mirror of https://github.com/apache/lucene.git
LUCENE-3518: enable sorting by sorted source doc values
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1201440 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2cdf3fa14d
commit
dbd48a72e4
|
@ -145,33 +145,33 @@ public class FieldType implements IndexableFieldType {
|
||||||
if (result.length() > 0)
|
if (result.length() > 0)
|
||||||
result.append(",");
|
result.append(",");
|
||||||
result.append("indexed");
|
result.append("indexed");
|
||||||
}
|
if (tokenized()) {
|
||||||
if (tokenized()) {
|
if (result.length() > 0)
|
||||||
if (result.length() > 0)
|
result.append(",");
|
||||||
result.append(",");
|
result.append("tokenized");
|
||||||
result.append("tokenized");
|
}
|
||||||
}
|
if (storeTermVectors()) {
|
||||||
if (storeTermVectors()) {
|
if (result.length() > 0)
|
||||||
if (result.length() > 0)
|
result.append(",");
|
||||||
result.append(",");
|
result.append("termVector");
|
||||||
result.append("termVector");
|
}
|
||||||
}
|
if (storeTermVectorOffsets()) {
|
||||||
if (storeTermVectorOffsets()) {
|
if (result.length() > 0)
|
||||||
if (result.length() > 0)
|
result.append(",");
|
||||||
result.append(",");
|
result.append("termVectorOffsets");
|
||||||
result.append("termVectorOffsets");
|
}
|
||||||
}
|
if (storeTermVectorPositions()) {
|
||||||
if (storeTermVectorPositions()) {
|
if (result.length() > 0)
|
||||||
if (result.length() > 0)
|
result.append(",");
|
||||||
result.append(",");
|
result.append("termVectorPosition");
|
||||||
result.append("termVectorPosition");
|
}
|
||||||
}
|
if (omitNorms()) {
|
||||||
if (omitNorms()) {
|
result.append(",omitNorms");
|
||||||
result.append(",omitNorms");
|
}
|
||||||
}
|
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||||
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
result.append(",indexOptions=");
|
||||||
result.append(",indexOptions=");
|
result.append(indexOptions);
|
||||||
result.append(indexOptions);
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result.toString();
|
return result.toString();
|
||||||
|
|
|
@ -32,17 +32,17 @@ import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.ByteBlockPool.Allocator;
|
||||||
|
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
|
||||||
import org.apache.lucene.util.ByteBlockPool;
|
import org.apache.lucene.util.ByteBlockPool;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
|
||||||
import org.apache.lucene.util.BytesRefHash;
|
import org.apache.lucene.util.BytesRefHash;
|
||||||
import org.apache.lucene.util.CodecUtil;
|
import org.apache.lucene.util.CodecUtil;
|
||||||
import org.apache.lucene.util.Counter;
|
import org.apache.lucene.util.Counter;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.PagedBytes;
|
import org.apache.lucene.util.PagedBytes;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
import org.apache.lucene.util.ByteBlockPool.Allocator;
|
|
||||||
import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
|
|
||||||
import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
|
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -586,7 +586,11 @@ public final class Bytes {
|
||||||
this.idxIn = idxIn;
|
this.idxIn = idxIn;
|
||||||
ordToOffsetIndex = hasOffsets ? PackedInts.getReader(idxIn) : null;
|
ordToOffsetIndex = hasOffsets ? PackedInts.getReader(idxIn) : null;
|
||||||
docToOrdIndex = PackedInts.getReader(idxIn);
|
docToOrdIndex = PackedInts.getReader(idxIn);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PackedInts.Reader getDocToOrd() {
|
||||||
|
return docToOrdIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -194,6 +194,11 @@ class FixedSortedBytesImpl {
|
||||||
return (int) docToOrdIndex.get(docID);
|
return (int) docToOrdIndex.get(docID);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PackedInts.Reader getDocToOrd() {
|
||||||
|
return docToOrdIndex;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
|
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.index.FieldsEnum;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.codecs.DocValuesFormat;
|
import org.apache.lucene.index.codecs.DocValuesFormat;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* {@link IndexDocValues} provides a dense per-document typed storage for fast
|
* {@link IndexDocValues} provides a dense per-document typed storage for fast
|
||||||
|
@ -223,7 +224,7 @@ public abstract class IndexDocValues implements Closeable {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A sorted variant of {@link Source} for <tt>byte[]</tt> values per document.
|
* A sorted variant of {@link Source} for <tt>byte[]</tt> values per document.
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -257,6 +258,18 @@ public abstract class IndexDocValues implements Closeable {
|
||||||
/** Returns value for specified ord. */
|
/** Returns value for specified ord. */
|
||||||
public abstract BytesRef getByOrd(int ord, BytesRef bytesRef);
|
public abstract BytesRef getByOrd(int ord, BytesRef bytesRef);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the PackedInts.Reader impl that maps document to ord.
|
||||||
|
*/
|
||||||
|
public abstract PackedInts.Reader getDocToOrd();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the comparator used to order the BytesRefs.
|
||||||
|
*/
|
||||||
|
public Comparator<BytesRef> getComparator() {
|
||||||
|
return comparator;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs a lookup by value.
|
* Performs a lookup by value.
|
||||||
*
|
*
|
||||||
|
@ -304,4 +317,98 @@ public abstract class IndexDocValues implements Closeable {
|
||||||
*/
|
*/
|
||||||
public abstract int getValueCount();
|
public abstract int getValueCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns a Source that always returns default (missing)
|
||||||
|
* values for all documents. */
|
||||||
|
public static Source getDefaultSource(final ValueType type) {
|
||||||
|
return new Source(type) {
|
||||||
|
@Override
|
||||||
|
public long getInt(int docID) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double getFloat(int docID) {
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||||
|
ref.length = 0;
|
||||||
|
return ref;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns a SortedSource that always returns default (missing)
|
||||||
|
* values for all documents. */
|
||||||
|
public static SortedSource getDefaultSortedSource(final ValueType type, final int size) {
|
||||||
|
|
||||||
|
final PackedInts.Reader docToOrd = new PackedInts.Reader() {
|
||||||
|
@Override
|
||||||
|
public long get(int index) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getBitsPerValue() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int size() {
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasArray() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getArray() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return new SortedSource(type, BytesRef.getUTF8SortedAsUnicodeComparator()) {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||||
|
ref.length = 0;
|
||||||
|
return ref;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int ord(int docID) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
|
||||||
|
assert ord == 0;
|
||||||
|
bytesRef.length = 0;
|
||||||
|
return bytesRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PackedInts.Reader getDocToOrd() {
|
||||||
|
return docToOrd;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getByValue(BytesRef value, BytesRef spare) {
|
||||||
|
if (value.length == 0) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getValueCount() {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -299,6 +299,11 @@ final class SortedBytesMergeUtils {
|
||||||
return bytesRef;
|
return bytesRef;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PackedInts.Reader getDocToOrd() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getValueCount() {
|
public int getValueCount() {
|
||||||
return 1;
|
return 1;
|
||||||
|
|
|
@ -214,6 +214,11 @@ final class VarSortedBytesImpl {
|
||||||
return (int) docToOrdIndex.get(docID);
|
return (int) docToOrdIndex.get(docID);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PackedInts.Reader getDocToOrd() {
|
||||||
|
return docToOrdIndex;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
|
public BytesRef getByOrd(int ord, BytesRef bytesRef) {
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -18,10 +18,14 @@ package org.apache.lucene.search;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.values.IndexDocValues.SortedSource;
|
||||||
import org.apache.lucene.index.values.IndexDocValues.Source;
|
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||||
import org.apache.lucene.index.values.IndexDocValues;
|
import org.apache.lucene.index.values.IndexDocValues;
|
||||||
|
import org.apache.lucene.index.values.ValueType;
|
||||||
import org.apache.lucene.search.FieldCache.ByteParser;
|
import org.apache.lucene.search.FieldCache.ByteParser;
|
||||||
import org.apache.lucene.search.FieldCache.DocTerms;
|
import org.apache.lucene.search.FieldCache.DocTerms;
|
||||||
import org.apache.lucene.search.FieldCache.DocTermsIndex;
|
import org.apache.lucene.search.FieldCache.DocTermsIndex;
|
||||||
|
@ -399,6 +403,8 @@ public abstract class FieldComparator<T> {
|
||||||
final IndexDocValues docValues = context.reader.docValues(field);
|
final IndexDocValues docValues = context.reader.docValues(field);
|
||||||
if (docValues != null) {
|
if (docValues != null) {
|
||||||
currentReaderValues = docValues.getSource();
|
currentReaderValues = docValues.getSource();
|
||||||
|
} else {
|
||||||
|
currentReaderValues = IndexDocValues.getDefaultSource(ValueType.FLOAT_64);
|
||||||
}
|
}
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
@ -690,6 +696,8 @@ public abstract class FieldComparator<T> {
|
||||||
IndexDocValues docValues = context.reader.docValues(field);
|
IndexDocValues docValues = context.reader.docValues(field);
|
||||||
if (docValues != null) {
|
if (docValues != null) {
|
||||||
currentReaderValues = docValues.getSource();
|
currentReaderValues = docValues.getSource();
|
||||||
|
} else {
|
||||||
|
currentReaderValues = IndexDocValues.getDefaultSource(ValueType.FIXED_INTS_64);
|
||||||
}
|
}
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
@ -911,30 +919,53 @@ public abstract class FieldComparator<T> {
|
||||||
* than {@link TermValComparator}. For very small
|
* than {@link TermValComparator}. For very small
|
||||||
* result sets it may be slower. */
|
* result sets it may be slower. */
|
||||||
public static final class TermOrdValComparator extends FieldComparator<BytesRef> {
|
public static final class TermOrdValComparator extends FieldComparator<BytesRef> {
|
||||||
/** @lucene.internal */
|
/* Ords for each slot.
|
||||||
|
@lucene.internal */
|
||||||
final int[] ords;
|
final int[] ords;
|
||||||
/** @lucene.internal */
|
|
||||||
|
/* Values for each slot.
|
||||||
|
@lucene.internal */
|
||||||
final BytesRef[] values;
|
final BytesRef[] values;
|
||||||
/** @lucene.internal */
|
|
||||||
|
/* Which reader last copied a value into the slot. When
|
||||||
|
we compare two slots, we just compare-by-ord if the
|
||||||
|
readerGen is the same; else we must compare the
|
||||||
|
values (slower).
|
||||||
|
@lucene.internal */
|
||||||
final int[] readerGen;
|
final int[] readerGen;
|
||||||
|
|
||||||
/** @lucene.internal */
|
/* Gen of current reader we are on.
|
||||||
|
@lucene.internal */
|
||||||
int currentReaderGen = -1;
|
int currentReaderGen = -1;
|
||||||
private DocTermsIndex termsIndex;
|
|
||||||
|
/* Current reader's doc ord/values.
|
||||||
|
@lucene.internal */
|
||||||
|
DocTermsIndex termsIndex;
|
||||||
|
|
||||||
private final String field;
|
private final String field;
|
||||||
|
|
||||||
/** @lucene.internal */
|
/* Bottom slot, or -1 if queue isn't full yet
|
||||||
|
@lucene.internal */
|
||||||
int bottomSlot = -1;
|
int bottomSlot = -1;
|
||||||
/** @lucene.internal */
|
|
||||||
|
/* Bottom ord (same as ords[bottomSlot] once bottomSlot
|
||||||
|
is set). Cached for faster compares.
|
||||||
|
@lucene.internal */
|
||||||
int bottomOrd;
|
int bottomOrd;
|
||||||
/** @lucene.internal */
|
|
||||||
|
/* True if current bottom slot matches the current
|
||||||
|
reader.
|
||||||
|
@lucene.internal */
|
||||||
boolean bottomSameReader;
|
boolean bottomSameReader;
|
||||||
/** @lucene.internal */
|
|
||||||
|
/* Bottom value (same as values[bottomSlot] once
|
||||||
|
bottomSlot is set). Cached for faster compares.
|
||||||
|
@lucene.internal */
|
||||||
BytesRef bottomValue;
|
BytesRef bottomValue;
|
||||||
/** @lucene.internal */
|
|
||||||
final BytesRef tempBR = new BytesRef();
|
final BytesRef tempBR = new BytesRef();
|
||||||
|
|
||||||
public TermOrdValComparator(int numHits, String field, int sortPos, boolean reversed) {
|
public TermOrdValComparator(int numHits, String field) {
|
||||||
ords = new int[numHits];
|
ords = new int[numHits];
|
||||||
values = new BytesRef[numHits];
|
values = new BytesRef[numHits];
|
||||||
readerGen = new int[numHits];
|
readerGen = new int[numHits];
|
||||||
|
@ -1325,6 +1356,396 @@ public abstract class FieldComparator<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Sorts by field's natural Term sort order, using
|
||||||
|
* ordinals; this is just like {@link
|
||||||
|
* TermOrdValComparator} except it uses DocValues to
|
||||||
|
* retrieve the sort ords saved during indexing. */
|
||||||
|
public static final class TermOrdValDocValuesComparator extends FieldComparator<BytesRef> {
|
||||||
|
/* Ords for each slot.
|
||||||
|
@lucene.internal */
|
||||||
|
final int[] ords;
|
||||||
|
|
||||||
|
/* Values for each slot.
|
||||||
|
@lucene.internal */
|
||||||
|
final BytesRef[] values;
|
||||||
|
|
||||||
|
/* Which reader last copied a value into the slot. When
|
||||||
|
we compare two slots, we just compare-by-ord if the
|
||||||
|
readerGen is the same; else we must compare the
|
||||||
|
values (slower).
|
||||||
|
@lucene.internal */
|
||||||
|
final int[] readerGen;
|
||||||
|
|
||||||
|
/* Gen of current reader we are on.
|
||||||
|
@lucene.internal */
|
||||||
|
int currentReaderGen = -1;
|
||||||
|
|
||||||
|
/* Current reader's doc ord/values.
|
||||||
|
@lucene.internal */
|
||||||
|
SortedSource termsIndex;
|
||||||
|
|
||||||
|
/* Comparator for comparing by value.
|
||||||
|
@lucene.internal */
|
||||||
|
Comparator<BytesRef> comp;
|
||||||
|
|
||||||
|
private final String field;
|
||||||
|
|
||||||
|
/* Bottom slot, or -1 if queue isn't full yet
|
||||||
|
@lucene.internal */
|
||||||
|
int bottomSlot = -1;
|
||||||
|
|
||||||
|
/* Bottom ord (same as ords[bottomSlot] once bottomSlot
|
||||||
|
is set). Cached for faster compares.
|
||||||
|
@lucene.internal */
|
||||||
|
int bottomOrd;
|
||||||
|
|
||||||
|
/* True if current bottom slot matches the current
|
||||||
|
reader.
|
||||||
|
@lucene.internal */
|
||||||
|
boolean bottomSameReader;
|
||||||
|
|
||||||
|
/* Bottom value (same as values[bottomSlot] once
|
||||||
|
bottomSlot is set). Cached for faster compares.
|
||||||
|
@lucene.internal */
|
||||||
|
BytesRef bottomValue;
|
||||||
|
|
||||||
|
/** @lucene.internal */
|
||||||
|
final BytesRef tempBR = new BytesRef();
|
||||||
|
|
||||||
|
public TermOrdValDocValuesComparator(int numHits, String field) {
|
||||||
|
ords = new int[numHits];
|
||||||
|
values = new BytesRef[numHits];
|
||||||
|
readerGen = new int[numHits];
|
||||||
|
this.field = field;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(int slot1, int slot2) {
|
||||||
|
if (readerGen[slot1] == readerGen[slot2]) {
|
||||||
|
return ords[slot1] - ords[slot2];
|
||||||
|
}
|
||||||
|
|
||||||
|
final BytesRef val1 = values[slot1];
|
||||||
|
final BytesRef val2 = values[slot2];
|
||||||
|
if (val1 == null) {
|
||||||
|
if (val2 == null) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
} else if (val2 == null) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return comp.compare(val1, val2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareBottom(int doc) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void copy(int slot, int doc) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: would be nice to share these specialized impls
|
||||||
|
// w/ TermOrdValComparator
|
||||||
|
|
||||||
|
/** Base class for specialized (per bit width of the
|
||||||
|
* ords) per-segment comparator. NOTE: this is messy;
|
||||||
|
* we do this only because hotspot can't reliably inline
|
||||||
|
* the underlying array access when looking up doc->ord
|
||||||
|
* @lucene.internal
|
||||||
|
*/
|
||||||
|
abstract class PerSegmentComparator extends FieldComparator<BytesRef> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
|
||||||
|
return TermOrdValDocValuesComparator.this.setNextReader(context);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(int slot1, int slot2) {
|
||||||
|
return TermOrdValDocValuesComparator.this.compare(slot1, slot2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setBottom(final int bottom) {
|
||||||
|
TermOrdValDocValuesComparator.this.setBottom(bottom);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef value(int slot) {
|
||||||
|
return TermOrdValDocValuesComparator.this.value(slot);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareValues(BytesRef val1, BytesRef val2) {
|
||||||
|
assert val1 != null;
|
||||||
|
assert val2 != null;
|
||||||
|
return comp.compare(val1, val2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used per-segment when bit width of doc->ord is 8:
|
||||||
|
private final class ByteOrdComparator extends PerSegmentComparator {
|
||||||
|
private final byte[] readerOrds;
|
||||||
|
private final SortedSource termsIndex;
|
||||||
|
private final int docBase;
|
||||||
|
|
||||||
|
public ByteOrdComparator(byte[] readerOrds, SortedSource termsIndex, int docBase) {
|
||||||
|
this.readerOrds = readerOrds;
|
||||||
|
this.termsIndex = termsIndex;
|
||||||
|
this.docBase = docBase;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareBottom(int doc) {
|
||||||
|
assert bottomSlot != -1;
|
||||||
|
if (bottomSameReader) {
|
||||||
|
// ord is precisely comparable, even in the equal case
|
||||||
|
return bottomOrd - (readerOrds[doc]&0xFF);
|
||||||
|
} else {
|
||||||
|
// ord is only approx comparable: if they are not
|
||||||
|
// equal, we can use that; if they are equal, we
|
||||||
|
// must fallback to compare by value
|
||||||
|
final int order = readerOrds[doc]&0xFF;
|
||||||
|
final int cmp = bottomOrd - order;
|
||||||
|
if (cmp != 0) {
|
||||||
|
return cmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
termsIndex.getByOrd(order, tempBR);
|
||||||
|
return comp.compare(bottomValue, tempBR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void copy(int slot, int doc) {
|
||||||
|
final int ord = readerOrds[doc]&0xFF;
|
||||||
|
ords[slot] = ord;
|
||||||
|
if (values[slot] == null) {
|
||||||
|
values[slot] = new BytesRef();
|
||||||
|
}
|
||||||
|
termsIndex.getByOrd(ord, values[slot]);
|
||||||
|
readerGen[slot] = currentReaderGen;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used per-segment when bit width of doc->ord is 16:
|
||||||
|
private final class ShortOrdComparator extends PerSegmentComparator {
|
||||||
|
private final short[] readerOrds;
|
||||||
|
private final SortedSource termsIndex;
|
||||||
|
private final int docBase;
|
||||||
|
|
||||||
|
public ShortOrdComparator(short[] readerOrds, SortedSource termsIndex, int docBase) {
|
||||||
|
this.readerOrds = readerOrds;
|
||||||
|
this.termsIndex = termsIndex;
|
||||||
|
this.docBase = docBase;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareBottom(int doc) {
|
||||||
|
assert bottomSlot != -1;
|
||||||
|
if (bottomSameReader) {
|
||||||
|
// ord is precisely comparable, even in the equal case
|
||||||
|
return bottomOrd - (readerOrds[doc]&0xFFFF);
|
||||||
|
} else {
|
||||||
|
// ord is only approx comparable: if they are not
|
||||||
|
// equal, we can use that; if they are equal, we
|
||||||
|
// must fallback to compare by value
|
||||||
|
final int order = readerOrds[doc]&0xFFFF;
|
||||||
|
final int cmp = bottomOrd - order;
|
||||||
|
if (cmp != 0) {
|
||||||
|
return cmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
termsIndex.getByOrd(order, tempBR);
|
||||||
|
return comp.compare(bottomValue, tempBR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void copy(int slot, int doc) {
|
||||||
|
final int ord = readerOrds[doc]&0xFFFF;
|
||||||
|
ords[slot] = ord;
|
||||||
|
if (values[slot] == null) {
|
||||||
|
values[slot] = new BytesRef();
|
||||||
|
}
|
||||||
|
termsIndex.getByOrd(ord, values[slot]);
|
||||||
|
readerGen[slot] = currentReaderGen;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used per-segment when bit width of doc->ord is 32:
|
||||||
|
private final class IntOrdComparator extends PerSegmentComparator {
|
||||||
|
private final int[] readerOrds;
|
||||||
|
private final SortedSource termsIndex;
|
||||||
|
private final int docBase;
|
||||||
|
|
||||||
|
public IntOrdComparator(int[] readerOrds, SortedSource termsIndex, int docBase) {
|
||||||
|
this.readerOrds = readerOrds;
|
||||||
|
this.termsIndex = termsIndex;
|
||||||
|
this.docBase = docBase;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareBottom(int doc) {
|
||||||
|
assert bottomSlot != -1;
|
||||||
|
if (bottomSameReader) {
|
||||||
|
// ord is precisely comparable, even in the equal case
|
||||||
|
return bottomOrd - readerOrds[doc];
|
||||||
|
} else {
|
||||||
|
// ord is only approx comparable: if they are not
|
||||||
|
// equal, we can use that; if they are equal, we
|
||||||
|
// must fallback to compare by value
|
||||||
|
final int order = readerOrds[doc];
|
||||||
|
final int cmp = bottomOrd - order;
|
||||||
|
if (cmp != 0) {
|
||||||
|
return cmp;
|
||||||
|
}
|
||||||
|
termsIndex.getByOrd(order, tempBR);
|
||||||
|
return comp.compare(bottomValue, tempBR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void copy(int slot, int doc) {
|
||||||
|
final int ord = readerOrds[doc];
|
||||||
|
ords[slot] = ord;
|
||||||
|
if (values[slot] == null) {
|
||||||
|
values[slot] = new BytesRef();
|
||||||
|
}
|
||||||
|
termsIndex.getByOrd(ord, values[slot]);
|
||||||
|
readerGen[slot] = currentReaderGen;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used per-segment when bit width is not a native array
|
||||||
|
// size (8, 16, 32):
|
||||||
|
private final class AnyOrdComparator extends PerSegmentComparator {
|
||||||
|
private final PackedInts.Reader readerOrds;
|
||||||
|
private final int docBase;
|
||||||
|
|
||||||
|
public AnyOrdComparator(PackedInts.Reader readerOrds, int docBase) {
|
||||||
|
this.readerOrds = readerOrds;
|
||||||
|
this.docBase = docBase;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareBottom(int doc) {
|
||||||
|
assert bottomSlot != -1;
|
||||||
|
if (bottomSameReader) {
|
||||||
|
// ord is precisely comparable, even in the equal case
|
||||||
|
return bottomOrd - (int) readerOrds.get(doc);
|
||||||
|
} else {
|
||||||
|
// ord is only approx comparable: if they are not
|
||||||
|
// equal, we can use that; if they are equal, we
|
||||||
|
// must fallback to compare by value
|
||||||
|
final int order = (int) readerOrds.get(doc);
|
||||||
|
final int cmp = bottomOrd - order;
|
||||||
|
if (cmp != 0) {
|
||||||
|
return cmp;
|
||||||
|
}
|
||||||
|
termsIndex.getByOrd(order, tempBR);
|
||||||
|
return comp.compare(bottomValue, tempBR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void copy(int slot, int doc) {
|
||||||
|
final int ord = (int) readerOrds.get(doc);
|
||||||
|
ords[slot] = ord;
|
||||||
|
if (values[slot] == null) {
|
||||||
|
values[slot] = new BytesRef();
|
||||||
|
}
|
||||||
|
termsIndex.getByOrd(ord, values[slot]);
|
||||||
|
readerGen[slot] = currentReaderGen;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
|
||||||
|
final int docBase = context.docBase;
|
||||||
|
|
||||||
|
final IndexDocValues dv = context.reader.docValues(field);
|
||||||
|
if (dv == null) {
|
||||||
|
termsIndex = IndexDocValues.getDefaultSortedSource(ValueType.BYTES_VAR_SORTED, context.reader.maxDoc());
|
||||||
|
} else {
|
||||||
|
termsIndex = dv.getSource().asSortedSource();
|
||||||
|
if (termsIndex == null) {
|
||||||
|
termsIndex = IndexDocValues.getDefaultSortedSource(ValueType.BYTES_VAR_SORTED, context.reader.maxDoc());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
comp = termsIndex.getComparator();
|
||||||
|
|
||||||
|
FieldComparator perSegComp = null;
|
||||||
|
final PackedInts.Reader docToOrd = termsIndex.getDocToOrd();
|
||||||
|
if (docToOrd.hasArray()) {
|
||||||
|
final Object arr = docToOrd.getArray();
|
||||||
|
assert arr != null;
|
||||||
|
if (arr instanceof byte[]) {
|
||||||
|
// 8 bit packed
|
||||||
|
perSegComp = new ByteOrdComparator((byte[]) arr, termsIndex, docBase);
|
||||||
|
} else if (arr instanceof short[]) {
|
||||||
|
// 16 bit packed
|
||||||
|
perSegComp = new ShortOrdComparator((short[]) arr, termsIndex, docBase);
|
||||||
|
} else if (arr instanceof int[]) {
|
||||||
|
// 32 bit packed
|
||||||
|
perSegComp = new IntOrdComparator((int[]) arr, termsIndex, docBase);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (perSegComp == null) {
|
||||||
|
perSegComp = new AnyOrdComparator(docToOrd, docBase);
|
||||||
|
}
|
||||||
|
|
||||||
|
currentReaderGen++;
|
||||||
|
if (bottomSlot != -1) {
|
||||||
|
perSegComp.setBottom(bottomSlot);
|
||||||
|
}
|
||||||
|
|
||||||
|
return perSegComp;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setBottom(final int bottom) {
|
||||||
|
bottomSlot = bottom;
|
||||||
|
|
||||||
|
bottomValue = values[bottomSlot];
|
||||||
|
if (currentReaderGen == readerGen[bottomSlot]) {
|
||||||
|
bottomOrd = ords[bottomSlot];
|
||||||
|
bottomSameReader = true;
|
||||||
|
} else {
|
||||||
|
if (bottomValue == null) {
|
||||||
|
// 0 ord is null for all segments
|
||||||
|
assert ords[bottomSlot] == 0;
|
||||||
|
bottomOrd = 0;
|
||||||
|
bottomSameReader = true;
|
||||||
|
readerGen[bottomSlot] = currentReaderGen;
|
||||||
|
} else {
|
||||||
|
final int index = termsIndex.getByValue(bottomValue, tempBR);
|
||||||
|
if (index < 0) {
|
||||||
|
bottomOrd = -index - 2;
|
||||||
|
bottomSameReader = false;
|
||||||
|
} else {
|
||||||
|
bottomOrd = index;
|
||||||
|
// exact value match
|
||||||
|
bottomSameReader = true;
|
||||||
|
readerGen[bottomSlot] = currentReaderGen;
|
||||||
|
ords[bottomSlot] = bottomOrd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef value(int slot) {
|
||||||
|
return values[slot];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Sorts by field's natural Term sort order. All
|
/** Sorts by field's natural Term sort order. All
|
||||||
* comparisons are done using BytesRef.compareTo, which is
|
* comparisons are done using BytesRef.compareTo, which is
|
||||||
* slow for medium to large result sets but possibly
|
* slow for medium to large result sets but possibly
|
||||||
|
@ -1410,6 +1831,74 @@ public abstract class FieldComparator<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Sorts by field's natural Term sort order. All
|
||||||
|
* comparisons are done using BytesRef.compareTo, which is
|
||||||
|
* slow for medium to large result sets but possibly
|
||||||
|
* very fast for very small results sets. The BytesRef
|
||||||
|
* values are obtained using {@link IndexReader#docValues}. */
|
||||||
|
public static final class TermValDocValuesComparator extends FieldComparator<BytesRef> {
|
||||||
|
|
||||||
|
private BytesRef[] values;
|
||||||
|
private Source docTerms;
|
||||||
|
private final String field;
|
||||||
|
private BytesRef bottom;
|
||||||
|
private final BytesRef tempBR = new BytesRef();
|
||||||
|
|
||||||
|
TermValDocValuesComparator(int numHits, String field) {
|
||||||
|
values = new BytesRef[numHits];
|
||||||
|
this.field = field;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compare(int slot1, int slot2) {
|
||||||
|
assert values[slot1] != null;
|
||||||
|
assert values[slot2] != null;
|
||||||
|
return values[slot1].compareTo(values[slot2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareBottom(int doc) {
|
||||||
|
assert bottom != null;
|
||||||
|
return bottom.compareTo(docTerms.getBytes(doc, tempBR));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void copy(int slot, int doc) {
|
||||||
|
if (values[slot] == null) {
|
||||||
|
values[slot] = new BytesRef();
|
||||||
|
}
|
||||||
|
docTerms.getBytes(doc, values[slot]);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
|
||||||
|
final IndexDocValues dv = context.reader.docValues(field);
|
||||||
|
if (dv != null) {
|
||||||
|
docTerms = dv.getSource();
|
||||||
|
} else {
|
||||||
|
docTerms = IndexDocValues.getDefaultSource(ValueType.BYTES_VAR_DEREF);
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setBottom(final int bottom) {
|
||||||
|
this.bottom = values[bottom];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef value(int slot) {
|
||||||
|
return values[slot];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareValues(BytesRef val1, BytesRef val2) {
|
||||||
|
assert val1 != null;
|
||||||
|
assert val2 != null;
|
||||||
|
return val1.compareTo(val2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
final protected static int binarySearch(BytesRef br, DocTermsIndex a, BytesRef key) {
|
final protected static int binarySearch(BytesRef br, DocTermsIndex a, BytesRef key) {
|
||||||
return binarySearch(br, a, key, 1, a.numOrd()-1);
|
return binarySearch(br, a, key, 1, a.numOrd()-1);
|
||||||
}
|
}
|
||||||
|
|
|
@ -254,6 +254,7 @@ public class SortField {
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder buffer = new StringBuilder();
|
StringBuilder buffer = new StringBuilder();
|
||||||
|
String dv = useIndexValues ? " [dv]" : "";
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case SCORE:
|
case SCORE:
|
||||||
buffer.append("<score>");
|
buffer.append("<score>");
|
||||||
|
@ -264,11 +265,11 @@ public class SortField {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case STRING:
|
case STRING:
|
||||||
buffer.append("<string: \"").append(field).append("\">");
|
buffer.append("<string" + dv + ": \"").append(field).append("\">");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case STRING_VAL:
|
case STRING_VAL:
|
||||||
buffer.append("<string_val: \"").append(field).append("\">");
|
buffer.append("<string_val" + dv + ": \"").append(field).append("\">");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case BYTE:
|
case BYTE:
|
||||||
|
@ -280,7 +281,7 @@ public class SortField {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case INT:
|
case INT:
|
||||||
buffer.append("<int: \"").append(field).append("\">");
|
buffer.append("<int" + dv + ": \"").append(field).append("\">");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case LONG:
|
case LONG:
|
||||||
|
@ -288,11 +289,11 @@ public class SortField {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case FLOAT:
|
case FLOAT:
|
||||||
buffer.append("<float: \"").append(field).append("\">");
|
buffer.append("<float" + dv + ": \"").append(field).append("\">");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case DOUBLE:
|
case DOUBLE:
|
||||||
buffer.append("<double: \"").append(field).append("\">");
|
buffer.append("<double" + dv + ": \"").append(field).append("\">");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CUSTOM:
|
case CUSTOM:
|
||||||
|
@ -415,10 +416,18 @@ public class SortField {
|
||||||
return comparatorSource.newComparator(field, numHits, sortPos, reverse);
|
return comparatorSource.newComparator(field, numHits, sortPos, reverse);
|
||||||
|
|
||||||
case STRING:
|
case STRING:
|
||||||
return new FieldComparator.TermOrdValComparator(numHits, field, sortPos, reverse);
|
if (useIndexValues) {
|
||||||
|
return new FieldComparator.TermOrdValDocValuesComparator(numHits, field);
|
||||||
|
} else {
|
||||||
|
return new FieldComparator.TermOrdValComparator(numHits, field);
|
||||||
|
}
|
||||||
|
|
||||||
case STRING_VAL:
|
case STRING_VAL:
|
||||||
return new FieldComparator.TermValComparator(numHits, field);
|
if (useIndexValues) {
|
||||||
|
return new FieldComparator.TermValDocValuesComparator(numHits, field);
|
||||||
|
} else {
|
||||||
|
return new FieldComparator.TermValComparator(numHits, field);
|
||||||
|
}
|
||||||
|
|
||||||
case REWRITEABLE:
|
case REWRITEABLE:
|
||||||
throw new IllegalStateException("SortField needs to be rewritten through Sort.rewrite(..) and SortField.rewrite(..)");
|
throw new IllegalStateException("SortField needs to be rewritten through Sort.rewrite(..) and SortField.rewrite(..)");
|
||||||
|
|
|
@ -81,6 +81,7 @@ public class TestSort extends LuceneTestCase {
|
||||||
public static void beforeClass() throws Exception {
|
public static void beforeClass() throws Exception {
|
||||||
NUM_STRINGS = atLeast(6000);
|
NUM_STRINGS = atLeast(6000);
|
||||||
}
|
}
|
||||||
|
|
||||||
// document data:
|
// document data:
|
||||||
// the tracer field is used to determine which document was hit
|
// the tracer field is used to determine which document was hit
|
||||||
// the contents field is used to search and sort by relevance
|
// the contents field is used to search and sort by relevance
|
||||||
|
@ -111,7 +112,7 @@ public class TestSort extends LuceneTestCase {
|
||||||
{ "c", "m", "5", "5.0", "5", null, null, "5", "5", "5", "5", null},
|
{ "c", "m", "5", "5.0", "5", null, null, "5", "5", "5", "5", null},
|
||||||
{ "d", "m", null, null, null, null, null, null, null, null, null, null}
|
{ "d", "m", null, null, null, null, null, null, null, null, null, null}
|
||||||
};
|
};
|
||||||
|
|
||||||
// create an index of all the documents, or just the x, or just the y documents
|
// create an index of all the documents, or just the x, or just the y documents
|
||||||
private IndexSearcher getIndex (boolean even, boolean odd)
|
private IndexSearcher getIndex (boolean even, boolean odd)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
@ -119,6 +120,21 @@ public class TestSort extends LuceneTestCase {
|
||||||
dirs.add(indexStore);
|
dirs.add(indexStore);
|
||||||
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
|
RandomIndexWriter writer = new RandomIndexWriter(random, indexStore, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
|
||||||
|
|
||||||
|
final ValueType stringDVType;
|
||||||
|
if (dvStringSorted) {
|
||||||
|
// Index sorted
|
||||||
|
stringDVType = random.nextBoolean() ? ValueType.BYTES_VAR_SORTED : ValueType.BYTES_FIXED_SORTED;
|
||||||
|
} else {
|
||||||
|
// Index non-sorted
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
// Fixed
|
||||||
|
stringDVType = random.nextBoolean() ? ValueType.BYTES_FIXED_STRAIGHT : ValueType.BYTES_FIXED_DEREF;
|
||||||
|
} else {
|
||||||
|
// Var
|
||||||
|
stringDVType = random.nextBoolean() ? ValueType.BYTES_VAR_STRAIGHT : ValueType.BYTES_VAR_DEREF;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
FieldType ft1 = new FieldType();
|
FieldType ft1 = new FieldType();
|
||||||
ft1.setStored(true);
|
ft1.setStored(true);
|
||||||
FieldType ft2 = new FieldType();
|
FieldType ft2 = new FieldType();
|
||||||
|
@ -142,7 +158,13 @@ public class TestSort extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
doc.add(f);
|
doc.add(f);
|
||||||
}
|
}
|
||||||
if (data[i][4] != null) doc.add (new StringField ("string", data[i][4]));
|
if (data[i][4] != null) {
|
||||||
|
Field f = new StringField ("string", data[i][4]);
|
||||||
|
if (supportsDocValues) {
|
||||||
|
f = IndexDocValuesField.build(f, stringDVType);
|
||||||
|
}
|
||||||
|
doc.add(f);
|
||||||
|
}
|
||||||
if (data[i][5] != null) doc.add (new StringField ("custom", data[i][5]));
|
if (data[i][5] != null) doc.add (new StringField ("custom", data[i][5]));
|
||||||
if (data[i][6] != null) doc.add (new StringField ("i18n", data[i][6]));
|
if (data[i][6] != null) doc.add (new StringField ("i18n", data[i][6]));
|
||||||
if (data[i][7] != null) doc.add (new StringField ("long", data[i][7]));
|
if (data[i][7] != null) doc.add (new StringField ("long", data[i][7]));
|
||||||
|
@ -185,21 +207,52 @@ public class TestSort extends LuceneTestCase {
|
||||||
setMaxBufferedDocs(4).
|
setMaxBufferedDocs(4).
|
||||||
setMergePolicy(newLogMergePolicy(97))
|
setMergePolicy(newLogMergePolicy(97))
|
||||||
);
|
);
|
||||||
FieldType customType = new FieldType();
|
FieldType onlyStored = new FieldType();
|
||||||
customType.setStored(true);
|
onlyStored.setStored(true);
|
||||||
|
final int fixedLen = getRandomNumber(2, 8);
|
||||||
|
final int fixedLen2 = getRandomNumber(1, 4);
|
||||||
for (int i=0; i<NUM_STRINGS; i++) {
|
for (int i=0; i<NUM_STRINGS; i++) {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
String num = getRandomCharString(getRandomNumber(2, 8), 48, 52);
|
String num = getRandomCharString(getRandomNumber(2, 8), 48, 52);
|
||||||
doc.add (new Field ("tracer", num, customType));
|
doc.add (new Field ("tracer", num, onlyStored));
|
||||||
//doc.add (new Field ("contents", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED));
|
//doc.add (new Field ("contents", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED));
|
||||||
doc.add (new StringField ("string", num));
|
Field f = new StringField("string", num);
|
||||||
String num2 = getRandomCharString(getRandomNumber(1, 4), 48, 50);
|
if (supportsDocValues) {
|
||||||
doc.add (new StringField ("string2", num2));
|
f = IndexDocValuesField.build(f, ValueType.BYTES_VAR_SORTED);
|
||||||
doc.add (new Field ("tracer2", num2, customType));
|
}
|
||||||
for(IndexableField f : doc.getFields()) {
|
doc.add (f);
|
||||||
((Field) f).setBoost(2.0f);
|
String num2 = getRandomCharString(getRandomNumber(1, 4), 48, 50);
|
||||||
}
|
f = new StringField ("string2", num2);
|
||||||
writer.addDocument (doc);
|
if (supportsDocValues) {
|
||||||
|
f = IndexDocValuesField.build(f, ValueType.BYTES_VAR_SORTED);
|
||||||
|
}
|
||||||
|
doc.add (f);
|
||||||
|
doc.add (new Field ("tracer2", num2, onlyStored));
|
||||||
|
for(IndexableField f2 : doc.getFields()) {
|
||||||
|
((Field) f2).setBoost(2.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
String numFixed = getRandomCharString(fixedLen, 48, 52);
|
||||||
|
doc.add (new Field ("fixed_tracer", numFixed, onlyStored));
|
||||||
|
//doc.add (new Field ("contents", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED));
|
||||||
|
f = new StringField("string_fixed", numFixed);
|
||||||
|
if (supportsDocValues) {
|
||||||
|
f = IndexDocValuesField.build(f, ValueType.BYTES_FIXED_SORTED);
|
||||||
|
}
|
||||||
|
doc.add (f);
|
||||||
|
String num2Fixed = getRandomCharString(fixedLen2, 48, 52);
|
||||||
|
f = new StringField ("string2_fixed", num2Fixed);
|
||||||
|
if (supportsDocValues) {
|
||||||
|
f = IndexDocValuesField.build(f, ValueType.BYTES_FIXED_SORTED);
|
||||||
|
}
|
||||||
|
doc.add (f);
|
||||||
|
doc.add (new Field ("tracer2_fixed", num2Fixed, onlyStored));
|
||||||
|
|
||||||
|
for(IndexableField f2 : doc.getFields()) {
|
||||||
|
((Field) f2).setBoost(2.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.addDocument (doc);
|
||||||
}
|
}
|
||||||
//writer.forceMerge(1);
|
//writer.forceMerge(1);
|
||||||
//System.out.println(writer.getSegmentCount());
|
//System.out.println(writer.getSegmentCount());
|
||||||
|
@ -249,10 +302,15 @@ public class TestSort extends LuceneTestCase {
|
||||||
return getIndex (false, false);
|
return getIndex (false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set to true if the DV "string" field is indexed as a
|
||||||
|
// sorted source:
|
||||||
|
private boolean dvStringSorted;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
super.setUp();
|
super.setUp();
|
||||||
|
|
||||||
|
dvStringSorted = random.nextBoolean();
|
||||||
full = getFullIndex();
|
full = getFullIndex();
|
||||||
searchX = getXIndex();
|
searchX = getXIndex();
|
||||||
searchY = getYIndex();
|
searchY = getYIndex();
|
||||||
|
@ -339,6 +397,20 @@ public class TestSort extends LuceneTestCase {
|
||||||
sort.setSort (useDocValues(new SortField ("double", SortField.Type.DOUBLE)), SortField.FIELD_DOC );
|
sort.setSort (useDocValues(new SortField ("double", SortField.Type.DOUBLE)), SortField.FIELD_DOC );
|
||||||
assertMatches (full, queryX, sort, "AGICE");
|
assertMatches (full, queryX, sort, "AGICE");
|
||||||
assertMatches (full, queryY, sort, "DJHBF");
|
assertMatches (full, queryY, sort, "DJHBF");
|
||||||
|
|
||||||
|
sort.setSort (useDocValues(new SortField ("string", getDVStringSortType())), SortField.FIELD_DOC );
|
||||||
|
assertMatches (full, queryX, sort, "AIGEC");
|
||||||
|
assertMatches (full, queryY, sort, "DJHFB");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private SortField.Type getDVStringSortType() {
|
||||||
|
if (dvStringSorted) {
|
||||||
|
// If you index as sorted source you can still sort by
|
||||||
|
// value instead:
|
||||||
|
return random.nextBoolean() ? SortField.Type.STRING : SortField.Type.STRING_VAL;
|
||||||
|
} else {
|
||||||
|
return SortField.Type.STRING_VAL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -405,42 +477,72 @@ public class TestSort extends LuceneTestCase {
|
||||||
/**
|
/**
|
||||||
* Test String sorting: small queue to many matches, multi field sort, reverse sort
|
* Test String sorting: small queue to many matches, multi field sort, reverse sort
|
||||||
*/
|
*/
|
||||||
public void testStringSort() throws IOException {
|
public void testStringSort() throws Exception {
|
||||||
ScoreDoc[] result = null;
|
// Normal string field, var length
|
||||||
IndexSearcher searcher = getFullStrings();
|
|
||||||
sort.setSort(
|
sort.setSort(
|
||||||
new SortField("string", SortField.Type.STRING),
|
new SortField("string", SortField.Type.STRING),
|
||||||
new SortField("string2", SortField.Type.STRING, true),
|
new SortField("string2", SortField.Type.STRING, true),
|
||||||
SortField.FIELD_DOC);
|
SortField.FIELD_DOC);
|
||||||
|
verifyStringSort(sort);
|
||||||
|
|
||||||
result = searcher.search(new MatchAllDocsQuery(), null, 500, sort).scoreDocs;
|
// Normal string field, fixed length
|
||||||
|
sort.setSort(
|
||||||
|
new SortField("string_fixed", SortField.Type.STRING),
|
||||||
|
new SortField("string2_fixed", SortField.Type.STRING, true),
|
||||||
|
SortField.FIELD_DOC);
|
||||||
|
verifyStringSort(sort);
|
||||||
|
|
||||||
|
// Doc values field, var length
|
||||||
|
assumeFalse("cannot work with preflex codec",
|
||||||
|
"Lucene3x".equals(Codec.getDefault().getName()));
|
||||||
|
sort.setSort(
|
||||||
|
useDocValues(new SortField("string", getDVStringSortType())),
|
||||||
|
useDocValues(new SortField("string2", getDVStringSortType(), true)),
|
||||||
|
SortField.FIELD_DOC);
|
||||||
|
verifyStringSort(sort);
|
||||||
|
|
||||||
|
// Doc values field, fixed length
|
||||||
|
sort.setSort(
|
||||||
|
useDocValues(new SortField("string_fixed", getDVStringSortType())),
|
||||||
|
useDocValues(new SortField("string2_fixed", getDVStringSortType(), true)),
|
||||||
|
SortField.FIELD_DOC);
|
||||||
|
verifyStringSort(sort);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verifyStringSort(Sort sort) throws Exception {
|
||||||
|
final IndexSearcher searcher = getFullStrings();
|
||||||
|
final ScoreDoc[] result = searcher.search(new MatchAllDocsQuery(), null, _TestUtil.nextInt(random, 500, searcher.getIndexReader().maxDoc()), sort).scoreDocs;
|
||||||
StringBuilder buff = new StringBuilder();
|
StringBuilder buff = new StringBuilder();
|
||||||
int n = result.length;
|
int n = result.length;
|
||||||
String last = null;
|
String last = null;
|
||||||
String lastSub = null;
|
String lastSub = null;
|
||||||
int lastDocId = 0;
|
int lastDocId = 0;
|
||||||
boolean fail = false;
|
boolean fail = false;
|
||||||
|
final String fieldSuffix = sort.getSort()[0].getField().endsWith("_fixed") ? "_fixed" : "";
|
||||||
for (int x = 0; x < n; ++x) {
|
for (int x = 0; x < n; ++x) {
|
||||||
Document doc2 = searcher.doc(result[x].doc);
|
Document doc2 = searcher.doc(result[x].doc);
|
||||||
IndexableField[] v = doc2.getFields("tracer");
|
IndexableField[] v = doc2.getFields("tracer" + fieldSuffix);
|
||||||
IndexableField[] v2 = doc2.getFields("tracer2");
|
IndexableField[] v2 = doc2.getFields("tracer2" + fieldSuffix);
|
||||||
for (int j = 0; j < v.length; ++j) {
|
for (int j = 0; j < v.length; ++j) {
|
||||||
|
buff.append(v[j] + "(" + v2[j] + ")(" + result[x].doc+")\n");
|
||||||
if (last != null) {
|
if (last != null) {
|
||||||
int cmp = v[j].stringValue().compareTo(last);
|
int cmp = v[j].stringValue().compareTo(last);
|
||||||
if (!(cmp >= 0)) { // ensure first field is in order
|
if (!(cmp >= 0)) { // ensure first field is in order
|
||||||
fail = true;
|
fail = true;
|
||||||
System.out.println("fail:" + v[j] + " < " + last);
|
System.out.println("fail:" + v[j] + " < " + last);
|
||||||
|
buff.append(" WRONG tracer\n");
|
||||||
}
|
}
|
||||||
if (cmp == 0) { // ensure second field is in reverse order
|
if (cmp == 0) { // ensure second field is in reverse order
|
||||||
cmp = v2[j].stringValue().compareTo(lastSub);
|
cmp = v2[j].stringValue().compareTo(lastSub);
|
||||||
if (cmp > 0) {
|
if (cmp > 0) {
|
||||||
fail = true;
|
fail = true;
|
||||||
System.out.println("rev field fail:" + v2[j] + " > " + lastSub);
|
System.out.println("rev field fail:" + v2[j] + " > " + lastSub);
|
||||||
|
buff.append(" WRONG tracer2\n");
|
||||||
} else if(cmp == 0) { // ensure docid is in order
|
} else if(cmp == 0) { // ensure docid is in order
|
||||||
if (result[x].doc < lastDocId) {
|
if (result[x].doc < lastDocId) {
|
||||||
fail = true;
|
fail = true;
|
||||||
System.out.println("doc fail:" + result[x].doc + " > " + lastDocId);
|
System.out.println("doc fail:" + result[x].doc + " > " + lastDocId);
|
||||||
|
buff.append(" WRONG docID\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -448,11 +550,10 @@ public class TestSort extends LuceneTestCase {
|
||||||
last = v[j].stringValue();
|
last = v[j].stringValue();
|
||||||
lastSub = v2[j].stringValue();
|
lastSub = v2[j].stringValue();
|
||||||
lastDocId = result[x].doc;
|
lastDocId = result[x].doc;
|
||||||
buff.append(v[j] + "(" + v2[j] + ")(" + result[x].doc+") ");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(fail) {
|
if (fail) {
|
||||||
System.out.println("topn field1(field2)(docID):" + buff);
|
System.out.println("topn field1(field2)(docID):\n" + buff);
|
||||||
}
|
}
|
||||||
assertFalse("Found sort results out of order", fail);
|
assertFalse("Found sort results out of order", fail);
|
||||||
searcher.close();
|
searcher.close();
|
||||||
|
@ -549,6 +650,16 @@ public class TestSort extends LuceneTestCase {
|
||||||
|
|
||||||
sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), new SortField ("string", SortField.Type.STRING) );
|
sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), new SortField ("string", SortField.Type.STRING) );
|
||||||
assertMatches (empty, queryX, sort, "");
|
assertMatches (empty, queryX, sort, "");
|
||||||
|
|
||||||
|
sort.setSort (useDocValues(new SortField ("string", getDVStringSortType(), true)), SortField.FIELD_DOC );
|
||||||
|
assertMatches (empty, queryX, sort, "");
|
||||||
|
|
||||||
|
sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)),
|
||||||
|
useDocValues(new SortField ("string", getDVStringSortType())) );
|
||||||
|
assertMatches (empty, queryX, sort, "");
|
||||||
|
|
||||||
|
sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)), useDocValues(new SortField ("string", getDVStringSortType())) );
|
||||||
|
assertMatches (empty, queryX, sort, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
static class MyFieldComparator extends FieldComparator<Integer> {
|
static class MyFieldComparator extends FieldComparator<Integer> {
|
||||||
|
@ -642,11 +753,18 @@ public class TestSort extends LuceneTestCase {
|
||||||
sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT, true)) );
|
sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT, true)) );
|
||||||
assertMatches (full, queryX, sort, "AECIG");
|
assertMatches (full, queryX, sort, "AECIG");
|
||||||
assertMatches (full, queryY, sort, "BFJHD");
|
assertMatches (full, queryY, sort, "BFJHD");
|
||||||
|
|
||||||
|
sort.setSort (useDocValues(new SortField ("string", getDVStringSortType(), true)) );
|
||||||
|
assertMatches (full, queryX, sort, "CEGIA");
|
||||||
|
assertMatches (full, queryY, sort, "BFHJD");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// test sorting when the sort field is empty (undefined) for some of the documents
|
// test sorting when the sort field is empty (undefined) for some of the documents
|
||||||
public void testEmptyFieldSort() throws Exception {
|
public void testEmptyFieldSort() throws Exception {
|
||||||
|
|
||||||
|
// NOTE: do not test DocValues fields here, since you
|
||||||
|
// can't sort when some documents don't have the field
|
||||||
sort.setSort (new SortField ("string", SortField.Type.STRING) );
|
sort.setSort (new SortField ("string", SortField.Type.STRING) );
|
||||||
assertMatches (full, queryF, sort, "ZJI");
|
assertMatches (full, queryF, sort, "ZJI");
|
||||||
|
|
||||||
|
@ -662,14 +780,6 @@ public class TestSort extends LuceneTestCase {
|
||||||
sort.setSort (new SortField ("float", SortField.Type.FLOAT) );
|
sort.setSort (new SortField ("float", SortField.Type.FLOAT) );
|
||||||
assertMatches (full, queryF, sort, "ZJI");
|
assertMatches (full, queryF, sort, "ZJI");
|
||||||
|
|
||||||
if (supportsDocValues) {
|
|
||||||
sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT)) );
|
|
||||||
assertMatches (full, queryF, sort, "IZJ");
|
|
||||||
|
|
||||||
sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)) );
|
|
||||||
assertMatches (full, queryF, sort, "ZJI");
|
|
||||||
}
|
|
||||||
|
|
||||||
// using a nonexisting field as first sort key shouldn't make a difference:
|
// using a nonexisting field as first sort key shouldn't make a difference:
|
||||||
sort.setSort (new SortField ("nosuchfield", SortField.Type.STRING),
|
sort.setSort (new SortField ("nosuchfield", SortField.Type.STRING),
|
||||||
new SortField ("float", SortField.Type.FLOAT) );
|
new SortField ("float", SortField.Type.FLOAT) );
|
||||||
|
@ -679,7 +789,6 @@ public class TestSort extends LuceneTestCase {
|
||||||
assertMatches (full, queryF, sort, "IJZ");
|
assertMatches (full, queryF, sort, "IJZ");
|
||||||
|
|
||||||
// When a field is null for both documents, the next SortField should be used.
|
// When a field is null for both documents, the next SortField should be used.
|
||||||
// Works for
|
|
||||||
sort.setSort (new SortField ("int", SortField.Type.INT),
|
sort.setSort (new SortField ("int", SortField.Type.INT),
|
||||||
new SortField ("string", SortField.Type.STRING),
|
new SortField ("string", SortField.Type.STRING),
|
||||||
new SortField ("float", SortField.Type.FLOAT) );
|
new SortField ("float", SortField.Type.FLOAT) );
|
||||||
|
@ -688,7 +797,7 @@ public class TestSort extends LuceneTestCase {
|
||||||
// Reverse the last criterium to make sure the test didn't pass by chance
|
// Reverse the last criterium to make sure the test didn't pass by chance
|
||||||
sort.setSort (new SortField ("int", SortField.Type.INT),
|
sort.setSort (new SortField ("int", SortField.Type.INT),
|
||||||
new SortField ("string", SortField.Type.STRING),
|
new SortField ("string", SortField.Type.STRING),
|
||||||
new SortField ("float", SortField.Type.FLOAT, true) );
|
new SortField ("float", SortField.Type.FLOAT, true) );
|
||||||
assertMatches (full, queryG, sort, "ZYXW");
|
assertMatches (full, queryG, sort, "ZYXW");
|
||||||
|
|
||||||
// Do the same for a ParallelMultiSearcher
|
// Do the same for a ParallelMultiSearcher
|
||||||
|
@ -696,13 +805,13 @@ public class TestSort extends LuceneTestCase {
|
||||||
IndexSearcher parallelSearcher=new IndexSearcher (full.getIndexReader(), exec);
|
IndexSearcher parallelSearcher=new IndexSearcher (full.getIndexReader(), exec);
|
||||||
|
|
||||||
sort.setSort (new SortField ("int", SortField.Type.INT),
|
sort.setSort (new SortField ("int", SortField.Type.INT),
|
||||||
new SortField ("string", SortField.Type.STRING),
|
new SortField ("string", SortField.Type.STRING),
|
||||||
new SortField ("float", SortField.Type.FLOAT) );
|
new SortField ("float", SortField.Type.FLOAT) );
|
||||||
assertMatches (parallelSearcher, queryG, sort, "ZWXY");
|
assertMatches (parallelSearcher, queryG, sort, "ZWXY");
|
||||||
|
|
||||||
sort.setSort (new SortField ("int", SortField.Type.INT),
|
sort.setSort (new SortField ("int", SortField.Type.INT),
|
||||||
new SortField ("string", SortField.Type.STRING),
|
new SortField ("string", SortField.Type.STRING),
|
||||||
new SortField ("float", SortField.Type.FLOAT, true) );
|
new SortField ("float", SortField.Type.FLOAT, true) );
|
||||||
assertMatches (parallelSearcher, queryG, sort, "ZYXW");
|
assertMatches (parallelSearcher, queryG, sort, "ZYXW");
|
||||||
parallelSearcher.close();
|
parallelSearcher.close();
|
||||||
exec.shutdown();
|
exec.shutdown();
|
||||||
|
@ -719,6 +828,20 @@ public class TestSort extends LuceneTestCase {
|
||||||
|
|
||||||
sort.setSort (new SortField ("float", SortField.Type.FLOAT), new SortField ("string", SortField.Type.STRING) );
|
sort.setSort (new SortField ("float", SortField.Type.FLOAT), new SortField ("string", SortField.Type.STRING) );
|
||||||
assertMatches (full, queryX, sort, "GICEA");
|
assertMatches (full, queryX, sort, "GICEA");
|
||||||
|
|
||||||
|
if (supportsDocValues) {
|
||||||
|
sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT)),
|
||||||
|
useDocValues(new SortField ("float", SortField.Type.FLOAT)));
|
||||||
|
assertMatches (full, queryX, sort, "IGEAC");
|
||||||
|
|
||||||
|
sort.setSort (useDocValues(new SortField ("int", SortField.Type.INT, true)),
|
||||||
|
useDocValues(new SortField (null, SortField.Type.DOC, true)));
|
||||||
|
assertMatches (full, queryX, sort, "CEAGI");
|
||||||
|
|
||||||
|
sort.setSort (useDocValues(new SortField ("float", SortField.Type.FLOAT)),
|
||||||
|
useDocValues(new SortField ("string", getDVStringSortType())));
|
||||||
|
assertMatches (full, queryX, sort, "GICEA");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// test a variety of sorts using a parallel multisearcher
|
// test a variety of sorts using a parallel multisearcher
|
||||||
|
@ -1064,6 +1187,21 @@ public class TestSort extends LuceneTestCase {
|
||||||
|
|
||||||
sort.setSort(useDocValues(new SortField ("int", SortField.Type.INT, true)));
|
sort.setSort(useDocValues(new SortField ("int", SortField.Type.INT, true)));
|
||||||
assertMatches(multi, queryF, sort, "JZI");
|
assertMatches(multi, queryF, sort, "JZI");
|
||||||
|
|
||||||
|
sort.setSort(useDocValues(new SortField("string", getDVStringSortType())));
|
||||||
|
assertMatches(multi, queryA, sort, "DJAIHGFEBC");
|
||||||
|
|
||||||
|
sort.setSort(useDocValues(new SortField("string", getDVStringSortType(), true)));
|
||||||
|
assertMatches(multi, queryA, sort, "CBEFGHIAJD");
|
||||||
|
|
||||||
|
sort.setSort(useDocValues(new SortField("float", SortField.Type.FLOAT)),useDocValues(new SortField("string", getDVStringSortType())));
|
||||||
|
assertMatches(multi, queryA, sort, "GDHJICEFAB");
|
||||||
|
|
||||||
|
sort.setSort(useDocValues(new SortField ("string", getDVStringSortType())));
|
||||||
|
assertMatches(multi, queryF, sort, "ZJI");
|
||||||
|
|
||||||
|
sort.setSort(useDocValues(new SortField ("string", getDVStringSortType(), true)));
|
||||||
|
assertMatches(multi, queryF, sort, "IJZ");
|
||||||
}
|
}
|
||||||
|
|
||||||
// up to this point, all of the searches should have "sane"
|
// up to this point, all of the searches should have "sane"
|
||||||
|
|
Loading…
Reference in New Issue