LUCENE-7537: Index time sorting now supports multi-valued sorts using selectors (MIN, MAX, etc.)

This commit is contained in:
Mike McCandless 2016-11-15 16:22:51 -05:00
parent 7af454ad76
commit 6c3c6bc379
9 changed files with 1220 additions and 172 deletions

View File

@ -90,6 +90,9 @@ Improvements
which can be overridden to return false to eek out more speed in some cases.
(Timothy M. Rodriguez, David Smiley)
* LUCENE-7537: Index time sorting now supports multi-valued sorts
using selectors (MIN, MAX, etc.) (Jim Ferenczi via Mike McCandless)
Other
* LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file

View File

@ -33,9 +33,14 @@ import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSelector;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@ -64,6 +69,7 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
final static BytesRef SI_SORT = new BytesRef(" sort ");
final static BytesRef SI_SORT_FIELD = new BytesRef(" field ");
final static BytesRef SI_SORT_TYPE = new BytesRef(" type ");
final static BytesRef SI_SELECTOR_TYPE = new BytesRef(" selector ");
final static BytesRef SI_SORT_REVERSE = new BytesRef(" reverse ");
final static BytesRef SI_SORT_MISSING = new BytesRef(" missing ");
@ -158,6 +164,8 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
final String typeAsString = readString(SI_SORT_TYPE.length, scratch);
final SortField.Type type;
SortedSetSelector.Type selectorSet = null;
SortedNumericSelector.Type selectorNumeric = null;
switch (typeAsString) {
case "string":
type = SortField.Type.STRING;
@ -174,6 +182,26 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
case "float":
type = SortField.Type.FLOAT;
break;
case "multi_valued_string":
type = SortField.Type.STRING;
selectorSet = readSetSelector(input, scratch);
break;
case "multi_valued_long":
type = SortField.Type.LONG;
selectorNumeric = readNumericSelector(input, scratch);
break;
case "multi_valued_int":
type = SortField.Type.INT;
selectorNumeric = readNumericSelector(input, scratch);
break;
case "multi_valued_double":
type = SortField.Type.DOUBLE;
selectorNumeric = readNumericSelector(input, scratch);
break;
case "multi_valued_float":
type = SortField.Type.FLOAT;
selectorNumeric = readNumericSelector(input, scratch);
break;
default:
throw new CorruptIndexException("unable to parse sort type string: " + typeAsString, input);
}
@ -245,7 +273,13 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
default:
throw new AssertionError();
}
sortField[i] = new SortField(field, type, reverse);
if (selectorSet != null) {
sortField[i] = new SortedSetSortField(field, reverse);
} else if (selectorNumeric != null) {
sortField[i] = new SortedNumericSortField(field, type, reverse);
} else {
sortField[i] = new SortField(field, type, reverse);
}
if (missingValue != null) {
sortField[i].setMissingValue(missingValue);
}
@ -265,6 +299,38 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
private String readString(int offset, BytesRefBuilder scratch) {
return new String(scratch.bytes(), offset, scratch.length()-offset, StandardCharsets.UTF_8);
}
private SortedSetSelector.Type readSetSelector(IndexInput input, BytesRefBuilder scratch) throws IOException {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_SELECTOR_TYPE);
final String selectorAsString = readString(SI_SELECTOR_TYPE.length, scratch);
switch (selectorAsString) {
case "min":
return SortedSetSelector.Type.MIN;
case "middle_min":
return SortedSetSelector.Type.MIDDLE_MIN;
case "middle_max":
return SortedSetSelector.Type.MIDDLE_MAX;
case "max":
return SortedSetSelector.Type.MAX;
default:
throw new CorruptIndexException("unable to parse SortedSetSelector type: " + selectorAsString, input);
}
}
private SortedNumericSelector.Type readNumericSelector(IndexInput input, BytesRefBuilder scratch) throws IOException {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_SELECTOR_TYPE);
final String selectorAsString = readString(SI_SELECTOR_TYPE.length, scratch);
switch (selectorAsString) {
case "min":
return SortedNumericSelector.Type.MIN;
case "max":
return SortedNumericSelector.Type.MAX;
default:
throw new CorruptIndexException("unable to parse SortedNumericSelector type: " + selectorAsString, input);
}
}
@Override
public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
@ -352,29 +418,93 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_SORT_TYPE);
final String sortType;
switch (sortField.getType()) {
final String sortTypeString;
final SortField.Type sortType;
final boolean multiValued;
if (sortField instanceof SortedSetSortField) {
sortType = SortField.Type.STRING;
multiValued = true;
} else if (sortField instanceof SortedNumericSortField) {
sortType = ((SortedNumericSortField) sortField).getNumericType();
multiValued = true;
} else {
sortType = sortField.getType();
multiValued = false;
}
switch (sortType) {
case STRING:
sortType = "string";
if (multiValued) {
sortTypeString = "multi_valued_string";
} else {
sortTypeString = "string";
}
break;
case LONG:
sortType = "long";
if (multiValued) {
sortTypeString = "multi_valued_long";
} else {
sortTypeString = "long";
}
break;
case INT:
sortType = "int";
if (multiValued) {
sortTypeString = "multi_valued_int";
} else {
sortTypeString = "int";
}
break;
case DOUBLE:
sortType = "double";
if (multiValued) {
sortTypeString = "multi_valued_double";
} else {
sortTypeString = "double";
}
break;
case FLOAT:
sortType = "float";
if (multiValued) {
sortTypeString = "multi_valued_float";
} else {
sortTypeString = "float";
}
break;
default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
}
SimpleTextUtil.write(output, sortType, scratch);
SimpleTextUtil.write(output, sortTypeString, scratch);
SimpleTextUtil.writeNewline(output);
if (sortField instanceof SortedSetSortField) {
SortedSetSelector.Type selector = ((SortedSetSortField) sortField).getSelector();
final String selectorString;
if (selector == SortedSetSelector.Type.MIN) {
selectorString = "min";
} else if (selector == SortedSetSelector.Type.MIDDLE_MIN) {
selectorString = "middle_min";
} else if (selector == SortedSetSelector.Type.MIDDLE_MAX) {
selectorString = "middle_max";
} else if (selector == SortedSetSelector.Type.MAX) {
selectorString = "max";
} else {
throw new IllegalStateException("Unexpected SortedSetSelector type selector: " + selector);
}
SimpleTextUtil.write(output, SI_SELECTOR_TYPE);
SimpleTextUtil.write(output, selectorString, scratch);
SimpleTextUtil.writeNewline(output);
} else if (sortField instanceof SortedNumericSortField) {
SortedNumericSelector.Type selector = ((SortedNumericSortField) sortField).getSelector();
final String selectorString;
if (selector == SortedNumericSelector.Type.MIN) {
selectorString = "min";
} else if (selector == SortedNumericSelector.Type.MAX) {
selectorString = "max";
} else {
throw new IllegalStateException("Unexpected SortedNumericSelector type selector: " + selector);
}
SimpleTextUtil.write(output, SI_SELECTOR_TYPE);
SimpleTextUtil.write(output, selectorString, scratch);
SimpleTextUtil.writeNewline(output);
}
SimpleTextUtil.write(output, SI_SORT_REVERSE);
SimpleTextUtil.write(output, Boolean.toString(sortField.getReverse()), scratch);
SimpleTextUtil.writeNewline(output);

View File

@ -29,6 +29,10 @@ import org.apache.lucene.index.SegmentInfo; // javadocs
import org.apache.lucene.index.SegmentInfos; // javadocs
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSelector;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataOutput; // javadocs
import org.apache.lucene.store.Directory;
@ -69,7 +73,7 @@ import org.apache.lucene.util.Version;
* addIndexes), etc.</li>
* <li>Files is a list of files referred to by this segment.</li>
* </ul>
*
*
* @see SegmentInfos
* @lucene.experimental
*/
@ -78,7 +82,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
/** Sole constructor. */
public Lucene62SegmentInfoFormat() {
}
@Override
public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene62SegmentInfoFormat.SI_EXTENSION);
@ -91,13 +95,13 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
Lucene62SegmentInfoFormat.VERSION_CURRENT,
segmentID, "");
final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
final int docCount = input.readInt();
if (docCount < 0) {
throw new CorruptIndexException("invalid docCount: " + docCount, input);
}
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
final Map<String,String> diagnostics = input.readMapOfStrings();
final Set<String> files = input.readSetOfStrings();
final Map<String,String> attributes = input.readMapOfStrings();
@ -110,6 +114,8 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
String fieldName = input.readString();
int sortTypeID = input.readVInt();
SortField.Type sortType;
SortedSetSelector.Type sortedSetSelector = null;
SortedNumericSelector.Type sortedNumericSelector = null;
switch(sortTypeID) {
case 0:
sortType = SortField.Type.STRING;
@ -126,6 +132,43 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
case 4:
sortType = SortField.Type.FLOAT;
break;
case 5:
sortType = SortField.Type.STRING;
byte selector = input.readByte();
if (selector == 0) {
sortedSetSelector = SortedSetSelector.Type.MIN;
} else if (selector == 1) {
sortedSetSelector = SortedSetSelector.Type.MAX;
} else if (selector == 2) {
sortedSetSelector = SortedSetSelector.Type.MIDDLE_MIN;
} else if (selector == 3) {
sortedSetSelector = SortedSetSelector.Type.MIDDLE_MAX;
} else {
throw new CorruptIndexException("invalid index SortedSetSelector ID: " + selector, input);
}
break;
case 6:
byte type = input.readByte();
if (type == 0) {
sortType = SortField.Type.LONG;
} else if (type == 1) {
sortType = SortField.Type.INT;
} else if (type == 2) {
sortType = SortField.Type.DOUBLE;
} else if (type == 3) {
sortType = SortField.Type.FLOAT;
} else {
throw new CorruptIndexException("invalid index SortedNumericSortField type ID: " + type, input);
}
byte numericSelector = input.readByte();
if (numericSelector == 0) {
sortedNumericSelector = SortedNumericSelector.Type.MIN;
} else if (numericSelector == 1) {
sortedNumericSelector = SortedNumericSelector.Type.MAX;
} else {
throw new CorruptIndexException("invalid index SortedNumericSelector ID: " + numericSelector, input);
}
break;
default:
throw new CorruptIndexException("invalid index sort field type ID: " + sortTypeID, input);
}
@ -139,7 +182,13 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
throw new CorruptIndexException("invalid index sort reverse: " + b, input);
}
sortFields[i] = new SortField(fieldName, sortType, reverse);
if (sortedSetSelector != null) {
sortFields[i] = new SortedSetSortField(fieldName, reverse, sortedSetSelector);
} else if (sortedNumericSelector != null) {
sortFields[i] = new SortedNumericSortField(fieldName, sortType, reverse, sortedNumericSelector);
} else {
sortFields[i] = new SortField(fieldName, sortType, reverse);
}
Object missingValue;
b = input.readByte();
@ -194,7 +243,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
} else {
indexSort = null;
}
si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, indexSort);
si.setFiles(files);
} catch (Throwable exception) {
@ -213,8 +262,8 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
try (IndexOutput output = dir.createOutput(fileName, ioContext)) {
// Only add the file once we've successfully created it, else IFD assert can trip:
si.addFile(fileName);
CodecUtil.writeIndexHeader(output,
Lucene62SegmentInfoFormat.CODEC_NAME,
CodecUtil.writeIndexHeader(output,
Lucene62SegmentInfoFormat.CODEC_NAME,
Lucene62SegmentInfoFormat.VERSION_CURRENT,
si.getId(),
"");
@ -245,6 +294,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
output.writeVInt(numSortFields);
for (int i = 0; i < numSortFields; ++i) {
SortField sortField = indexSort.getSort()[i];
SortField.Type sortType = sortField.getType();
output.writeString(sortField.getField());
int sortTypeID;
switch (sortField.getType()) {
@ -263,10 +313,55 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
case FLOAT:
sortTypeID = 4;
break;
case CUSTOM:
if (sortField instanceof SortedSetSortField) {
sortTypeID = 5;
sortType = SortField.Type.STRING;
} else if (sortField instanceof SortedNumericSortField) {
sortTypeID = 6;
sortType = ((SortedNumericSortField) sortField).getNumericType();
} else {
throw new IllegalStateException("Unexpected SortedNumericSortField " + sortField);
}
break;
default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
}
output.writeVInt(sortTypeID);
if (sortTypeID == 5) {
SortedSetSortField ssf = (SortedSetSortField) sortField;
if (ssf.getSelector() == SortedSetSelector.Type.MIN) {
output.writeByte((byte) 0);
} else if (ssf.getSelector() == SortedSetSelector.Type.MAX) {
output.writeByte((byte) 1);
} else if (ssf.getSelector() == SortedSetSelector.Type.MIDDLE_MIN) {
output.writeByte((byte) 2);
} else if (ssf.getSelector() == SortedSetSelector.Type.MIDDLE_MAX) {
output.writeByte((byte) 3);
} else {
throw new IllegalStateException("Unexpected SortedSetSelector type: " + ssf.getSelector());
}
} else if (sortTypeID == 6) {
SortedNumericSortField snsf = (SortedNumericSortField) sortField;
if (snsf.getNumericType() == SortField.Type.LONG) {
output.writeByte((byte) 0);
} else if (snsf.getNumericType() == SortField.Type.INT) {
output.writeByte((byte) 1);
} else if (snsf.getNumericType() == SortField.Type.DOUBLE) {
output.writeByte((byte) 2);
} else if (snsf.getNumericType() == SortField.Type.FLOAT) {
output.writeByte((byte) 3);
} else {
throw new IllegalStateException("Unexpected SortedNumericSelector type: " + snsf.getNumericType());
}
if (snsf.getSelector() == SortedNumericSelector.Type.MIN) {
output.writeByte((byte) 0);
} else if (snsf.getSelector() == SortedNumericSelector.Type.MAX) {
output.writeByte((byte) 1);
} else {
throw new IllegalStateException("Unexpected sorted numeric selector type: " + snsf.getSelector());
}
}
output.writeByte((byte) (sortField.getReverse() ? 0 : 1));
// write missing value
@ -274,7 +369,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
if (missingValue == null) {
output.writeByte((byte) 0);
} else {
switch(sortField.getType()) {
switch(sortType) {
case STRING:
if (missingValue == SortField.STRING_LAST) {
output.writeByte((byte) 1);
@ -305,7 +400,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
}
}
}
CodecUtil.writeFooter(output);
}
}
@ -314,5 +409,6 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
public final static String SI_EXTENSION = "si";
static final String CODEC_NAME = "Lucene62SegmentInfo";
static final int VERSION_START = 0;
static final int VERSION_CURRENT = VERSION_START;
static final int VERSION_MULTI_VALUED_SORT = 1;
static final int VERSION_CURRENT = VERSION_MULTI_VALUED_SORT;
}

View File

@ -468,7 +468,8 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig {
*/
public IndexWriterConfig setIndexSort(Sort sort) {
for(SortField sortField : sort.getSort()) {
if (ALLOWED_INDEX_SORT_TYPES.contains(sortField.getType()) == false) {
final SortField.Type sortType = Sorter.getSortFieldType(sortField);
if (ALLOWED_INDEX_SORT_TYPES.contains(sortType) == false) {
throw new IllegalArgumentException("invalid SortField type: must be one of " + ALLOWED_INDEX_SORT_TYPES + " but got: " + sortField);
}
}

View File

@ -141,33 +141,25 @@ final class MultiSorter {
private static ComparableProvider[] getComparableProviders(List<CodecReader> readers, SortField sortField) throws IOException {
ComparableProvider[] providers = new ComparableProvider[readers.size()];
final int reverseMul = sortField.getReverse() ? -1 : 1;
final SortField.Type sortType = Sorter.getSortFieldType(sortField);
switch(sortField.getType()) {
switch(sortType) {
case STRING:
{
// this uses the efficient segment-local ordinal map:
final SortedDocValues[] values = new SortedDocValues[readers.size()];
for(int i=0;i<readers.size();i++) {
SortedDocValues v = readers.get(i).getSortedDocValues(sortField.getField());
if (v == null) {
v = DocValues.emptySorted();
}
values[i] = v;
final SortedDocValues sorted = Sorter.getOrWrapSorted(readers.get(i), sortField);
values[i] = sorted;
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
final int missingOrd;
if (sortField.getMissingValue() == SortField.STRING_LAST) {
missingOrd = Integer.MAX_VALUE;
missingOrd = sortField.getReverse() ? Integer.MIN_VALUE : Integer.MAX_VALUE;
} else {
missingOrd = Integer.MIN_VALUE;
}
final int reverseMul;
if (sortField.getReverse()) {
reverseMul = -1;
} else {
reverseMul = 1;
missingOrd = sortField.getReverse() ? Integer.MAX_VALUE : Integer.MIN_VALUE;
}
for(int readerIndex=0;readerIndex<readers.size();readerIndex++) {
@ -205,13 +197,6 @@ final class MultiSorter {
case LONG:
{
final int reverseMul;
if (sortField.getReverse()) {
reverseMul = -1;
} else {
reverseMul = 1;
}
final Long missingValue;
if (sortField.getMissingValue() != null) {
missingValue = (Long) sortField.getMissingValue();
@ -220,8 +205,8 @@ final class MultiSorter {
}
for(int readerIndex=0;readerIndex<readers.size();readerIndex++) {
final NumericDocValues values = DocValues.getNumeric(readers.get(readerIndex), sortField.getField());
final NumericDocValues values = Sorter.getOrWrapNumeric(readers.get(readerIndex), sortField);
providers[readerIndex] = new ComparableProvider() {
// used only by assert:
int lastDocID = -1;
@ -243,7 +228,7 @@ final class MultiSorter {
if (readerDocID == docID) {
return reverseMul * values.longValue();
} else {
return missingValue;
return reverseMul * missingValue;
}
}
};
@ -253,13 +238,6 @@ final class MultiSorter {
case INT:
{
final int reverseMul;
if (sortField.getReverse()) {
reverseMul = -1;
} else {
reverseMul = 1;
}
final Integer missingValue;
if (sortField.getMissingValue() != null) {
missingValue = (Integer) sortField.getMissingValue();
@ -268,7 +246,7 @@ final class MultiSorter {
}
for(int readerIndex=0;readerIndex<readers.size();readerIndex++) {
final NumericDocValues values = DocValues.getNumeric(readers.get(readerIndex), sortField.getField());
final NumericDocValues values = Sorter.getOrWrapNumeric(readers.get(readerIndex), sortField);
providers[readerIndex] = new ComparableProvider() {
// used only by assert:
@ -291,7 +269,7 @@ final class MultiSorter {
if (readerDocID == docID) {
return reverseMul * (int) values.longValue();
} else {
return missingValue;
return reverseMul * missingValue;
}
}
};
@ -301,13 +279,6 @@ final class MultiSorter {
case DOUBLE:
{
final int reverseMul;
if (sortField.getReverse()) {
reverseMul = -1;
} else {
reverseMul = 1;
}
final Double missingValue;
if (sortField.getMissingValue() != null) {
missingValue = (Double) sortField.getMissingValue();
@ -316,7 +287,7 @@ final class MultiSorter {
}
for(int readerIndex=0;readerIndex<readers.size();readerIndex++) {
final NumericDocValues values = DocValues.getNumeric(readers.get(readerIndex), sortField.getField());
final NumericDocValues values = Sorter.getOrWrapNumeric(readers.get(readerIndex), sortField);
providers[readerIndex] = new ComparableProvider() {
// used only by assert:
@ -339,7 +310,7 @@ final class MultiSorter {
if (readerDocID == docID) {
return reverseMul * Double.longBitsToDouble(values.longValue());
} else {
return missingValue;
return reverseMul * missingValue;
}
}
};
@ -349,13 +320,6 @@ final class MultiSorter {
case FLOAT:
{
final int reverseMul;
if (sortField.getReverse()) {
reverseMul = -1;
} else {
reverseMul = 1;
}
final Float missingValue;
if (sortField.getMissingValue() != null) {
missingValue = (Float) sortField.getMissingValue();
@ -364,7 +328,7 @@ final class MultiSorter {
}
for(int readerIndex=0;readerIndex<readers.size();readerIndex++) {
final NumericDocValues values = DocValues.getNumeric(readers.get(readerIndex), sortField.getField());
final NumericDocValues values = Sorter.getOrWrapNumeric(readers.get(readerIndex), sortField);
providers[readerIndex] = new ComparableProvider() {
// used only by assert:
@ -387,7 +351,7 @@ final class MultiSorter {
if (readerDocID == docID) {
return reverseMul * Float.intBitsToFloat((int) values.longValue());
} else {
return missingValue;
return reverseMul * missingValue;
}
}
};

View File

@ -25,6 +25,10 @@ import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSelector;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.util.TimSorter;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
@ -200,18 +204,55 @@ final class Sorter {
};
}
/** Returns the native sort type for {@link SortedSetSortField} and {@link SortedNumericSortField},
* {@link SortField#getType()} otherwise */
static SortField.Type getSortFieldType(SortField sortField) {
if (sortField instanceof SortedSetSortField) {
return SortField.Type.STRING;
} else if (sortField instanceof SortedNumericSortField) {
return ((SortedNumericSortField) sortField).getNumericType();
} else {
return sortField.getType();
}
}
/** Wraps a {@link SortedNumericDocValues} as a single-valued view if the field is an instance of {@link SortedNumericSortField},
* returns {@link NumericDocValues} for the field otherwise. */
static NumericDocValues getOrWrapNumeric(LeafReader reader, SortField sortField) throws IOException {
if (sortField instanceof SortedNumericSortField) {
SortedNumericSortField sf = (SortedNumericSortField) sortField;
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(reader, sf.getField()), sf.getSelector(), sf.getNumericType());
} else {
return DocValues.getNumeric(reader, sortField.getField());
}
}
/** Wraps a {@link SortedSetDocValues} as a single-valued view if the field is an instance of {@link SortedSetSortField},
* returns {@link SortedDocValues} for the field otherwise. */
static SortedDocValues getOrWrapSorted(LeafReader reader, SortField sortField) throws IOException {
if (sortField instanceof SortedSetSortField) {
SortedSetSortField sf = (SortedSetSortField) sortField;
return SortedSetSelector.wrap(DocValues.getSortedSet(reader, sf.getField()), sf.getSelector());
} else {
return DocValues.getSorted(reader, sortField.getField());
}
}
/** We cannot use the {@link FieldComparator} API because that API requires that you send it docIDs in order. Note that this API
* allocates arrays[maxDoc] to hold the native values needed for comparison, but 1) they are transient (only alive while sorting this one
* segment), and 2) in the typical index sorting case, they are only used to sort newly flushed segments, which will be smaller than
* merged segments. */
private static DocComparator getDocComparator(LeafReader reader, SortField sortField) throws IOException {
int maxDoc = reader.maxDoc();
switch(sortField.getType()) {
final int maxDoc = reader.maxDoc();
final int reverseMul = sortField.getReverse() ? -1 : 1;
final SortField.Type sortType = getSortFieldType(sortField);
switch(sortType) {
case STRING:
{
final SortedDocValues sorted = getOrWrapSorted(reader, sortField);
final int missingOrd;
if (sortField.getMissingValue() == SortField.STRING_LAST) {
missingOrd = Integer.MAX_VALUE;
@ -221,18 +262,10 @@ final class Sorter {
final int[] ords = new int[reader.maxDoc()];
Arrays.fill(ords, missingOrd);
SortedDocValues sorted = DocValues.getSorted(reader, sortField.getField());
int docID;
while ((docID = sorted.nextDoc()) != NO_MORE_DOCS) {
ords[docID] = sorted.ordValue();
}
final int reverseMul;
if (sortField.getReverse()) {
reverseMul = -1;
} else {
reverseMul = 1;
}
return new DocComparator() {
@Override
@ -244,9 +277,8 @@ final class Sorter {
case LONG:
{
final NumericDocValues dvs = getOrWrapNumeric(reader, sortField);
long[] values = new long[maxDoc];
NumericDocValues dvs = DocValues.getNumeric(reader, sortField.getField());
if (sortField.getMissingValue() != null) {
Arrays.fill(values, (Long) sortField.getMissingValue());
}
@ -258,13 +290,6 @@ final class Sorter {
values[docID] = dvs.longValue();
}
final int reverseMul;
if (sortField.getReverse()) {
reverseMul = -1;
} else {
reverseMul = 1;
}
return new DocComparator() {
@Override
public int compare(int docID1, int docID2) {
@ -275,13 +300,12 @@ final class Sorter {
case INT:
{
final NumericDocValues dvs = getOrWrapNumeric(reader, sortField);
int[] values = new int[maxDoc];
NumericDocValues dvs = DocValues.getNumeric(reader, sortField.getField());
if (sortField.getMissingValue() != null) {
Arrays.fill(values, (Integer) sortField.getMissingValue());
}
while (true) {
int docID = dvs.nextDoc();
if (docID == NO_MORE_DOCS) {
@ -290,13 +314,6 @@ final class Sorter {
values[docID] = (int) dvs.longValue();
}
final int reverseMul;
if (sortField.getReverse()) {
reverseMul = -1;
} else {
reverseMul = 1;
}
return new DocComparator() {
@Override
public int compare(int docID1, int docID2) {
@ -307,9 +324,8 @@ final class Sorter {
case DOUBLE:
{
final NumericDocValues dvs = getOrWrapNumeric(reader, sortField);
double[] values = new double[maxDoc];
NumericDocValues dvs = DocValues.getNumeric(reader, sortField.getField());
if (sortField.getMissingValue() != null) {
Arrays.fill(values, (Double) sortField.getMissingValue());
}
@ -320,13 +336,6 @@ final class Sorter {
}
values[docID] = Double.longBitsToDouble(dvs.longValue());
}
final int reverseMul;
if (sortField.getReverse()) {
reverseMul = -1;
} else {
reverseMul = 1;
}
return new DocComparator() {
@Override
@ -338,9 +347,8 @@ final class Sorter {
case FLOAT:
{
final NumericDocValues dvs = getOrWrapNumeric(reader, sortField);
float[] values = new float[maxDoc];
NumericDocValues dvs = DocValues.getNumeric(reader, sortField.getField());
if (sortField.getMissingValue() != null) {
Arrays.fill(values, (Float) sortField.getMissingValue());
}
@ -352,13 +360,6 @@ final class Sorter {
values[docID] = Float.intBitsToFloat((int) dvs.longValue());
}
final int reverseMul;
if (sortField.getReverse()) {
reverseMul = -1;
} else {
reverseMul = 1;
}
return new DocComparator() {
@Override
public int compare(int docID1, int docID2) {
@ -371,7 +372,7 @@ final class Sorter {
throw new IllegalArgumentException("unhandled SortField.getType()=" + sortField.getType());
}
}
/**
* Returns a mapping from the old document ID to its new location in the
* sorted index. Implementations can use the auxiliary
@ -386,7 +387,6 @@ final class Sorter {
*/
DocMap sort(LeafReader reader) throws IOException {
SortField fields[] = sort.getSort();
final int reverseMul[] = new int[fields.length];
final DocComparator comparators[] = new DocComparator[fields.length];
for (int i = 0; i < fields.length; i++) {
@ -424,7 +424,7 @@ final class Sorter {
public String toString() {
return getID();
}
static final Scorer FAKESCORER = new Scorer(null) {
float score;

View File

@ -82,6 +82,11 @@ public class SortedNumericSortField extends SortField {
this.selector = selector;
this.type = type;
}
/** Returns the numeric type in use for this sort */
public SortField.Type getNumericType() {
return type;
}
/** Returns the selector in use for this sort */
public SortedNumericSelector.Type getSelector() {

View File

@ -28,6 +28,8 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.MockDirectoryWrapper;
@ -167,6 +169,78 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
return true;
}
private SortField randomIndexSortField() {
boolean reversed = random().nextBoolean();
SortField sortField;
switch(random().nextInt(10)) {
case 0:
sortField = new SortField(TestUtil.randomSimpleString(random()), SortField.Type.INT, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextInt());
}
break;
case 1:
sortField = new SortedNumericSortField(TestUtil.randomSimpleString(random()), SortField.Type.INT, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextInt());
}
break;
case 2:
sortField = new SortField(TestUtil.randomSimpleString(random()), SortField.Type.LONG, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextLong());
}
break;
case 3:
sortField = new SortedNumericSortField(TestUtil.randomSimpleString(random()), SortField.Type.LONG, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextLong());
}
break;
case 4:
sortField = new SortField(TestUtil.randomSimpleString(random()), SortField.Type.FLOAT, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextFloat());
}
break;
case 5:
sortField = new SortedNumericSortField(TestUtil.randomSimpleString(random()), SortField.Type.FLOAT, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextFloat());
}
break;
case 6:
sortField = new SortField(TestUtil.randomSimpleString(random()), SortField.Type.DOUBLE, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextDouble());
}
break;
case 7:
sortField = new SortedNumericSortField(TestUtil.randomSimpleString(random()), SortField.Type.DOUBLE, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(random().nextDouble());
}
break;
case 8:
sortField = new SortField(TestUtil.randomSimpleString(random()), SortField.Type.STRING, reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(SortField.STRING_LAST);
}
break;
case 9:
sortField = new SortedSetSortField(TestUtil.randomSimpleString(random()), reversed);
if (random().nextBoolean()) {
sortField.setMissingValue(SortField.STRING_LAST);
}
break;
default:
sortField = null;
fail();
}
return sortField;
}
/** Test sort */
public void testSort() throws IOException {
assumeTrue("test requires a codec that can read/write index sort", supportsIndexSort());
@ -180,22 +254,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
final int numSortFields = TestUtil.nextInt(random(), 1, 3);
SortField[] sortFields = new SortField[numSortFields];
for (int j = 0; j < numSortFields; ++j) {
sortFields[j] = new SortField(
TestUtil.randomSimpleString(random()),
random().nextBoolean() ? SortField.Type.LONG : SortField.Type.STRING,
random().nextBoolean());
if (random().nextBoolean()) {
switch (sortFields[j].getType()) {
case LONG:
sortFields[j].setMissingValue(random().nextLong());
break;
case STRING:
sortFields[j].setMissingValue(random().nextBoolean() ? SortField.STRING_FIRST : SortField.STRING_LAST);
break;
default:
fail();
}
}
sortFields[j] = randomIndexSortField();
}
sort = new Sort(sortFields);
}