mirror of https://github.com/apache/lucene.git
LUCENE-7537: Index time sorting now supports multi-valued sorts using selectors (MIN, MAX, etc.)
This commit is contained in:
parent
7af454ad76
commit
6c3c6bc379
|
@ -90,6 +90,9 @@ Improvements
|
|||
which can be overridden to return false to eek out more speed in some cases.
|
||||
(Timothy M. Rodriguez, David Smiley)
|
||||
|
||||
* LUCENE-7537: Index time sorting now supports multi-valued sorts
|
||||
using selectors (MIN, MAX, etc.) (Jim Ferenczi via Mike McCandless)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file
|
||||
|
|
|
@ -33,9 +33,14 @@ import org.apache.lucene.index.IndexFileNames;
|
|||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortedNumericSelector;
|
||||
import org.apache.lucene.search.SortedNumericSortField;
|
||||
import org.apache.lucene.search.SortedSetSelector;
|
||||
import org.apache.lucene.search.SortedSetSortField;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
@ -64,6 +69,7 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
|
|||
final static BytesRef SI_SORT = new BytesRef(" sort ");
|
||||
final static BytesRef SI_SORT_FIELD = new BytesRef(" field ");
|
||||
final static BytesRef SI_SORT_TYPE = new BytesRef(" type ");
|
||||
final static BytesRef SI_SELECTOR_TYPE = new BytesRef(" selector ");
|
||||
final static BytesRef SI_SORT_REVERSE = new BytesRef(" reverse ");
|
||||
final static BytesRef SI_SORT_MISSING = new BytesRef(" missing ");
|
||||
|
||||
|
@ -158,6 +164,8 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
|
|||
final String typeAsString = readString(SI_SORT_TYPE.length, scratch);
|
||||
|
||||
final SortField.Type type;
|
||||
SortedSetSelector.Type selectorSet = null;
|
||||
SortedNumericSelector.Type selectorNumeric = null;
|
||||
switch (typeAsString) {
|
||||
case "string":
|
||||
type = SortField.Type.STRING;
|
||||
|
@ -174,6 +182,26 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
|
|||
case "float":
|
||||
type = SortField.Type.FLOAT;
|
||||
break;
|
||||
case "multi_valued_string":
|
||||
type = SortField.Type.STRING;
|
||||
selectorSet = readSetSelector(input, scratch);
|
||||
break;
|
||||
case "multi_valued_long":
|
||||
type = SortField.Type.LONG;
|
||||
selectorNumeric = readNumericSelector(input, scratch);
|
||||
break;
|
||||
case "multi_valued_int":
|
||||
type = SortField.Type.INT;
|
||||
selectorNumeric = readNumericSelector(input, scratch);
|
||||
break;
|
||||
case "multi_valued_double":
|
||||
type = SortField.Type.DOUBLE;
|
||||
selectorNumeric = readNumericSelector(input, scratch);
|
||||
break;
|
||||
case "multi_valued_float":
|
||||
type = SortField.Type.FLOAT;
|
||||
selectorNumeric = readNumericSelector(input, scratch);
|
||||
break;
|
||||
default:
|
||||
throw new CorruptIndexException("unable to parse sort type string: " + typeAsString, input);
|
||||
}
|
||||
|
@ -245,7 +273,13 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
|
|||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
sortField[i] = new SortField(field, type, reverse);
|
||||
if (selectorSet != null) {
|
||||
sortField[i] = new SortedSetSortField(field, reverse);
|
||||
} else if (selectorNumeric != null) {
|
||||
sortField[i] = new SortedNumericSortField(field, type, reverse);
|
||||
} else {
|
||||
sortField[i] = new SortField(field, type, reverse);
|
||||
}
|
||||
if (missingValue != null) {
|
||||
sortField[i].setMissingValue(missingValue);
|
||||
}
|
||||
|
@ -265,6 +299,38 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
|
|||
private String readString(int offset, BytesRefBuilder scratch) {
|
||||
return new String(scratch.bytes(), offset, scratch.length()-offset, StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
private SortedSetSelector.Type readSetSelector(IndexInput input, BytesRefBuilder scratch) throws IOException {
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch.get(), SI_SELECTOR_TYPE);
|
||||
final String selectorAsString = readString(SI_SELECTOR_TYPE.length, scratch);
|
||||
switch (selectorAsString) {
|
||||
case "min":
|
||||
return SortedSetSelector.Type.MIN;
|
||||
case "middle_min":
|
||||
return SortedSetSelector.Type.MIDDLE_MIN;
|
||||
case "middle_max":
|
||||
return SortedSetSelector.Type.MIDDLE_MAX;
|
||||
case "max":
|
||||
return SortedSetSelector.Type.MAX;
|
||||
default:
|
||||
throw new CorruptIndexException("unable to parse SortedSetSelector type: " + selectorAsString, input);
|
||||
}
|
||||
}
|
||||
|
||||
private SortedNumericSelector.Type readNumericSelector(IndexInput input, BytesRefBuilder scratch) throws IOException {
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch.get(), SI_SELECTOR_TYPE);
|
||||
final String selectorAsString = readString(SI_SELECTOR_TYPE.length, scratch);
|
||||
switch (selectorAsString) {
|
||||
case "min":
|
||||
return SortedNumericSelector.Type.MIN;
|
||||
case "max":
|
||||
return SortedNumericSelector.Type.MAX;
|
||||
default:
|
||||
throw new CorruptIndexException("unable to parse SortedNumericSelector type: " + selectorAsString, input);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
|
||||
|
@ -352,29 +418,93 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat {
|
|||
SimpleTextUtil.writeNewline(output);
|
||||
|
||||
SimpleTextUtil.write(output, SI_SORT_TYPE);
|
||||
final String sortType;
|
||||
switch (sortField.getType()) {
|
||||
final String sortTypeString;
|
||||
final SortField.Type sortType;
|
||||
final boolean multiValued;
|
||||
if (sortField instanceof SortedSetSortField) {
|
||||
sortType = SortField.Type.STRING;
|
||||
multiValued = true;
|
||||
} else if (sortField instanceof SortedNumericSortField) {
|
||||
sortType = ((SortedNumericSortField) sortField).getNumericType();
|
||||
multiValued = true;
|
||||
} else {
|
||||
sortType = sortField.getType();
|
||||
multiValued = false;
|
||||
}
|
||||
switch (sortType) {
|
||||
case STRING:
|
||||
sortType = "string";
|
||||
if (multiValued) {
|
||||
sortTypeString = "multi_valued_string";
|
||||
} else {
|
||||
sortTypeString = "string";
|
||||
}
|
||||
break;
|
||||
case LONG:
|
||||
sortType = "long";
|
||||
if (multiValued) {
|
||||
sortTypeString = "multi_valued_long";
|
||||
} else {
|
||||
sortTypeString = "long";
|
||||
}
|
||||
break;
|
||||
case INT:
|
||||
sortType = "int";
|
||||
if (multiValued) {
|
||||
sortTypeString = "multi_valued_int";
|
||||
} else {
|
||||
sortTypeString = "int";
|
||||
}
|
||||
break;
|
||||
case DOUBLE:
|
||||
sortType = "double";
|
||||
if (multiValued) {
|
||||
sortTypeString = "multi_valued_double";
|
||||
} else {
|
||||
sortTypeString = "double";
|
||||
}
|
||||
break;
|
||||
case FLOAT:
|
||||
sortType = "float";
|
||||
if (multiValued) {
|
||||
sortTypeString = "multi_valued_float";
|
||||
} else {
|
||||
sortTypeString = "float";
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
|
||||
}
|
||||
SimpleTextUtil.write(output, sortType, scratch);
|
||||
SimpleTextUtil.write(output, sortTypeString, scratch);
|
||||
SimpleTextUtil.writeNewline(output);
|
||||
|
||||
if (sortField instanceof SortedSetSortField) {
|
||||
SortedSetSelector.Type selector = ((SortedSetSortField) sortField).getSelector();
|
||||
final String selectorString;
|
||||
if (selector == SortedSetSelector.Type.MIN) {
|
||||
selectorString = "min";
|
||||
} else if (selector == SortedSetSelector.Type.MIDDLE_MIN) {
|
||||
selectorString = "middle_min";
|
||||
} else if (selector == SortedSetSelector.Type.MIDDLE_MAX) {
|
||||
selectorString = "middle_max";
|
||||
} else if (selector == SortedSetSelector.Type.MAX) {
|
||||
selectorString = "max";
|
||||
} else {
|
||||
throw new IllegalStateException("Unexpected SortedSetSelector type selector: " + selector);
|
||||
}
|
||||
SimpleTextUtil.write(output, SI_SELECTOR_TYPE);
|
||||
SimpleTextUtil.write(output, selectorString, scratch);
|
||||
SimpleTextUtil.writeNewline(output);
|
||||
} else if (sortField instanceof SortedNumericSortField) {
|
||||
SortedNumericSelector.Type selector = ((SortedNumericSortField) sortField).getSelector();
|
||||
final String selectorString;
|
||||
if (selector == SortedNumericSelector.Type.MIN) {
|
||||
selectorString = "min";
|
||||
} else if (selector == SortedNumericSelector.Type.MAX) {
|
||||
selectorString = "max";
|
||||
} else {
|
||||
throw new IllegalStateException("Unexpected SortedNumericSelector type selector: " + selector);
|
||||
}
|
||||
SimpleTextUtil.write(output, SI_SELECTOR_TYPE);
|
||||
SimpleTextUtil.write(output, selectorString, scratch);
|
||||
SimpleTextUtil.writeNewline(output);
|
||||
}
|
||||
|
||||
SimpleTextUtil.write(output, SI_SORT_REVERSE);
|
||||
SimpleTextUtil.write(output, Boolean.toString(sortField.getReverse()), scratch);
|
||||
SimpleTextUtil.writeNewline(output);
|
||||
|
|
|
@ -29,6 +29,10 @@ import org.apache.lucene.index.SegmentInfo; // javadocs
|
|||
import org.apache.lucene.index.SegmentInfos; // javadocs
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortedNumericSelector;
|
||||
import org.apache.lucene.search.SortedNumericSortField;
|
||||
import org.apache.lucene.search.SortedSetSelector;
|
||||
import org.apache.lucene.search.SortedSetSortField;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.DataOutput; // javadocs
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -69,7 +73,7 @@ import org.apache.lucene.util.Version;
|
|||
* addIndexes), etc.</li>
|
||||
* <li>Files is a list of files referred to by this segment.</li>
|
||||
* </ul>
|
||||
*
|
||||
*
|
||||
* @see SegmentInfos
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
@ -78,7 +82,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
|
|||
/** Sole constructor. */
|
||||
public Lucene62SegmentInfoFormat() {
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public SegmentInfo read(Directory dir, String segment, byte[] segmentID, IOContext context) throws IOException {
|
||||
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene62SegmentInfoFormat.SI_EXTENSION);
|
||||
|
@ -91,13 +95,13 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
|
|||
Lucene62SegmentInfoFormat.VERSION_CURRENT,
|
||||
segmentID, "");
|
||||
final Version version = Version.fromBits(input.readInt(), input.readInt(), input.readInt());
|
||||
|
||||
|
||||
final int docCount = input.readInt();
|
||||
if (docCount < 0) {
|
||||
throw new CorruptIndexException("invalid docCount: " + docCount, input);
|
||||
}
|
||||
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
|
||||
|
||||
|
||||
final Map<String,String> diagnostics = input.readMapOfStrings();
|
||||
final Set<String> files = input.readSetOfStrings();
|
||||
final Map<String,String> attributes = input.readMapOfStrings();
|
||||
|
@ -110,6 +114,8 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
|
|||
String fieldName = input.readString();
|
||||
int sortTypeID = input.readVInt();
|
||||
SortField.Type sortType;
|
||||
SortedSetSelector.Type sortedSetSelector = null;
|
||||
SortedNumericSelector.Type sortedNumericSelector = null;
|
||||
switch(sortTypeID) {
|
||||
case 0:
|
||||
sortType = SortField.Type.STRING;
|
||||
|
@ -126,6 +132,43 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
|
|||
case 4:
|
||||
sortType = SortField.Type.FLOAT;
|
||||
break;
|
||||
case 5:
|
||||
sortType = SortField.Type.STRING;
|
||||
byte selector = input.readByte();
|
||||
if (selector == 0) {
|
||||
sortedSetSelector = SortedSetSelector.Type.MIN;
|
||||
} else if (selector == 1) {
|
||||
sortedSetSelector = SortedSetSelector.Type.MAX;
|
||||
} else if (selector == 2) {
|
||||
sortedSetSelector = SortedSetSelector.Type.MIDDLE_MIN;
|
||||
} else if (selector == 3) {
|
||||
sortedSetSelector = SortedSetSelector.Type.MIDDLE_MAX;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid index SortedSetSelector ID: " + selector, input);
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
byte type = input.readByte();
|
||||
if (type == 0) {
|
||||
sortType = SortField.Type.LONG;
|
||||
} else if (type == 1) {
|
||||
sortType = SortField.Type.INT;
|
||||
} else if (type == 2) {
|
||||
sortType = SortField.Type.DOUBLE;
|
||||
} else if (type == 3) {
|
||||
sortType = SortField.Type.FLOAT;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid index SortedNumericSortField type ID: " + type, input);
|
||||
}
|
||||
byte numericSelector = input.readByte();
|
||||
if (numericSelector == 0) {
|
||||
sortedNumericSelector = SortedNumericSelector.Type.MIN;
|
||||
} else if (numericSelector == 1) {
|
||||
sortedNumericSelector = SortedNumericSelector.Type.MAX;
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid index SortedNumericSelector ID: " + numericSelector, input);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new CorruptIndexException("invalid index sort field type ID: " + sortTypeID, input);
|
||||
}
|
||||
|
@ -139,7 +182,13 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
|
|||
throw new CorruptIndexException("invalid index sort reverse: " + b, input);
|
||||
}
|
||||
|
||||
sortFields[i] = new SortField(fieldName, sortType, reverse);
|
||||
if (sortedSetSelector != null) {
|
||||
sortFields[i] = new SortedSetSortField(fieldName, reverse, sortedSetSelector);
|
||||
} else if (sortedNumericSelector != null) {
|
||||
sortFields[i] = new SortedNumericSortField(fieldName, sortType, reverse, sortedNumericSelector);
|
||||
} else {
|
||||
sortFields[i] = new SortField(fieldName, sortType, reverse);
|
||||
}
|
||||
|
||||
Object missingValue;
|
||||
b = input.readByte();
|
||||
|
@ -194,7 +243,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
|
|||
} else {
|
||||
indexSort = null;
|
||||
}
|
||||
|
||||
|
||||
si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, attributes, indexSort);
|
||||
si.setFiles(files);
|
||||
} catch (Throwable exception) {
|
||||
|
@ -213,8 +262,8 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
|
|||
try (IndexOutput output = dir.createOutput(fileName, ioContext)) {
|
||||
// Only add the file once we've successfully created it, else IFD assert can trip:
|
||||
si.addFile(fileName);
|
||||
CodecUtil.writeIndexHeader(output,
|
||||
Lucene62SegmentInfoFormat.CODEC_NAME,
|
||||
CodecUtil.writeIndexHeader(output,
|
||||
Lucene62SegmentInfoFormat.CODEC_NAME,
|
||||
Lucene62SegmentInfoFormat.VERSION_CURRENT,
|
||||
si.getId(),
|
||||
"");
|
||||
|
@ -245,6 +294,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
|
|||
output.writeVInt(numSortFields);
|
||||
for (int i = 0; i < numSortFields; ++i) {
|
||||
SortField sortField = indexSort.getSort()[i];
|
||||
SortField.Type sortType = sortField.getType();
|
||||
output.writeString(sortField.getField());
|
||||
int sortTypeID;
|
||||
switch (sortField.getType()) {
|
||||
|
@ -263,10 +313,55 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
|
|||
case FLOAT:
|
||||
sortTypeID = 4;
|
||||
break;
|
||||
case CUSTOM:
|
||||
if (sortField instanceof SortedSetSortField) {
|
||||
sortTypeID = 5;
|
||||
sortType = SortField.Type.STRING;
|
||||
} else if (sortField instanceof SortedNumericSortField) {
|
||||
sortTypeID = 6;
|
||||
sortType = ((SortedNumericSortField) sortField).getNumericType();
|
||||
} else {
|
||||
throw new IllegalStateException("Unexpected SortedNumericSortField " + sortField);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
|
||||
}
|
||||
output.writeVInt(sortTypeID);
|
||||
if (sortTypeID == 5) {
|
||||
SortedSetSortField ssf = (SortedSetSortField) sortField;
|
||||
if (ssf.getSelector() == SortedSetSelector.Type.MIN) {
|
||||
output.writeByte((byte) 0);
|
||||
} else if (ssf.getSelector() == SortedSetSelector.Type.MAX) {
|
||||
output.writeByte((byte) 1);
|
||||
} else if (ssf.getSelector() == SortedSetSelector.Type.MIDDLE_MIN) {
|
||||
output.writeByte((byte) 2);
|
||||
} else if (ssf.getSelector() == SortedSetSelector.Type.MIDDLE_MAX) {
|
||||
output.writeByte((byte) 3);
|
||||
} else {
|
||||
throw new IllegalStateException("Unexpected SortedSetSelector type: " + ssf.getSelector());
|
||||
}
|
||||
} else if (sortTypeID == 6) {
|
||||
SortedNumericSortField snsf = (SortedNumericSortField) sortField;
|
||||
if (snsf.getNumericType() == SortField.Type.LONG) {
|
||||
output.writeByte((byte) 0);
|
||||
} else if (snsf.getNumericType() == SortField.Type.INT) {
|
||||
output.writeByte((byte) 1);
|
||||
} else if (snsf.getNumericType() == SortField.Type.DOUBLE) {
|
||||
output.writeByte((byte) 2);
|
||||
} else if (snsf.getNumericType() == SortField.Type.FLOAT) {
|
||||
output.writeByte((byte) 3);
|
||||
} else {
|
||||
throw new IllegalStateException("Unexpected SortedNumericSelector type: " + snsf.getNumericType());
|
||||
}
|
||||
if (snsf.getSelector() == SortedNumericSelector.Type.MIN) {
|
||||
output.writeByte((byte) 0);
|
||||
} else if (snsf.getSelector() == SortedNumericSelector.Type.MAX) {
|
||||
output.writeByte((byte) 1);
|
||||
} else {
|
||||
throw new IllegalStateException("Unexpected sorted numeric selector type: " + snsf.getSelector());
|
||||
}
|
||||
}
|
||||
output.writeByte((byte) (sortField.getReverse() ? 0 : 1));
|
||||
|
||||
// write missing value
|
||||
|
@ -274,7 +369,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
|
|||
if (missingValue == null) {
|
||||
output.writeByte((byte) 0);
|
||||
} else {
|
||||
switch(sortField.getType()) {
|
||||
switch(sortType) {
|
||||
case STRING:
|
||||
if (missingValue == SortField.STRING_LAST) {
|
||||
output.writeByte((byte) 1);
|
||||
|
@ -305,7 +400,7 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
CodecUtil.writeFooter(output);
|
||||
}
|
||||
}
|
||||
|
@ -314,5 +409,6 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
|
|||
public final static String SI_EXTENSION = "si";
|
||||
static final String CODEC_NAME = "Lucene62SegmentInfo";
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION_CURRENT = VERSION_START;
|
||||
static final int VERSION_MULTI_VALUED_SORT = 1;
|
||||
static final int VERSION_CURRENT = VERSION_MULTI_VALUED_SORT;
|
||||
}
|
||||
|
|
|
@ -468,7 +468,8 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig {
|
|||
*/
|
||||
public IndexWriterConfig setIndexSort(Sort sort) {
|
||||
for(SortField sortField : sort.getSort()) {
|
||||
if (ALLOWED_INDEX_SORT_TYPES.contains(sortField.getType()) == false) {
|
||||
final SortField.Type sortType = Sorter.getSortFieldType(sortField);
|
||||
if (ALLOWED_INDEX_SORT_TYPES.contains(sortType) == false) {
|
||||
throw new IllegalArgumentException("invalid SortField type: must be one of " + ALLOWED_INDEX_SORT_TYPES + " but got: " + sortField);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -141,33 +141,25 @@ final class MultiSorter {
|
|||
private static ComparableProvider[] getComparableProviders(List<CodecReader> readers, SortField sortField) throws IOException {
|
||||
|
||||
ComparableProvider[] providers = new ComparableProvider[readers.size()];
|
||||
final int reverseMul = sortField.getReverse() ? -1 : 1;
|
||||
final SortField.Type sortType = Sorter.getSortFieldType(sortField);
|
||||
|
||||
switch(sortField.getType()) {
|
||||
switch(sortType) {
|
||||
|
||||
case STRING:
|
||||
{
|
||||
// this uses the efficient segment-local ordinal map:
|
||||
final SortedDocValues[] values = new SortedDocValues[readers.size()];
|
||||
for(int i=0;i<readers.size();i++) {
|
||||
SortedDocValues v = readers.get(i).getSortedDocValues(sortField.getField());
|
||||
if (v == null) {
|
||||
v = DocValues.emptySorted();
|
||||
}
|
||||
values[i] = v;
|
||||
final SortedDocValues sorted = Sorter.getOrWrapSorted(readers.get(i), sortField);
|
||||
values[i] = sorted;
|
||||
}
|
||||
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
|
||||
final int missingOrd;
|
||||
if (sortField.getMissingValue() == SortField.STRING_LAST) {
|
||||
missingOrd = Integer.MAX_VALUE;
|
||||
missingOrd = sortField.getReverse() ? Integer.MIN_VALUE : Integer.MAX_VALUE;
|
||||
} else {
|
||||
missingOrd = Integer.MIN_VALUE;
|
||||
}
|
||||
|
||||
final int reverseMul;
|
||||
if (sortField.getReverse()) {
|
||||
reverseMul = -1;
|
||||
} else {
|
||||
reverseMul = 1;
|
||||
missingOrd = sortField.getReverse() ? Integer.MAX_VALUE : Integer.MIN_VALUE;
|
||||
}
|
||||
|
||||
for(int readerIndex=0;readerIndex<readers.size();readerIndex++) {
|
||||
|
@ -205,13 +197,6 @@ final class MultiSorter {
|
|||
|
||||
case LONG:
|
||||
{
|
||||
final int reverseMul;
|
||||
if (sortField.getReverse()) {
|
||||
reverseMul = -1;
|
||||
} else {
|
||||
reverseMul = 1;
|
||||
}
|
||||
|
||||
final Long missingValue;
|
||||
if (sortField.getMissingValue() != null) {
|
||||
missingValue = (Long) sortField.getMissingValue();
|
||||
|
@ -220,8 +205,8 @@ final class MultiSorter {
|
|||
}
|
||||
|
||||
for(int readerIndex=0;readerIndex<readers.size();readerIndex++) {
|
||||
final NumericDocValues values = DocValues.getNumeric(readers.get(readerIndex), sortField.getField());
|
||||
|
||||
final NumericDocValues values = Sorter.getOrWrapNumeric(readers.get(readerIndex), sortField);
|
||||
|
||||
providers[readerIndex] = new ComparableProvider() {
|
||||
// used only by assert:
|
||||
int lastDocID = -1;
|
||||
|
@ -243,7 +228,7 @@ final class MultiSorter {
|
|||
if (readerDocID == docID) {
|
||||
return reverseMul * values.longValue();
|
||||
} else {
|
||||
return missingValue;
|
||||
return reverseMul * missingValue;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -253,13 +238,6 @@ final class MultiSorter {
|
|||
|
||||
case INT:
|
||||
{
|
||||
final int reverseMul;
|
||||
if (sortField.getReverse()) {
|
||||
reverseMul = -1;
|
||||
} else {
|
||||
reverseMul = 1;
|
||||
}
|
||||
|
||||
final Integer missingValue;
|
||||
if (sortField.getMissingValue() != null) {
|
||||
missingValue = (Integer) sortField.getMissingValue();
|
||||
|
@ -268,7 +246,7 @@ final class MultiSorter {
|
|||
}
|
||||
|
||||
for(int readerIndex=0;readerIndex<readers.size();readerIndex++) {
|
||||
final NumericDocValues values = DocValues.getNumeric(readers.get(readerIndex), sortField.getField());
|
||||
final NumericDocValues values = Sorter.getOrWrapNumeric(readers.get(readerIndex), sortField);
|
||||
|
||||
providers[readerIndex] = new ComparableProvider() {
|
||||
// used only by assert:
|
||||
|
@ -291,7 +269,7 @@ final class MultiSorter {
|
|||
if (readerDocID == docID) {
|
||||
return reverseMul * (int) values.longValue();
|
||||
} else {
|
||||
return missingValue;
|
||||
return reverseMul * missingValue;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -301,13 +279,6 @@ final class MultiSorter {
|
|||
|
||||
case DOUBLE:
|
||||
{
|
||||
final int reverseMul;
|
||||
if (sortField.getReverse()) {
|
||||
reverseMul = -1;
|
||||
} else {
|
||||
reverseMul = 1;
|
||||
}
|
||||
|
||||
final Double missingValue;
|
||||
if (sortField.getMissingValue() != null) {
|
||||
missingValue = (Double) sortField.getMissingValue();
|
||||
|
@ -316,7 +287,7 @@ final class MultiSorter {
|
|||
}
|
||||
|
||||
for(int readerIndex=0;readerIndex<readers.size();readerIndex++) {
|
||||
final NumericDocValues values = DocValues.getNumeric(readers.get(readerIndex), sortField.getField());
|
||||
final NumericDocValues values = Sorter.getOrWrapNumeric(readers.get(readerIndex), sortField);
|
||||
|
||||
providers[readerIndex] = new ComparableProvider() {
|
||||
// used only by assert:
|
||||
|
@ -339,7 +310,7 @@ final class MultiSorter {
|
|||
if (readerDocID == docID) {
|
||||
return reverseMul * Double.longBitsToDouble(values.longValue());
|
||||
} else {
|
||||
return missingValue;
|
||||
return reverseMul * missingValue;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -349,13 +320,6 @@ final class MultiSorter {
|
|||
|
||||
case FLOAT:
|
||||
{
|
||||
final int reverseMul;
|
||||
if (sortField.getReverse()) {
|
||||
reverseMul = -1;
|
||||
} else {
|
||||
reverseMul = 1;
|
||||
}
|
||||
|
||||
final Float missingValue;
|
||||
if (sortField.getMissingValue() != null) {
|
||||
missingValue = (Float) sortField.getMissingValue();
|
||||
|
@ -364,7 +328,7 @@ final class MultiSorter {
|
|||
}
|
||||
|
||||
for(int readerIndex=0;readerIndex<readers.size();readerIndex++) {
|
||||
final NumericDocValues values = DocValues.getNumeric(readers.get(readerIndex), sortField.getField());
|
||||
final NumericDocValues values = Sorter.getOrWrapNumeric(readers.get(readerIndex), sortField);
|
||||
|
||||
providers[readerIndex] = new ComparableProvider() {
|
||||
// used only by assert:
|
||||
|
@ -387,7 +351,7 @@ final class MultiSorter {
|
|||
if (readerDocID == docID) {
|
||||
return reverseMul * Float.intBitsToFloat((int) values.longValue());
|
||||
} else {
|
||||
return missingValue;
|
||||
return reverseMul * missingValue;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -25,6 +25,10 @@ import org.apache.lucene.search.FieldComparator;
|
|||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortedNumericSelector;
|
||||
import org.apache.lucene.search.SortedNumericSortField;
|
||||
import org.apache.lucene.search.SortedSetSelector;
|
||||
import org.apache.lucene.search.SortedSetSortField;
|
||||
import org.apache.lucene.util.TimSorter;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.apache.lucene.util.packed.PackedLongValues;
|
||||
|
@ -200,18 +204,55 @@ final class Sorter {
|
|||
};
|
||||
}
|
||||
|
||||
/** Returns the native sort type for {@link SortedSetSortField} and {@link SortedNumericSortField},
|
||||
* {@link SortField#getType()} otherwise */
|
||||
static SortField.Type getSortFieldType(SortField sortField) {
|
||||
if (sortField instanceof SortedSetSortField) {
|
||||
return SortField.Type.STRING;
|
||||
} else if (sortField instanceof SortedNumericSortField) {
|
||||
return ((SortedNumericSortField) sortField).getNumericType();
|
||||
} else {
|
||||
return sortField.getType();
|
||||
}
|
||||
}
|
||||
|
||||
/** Wraps a {@link SortedNumericDocValues} as a single-valued view if the field is an instance of {@link SortedNumericSortField},
|
||||
* returns {@link NumericDocValues} for the field otherwise. */
|
||||
static NumericDocValues getOrWrapNumeric(LeafReader reader, SortField sortField) throws IOException {
|
||||
if (sortField instanceof SortedNumericSortField) {
|
||||
SortedNumericSortField sf = (SortedNumericSortField) sortField;
|
||||
return SortedNumericSelector.wrap(DocValues.getSortedNumeric(reader, sf.getField()), sf.getSelector(), sf.getNumericType());
|
||||
} else {
|
||||
return DocValues.getNumeric(reader, sortField.getField());
|
||||
}
|
||||
}
|
||||
|
||||
/** Wraps a {@link SortedSetDocValues} as a single-valued view if the field is an instance of {@link SortedSetSortField},
|
||||
* returns {@link SortedDocValues} for the field otherwise. */
|
||||
static SortedDocValues getOrWrapSorted(LeafReader reader, SortField sortField) throws IOException {
|
||||
if (sortField instanceof SortedSetSortField) {
|
||||
SortedSetSortField sf = (SortedSetSortField) sortField;
|
||||
return SortedSetSelector.wrap(DocValues.getSortedSet(reader, sf.getField()), sf.getSelector());
|
||||
} else {
|
||||
return DocValues.getSorted(reader, sortField.getField());
|
||||
}
|
||||
}
|
||||
|
||||
/** We cannot use the {@link FieldComparator} API because that API requires that you send it docIDs in order. Note that this API
|
||||
* allocates arrays[maxDoc] to hold the native values needed for comparison, but 1) they are transient (only alive while sorting this one
|
||||
* segment), and 2) in the typical index sorting case, they are only used to sort newly flushed segments, which will be smaller than
|
||||
* merged segments. */
|
||||
private static DocComparator getDocComparator(LeafReader reader, SortField sortField) throws IOException {
|
||||
|
||||
int maxDoc = reader.maxDoc();
|
||||
|
||||
switch(sortField.getType()) {
|
||||
final int maxDoc = reader.maxDoc();
|
||||
final int reverseMul = sortField.getReverse() ? -1 : 1;
|
||||
final SortField.Type sortType = getSortFieldType(sortField);
|
||||
|
||||
switch(sortType) {
|
||||
|
||||
case STRING:
|
||||
{
|
||||
final SortedDocValues sorted = getOrWrapSorted(reader, sortField);
|
||||
final int missingOrd;
|
||||
if (sortField.getMissingValue() == SortField.STRING_LAST) {
|
||||
missingOrd = Integer.MAX_VALUE;
|
||||
|
@ -221,18 +262,10 @@ final class Sorter {
|
|||
|
||||
final int[] ords = new int[reader.maxDoc()];
|
||||
Arrays.fill(ords, missingOrd);
|
||||
SortedDocValues sorted = DocValues.getSorted(reader, sortField.getField());
|
||||
int docID;
|
||||
while ((docID = sorted.nextDoc()) != NO_MORE_DOCS) {
|
||||
ords[docID] = sorted.ordValue();
|
||||
}
|
||||
|
||||
final int reverseMul;
|
||||
if (sortField.getReverse()) {
|
||||
reverseMul = -1;
|
||||
} else {
|
||||
reverseMul = 1;
|
||||
}
|
||||
|
||||
return new DocComparator() {
|
||||
@Override
|
||||
|
@ -244,9 +277,8 @@ final class Sorter {
|
|||
|
||||
case LONG:
|
||||
{
|
||||
final NumericDocValues dvs = getOrWrapNumeric(reader, sortField);
|
||||
long[] values = new long[maxDoc];
|
||||
NumericDocValues dvs = DocValues.getNumeric(reader, sortField.getField());
|
||||
|
||||
if (sortField.getMissingValue() != null) {
|
||||
Arrays.fill(values, (Long) sortField.getMissingValue());
|
||||
}
|
||||
|
@ -258,13 +290,6 @@ final class Sorter {
|
|||
values[docID] = dvs.longValue();
|
||||
}
|
||||
|
||||
final int reverseMul;
|
||||
if (sortField.getReverse()) {
|
||||
reverseMul = -1;
|
||||
} else {
|
||||
reverseMul = 1;
|
||||
}
|
||||
|
||||
return new DocComparator() {
|
||||
@Override
|
||||
public int compare(int docID1, int docID2) {
|
||||
|
@ -275,13 +300,12 @@ final class Sorter {
|
|||
|
||||
case INT:
|
||||
{
|
||||
final NumericDocValues dvs = getOrWrapNumeric(reader, sortField);
|
||||
int[] values = new int[maxDoc];
|
||||
NumericDocValues dvs = DocValues.getNumeric(reader, sortField.getField());
|
||||
|
||||
if (sortField.getMissingValue() != null) {
|
||||
Arrays.fill(values, (Integer) sortField.getMissingValue());
|
||||
}
|
||||
|
||||
|
||||
while (true) {
|
||||
int docID = dvs.nextDoc();
|
||||
if (docID == NO_MORE_DOCS) {
|
||||
|
@ -290,13 +314,6 @@ final class Sorter {
|
|||
values[docID] = (int) dvs.longValue();
|
||||
}
|
||||
|
||||
final int reverseMul;
|
||||
if (sortField.getReverse()) {
|
||||
reverseMul = -1;
|
||||
} else {
|
||||
reverseMul = 1;
|
||||
}
|
||||
|
||||
return new DocComparator() {
|
||||
@Override
|
||||
public int compare(int docID1, int docID2) {
|
||||
|
@ -307,9 +324,8 @@ final class Sorter {
|
|||
|
||||
case DOUBLE:
|
||||
{
|
||||
final NumericDocValues dvs = getOrWrapNumeric(reader, sortField);
|
||||
double[] values = new double[maxDoc];
|
||||
NumericDocValues dvs = DocValues.getNumeric(reader, sortField.getField());
|
||||
|
||||
if (sortField.getMissingValue() != null) {
|
||||
Arrays.fill(values, (Double) sortField.getMissingValue());
|
||||
}
|
||||
|
@ -320,13 +336,6 @@ final class Sorter {
|
|||
}
|
||||
values[docID] = Double.longBitsToDouble(dvs.longValue());
|
||||
}
|
||||
|
||||
final int reverseMul;
|
||||
if (sortField.getReverse()) {
|
||||
reverseMul = -1;
|
||||
} else {
|
||||
reverseMul = 1;
|
||||
}
|
||||
|
||||
return new DocComparator() {
|
||||
@Override
|
||||
|
@ -338,9 +347,8 @@ final class Sorter {
|
|||
|
||||
case FLOAT:
|
||||
{
|
||||
final NumericDocValues dvs = getOrWrapNumeric(reader, sortField);
|
||||
float[] values = new float[maxDoc];
|
||||
NumericDocValues dvs = DocValues.getNumeric(reader, sortField.getField());
|
||||
|
||||
if (sortField.getMissingValue() != null) {
|
||||
Arrays.fill(values, (Float) sortField.getMissingValue());
|
||||
}
|
||||
|
@ -352,13 +360,6 @@ final class Sorter {
|
|||
values[docID] = Float.intBitsToFloat((int) dvs.longValue());
|
||||
}
|
||||
|
||||
final int reverseMul;
|
||||
if (sortField.getReverse()) {
|
||||
reverseMul = -1;
|
||||
} else {
|
||||
reverseMul = 1;
|
||||
}
|
||||
|
||||
return new DocComparator() {
|
||||
@Override
|
||||
public int compare(int docID1, int docID2) {
|
||||
|
@ -371,7 +372,7 @@ final class Sorter {
|
|||
throw new IllegalArgumentException("unhandled SortField.getType()=" + sortField.getType());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a mapping from the old document ID to its new location in the
|
||||
* sorted index. Implementations can use the auxiliary
|
||||
|
@ -386,7 +387,6 @@ final class Sorter {
|
|||
*/
|
||||
DocMap sort(LeafReader reader) throws IOException {
|
||||
SortField fields[] = sort.getSort();
|
||||
final int reverseMul[] = new int[fields.length];
|
||||
final DocComparator comparators[] = new DocComparator[fields.length];
|
||||
|
||||
for (int i = 0; i < fields.length; i++) {
|
||||
|
@ -424,7 +424,7 @@ final class Sorter {
|
|||
public String toString() {
|
||||
return getID();
|
||||
}
|
||||
|
||||
|
||||
static final Scorer FAKESCORER = new Scorer(null) {
|
||||
|
||||
float score;
|
||||
|
|
|
@ -82,6 +82,11 @@ public class SortedNumericSortField extends SortField {
|
|||
this.selector = selector;
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
/** Returns the numeric type in use for this sort */
|
||||
public SortField.Type getNumericType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
/** Returns the selector in use for this sort */
|
||||
public SortedNumericSelector.Type getSelector() {
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -28,6 +28,8 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortedNumericSortField;
|
||||
import org.apache.lucene.search.SortedSetSortField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
|
@ -167,6 +169,78 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
|
|||
return true;
|
||||
}
|
||||
|
||||
private SortField randomIndexSortField() {
|
||||
boolean reversed = random().nextBoolean();
|
||||
SortField sortField;
|
||||
switch(random().nextInt(10)) {
|
||||
case 0:
|
||||
sortField = new SortField(TestUtil.randomSimpleString(random()), SortField.Type.INT, reversed);
|
||||
if (random().nextBoolean()) {
|
||||
sortField.setMissingValue(random().nextInt());
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
sortField = new SortedNumericSortField(TestUtil.randomSimpleString(random()), SortField.Type.INT, reversed);
|
||||
if (random().nextBoolean()) {
|
||||
sortField.setMissingValue(random().nextInt());
|
||||
}
|
||||
break;
|
||||
|
||||
case 2:
|
||||
sortField = new SortField(TestUtil.randomSimpleString(random()), SortField.Type.LONG, reversed);
|
||||
if (random().nextBoolean()) {
|
||||
sortField.setMissingValue(random().nextLong());
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
sortField = new SortedNumericSortField(TestUtil.randomSimpleString(random()), SortField.Type.LONG, reversed);
|
||||
if (random().nextBoolean()) {
|
||||
sortField.setMissingValue(random().nextLong());
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
sortField = new SortField(TestUtil.randomSimpleString(random()), SortField.Type.FLOAT, reversed);
|
||||
if (random().nextBoolean()) {
|
||||
sortField.setMissingValue(random().nextFloat());
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
sortField = new SortedNumericSortField(TestUtil.randomSimpleString(random()), SortField.Type.FLOAT, reversed);
|
||||
if (random().nextBoolean()) {
|
||||
sortField.setMissingValue(random().nextFloat());
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
sortField = new SortField(TestUtil.randomSimpleString(random()), SortField.Type.DOUBLE, reversed);
|
||||
if (random().nextBoolean()) {
|
||||
sortField.setMissingValue(random().nextDouble());
|
||||
}
|
||||
break;
|
||||
case 7:
|
||||
sortField = new SortedNumericSortField(TestUtil.randomSimpleString(random()), SortField.Type.DOUBLE, reversed);
|
||||
if (random().nextBoolean()) {
|
||||
sortField.setMissingValue(random().nextDouble());
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
sortField = new SortField(TestUtil.randomSimpleString(random()), SortField.Type.STRING, reversed);
|
||||
if (random().nextBoolean()) {
|
||||
sortField.setMissingValue(SortField.STRING_LAST);
|
||||
}
|
||||
break;
|
||||
case 9:
|
||||
sortField = new SortedSetSortField(TestUtil.randomSimpleString(random()), reversed);
|
||||
if (random().nextBoolean()) {
|
||||
sortField.setMissingValue(SortField.STRING_LAST);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
sortField = null;
|
||||
fail();
|
||||
}
|
||||
return sortField;
|
||||
}
|
||||
|
||||
/** Test sort */
|
||||
public void testSort() throws IOException {
|
||||
assumeTrue("test requires a codec that can read/write index sort", supportsIndexSort());
|
||||
|
@ -180,22 +254,7 @@ public abstract class BaseSegmentInfoFormatTestCase extends BaseIndexFileFormatT
|
|||
final int numSortFields = TestUtil.nextInt(random(), 1, 3);
|
||||
SortField[] sortFields = new SortField[numSortFields];
|
||||
for (int j = 0; j < numSortFields; ++j) {
|
||||
sortFields[j] = new SortField(
|
||||
TestUtil.randomSimpleString(random()),
|
||||
random().nextBoolean() ? SortField.Type.LONG : SortField.Type.STRING,
|
||||
random().nextBoolean());
|
||||
if (random().nextBoolean()) {
|
||||
switch (sortFields[j].getType()) {
|
||||
case LONG:
|
||||
sortFields[j].setMissingValue(random().nextLong());
|
||||
break;
|
||||
case STRING:
|
||||
sortFields[j].setMissingValue(random().nextBoolean() ? SortField.STRING_FIRST : SortField.STRING_LAST);
|
||||
break;
|
||||
default:
|
||||
fail();
|
||||
}
|
||||
}
|
||||
sortFields[j] = randomIndexSortField();
|
||||
}
|
||||
sort = new Sort(sortFields);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue