diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index a6c6dbedaeb..bdc118b95f2 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -90,6 +90,9 @@ Improvements which can be overridden to return false to eek out more speed in some cases. (Timothy M. Rodriguez, David Smiley) +* LUCENE-7537: Index time sorting now supports multi-valued sorts + using selectors (MIN, MAX, etc.) (Jim Ferenczi via Mike McCandless) + Other * LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java index 146e92a6a29..3d38d72385f 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java @@ -33,9 +33,14 @@ import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentInfo; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; +import org.apache.lucene.search.SortedNumericSelector; +import org.apache.lucene.search.SortedNumericSortField; +import org.apache.lucene.search.SortedSetSelector; +import org.apache.lucene.search.SortedSetSortField; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; @@ -64,6 +69,7 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat { final static BytesRef SI_SORT = new BytesRef(" sort "); final static BytesRef SI_SORT_FIELD = new BytesRef(" field "); final static BytesRef SI_SORT_TYPE = new BytesRef(" type "); + final static BytesRef SI_SELECTOR_TYPE = new BytesRef(" selector "); final static BytesRef SI_SORT_REVERSE = new BytesRef(" reverse "); final static BytesRef SI_SORT_MISSING = new BytesRef(" missing "); @@ -158,6 +164,8 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat { final String typeAsString = readString(SI_SORT_TYPE.length, scratch); final SortField.Type type; + SortedSetSelector.Type selectorSet = null; + SortedNumericSelector.Type selectorNumeric = null; switch (typeAsString) { case "string": type = SortField.Type.STRING; @@ -174,6 +182,26 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat { case "float": type = SortField.Type.FLOAT; break; + case "multi_valued_string": + type = SortField.Type.STRING; + selectorSet = readSetSelector(input, scratch); + break; + case "multi_valued_long": + type = SortField.Type.LONG; + selectorNumeric = readNumericSelector(input, scratch); + break; + case "multi_valued_int": + type = SortField.Type.INT; + selectorNumeric = readNumericSelector(input, scratch); + break; + case "multi_valued_double": + type = SortField.Type.DOUBLE; + selectorNumeric = readNumericSelector(input, scratch); + break; + case "multi_valued_float": + type = SortField.Type.FLOAT; + selectorNumeric = readNumericSelector(input, scratch); + break; default: throw new CorruptIndexException("unable to parse sort type string: " + typeAsString, input); } @@ -245,7 +273,13 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat { default: throw new AssertionError(); } - sortField[i] = new SortField(field, type, reverse); + if (selectorSet != null) { + sortField[i] = new SortedSetSortField(field, reverse); + } else if (selectorNumeric != null) { + sortField[i] = new SortedNumericSortField(field, type, reverse); + } else { + sortField[i] = new SortField(field, type, reverse); + } if (missingValue != null) { sortField[i].setMissingValue(missingValue); } @@ -265,6 +299,38 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat { private String readString(int offset, BytesRefBuilder scratch) { return new String(scratch.bytes(), offset, scratch.length()-offset, StandardCharsets.UTF_8); } + + private SortedSetSelector.Type readSetSelector(IndexInput input, BytesRefBuilder scratch) throws IOException { + SimpleTextUtil.readLine(input, scratch); + assert StringHelper.startsWith(scratch.get(), SI_SELECTOR_TYPE); + final String selectorAsString = readString(SI_SELECTOR_TYPE.length, scratch); + switch (selectorAsString) { + case "min": + return SortedSetSelector.Type.MIN; + case "middle_min": + return SortedSetSelector.Type.MIDDLE_MIN; + case "middle_max": + return SortedSetSelector.Type.MIDDLE_MAX; + case "max": + return SortedSetSelector.Type.MAX; + default: + throw new CorruptIndexException("unable to parse SortedSetSelector type: " + selectorAsString, input); + } + } + + private SortedNumericSelector.Type readNumericSelector(IndexInput input, BytesRefBuilder scratch) throws IOException { + SimpleTextUtil.readLine(input, scratch); + assert StringHelper.startsWith(scratch.get(), SI_SELECTOR_TYPE); + final String selectorAsString = readString(SI_SELECTOR_TYPE.length, scratch); + switch (selectorAsString) { + case "min": + return SortedNumericSelector.Type.MIN; + case "max": + return SortedNumericSelector.Type.MAX; + default: + throw new CorruptIndexException("unable to parse SortedNumericSelector type: " + selectorAsString, input); + } + } @Override public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException { @@ -352,29 +418,93 @@ public class SimpleTextSegmentInfoFormat extends SegmentInfoFormat { SimpleTextUtil.writeNewline(output); SimpleTextUtil.write(output, SI_SORT_TYPE); - final String sortType; - switch (sortField.getType()) { + final String sortTypeString; + final SortField.Type sortType; + final boolean multiValued; + if (sortField instanceof SortedSetSortField) { + sortType = SortField.Type.STRING; + multiValued = true; + } else if (sortField instanceof SortedNumericSortField) { + sortType = ((SortedNumericSortField) sortField).getNumericType(); + multiValued = true; + } else { + sortType = sortField.getType(); + multiValued = false; + } + switch (sortType) { case STRING: - sortType = "string"; + if (multiValued) { + sortTypeString = "multi_valued_string"; + } else { + sortTypeString = "string"; + } break; case LONG: - sortType = "long"; + if (multiValued) { + sortTypeString = "multi_valued_long"; + } else { + sortTypeString = "long"; + } break; case INT: - sortType = "int"; + if (multiValued) { + sortTypeString = "multi_valued_int"; + } else { + sortTypeString = "int"; + } break; case DOUBLE: - sortType = "double"; + if (multiValued) { + sortTypeString = "multi_valued_double"; + } else { + sortTypeString = "double"; + } break; case FLOAT: - sortType = "float"; + if (multiValued) { + sortTypeString = "multi_valued_float"; + } else { + sortTypeString = "float"; + } break; default: throw new IllegalStateException("Unexpected sort type: " + sortField.getType()); } - SimpleTextUtil.write(output, sortType, scratch); + SimpleTextUtil.write(output, sortTypeString, scratch); SimpleTextUtil.writeNewline(output); + if (sortField instanceof SortedSetSortField) { + SortedSetSelector.Type selector = ((SortedSetSortField) sortField).getSelector(); + final String selectorString; + if (selector == SortedSetSelector.Type.MIN) { + selectorString = "min"; + } else if (selector == SortedSetSelector.Type.MIDDLE_MIN) { + selectorString = "middle_min"; + } else if (selector == SortedSetSelector.Type.MIDDLE_MAX) { + selectorString = "middle_max"; + } else if (selector == SortedSetSelector.Type.MAX) { + selectorString = "max"; + } else { + throw new IllegalStateException("Unexpected SortedSetSelector type selector: " + selector); + } + SimpleTextUtil.write(output, SI_SELECTOR_TYPE); + SimpleTextUtil.write(output, selectorString, scratch); + SimpleTextUtil.writeNewline(output); + } else if (sortField instanceof SortedNumericSortField) { + SortedNumericSelector.Type selector = ((SortedNumericSortField) sortField).getSelector(); + final String selectorString; + if (selector == SortedNumericSelector.Type.MIN) { + selectorString = "min"; + } else if (selector == SortedNumericSelector.Type.MAX) { + selectorString = "max"; + } else { + throw new IllegalStateException("Unexpected SortedNumericSelector type selector: " + selector); + } + SimpleTextUtil.write(output, SI_SELECTOR_TYPE); + SimpleTextUtil.write(output, selectorString, scratch); + SimpleTextUtil.writeNewline(output); + } + SimpleTextUtil.write(output, SI_SORT_REVERSE); SimpleTextUtil.write(output, Boolean.toString(sortField.getReverse()), scratch); SimpleTextUtil.writeNewline(output); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java index 1ee52588a55..da6e395e27a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java @@ -29,6 +29,10 @@ import org.apache.lucene.index.SegmentInfo; // javadocs import org.apache.lucene.index.SegmentInfos; // javadocs import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; +import org.apache.lucene.search.SortedNumericSelector; +import org.apache.lucene.search.SortedNumericSortField; +import org.apache.lucene.search.SortedSetSelector; +import org.apache.lucene.search.SortedSetSortField; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.DataOutput; // javadocs import org.apache.lucene.store.Directory; @@ -69,7 +73,7 @@ import org.apache.lucene.util.Version; * addIndexes), etc. *