mirror of https://github.com/apache/lucene.git
LUCENE-10633 Disable sort optimization for SortedSetSortField (#3125)
Add ability to SortedSetSortField to disable sort optimization
This commit is contained in:
parent
61ef031f7f
commit
554fabf682
|
@ -93,6 +93,10 @@ New Features
|
|||
|
||||
* LUCENE-10629: Support match set filtering with a query in MatchingFacetSetCounts. (Stefan Vodita, Shai Erera)
|
||||
|
||||
* LUCENE-10633: SortField#setOptimizeSortWithIndexedData and
|
||||
SortField#getOptimizeSortWithIndexedData were introduce to provide
|
||||
an option to disable sort optimization for various sort fields. (Mayya Sharipova)
|
||||
|
||||
Improvements
|
||||
---------------------
|
||||
* LUCENE-10592: Build HNSW Graph on indexing. (Mayya Sharipova, Adrien Grand, Julie Tibshirani)
|
||||
|
|
|
@ -45,6 +45,10 @@ import org.apache.lucene.util.NumericUtils;
|
|||
* optimization to skip non-competitive documents. This optimization relies on the assumption that
|
||||
* the same data is stored in these points and doc values.
|
||||
*
|
||||
* <p>Sorting on a SORTED(_SET) field that is indexed with both doc values and term index may use an
|
||||
* optimization to skip non-competitive documents. This optimization relies on the assumption that
|
||||
* the same data is stored in these term index and doc values.
|
||||
*
|
||||
* <p>Created: Feb 11, 2004 1:25:29 PM
|
||||
*
|
||||
* @since lucene 1.4
|
||||
|
@ -131,8 +135,8 @@ public class SortField {
|
|||
// Used for 'sortMissingFirst/Last'
|
||||
protected Object missingValue = null;
|
||||
|
||||
// Indicates if numeric sort should be optimized with Points index. Set to true by default.
|
||||
@Deprecated private boolean optimizeSortWithPoints = true;
|
||||
// Indicates if sort should be optimized with indexed data. Set to true by default.
|
||||
@Deprecated private boolean optimizeSortWithIndexedData = true;
|
||||
|
||||
/**
|
||||
* Creates a sort by terms in the given field with the type of term values explicitly given.
|
||||
|
@ -537,7 +541,10 @@ public class SortField {
|
|||
break;
|
||||
|
||||
case STRING:
|
||||
return new TermOrdValComparator(numHits, field, missingValue == STRING_LAST, reverse);
|
||||
fieldComparator =
|
||||
new TermOrdValComparator(
|
||||
numHits, field, missingValue == STRING_LAST, reverse, enableSkipping);
|
||||
break;
|
||||
|
||||
case STRING_VAL:
|
||||
fieldComparator =
|
||||
|
@ -551,7 +558,7 @@ public class SortField {
|
|||
default:
|
||||
throw new IllegalStateException("Illegal sort type: " + type);
|
||||
}
|
||||
if (getOptimizeSortWithPoints() == false) {
|
||||
if (getOptimizeSortWithIndexedData() == false) {
|
||||
fieldComparator.disableSkipping();
|
||||
}
|
||||
return fieldComparator;
|
||||
|
@ -626,6 +633,39 @@ public class SortField {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Enables/disables numeric sort optimization to use the indexed data.
|
||||
*
|
||||
* <p>Enabled by default. By default, sorting on a numeric field activates point sort optimization
|
||||
* that can efficiently skip over non-competitive hits. Sort optimization has a number of
|
||||
* requirements, one of which is that SortField.Type matches the Point type with which the field
|
||||
* was indexed (e.g. sort on IntPoint field should use SortField.Type.INT). Another requirement is
|
||||
* that the same data is indexed with points and doc values for the field.
|
||||
*
|
||||
* <p>By default, sorting on a SORTED(_SET) field activates sort optimization that can efficiently
|
||||
* skip over non-competitive hits. Sort optimization requires that the same data is indexed with
|
||||
* term index and doc values for the field.
|
||||
*
|
||||
* @param optimizeSortWithIndexedData providing {@code false} disables the optimization, in cases
|
||||
* where these requirements can't be met.
|
||||
* @deprecated should only be used for compatibility with 8.x indices that got created with
|
||||
* inconsistent data across fields, or the wrong sort configuration in the index sort
|
||||
*/
|
||||
@Deprecated // Remove in Lucene 10
|
||||
public void setOptimizeSortWithIndexedData(boolean optimizeSortWithIndexedData) {
|
||||
this.optimizeSortWithIndexedData = optimizeSortWithIndexedData;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether sort optimization should be optimized with indexed data
|
||||
*
|
||||
* @return whether sort optimization should be optimized with indexed data
|
||||
*/
|
||||
@Deprecated // Remove in Lucene 10
|
||||
public boolean getOptimizeSortWithIndexedData() {
|
||||
return optimizeSortWithIndexedData;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enables/disables numeric sort optimization to use the Points index.
|
||||
*
|
||||
|
@ -638,20 +678,22 @@ public class SortField {
|
|||
* @param optimizeSortWithPoints providing {@code false} disables the optimization, in cases where
|
||||
* these requirements can't be met.
|
||||
* @deprecated should only be used for compatibility with 8.x indices that got created with
|
||||
* inconsistent data across fields, or the wrong sort configuration in the index sort
|
||||
* inconsistent data across fields, or the wrong sort configuration in the index sort. This is
|
||||
* a duplicate method for {@code SortField#setOptimizeSortWithIndexedData}.
|
||||
*/
|
||||
@Deprecated // Remove in Lucene 10
|
||||
public void setOptimizeSortWithPoints(boolean optimizeSortWithPoints) {
|
||||
this.optimizeSortWithPoints = optimizeSortWithPoints;
|
||||
setOptimizeSortWithIndexedData(optimizeSortWithPoints);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether sort optimization should be optimized with points index
|
||||
*
|
||||
* @return whether sort optimization should be optimized with points index
|
||||
* @deprecated This is a duplicate method for {@code SortField#getOptimizeSortWithIndexedData}.
|
||||
*/
|
||||
@Deprecated // Remove in Lucene 10
|
||||
public boolean getOptimizeSortWithPoints() {
|
||||
return optimizeSortWithPoints;
|
||||
return getOptimizeSortWithIndexedData();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -333,7 +333,7 @@ public class SortedNumericSortField extends SortField {
|
|||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
if (getOptimizeSortWithPoints() == false) {
|
||||
if (getOptimizeSortWithIndexedData() == false) {
|
||||
fieldComparator.disableSkipping();
|
||||
}
|
||||
return fieldComparator;
|
||||
|
|
|
@ -179,7 +179,9 @@ public class SortedSetSortField extends SortField {
|
|||
|
||||
@Override
|
||||
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) {
|
||||
return new TermOrdValComparator(numHits, getField(), missingValue == STRING_LAST, reverse) {
|
||||
boolean finalEnableSkipping = enableSkipping && getOptimizeSortWithIndexedData();
|
||||
return new TermOrdValComparator(
|
||||
numHits, getField(), missingValue == STRING_LAST, reverse, finalEnableSkipping) {
|
||||
@Override
|
||||
protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field)
|
||||
throws IOException {
|
||||
|
|
|
@ -97,7 +97,9 @@ public class TermOrdValComparator extends FieldComparator<BytesRef> {
|
|||
* Creates this, with control over how missing values are sorted. Pass sortMissingLast=true to put
|
||||
* missing values at the end.
|
||||
*/
|
||||
public TermOrdValComparator(int numHits, String field, boolean sortMissingLast, boolean reverse) {
|
||||
public TermOrdValComparator(
|
||||
int numHits, String field, boolean sortMissingLast, boolean reverse, boolean enableSkipping) {
|
||||
canSkipDocuments = enableSkipping;
|
||||
ords = new int[numHits];
|
||||
values = new BytesRef[numHits];
|
||||
tempBRs = new BytesRefBuilder[numHits];
|
||||
|
|
|
@ -679,7 +679,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
|||
IllegalArgumentException.class,
|
||||
() -> searcher.search(new MatchAllDocsQuery(), 1, new Sort(longSortOnIntField)));
|
||||
// assert that when sort optimization is disabled we can use LONG sort on int field
|
||||
longSortOnIntField.setOptimizeSortWithPoints(false);
|
||||
longSortOnIntField.setOptimizeSortWithIndexedData(false);
|
||||
searcher.search(new MatchAllDocsQuery(), 1, new Sort(longSortOnIntField));
|
||||
|
||||
SortField intSortOnLongField = new SortField("longField", SortField.Type.INT);
|
||||
|
@ -687,7 +687,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
|||
IllegalArgumentException.class,
|
||||
() -> searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnLongField)));
|
||||
// assert that when sort optimization is disabled we can use INT sort on long field
|
||||
intSortOnLongField.setOptimizeSortWithPoints(false);
|
||||
intSortOnLongField.setOptimizeSortWithIndexedData(false);
|
||||
searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnLongField));
|
||||
|
||||
SortField intSortOnIntRangeField = new SortField("intRange", SortField.Type.INT);
|
||||
|
@ -695,7 +695,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
|||
IllegalArgumentException.class,
|
||||
() -> searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnIntRangeField)));
|
||||
// assert that when sort optimization is disabled we can use INT sort on intRange field
|
||||
intSortOnIntRangeField.setOptimizeSortWithPoints(false);
|
||||
intSortOnIntRangeField.setOptimizeSortWithIndexedData(false);
|
||||
searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnIntRangeField));
|
||||
|
||||
reader.close();
|
||||
|
@ -823,7 +823,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
|||
boolean reverse = random().nextBoolean();
|
||||
final SortField sortField =
|
||||
new SortedNumericSortField("my_field", SortField.Type.LONG, reverse, type);
|
||||
sortField.setOptimizeSortWithPoints(false);
|
||||
sortField.setOptimizeSortWithIndexedData(false);
|
||||
final Sort sort = new Sort(sortField); // sort without sort optimization
|
||||
final SortField sortField2 =
|
||||
new SortedNumericSortField("my_field", SortField.Type.LONG, reverse, type);
|
||||
|
@ -901,6 +901,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
|||
final DirectoryReader reader = DirectoryReader.open(writer);
|
||||
writer.close();
|
||||
doTestStringSortOptimization(reader);
|
||||
doTestStringSortOptimizationDisabled(reader);
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
@ -1025,6 +1026,27 @@ public class TestSortOptimization extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void doTestStringSortOptimizationDisabled(DirectoryReader reader) throws IOException {
|
||||
SortField sortField =
|
||||
random().nextBoolean()
|
||||
? new SortedSetSortField("my_field", false)
|
||||
: new SortField("my_field", SortField.Type.STRING);
|
||||
sortField.setMissingValue(SortField.STRING_LAST);
|
||||
sortField.setOptimizeSortWithIndexedData(false);
|
||||
|
||||
Sort sort = new Sort(sortField);
|
||||
final int numDocs = reader.numDocs();
|
||||
final int numHits = 5;
|
||||
final int totalHitsThreshold = 5;
|
||||
|
||||
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
|
||||
TopFieldCollector.createSharedManager(sort, numHits, null, totalHitsThreshold);
|
||||
IndexSearcher searcher =
|
||||
newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false);
|
||||
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
|
||||
assertEquals(numDocs, topDocs.totalHits.value);
|
||||
}
|
||||
|
||||
private TopDocs assertSort(DirectoryReader reader, Sort sort, int n, FieldDoc after)
|
||||
throws IOException {
|
||||
TopDocs topDocs = assertSearchHits(reader, sort, n, after);
|
||||
|
|
|
@ -135,25 +135,22 @@ public class ToParentBlockJoinSortField extends SortField {
|
|||
}
|
||||
|
||||
private FieldComparator<?> getStringComparator(int numHits) {
|
||||
FieldComparator<?> cmp =
|
||||
new TermOrdValComparator(numHits, getField(), missingValue == STRING_LAST, getReverse()) {
|
||||
|
||||
@Override
|
||||
protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field)
|
||||
throws IOException {
|
||||
SortedSetDocValues sortedSet = DocValues.getSortedSet(context.reader(), field);
|
||||
final BlockJoinSelector.Type type =
|
||||
order ? BlockJoinSelector.Type.MAX : BlockJoinSelector.Type.MIN;
|
||||
final BitSet parents = parentFilter.getBitSet(context);
|
||||
final BitSet children = childFilter.getBitSet(context);
|
||||
if (children == null) {
|
||||
return DocValues.emptySorted();
|
||||
}
|
||||
return BlockJoinSelector.wrap(sortedSet, type, parents, toIter(children));
|
||||
}
|
||||
};
|
||||
cmp.disableSkipping();
|
||||
return cmp;
|
||||
return new TermOrdValComparator(
|
||||
numHits, getField(), missingValue == STRING_LAST, getReverse(), false) {
|
||||
@Override
|
||||
protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field)
|
||||
throws IOException {
|
||||
SortedSetDocValues sortedSet = DocValues.getSortedSet(context.reader(), field);
|
||||
final BlockJoinSelector.Type type =
|
||||
order ? BlockJoinSelector.Type.MAX : BlockJoinSelector.Type.MIN;
|
||||
final BitSet parents = parentFilter.getBitSet(context);
|
||||
final BitSet children = childFilter.getBitSet(context);
|
||||
if (children == null) {
|
||||
return DocValues.emptySorted();
|
||||
}
|
||||
return BlockJoinSelector.wrap(sortedSet, type, parents, toIter(children));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private FieldComparator<?> getIntComparator(int numHits) {
|
||||
|
|
Loading…
Reference in New Issue