mirror of
https://github.com/apache/lucene.git
synced 2025-02-09 11:35:14 +00:00
LUCENE-10633 Disable sort optimization for SortedSetSortField (#3125)
Add ability to SortedSetSortField to disable sort optimization
This commit is contained in:
parent
61ef031f7f
commit
554fabf682
@ -93,6 +93,10 @@ New Features
|
|||||||
|
|
||||||
* LUCENE-10629: Support match set filtering with a query in MatchingFacetSetCounts. (Stefan Vodita, Shai Erera)
|
* LUCENE-10629: Support match set filtering with a query in MatchingFacetSetCounts. (Stefan Vodita, Shai Erera)
|
||||||
|
|
||||||
|
* LUCENE-10633: SortField#setOptimizeSortWithIndexedData and
|
||||||
|
SortField#getOptimizeSortWithIndexedData were introduce to provide
|
||||||
|
an option to disable sort optimization for various sort fields. (Mayya Sharipova)
|
||||||
|
|
||||||
Improvements
|
Improvements
|
||||||
---------------------
|
---------------------
|
||||||
* LUCENE-10592: Build HNSW Graph on indexing. (Mayya Sharipova, Adrien Grand, Julie Tibshirani)
|
* LUCENE-10592: Build HNSW Graph on indexing. (Mayya Sharipova, Adrien Grand, Julie Tibshirani)
|
||||||
|
@ -45,6 +45,10 @@ import org.apache.lucene.util.NumericUtils;
|
|||||||
* optimization to skip non-competitive documents. This optimization relies on the assumption that
|
* optimization to skip non-competitive documents. This optimization relies on the assumption that
|
||||||
* the same data is stored in these points and doc values.
|
* the same data is stored in these points and doc values.
|
||||||
*
|
*
|
||||||
|
* <p>Sorting on a SORTED(_SET) field that is indexed with both doc values and term index may use an
|
||||||
|
* optimization to skip non-competitive documents. This optimization relies on the assumption that
|
||||||
|
* the same data is stored in these term index and doc values.
|
||||||
|
*
|
||||||
* <p>Created: Feb 11, 2004 1:25:29 PM
|
* <p>Created: Feb 11, 2004 1:25:29 PM
|
||||||
*
|
*
|
||||||
* @since lucene 1.4
|
* @since lucene 1.4
|
||||||
@ -131,8 +135,8 @@ public class SortField {
|
|||||||
// Used for 'sortMissingFirst/Last'
|
// Used for 'sortMissingFirst/Last'
|
||||||
protected Object missingValue = null;
|
protected Object missingValue = null;
|
||||||
|
|
||||||
// Indicates if numeric sort should be optimized with Points index. Set to true by default.
|
// Indicates if sort should be optimized with indexed data. Set to true by default.
|
||||||
@Deprecated private boolean optimizeSortWithPoints = true;
|
@Deprecated private boolean optimizeSortWithIndexedData = true;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a sort by terms in the given field with the type of term values explicitly given.
|
* Creates a sort by terms in the given field with the type of term values explicitly given.
|
||||||
@ -537,7 +541,10 @@ public class SortField {
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case STRING:
|
case STRING:
|
||||||
return new TermOrdValComparator(numHits, field, missingValue == STRING_LAST, reverse);
|
fieldComparator =
|
||||||
|
new TermOrdValComparator(
|
||||||
|
numHits, field, missingValue == STRING_LAST, reverse, enableSkipping);
|
||||||
|
break;
|
||||||
|
|
||||||
case STRING_VAL:
|
case STRING_VAL:
|
||||||
fieldComparator =
|
fieldComparator =
|
||||||
@ -551,7 +558,7 @@ public class SortField {
|
|||||||
default:
|
default:
|
||||||
throw new IllegalStateException("Illegal sort type: " + type);
|
throw new IllegalStateException("Illegal sort type: " + type);
|
||||||
}
|
}
|
||||||
if (getOptimizeSortWithPoints() == false) {
|
if (getOptimizeSortWithIndexedData() == false) {
|
||||||
fieldComparator.disableSkipping();
|
fieldComparator.disableSkipping();
|
||||||
}
|
}
|
||||||
return fieldComparator;
|
return fieldComparator;
|
||||||
@ -626,6 +633,39 @@ public class SortField {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enables/disables numeric sort optimization to use the indexed data.
|
||||||
|
*
|
||||||
|
* <p>Enabled by default. By default, sorting on a numeric field activates point sort optimization
|
||||||
|
* that can efficiently skip over non-competitive hits. Sort optimization has a number of
|
||||||
|
* requirements, one of which is that SortField.Type matches the Point type with which the field
|
||||||
|
* was indexed (e.g. sort on IntPoint field should use SortField.Type.INT). Another requirement is
|
||||||
|
* that the same data is indexed with points and doc values for the field.
|
||||||
|
*
|
||||||
|
* <p>By default, sorting on a SORTED(_SET) field activates sort optimization that can efficiently
|
||||||
|
* skip over non-competitive hits. Sort optimization requires that the same data is indexed with
|
||||||
|
* term index and doc values for the field.
|
||||||
|
*
|
||||||
|
* @param optimizeSortWithIndexedData providing {@code false} disables the optimization, in cases
|
||||||
|
* where these requirements can't be met.
|
||||||
|
* @deprecated should only be used for compatibility with 8.x indices that got created with
|
||||||
|
* inconsistent data across fields, or the wrong sort configuration in the index sort
|
||||||
|
*/
|
||||||
|
@Deprecated // Remove in Lucene 10
|
||||||
|
public void setOptimizeSortWithIndexedData(boolean optimizeSortWithIndexedData) {
|
||||||
|
this.optimizeSortWithIndexedData = optimizeSortWithIndexedData;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether sort optimization should be optimized with indexed data
|
||||||
|
*
|
||||||
|
* @return whether sort optimization should be optimized with indexed data
|
||||||
|
*/
|
||||||
|
@Deprecated // Remove in Lucene 10
|
||||||
|
public boolean getOptimizeSortWithIndexedData() {
|
||||||
|
return optimizeSortWithIndexedData;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Enables/disables numeric sort optimization to use the Points index.
|
* Enables/disables numeric sort optimization to use the Points index.
|
||||||
*
|
*
|
||||||
@ -638,20 +678,22 @@ public class SortField {
|
|||||||
* @param optimizeSortWithPoints providing {@code false} disables the optimization, in cases where
|
* @param optimizeSortWithPoints providing {@code false} disables the optimization, in cases where
|
||||||
* these requirements can't be met.
|
* these requirements can't be met.
|
||||||
* @deprecated should only be used for compatibility with 8.x indices that got created with
|
* @deprecated should only be used for compatibility with 8.x indices that got created with
|
||||||
* inconsistent data across fields, or the wrong sort configuration in the index sort
|
* inconsistent data across fields, or the wrong sort configuration in the index sort. This is
|
||||||
|
* a duplicate method for {@code SortField#setOptimizeSortWithIndexedData}.
|
||||||
*/
|
*/
|
||||||
@Deprecated // Remove in Lucene 10
|
@Deprecated // Remove in Lucene 10
|
||||||
public void setOptimizeSortWithPoints(boolean optimizeSortWithPoints) {
|
public void setOptimizeSortWithPoints(boolean optimizeSortWithPoints) {
|
||||||
this.optimizeSortWithPoints = optimizeSortWithPoints;
|
setOptimizeSortWithIndexedData(optimizeSortWithPoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns whether sort optimization should be optimized with points index
|
* Returns whether sort optimization should be optimized with points index
|
||||||
*
|
*
|
||||||
* @return whether sort optimization should be optimized with points index
|
* @return whether sort optimization should be optimized with points index
|
||||||
|
* @deprecated This is a duplicate method for {@code SortField#getOptimizeSortWithIndexedData}.
|
||||||
*/
|
*/
|
||||||
@Deprecated // Remove in Lucene 10
|
@Deprecated // Remove in Lucene 10
|
||||||
public boolean getOptimizeSortWithPoints() {
|
public boolean getOptimizeSortWithPoints() {
|
||||||
return optimizeSortWithPoints;
|
return getOptimizeSortWithIndexedData();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -333,7 +333,7 @@ public class SortedNumericSortField extends SortField {
|
|||||||
default:
|
default:
|
||||||
throw new AssertionError();
|
throw new AssertionError();
|
||||||
}
|
}
|
||||||
if (getOptimizeSortWithPoints() == false) {
|
if (getOptimizeSortWithIndexedData() == false) {
|
||||||
fieldComparator.disableSkipping();
|
fieldComparator.disableSkipping();
|
||||||
}
|
}
|
||||||
return fieldComparator;
|
return fieldComparator;
|
||||||
|
@ -179,7 +179,9 @@ public class SortedSetSortField extends SortField {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) {
|
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) {
|
||||||
return new TermOrdValComparator(numHits, getField(), missingValue == STRING_LAST, reverse) {
|
boolean finalEnableSkipping = enableSkipping && getOptimizeSortWithIndexedData();
|
||||||
|
return new TermOrdValComparator(
|
||||||
|
numHits, getField(), missingValue == STRING_LAST, reverse, finalEnableSkipping) {
|
||||||
@Override
|
@Override
|
||||||
protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field)
|
protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
@ -97,7 +97,9 @@ public class TermOrdValComparator extends FieldComparator<BytesRef> {
|
|||||||
* Creates this, with control over how missing values are sorted. Pass sortMissingLast=true to put
|
* Creates this, with control over how missing values are sorted. Pass sortMissingLast=true to put
|
||||||
* missing values at the end.
|
* missing values at the end.
|
||||||
*/
|
*/
|
||||||
public TermOrdValComparator(int numHits, String field, boolean sortMissingLast, boolean reverse) {
|
public TermOrdValComparator(
|
||||||
|
int numHits, String field, boolean sortMissingLast, boolean reverse, boolean enableSkipping) {
|
||||||
|
canSkipDocuments = enableSkipping;
|
||||||
ords = new int[numHits];
|
ords = new int[numHits];
|
||||||
values = new BytesRef[numHits];
|
values = new BytesRef[numHits];
|
||||||
tempBRs = new BytesRefBuilder[numHits];
|
tempBRs = new BytesRefBuilder[numHits];
|
||||||
|
@ -679,7 +679,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
|||||||
IllegalArgumentException.class,
|
IllegalArgumentException.class,
|
||||||
() -> searcher.search(new MatchAllDocsQuery(), 1, new Sort(longSortOnIntField)));
|
() -> searcher.search(new MatchAllDocsQuery(), 1, new Sort(longSortOnIntField)));
|
||||||
// assert that when sort optimization is disabled we can use LONG sort on int field
|
// assert that when sort optimization is disabled we can use LONG sort on int field
|
||||||
longSortOnIntField.setOptimizeSortWithPoints(false);
|
longSortOnIntField.setOptimizeSortWithIndexedData(false);
|
||||||
searcher.search(new MatchAllDocsQuery(), 1, new Sort(longSortOnIntField));
|
searcher.search(new MatchAllDocsQuery(), 1, new Sort(longSortOnIntField));
|
||||||
|
|
||||||
SortField intSortOnLongField = new SortField("longField", SortField.Type.INT);
|
SortField intSortOnLongField = new SortField("longField", SortField.Type.INT);
|
||||||
@ -687,7 +687,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
|||||||
IllegalArgumentException.class,
|
IllegalArgumentException.class,
|
||||||
() -> searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnLongField)));
|
() -> searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnLongField)));
|
||||||
// assert that when sort optimization is disabled we can use INT sort on long field
|
// assert that when sort optimization is disabled we can use INT sort on long field
|
||||||
intSortOnLongField.setOptimizeSortWithPoints(false);
|
intSortOnLongField.setOptimizeSortWithIndexedData(false);
|
||||||
searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnLongField));
|
searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnLongField));
|
||||||
|
|
||||||
SortField intSortOnIntRangeField = new SortField("intRange", SortField.Type.INT);
|
SortField intSortOnIntRangeField = new SortField("intRange", SortField.Type.INT);
|
||||||
@ -695,7 +695,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
|||||||
IllegalArgumentException.class,
|
IllegalArgumentException.class,
|
||||||
() -> searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnIntRangeField)));
|
() -> searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnIntRangeField)));
|
||||||
// assert that when sort optimization is disabled we can use INT sort on intRange field
|
// assert that when sort optimization is disabled we can use INT sort on intRange field
|
||||||
intSortOnIntRangeField.setOptimizeSortWithPoints(false);
|
intSortOnIntRangeField.setOptimizeSortWithIndexedData(false);
|
||||||
searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnIntRangeField));
|
searcher.search(new MatchAllDocsQuery(), 1, new Sort(intSortOnIntRangeField));
|
||||||
|
|
||||||
reader.close();
|
reader.close();
|
||||||
@ -823,7 +823,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
|||||||
boolean reverse = random().nextBoolean();
|
boolean reverse = random().nextBoolean();
|
||||||
final SortField sortField =
|
final SortField sortField =
|
||||||
new SortedNumericSortField("my_field", SortField.Type.LONG, reverse, type);
|
new SortedNumericSortField("my_field", SortField.Type.LONG, reverse, type);
|
||||||
sortField.setOptimizeSortWithPoints(false);
|
sortField.setOptimizeSortWithIndexedData(false);
|
||||||
final Sort sort = new Sort(sortField); // sort without sort optimization
|
final Sort sort = new Sort(sortField); // sort without sort optimization
|
||||||
final SortField sortField2 =
|
final SortField sortField2 =
|
||||||
new SortedNumericSortField("my_field", SortField.Type.LONG, reverse, type);
|
new SortedNumericSortField("my_field", SortField.Type.LONG, reverse, type);
|
||||||
@ -901,6 +901,7 @@ public class TestSortOptimization extends LuceneTestCase {
|
|||||||
final DirectoryReader reader = DirectoryReader.open(writer);
|
final DirectoryReader reader = DirectoryReader.open(writer);
|
||||||
writer.close();
|
writer.close();
|
||||||
doTestStringSortOptimization(reader);
|
doTestStringSortOptimization(reader);
|
||||||
|
doTestStringSortOptimizationDisabled(reader);
|
||||||
reader.close();
|
reader.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
@ -1025,6 +1026,27 @@ public class TestSortOptimization extends LuceneTestCase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void doTestStringSortOptimizationDisabled(DirectoryReader reader) throws IOException {
|
||||||
|
SortField sortField =
|
||||||
|
random().nextBoolean()
|
||||||
|
? new SortedSetSortField("my_field", false)
|
||||||
|
: new SortField("my_field", SortField.Type.STRING);
|
||||||
|
sortField.setMissingValue(SortField.STRING_LAST);
|
||||||
|
sortField.setOptimizeSortWithIndexedData(false);
|
||||||
|
|
||||||
|
Sort sort = new Sort(sortField);
|
||||||
|
final int numDocs = reader.numDocs();
|
||||||
|
final int numHits = 5;
|
||||||
|
final int totalHitsThreshold = 5;
|
||||||
|
|
||||||
|
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
|
||||||
|
TopFieldCollector.createSharedManager(sort, numHits, null, totalHitsThreshold);
|
||||||
|
IndexSearcher searcher =
|
||||||
|
newSearcher(reader, random().nextBoolean(), random().nextBoolean(), false);
|
||||||
|
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), manager);
|
||||||
|
assertEquals(numDocs, topDocs.totalHits.value);
|
||||||
|
}
|
||||||
|
|
||||||
private TopDocs assertSort(DirectoryReader reader, Sort sort, int n, FieldDoc after)
|
private TopDocs assertSort(DirectoryReader reader, Sort sort, int n, FieldDoc after)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
TopDocs topDocs = assertSearchHits(reader, sort, n, after);
|
TopDocs topDocs = assertSearchHits(reader, sort, n, after);
|
||||||
|
@ -135,25 +135,22 @@ public class ToParentBlockJoinSortField extends SortField {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private FieldComparator<?> getStringComparator(int numHits) {
|
private FieldComparator<?> getStringComparator(int numHits) {
|
||||||
FieldComparator<?> cmp =
|
return new TermOrdValComparator(
|
||||||
new TermOrdValComparator(numHits, getField(), missingValue == STRING_LAST, getReverse()) {
|
numHits, getField(), missingValue == STRING_LAST, getReverse(), false) {
|
||||||
|
@Override
|
||||||
@Override
|
protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field)
|
||||||
protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field)
|
throws IOException {
|
||||||
throws IOException {
|
SortedSetDocValues sortedSet = DocValues.getSortedSet(context.reader(), field);
|
||||||
SortedSetDocValues sortedSet = DocValues.getSortedSet(context.reader(), field);
|
final BlockJoinSelector.Type type =
|
||||||
final BlockJoinSelector.Type type =
|
order ? BlockJoinSelector.Type.MAX : BlockJoinSelector.Type.MIN;
|
||||||
order ? BlockJoinSelector.Type.MAX : BlockJoinSelector.Type.MIN;
|
final BitSet parents = parentFilter.getBitSet(context);
|
||||||
final BitSet parents = parentFilter.getBitSet(context);
|
final BitSet children = childFilter.getBitSet(context);
|
||||||
final BitSet children = childFilter.getBitSet(context);
|
if (children == null) {
|
||||||
if (children == null) {
|
return DocValues.emptySorted();
|
||||||
return DocValues.emptySorted();
|
}
|
||||||
}
|
return BlockJoinSelector.wrap(sortedSet, type, parents, toIter(children));
|
||||||
return BlockJoinSelector.wrap(sortedSet, type, parents, toIter(children));
|
}
|
||||||
}
|
};
|
||||||
};
|
|
||||||
cmp.disableSkipping();
|
|
||||||
return cmp;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private FieldComparator<?> getIntComparator(int numHits) {
|
private FieldComparator<?> getIntComparator(int numHits) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user