diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 9b9f6d37d2f..ff2713ba6a5 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -93,6 +93,10 @@ New Features * LUCENE-10629: Support match set filtering with a query in MatchingFacetSetCounts. (Stefan Vodita, Shai Erera) +* LUCENE-10633: SortField#setOptimizeSortWithIndexedData and + SortField#getOptimizeSortWithIndexedData were introduce to provide + an option to disable sort optimization for various sort fields. (Mayya Sharipova) + Improvements --------------------- * LUCENE-10592: Build HNSW Graph on indexing. (Mayya Sharipova, Adrien Grand, Julie Tibshirani) diff --git a/lucene/core/src/java/org/apache/lucene/search/SortField.java b/lucene/core/src/java/org/apache/lucene/search/SortField.java index 0240dc32d45..953e6ff7740 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SortField.java +++ b/lucene/core/src/java/org/apache/lucene/search/SortField.java @@ -45,6 +45,10 @@ import org.apache.lucene.util.NumericUtils; * optimization to skip non-competitive documents. This optimization relies on the assumption that * the same data is stored in these points and doc values. * + *
Sorting on a SORTED(_SET) field that is indexed with both doc values and term index may use an + * optimization to skip non-competitive documents. This optimization relies on the assumption that + * the same data is stored in these term index and doc values. + * *
Created: Feb 11, 2004 1:25:29 PM * * @since lucene 1.4 @@ -131,8 +135,8 @@ public class SortField { // Used for 'sortMissingFirst/Last' protected Object missingValue = null; - // Indicates if numeric sort should be optimized with Points index. Set to true by default. - @Deprecated private boolean optimizeSortWithPoints = true; + // Indicates if sort should be optimized with indexed data. Set to true by default. + @Deprecated private boolean optimizeSortWithIndexedData = true; /** * Creates a sort by terms in the given field with the type of term values explicitly given. @@ -537,7 +541,10 @@ public class SortField { break; case STRING: - return new TermOrdValComparator(numHits, field, missingValue == STRING_LAST, reverse); + fieldComparator = + new TermOrdValComparator( + numHits, field, missingValue == STRING_LAST, reverse, enableSkipping); + break; case STRING_VAL: fieldComparator = @@ -551,7 +558,7 @@ public class SortField { default: throw new IllegalStateException("Illegal sort type: " + type); } - if (getOptimizeSortWithPoints() == false) { + if (getOptimizeSortWithIndexedData() == false) { fieldComparator.disableSkipping(); } return fieldComparator; @@ -626,6 +633,39 @@ public class SortField { } } + /** + * Enables/disables numeric sort optimization to use the indexed data. + * + *
Enabled by default. By default, sorting on a numeric field activates point sort optimization + * that can efficiently skip over non-competitive hits. Sort optimization has a number of + * requirements, one of which is that SortField.Type matches the Point type with which the field + * was indexed (e.g. sort on IntPoint field should use SortField.Type.INT). Another requirement is + * that the same data is indexed with points and doc values for the field. + * + *
By default, sorting on a SORTED(_SET) field activates sort optimization that can efficiently
+ * skip over non-competitive hits. Sort optimization requires that the same data is indexed with
+ * term index and doc values for the field.
+ *
+ * @param optimizeSortWithIndexedData providing {@code false} disables the optimization, in cases
+ * where these requirements can't be met.
+ * @deprecated should only be used for compatibility with 8.x indices that got created with
+ * inconsistent data across fields, or the wrong sort configuration in the index sort
+ */
+ @Deprecated // Remove in Lucene 10
+ public void setOptimizeSortWithIndexedData(boolean optimizeSortWithIndexedData) {
+ this.optimizeSortWithIndexedData = optimizeSortWithIndexedData;
+ }
+
+ /**
+ * Returns whether sort optimization should be optimized with indexed data
+ *
+ * @return whether sort optimization should be optimized with indexed data
+ */
+ @Deprecated // Remove in Lucene 10
+ public boolean getOptimizeSortWithIndexedData() {
+ return optimizeSortWithIndexedData;
+ }
+
/**
* Enables/disables numeric sort optimization to use the Points index.
*
@@ -638,20 +678,22 @@ public class SortField {
* @param optimizeSortWithPoints providing {@code false} disables the optimization, in cases where
* these requirements can't be met.
* @deprecated should only be used for compatibility with 8.x indices that got created with
- * inconsistent data across fields, or the wrong sort configuration in the index sort
+ * inconsistent data across fields, or the wrong sort configuration in the index sort. This is
+ * a duplicate method for {@code SortField#setOptimizeSortWithIndexedData}.
*/
@Deprecated // Remove in Lucene 10
public void setOptimizeSortWithPoints(boolean optimizeSortWithPoints) {
- this.optimizeSortWithPoints = optimizeSortWithPoints;
+ setOptimizeSortWithIndexedData(optimizeSortWithPoints);
}
/**
* Returns whether sort optimization should be optimized with points index
*
* @return whether sort optimization should be optimized with points index
+ * @deprecated This is a duplicate method for {@code SortField#getOptimizeSortWithIndexedData}.
*/
@Deprecated // Remove in Lucene 10
public boolean getOptimizeSortWithPoints() {
- return optimizeSortWithPoints;
+ return getOptimizeSortWithIndexedData();
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/SortedNumericSortField.java b/lucene/core/src/java/org/apache/lucene/search/SortedNumericSortField.java
index 38de65414eb..069ecdccd50 100644
--- a/lucene/core/src/java/org/apache/lucene/search/SortedNumericSortField.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SortedNumericSortField.java
@@ -333,7 +333,7 @@ public class SortedNumericSortField extends SortField {
default:
throw new AssertionError();
}
- if (getOptimizeSortWithPoints() == false) {
+ if (getOptimizeSortWithIndexedData() == false) {
fieldComparator.disableSkipping();
}
return fieldComparator;
diff --git a/lucene/core/src/java/org/apache/lucene/search/SortedSetSortField.java b/lucene/core/src/java/org/apache/lucene/search/SortedSetSortField.java
index 80db1ebecc9..5d1736a8ca4 100644
--- a/lucene/core/src/java/org/apache/lucene/search/SortedSetSortField.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SortedSetSortField.java
@@ -179,7 +179,9 @@ public class SortedSetSortField extends SortField {
@Override
public FieldComparator> getComparator(int numHits, boolean enableSkipping) {
- return new TermOrdValComparator(numHits, getField(), missingValue == STRING_LAST, reverse) {
+ boolean finalEnableSkipping = enableSkipping && getOptimizeSortWithIndexedData();
+ return new TermOrdValComparator(
+ numHits, getField(), missingValue == STRING_LAST, reverse, finalEnableSkipping) {
@Override
protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field)
throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java
index cff3f846162..b0f2260148c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/comparators/TermOrdValComparator.java
@@ -97,7 +97,9 @@ public class TermOrdValComparator extends FieldComparator