LUCENE-9599 Disable sort optim on index sort (#2075)

Disable sort optimization in comparators on index sort.

Currently, if search sort is equal or a part of the index sort, we have
an early termination in TopFieldCollector.
But comparators are not aware of the index sort, and may run
sort optimization even if the search sort is congruent with
the index sort.

This patch:
- adds `disableSkipping` method to `FieldComparator`,
 This method is called by `TopFieldCollector`, and currently called 
  when  the search sort is congruent with the index sort,
  but more conditions can be added. 
- disables sort optimization in comparators in this case.
- removes a private  `MultiComparatorLeafCollector` class, because the only
  class that extends `MultiComparatorLeafCollector` was `TopFieldLeafCollector`.
  The logic of the deleted `TopFieldLeafCollector` is added to `TopFieldLeafCollector`.

Relates to #1351
This commit is contained in:
Mayya Sharipova 2020-12-03 12:08:46 -05:00 committed by GitHub
parent f24b497c72
commit 69de1a490f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 50 additions and 42 deletions

View File

@ -142,6 +142,17 @@ public abstract class FieldComparator<T> {
public void setSingleSort() {
}
/**
* Informs the comparator that the skipping of documents should be disabled.
* This function is called by TopFieldCollector in cases when the skipping functionality
* should not be applied or not necessary. An example could be when
* search sort is a part of the index sort, and can be already efficiently
* handled by TopFieldCollector, and doing extra work for skipping in the comparator
* is redundant.
*/
public void disableSkipping() {
}
/** Sorts by descending relevance. NOTE: if you are
* sorting only by descending relevance and then
* secondarily by ascending docID, performance is faster

View File

@ -131,5 +131,5 @@ public interface LeafFieldComparator {
*/
default void setHitsThresholdReached() throws IOException{
}
}

View File

@ -46,38 +46,31 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
// always compare lower than a real hit; this would
// save having to check queueFull on each insert
private static abstract class MultiComparatorLeafCollector implements LeafCollector {
private abstract class TopFieldLeafCollector implements LeafCollector {
final LeafFieldComparator comparator;
final int reverseMul;
Scorable scorer;
MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul) {
if (comparators.length == 1) {
this.reverseMul = reverseMul[0];
this.comparator = comparators[0];
} else {
this.reverseMul = 1;
this.comparator = new MultiLeafFieldComparator(comparators, reverseMul);
}
}
@Override
public void setScorer(Scorable scorer) throws IOException {
comparator.setScorer(scorer);
this.scorer = scorer;
}
}
private abstract class TopFieldLeafCollector extends MultiComparatorLeafCollector {
final boolean canEarlyTerminate;
boolean collectedAllCompetitiveHits = false;
TopFieldLeafCollector(FieldValueHitQueue<Entry> queue, Sort sort, LeafReaderContext context) throws IOException {
super(queue.getComparators(context), queue.getReverseMul());
final Sort indexSort = context.reader().getMetaData().getSort();
canEarlyTerminate = canEarlyTerminate(sort, indexSort);
// as all segments are sorted in the same way, enough to check only the 1st segment for indexSort
if (searchSortPartOfIndexSort == null) {
final Sort indexSort = context.reader().getMetaData().getSort();
searchSortPartOfIndexSort = canEarlyTerminate(sort, indexSort);
if (searchSortPartOfIndexSort) {
firstComparator.disableSkipping();
}
}
LeafFieldComparator[] comparators = queue.getComparators(context);
int[] reverseMuls = queue.getReverseMul();
if (comparators.length == 1) {
this.reverseMul = reverseMuls[0];
this.comparator = comparators[0];
} else {
this.reverseMul = 1;
this.comparator = new MultiLeafFieldComparator(comparators, reverseMuls);
}
}
void countHit(int doc) throws IOException {
@ -100,7 +93,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
// since docs are visited in doc Id order, if compare is 0, it means
// this document is largest than anything else in the queue, and
// therefore not competitive.
if (canEarlyTerminate) {
if (searchSortPartOfIndexSort) {
if (hitsThresholdChecker.isThresholdReached()) {
totalHitsRelation = Relation.GREATER_THAN_OR_EQUAL_TO;
throw new CollectionTerminatedException();
@ -139,7 +132,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
@Override
public void setScorer(Scorable scorer) throws IOException {
super.setScorer(scorer);
this.scorer = scorer;
comparator.setScorer(scorer);
minCompetitiveScore = 0f;
updateMinCompetitiveScore(scorer);
if (minScoreAcc != null) {
@ -154,8 +148,6 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
}
// TODO: remove this code when all bulk scores similar to {@code DefaultBulkScorer} use collectors' iterator,
// as early termination should be implemented in their respective comparators and removed from a collector
static boolean canEarlyTerminate(Sort searchSort, Sort indexSort) {
return canEarlyTerminateOnDocId(searchSort) ||
canEarlyTerminateOnPrefix(searchSort, indexSort);
@ -286,9 +278,11 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
final int numHits;
final HitsThresholdChecker hitsThresholdChecker;
final FieldComparator.RelevanceComparator relevanceComparator;
final FieldComparator<?> firstComparator;
final boolean canSetMinScore;
Boolean searchSortPartOfIndexSort = null; // shows if Search Sort if a part of the Index Sort
// an accumulator that maintains the maximum of the segment's minimum competitive scores
final MaxScoreAccumulator minScoreAcc;
// the current local minimum competitive score already propagated to the underlying scorer
@ -314,17 +308,15 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
this.numHits = numHits;
this.hitsThresholdChecker = hitsThresholdChecker;
this.numComparators = pq.getComparators().length;
FieldComparator<?> firstComparator = pq.getComparators()[0];
this.firstComparator = pq.getComparators()[0];
int reverseMul = pq.reverseMul[0];
if (firstComparator.getClass().equals(FieldComparator.RelevanceComparator.class)
&& reverseMul == 1 // if the natural sort is preserved (sort by descending relevance)
&& hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
relevanceComparator = (FieldComparator.RelevanceComparator) firstComparator;
scoreMode = ScoreMode.TOP_SCORES;
canSetMinScore = true;
} else {
relevanceComparator = null;
canSetMinScore = false;
if (hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
scoreMode = needsScores ? ScoreMode.TOP_DOCS_WITH_SCORES : ScoreMode.TOP_DOCS;
@ -360,8 +352,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
if (canSetMinScore
&& queueFull
&& hitsThresholdChecker.isThresholdReached()) {
assert bottom != null && relevanceComparator != null;
float minScore = relevanceComparator.value(bottom.slot);
assert bottom != null;
float minScore = (float) firstComparator.value(bottom.slot);
if (minScore > minCompetitiveScore) {
scorer.setMinCompetitiveScore(minScore);
minCompetitiveScore = minScore;

View File

@ -39,19 +39,19 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
protected final T missingValue;
protected final String field;
protected final boolean reverse;
protected final boolean primarySort;
private final int bytesCount; // how many bytes are used to encode this number
protected boolean topValueSet;
protected boolean singleSort; // singleSort is true, if sort is based on a single sort field.
protected boolean hitsThresholdReached;
protected boolean queueFull;
private boolean canSkipDocuments;
protected NumericComparator(String field, T missingValue, boolean reverse, int sortPos, int bytesCount) {
this.field = field;
this.missingValue = missingValue;
this.reverse = reverse;
this.primarySort = (sortPos == 0);
this.canSkipDocuments = (sortPos == 0); // skipping functionality is only relevant for primary sort
this.bytesCount = bytesCount;
}
@ -65,17 +65,22 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
singleSort = true;
}
@Override
public void disableSkipping() {
canSkipDocuments = false;
}
/**
* Leaf comparator for {@link NumericComparator} that provides skipping functionality
*/
public abstract class NumericLeafComparator implements LeafFieldComparator {
protected final NumericDocValues docValues;
private final PointValues pointValues;
private final boolean enableSkipping; // if skipping functionality should be enabled
private final boolean enableSkipping; // if skipping functionality should be enabled on this segment
private final int maxDoc;
private final byte[] minValueAsBytes;
private final byte[] maxValueAsBytes;
private DocIdSetIterator competitiveIterator;
private long iteratorCost;
private int maxDocVisited = 0;
@ -83,9 +88,9 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
public NumericLeafComparator(LeafReaderContext context) throws IOException {
this.docValues = getNumericDocValues(context, field);
this.pointValues = primarySort ? context.reader().getPointValues(field) : null;
this.pointValues = canSkipDocuments ? context.reader().getPointValues(field) : null;
if (pointValues != null) {
this.enableSkipping = true; // skipping is enabled on primarySort and when points are available
this.enableSkipping = true; // skipping is enabled when points are available
this.maxDoc = context.reader().maxDoc();
this.maxValueAsBytes = reverse == false ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
this.minValueAsBytes = reverse ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;