From 69de1a490f4b78ca6479f944f495f4f4beb80771 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Thu, 3 Dec 2020 12:08:46 -0500 Subject: [PATCH] LUCENE-9599 Disable sort optim on index sort (#2075) Disable sort optimization in comparators on index sort. Currently, if search sort is equal or a part of the index sort, we have an early termination in TopFieldCollector. But comparators are not aware of the index sort, and may run sort optimization even if the search sort is congruent with the index sort. This patch: - adds `disableSkipping` method to `FieldComparator`, This method is called by `TopFieldCollector`, and currently called when the search sort is congruent with the index sort, but more conditions can be added. - disables sort optimization in comparators in this case. - removes a private `MultiComparatorLeafCollector` class, because the only class that extends `MultiComparatorLeafCollector` was `TopFieldLeafCollector`. The logic of the deleted `TopFieldLeafCollector` is added to `TopFieldLeafCollector`. Relates to #1351 --- .../apache/lucene/search/FieldComparator.java | 11 ++++ .../lucene/search/LeafFieldComparator.java | 2 +- .../lucene/search/TopFieldCollector.java | 62 ++++++++----------- .../search/comparators/NumericComparator.java | 17 +++-- 4 files changed, 50 insertions(+), 42 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java index d0222389adf..95349983c85 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java @@ -142,6 +142,17 @@ public abstract class FieldComparator { public void setSingleSort() { } + /** + * Informs the comparator that the skipping of documents should be disabled. + * This function is called by TopFieldCollector in cases when the skipping functionality + * should not be applied or not necessary. An example could be when + * search sort is a part of the index sort, and can be already efficiently + * handled by TopFieldCollector, and doing extra work for skipping in the comparator + * is redundant. + */ + public void disableSkipping() { + } + /** Sorts by descending relevance. NOTE: if you are * sorting only by descending relevance and then * secondarily by ascending docID, performance is faster diff --git a/lucene/core/src/java/org/apache/lucene/search/LeafFieldComparator.java b/lucene/core/src/java/org/apache/lucene/search/LeafFieldComparator.java index 97f745c2f5e..6a93658a966 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LeafFieldComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/LeafFieldComparator.java @@ -131,5 +131,5 @@ public interface LeafFieldComparator { */ default void setHitsThresholdReached() throws IOException{ } - + } diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java index e529892e6e7..a47c7bb253c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java @@ -46,38 +46,31 @@ public abstract class TopFieldCollector extends TopDocsCollector { // always compare lower than a real hit; this would // save having to check queueFull on each insert - private static abstract class MultiComparatorLeafCollector implements LeafCollector { + private abstract class TopFieldLeafCollector implements LeafCollector { final LeafFieldComparator comparator; final int reverseMul; Scorable scorer; - - MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul) { - if (comparators.length == 1) { - this.reverseMul = reverseMul[0]; - this.comparator = comparators[0]; - } else { - this.reverseMul = 1; - this.comparator = new MultiLeafFieldComparator(comparators, reverseMul); - } - } - - @Override - public void setScorer(Scorable scorer) throws IOException { - comparator.setScorer(scorer); - this.scorer = scorer; - } - } - - private abstract class TopFieldLeafCollector extends MultiComparatorLeafCollector { - - final boolean canEarlyTerminate; boolean collectedAllCompetitiveHits = false; TopFieldLeafCollector(FieldValueHitQueue queue, Sort sort, LeafReaderContext context) throws IOException { - super(queue.getComparators(context), queue.getReverseMul()); - final Sort indexSort = context.reader().getMetaData().getSort(); - canEarlyTerminate = canEarlyTerminate(sort, indexSort); + // as all segments are sorted in the same way, enough to check only the 1st segment for indexSort + if (searchSortPartOfIndexSort == null) { + final Sort indexSort = context.reader().getMetaData().getSort(); + searchSortPartOfIndexSort = canEarlyTerminate(sort, indexSort); + if (searchSortPartOfIndexSort) { + firstComparator.disableSkipping(); + } + } + LeafFieldComparator[] comparators = queue.getComparators(context); + int[] reverseMuls = queue.getReverseMul(); + if (comparators.length == 1) { + this.reverseMul = reverseMuls[0]; + this.comparator = comparators[0]; + } else { + this.reverseMul = 1; + this.comparator = new MultiLeafFieldComparator(comparators, reverseMuls); + } } void countHit(int doc) throws IOException { @@ -100,7 +93,7 @@ public abstract class TopFieldCollector extends TopDocsCollector { // since docs are visited in doc Id order, if compare is 0, it means // this document is largest than anything else in the queue, and // therefore not competitive. - if (canEarlyTerminate) { + if (searchSortPartOfIndexSort) { if (hitsThresholdChecker.isThresholdReached()) { totalHitsRelation = Relation.GREATER_THAN_OR_EQUAL_TO; throw new CollectionTerminatedException(); @@ -139,7 +132,8 @@ public abstract class TopFieldCollector extends TopDocsCollector { @Override public void setScorer(Scorable scorer) throws IOException { - super.setScorer(scorer); + this.scorer = scorer; + comparator.setScorer(scorer); minCompetitiveScore = 0f; updateMinCompetitiveScore(scorer); if (minScoreAcc != null) { @@ -154,8 +148,6 @@ public abstract class TopFieldCollector extends TopDocsCollector { } - // TODO: remove this code when all bulk scores similar to {@code DefaultBulkScorer} use collectors' iterator, - // as early termination should be implemented in their respective comparators and removed from a collector static boolean canEarlyTerminate(Sort searchSort, Sort indexSort) { return canEarlyTerminateOnDocId(searchSort) || canEarlyTerminateOnPrefix(searchSort, indexSort); @@ -286,9 +278,11 @@ public abstract class TopFieldCollector extends TopDocsCollector { final int numHits; final HitsThresholdChecker hitsThresholdChecker; - final FieldComparator.RelevanceComparator relevanceComparator; + final FieldComparator firstComparator; final boolean canSetMinScore; + Boolean searchSortPartOfIndexSort = null; // shows if Search Sort if a part of the Index Sort + // an accumulator that maintains the maximum of the segment's minimum competitive scores final MaxScoreAccumulator minScoreAcc; // the current local minimum competitive score already propagated to the underlying scorer @@ -314,17 +308,15 @@ public abstract class TopFieldCollector extends TopDocsCollector { this.numHits = numHits; this.hitsThresholdChecker = hitsThresholdChecker; this.numComparators = pq.getComparators().length; - FieldComparator firstComparator = pq.getComparators()[0]; + this.firstComparator = pq.getComparators()[0]; int reverseMul = pq.reverseMul[0]; if (firstComparator.getClass().equals(FieldComparator.RelevanceComparator.class) && reverseMul == 1 // if the natural sort is preserved (sort by descending relevance) && hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) { - relevanceComparator = (FieldComparator.RelevanceComparator) firstComparator; scoreMode = ScoreMode.TOP_SCORES; canSetMinScore = true; } else { - relevanceComparator = null; canSetMinScore = false; if (hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) { scoreMode = needsScores ? ScoreMode.TOP_DOCS_WITH_SCORES : ScoreMode.TOP_DOCS; @@ -360,8 +352,8 @@ public abstract class TopFieldCollector extends TopDocsCollector { if (canSetMinScore && queueFull && hitsThresholdChecker.isThresholdReached()) { - assert bottom != null && relevanceComparator != null; - float minScore = relevanceComparator.value(bottom.slot); + assert bottom != null; + float minScore = (float) firstComparator.value(bottom.slot); if (minScore > minCompetitiveScore) { scorer.setMinCompetitiveScore(minScore); minCompetitiveScore = minScore; diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java index fa5f2679cd3..11b186b4184 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java @@ -39,19 +39,19 @@ public abstract class NumericComparator extends FieldComparato protected final T missingValue; protected final String field; protected final boolean reverse; - protected final boolean primarySort; private final int bytesCount; // how many bytes are used to encode this number protected boolean topValueSet; protected boolean singleSort; // singleSort is true, if sort is based on a single sort field. protected boolean hitsThresholdReached; protected boolean queueFull; + private boolean canSkipDocuments; protected NumericComparator(String field, T missingValue, boolean reverse, int sortPos, int bytesCount) { this.field = field; this.missingValue = missingValue; this.reverse = reverse; - this.primarySort = (sortPos == 0); + this.canSkipDocuments = (sortPos == 0); // skipping functionality is only relevant for primary sort this.bytesCount = bytesCount; } @@ -65,17 +65,22 @@ public abstract class NumericComparator extends FieldComparato singleSort = true; } + @Override + public void disableSkipping() { + canSkipDocuments = false; + } + /** * Leaf comparator for {@link NumericComparator} that provides skipping functionality */ public abstract class NumericLeafComparator implements LeafFieldComparator { protected final NumericDocValues docValues; private final PointValues pointValues; - private final boolean enableSkipping; // if skipping functionality should be enabled + private final boolean enableSkipping; // if skipping functionality should be enabled on this segment private final int maxDoc; private final byte[] minValueAsBytes; private final byte[] maxValueAsBytes; - + private DocIdSetIterator competitiveIterator; private long iteratorCost; private int maxDocVisited = 0; @@ -83,9 +88,9 @@ public abstract class NumericComparator extends FieldComparato public NumericLeafComparator(LeafReaderContext context) throws IOException { this.docValues = getNumericDocValues(context, field); - this.pointValues = primarySort ? context.reader().getPointValues(field) : null; + this.pointValues = canSkipDocuments ? context.reader().getPointValues(field) : null; if (pointValues != null) { - this.enableSkipping = true; // skipping is enabled on primarySort and when points are available + this.enableSkipping = true; // skipping is enabled when points are available this.maxDoc = context.reader().maxDoc(); this.maxValueAsBytes = reverse == false ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null; this.minValueAsBytes = reverse ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;