LUCENE-9599 Disable sort optim on index sort (#2075)

Disable sort optimization in comparators on index sort. Currently, if search sort is equal or a part of the index sort, we have an early termination in TopFieldCollector. But comparators are not aware of the index sort, and may run sort optimization even if the search sort is congruent with the index sort. This patch: - adds `disableSkipping` method to `FieldComparator`, This method is called by `TopFieldCollector`, and currently called when the search sort is congruent with the index sort, but more conditions can be added. - disables sort optimization in comparators in this case. - removes a private `MultiComparatorLeafCollector` class, because the only class that extends `MultiComparatorLeafCollector` was `TopFieldLeafCollector`. The logic of the deleted `TopFieldLeafCollector` is added to `TopFieldLeafCollector`. Relates to #1351
2020-12-03 12:08:46 -05:00 · 2020-12-03 12:08:46 -05:00 · 69de1a490f
parent f24b497c72
commit 69de1a490f
4 changed files with 50 additions and 42 deletions
--- a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
@ -142,6 +142,17 @@ public abstract class FieldComparator<T> {
  public void setSingleSort() {
  }

+  /**
+   * Informs the comparator that the skipping of documents should be disabled.
+   * This function is called by TopFieldCollector in cases when the skipping functionality
+   * should not be applied or not necessary. An example could be when
+   * search sort is a part of the index sort, and can be already efficiently
+   * handled by TopFieldCollector, and doing extra work for skipping in the comparator
+   * is redundant.
+   */
+  public void disableSkipping() {
+  }
+
  /** Sorts by descending relevance.  NOTE: if you are
   *  sorting only by descending relevance and then
   *  secondarily by ascending docID, performance is faster
--- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
@ -46,38 +46,31 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
  // always compare lower than a real hit; this would
  // save having to check queueFull on each insert

-  private static abstract class MultiComparatorLeafCollector implements LeafCollector {
+  private abstract class TopFieldLeafCollector implements LeafCollector  {

    final LeafFieldComparator comparator;
    final int reverseMul;
    Scorable scorer;
-
-    MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul) {
-      if (comparators.length == 1) {
-        this.reverseMul = reverseMul[0];
-        this.comparator = comparators[0];
-      } else {
-        this.reverseMul = 1;
-        this.comparator = new MultiLeafFieldComparator(comparators, reverseMul);
-      }
-    }
-
-    @Override
-    public void setScorer(Scorable scorer) throws IOException {
-      comparator.setScorer(scorer);
-      this.scorer = scorer;
-    }
-  }
-
-  private abstract class TopFieldLeafCollector extends MultiComparatorLeafCollector {
-
-    final boolean canEarlyTerminate;
    boolean collectedAllCompetitiveHits = false;

    TopFieldLeafCollector(FieldValueHitQueue<Entry> queue, Sort sort, LeafReaderContext context) throws IOException {
-      super(queue.getComparators(context), queue.getReverseMul());
+      // as all segments are sorted in the same way, enough to check only the 1st segment for indexSort
+      if (searchSortPartOfIndexSort == null) {
        final Sort indexSort = context.reader().getMetaData().getSort();
-      canEarlyTerminate = canEarlyTerminate(sort, indexSort);
+        searchSortPartOfIndexSort = canEarlyTerminate(sort, indexSort);
+        if (searchSortPartOfIndexSort) {
+          firstComparator.disableSkipping();
+        }
+      }
+      LeafFieldComparator[] comparators = queue.getComparators(context);
+      int[] reverseMuls = queue.getReverseMul();
+      if (comparators.length == 1) {
+        this.reverseMul = reverseMuls[0];
+        this.comparator = comparators[0];
+      } else {
+        this.reverseMul = 1;
+        this.comparator = new MultiLeafFieldComparator(comparators, reverseMuls);
+      }
    }

    void countHit(int doc) throws IOException {
@ -100,7 +93,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
        // since docs are visited in doc Id order, if compare is 0, it means
        // this document is largest than anything else in the queue, and
        // therefore not competitive.
-        if (canEarlyTerminate) {
+        if (searchSortPartOfIndexSort) {
          if (hitsThresholdChecker.isThresholdReached()) {
            totalHitsRelation = Relation.GREATER_THAN_OR_EQUAL_TO;
            throw new CollectionTerminatedException();
@ -139,7 +132,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {

    @Override
    public void setScorer(Scorable scorer) throws IOException {
-      super.setScorer(scorer);
+      this.scorer = scorer;
+      comparator.setScorer(scorer);
      minCompetitiveScore = 0f;
      updateMinCompetitiveScore(scorer);
      if (minScoreAcc != null) {
@ -154,8 +148,6 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {

  }

-  // TODO: remove this code when all bulk scores similar to {@code DefaultBulkScorer} use collectors' iterator,
-  // as early termination should be implemented in their respective comparators and removed from a collector
  static boolean canEarlyTerminate(Sort searchSort, Sort indexSort) {
    return canEarlyTerminateOnDocId(searchSort) ||
           canEarlyTerminateOnPrefix(searchSort, indexSort);
@ -286,9 +278,11 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {

  final int numHits;
  final HitsThresholdChecker hitsThresholdChecker;
-  final FieldComparator.RelevanceComparator relevanceComparator;
+  final FieldComparator<?> firstComparator;
  final boolean canSetMinScore;

+  Boolean searchSortPartOfIndexSort = null; // shows if Search Sort if a part of the Index Sort
+
  // an accumulator that maintains the maximum of the segment's minimum competitive scores
  final MaxScoreAccumulator minScoreAcc;
  // the current local minimum competitive score already propagated to the underlying scorer
@ -314,17 +308,15 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
    this.numHits = numHits;
    this.hitsThresholdChecker = hitsThresholdChecker;
    this.numComparators = pq.getComparators().length;
-    FieldComparator<?> firstComparator = pq.getComparators()[0];
+    this.firstComparator = pq.getComparators()[0];
    int reverseMul = pq.reverseMul[0];

    if (firstComparator.getClass().equals(FieldComparator.RelevanceComparator.class)
            && reverseMul == 1 // if the natural sort is preserved (sort by descending relevance)
            && hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
-      relevanceComparator = (FieldComparator.RelevanceComparator) firstComparator;
      scoreMode = ScoreMode.TOP_SCORES;
      canSetMinScore = true;
    } else {
-      relevanceComparator = null;
      canSetMinScore = false;
      if (hitsThresholdChecker.getHitsThreshold() != Integer.MAX_VALUE) {
        scoreMode = needsScores ? ScoreMode.TOP_DOCS_WITH_SCORES : ScoreMode.TOP_DOCS;
@ -360,8 +352,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
    if (canSetMinScore
          && queueFull
          && hitsThresholdChecker.isThresholdReached()) {
-      assert bottom != null && relevanceComparator != null;
-      float minScore = relevanceComparator.value(bottom.slot);
+      assert bottom != null;
+      float minScore = (float) firstComparator.value(bottom.slot);
      if (minScore > minCompetitiveScore) {
        scorer.setMinCompetitiveScore(minScore);
        minCompetitiveScore = minScore;
--- a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java
@ -39,19 +39,19 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
    protected final T missingValue;
    protected final String field;
    protected final boolean reverse;
-    protected final boolean primarySort;
    private final int bytesCount; // how many bytes are used to encode this number

    protected boolean topValueSet;
    protected boolean singleSort; // singleSort is true, if sort is based on a single sort field.
    protected boolean hitsThresholdReached;
    protected boolean queueFull;
+    private boolean canSkipDocuments;

    protected NumericComparator(String field, T missingValue, boolean reverse, int sortPos, int bytesCount) {
        this.field = field;
        this.missingValue = missingValue;
        this.reverse = reverse;
-        this.primarySort = (sortPos == 0);
+        this.canSkipDocuments = (sortPos == 0); // skipping functionality is only relevant for primary sort
        this.bytesCount = bytesCount;
    }

@ -65,13 +65,18 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato
        singleSort = true;
    }

+    @Override
+    public void disableSkipping() {
+        canSkipDocuments = false;
+    }
+
    /**
     * Leaf comparator for {@link NumericComparator} that provides skipping functionality
     */
    public abstract class NumericLeafComparator implements LeafFieldComparator {
        protected final NumericDocValues docValues;
        private final PointValues pointValues;
-        private final boolean enableSkipping; // if skipping functionality should be enabled
+        private final boolean enableSkipping; // if skipping functionality should be enabled on this segment
        private final int maxDoc;
        private final byte[] minValueAsBytes;
        private final byte[] maxValueAsBytes;
@ -83,9 +88,9 @@ public abstract class NumericComparator<T extends Number> extends FieldComparato

        public NumericLeafComparator(LeafReaderContext context) throws IOException {
            this.docValues = getNumericDocValues(context, field);
-            this.pointValues = primarySort ? context.reader().getPointValues(field) : null;
+            this.pointValues = canSkipDocuments ? context.reader().getPointValues(field) : null;
            if (pointValues != null) {
-                this.enableSkipping = true; // skipping is enabled on primarySort and when points are available
+                this.enableSkipping = true; // skipping is enabled when points are available
                this.maxDoc = context.reader().maxDoc();
                this.maxValueAsBytes = reverse == false ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;
                this.minValueAsBytes = reverse ? new byte[bytesCount] : topValueSet ? new byte[bytesCount] : null;