From bc4a84e913549144b5bf4907c7dc7f8f1d988efe Mon Sep 17 00:00:00 2001
From: Adrien Grand <jpountz@gmail.com>
Date: Thu, 5 Sep 2019 17:00:04 +0200
Subject: [PATCH] Revert "LUCENE-8956: QueryRescorer now only sorts the first
 topN hits instead of all initial hits."

This reverts commit 42fc9690bfd545d2a147f306ed08f175a0293fc0.
---
 lucene/CHANGES.txt                            |  3 -
 .../apache/lucene/search/QueryRescorer.java   | 39 ++++----
 .../org/apache/lucene/util/ArrayUtil.java     | 23 ++---
 .../lucene/search/TestQueryRescorer.java      | 91 -------------------
 4 files changed, 25 insertions(+), 131 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 9f60dc72c78..054dfa0ce0a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -74,9 +74,6 @@ the total hits is not requested.
 * LUCENE-8755: spatial-extras quad and packed quad prefix trees now index points faster.
   (Chongchen Chen, David Smiley)
 
-* LUCENE-8956: QueryRescorer now only sorts the first topN hits instead of all
-  initial hits. (Paul Sanwald via Adrien Grand)
-
 Bug Fixes
 
 * LUCENE-8755: spatial-extras quad and packed quad prefix trees could throw a
diff --git a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java
index ce5b16a213a..b3452f43881 100644
--- a/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java
@@ -23,7 +23,6 @@ import java.util.Comparator;
 import java.util.List;
 
 import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.util.ArrayUtil;
 
 /** A {@link Rescorer} that uses a provided Query to assign
  *  scores to the first-pass hits.
@@ -51,7 +50,6 @@ public abstract class QueryRescorer extends Rescorer {
   @Override
   public TopDocs rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int topN) throws IOException {
     ScoreDoc[] hits = firstPassTopDocs.scoreDocs.clone();
-
     Arrays.sort(hits,
                 new Comparator<ScoreDoc>() {
                   @Override
@@ -111,31 +109,32 @@ public abstract class QueryRescorer extends Rescorer {
       hitUpto++;
     }
 
-    Comparator<ScoreDoc> sortDocComparator = new Comparator<ScoreDoc>() {
-      @Override
-      public int compare(ScoreDoc a, ScoreDoc b) {
-        // Sort by score descending, then docID ascending:
-        if (a.score > b.score) {
-          return -1;
-        } else if (a.score < b.score) {
-          return 1;
-        } else {
-          // This subtraction can't overflow int
-          // because docIDs are >= 0:
-          return a.doc - b.doc;
-        }
-      }
-    };
+    // TODO: we should do a partial sort (of only topN)
+    // instead, but typically the number of hits is
+    // smallish:
+    Arrays.sort(hits,
+                new Comparator<ScoreDoc>() {
+                  @Override
+                  public int compare(ScoreDoc a, ScoreDoc b) {
+                    // Sort by score descending, then docID ascending:
+                    if (a.score > b.score) {
+                      return -1;
+                    } else if (a.score < b.score) {
+                      return 1;
+                    } else {
+                      // This subtraction can't overflow int
+                      // because docIDs are >= 0:
+                      return a.doc - b.doc;
+                    }
+                  }
+                });
 
     if (topN < hits.length) {
-      ArrayUtil.select(hits, 0, hits.length, topN, sortDocComparator);
       ScoreDoc[] subset = new ScoreDoc[topN];
       System.arraycopy(hits, 0, subset, 0, topN);
       hits = subset;
     }
 
-    Arrays.sort(hits, sortDocComparator);
-
     return new TopDocs(firstPassTopDocs.totalHits, hits);
   }
 
diff --git a/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java b/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java
index 9247aa636a6..f6bab105911 100644
--- a/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java
@@ -479,23 +479,12 @@ public final class ArrayUtil {
     timSort(a, 0, a.length);
   }
 
-  /**
-   * Reorganize {@code arr[from:to[} so that the element at offset k is at the
-   * same position as if {@code arr[from:to]} was sorted, and all elements on
-   * its left are less than or equal to it, and all elements on its right are
-   * greater than or equal to it.
-   *
-   * This runs in linear time on average and in {@code n log(n)} time in the
-   * worst case.
-   *
-   * @param arr Array to be re-organized.
-   * @param from Starting index for re-organization. Elements before this index
-   *             will be left as is.
-   * @param to Ending index. Elements after this index will be left as is.
-   * @param k Index of element to sort from. Value must be less than 'to' and greater than or equal to 'from'.
-   * @param comparator Comparator to use for sorting
-   *
-   */
+  /** Reorganize {@code arr[from:to[} so that the element at offset k is at the
+   *  same position as if {@code arr[from:to[} was sorted, and all elements on
+   *  its left are less than or equal to it, and all elements on its right are
+   *  greater than or equal to it.
+   *  This runs in linear time on average and in {@code n log(n)} time in the
+   *  worst case.*/
   public static <T> void select(T[] arr, int from, int to, int k, Comparator<? super T> comparator) {
     new IntroSelector() {
 
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java
index f6709265449..c507b6dae21 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java
@@ -21,9 +21,7 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.Set;
-import java.util.List;
 
-import com.carrotsearch.randomizedtesting.generators.RandomPicks;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.NumericDocValuesField;
@@ -33,13 +31,11 @@ import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.search.similarities.BM25Similarity;
 import org.apache.lucene.search.similarities.ClassicSimilarity;
 import org.apache.lucene.search.spans.SpanNearQuery;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 
@@ -59,93 +55,6 @@ public class TestQueryRescorer extends LuceneTestCase {
     return LuceneTestCase.newIndexWriterConfig().setSimilarity(new ClassicSimilarity());
   }
 
-  static List<String> dictionary = Arrays.asList("river","quick","brown","fox","jumped","lazy","fence");
-
-  String randomSentence() {
-    final int length = random().nextInt(10);
-    StringBuilder sentence = new StringBuilder(dictionary.get(0)+" ");
-    for (int i = 0; i < length; i++) {
-      sentence.append(dictionary.get(random().nextInt(dictionary.size()-1))+" ");
-    }
-    return sentence.toString();
-  }
-
-  private IndexReader publishDocs(int numDocs, String fieldName, Directory dir) throws Exception {
-
-    RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());
-    for (int i = 0; i < numDocs; i++) {
-      Document d = new Document();
-      d.add(newStringField("id", Integer.toString(i), Field.Store.YES));
-      d.add(newTextField(fieldName, randomSentence(), Field.Store.NO));
-      w.addDocument(d);
-    }
-    IndexReader reader = w.getReader();
-    w.close();
-    return reader;
-  }
-
-  public void testRescoreOfASubsetOfHits() throws Exception {
-    Directory dir = newDirectory();
-    int numDocs = 100;
-    String fieldName = "field";
-    IndexReader reader = publishDocs(numDocs, fieldName, dir);
-
-    // Construct a query that will get numDocs hits.
-    String wordOne = dictionary.get(0);
-    TermQuery termQuery = new TermQuery(new Term(fieldName, wordOne));
-    IndexSearcher searcher = getSearcher(reader);
-    searcher.setSimilarity(new BM25Similarity());
-    TopDocs hits = searcher.search(termQuery, numDocs);
-
-    // Next, use a more specific phrase query that will return different scores
-    // from the above term query
-    String wordTwo = RandomPicks.randomFrom(random(), dictionary);
-    PhraseQuery phraseQuery = new PhraseQuery(1, fieldName, wordOne, wordTwo);
-
-    // rescore, requesting a smaller topN
-    int topN = random().nextInt(numDocs-1);
-    TopDocs phraseQueryHits = QueryRescorer.rescore(searcher, hits, phraseQuery, 2.0, topN);
-    assertEquals(topN, phraseQueryHits.scoreDocs.length);
-
-    for (int i = 1; i < phraseQueryHits.scoreDocs.length; i++) {
-      assertTrue(phraseQueryHits.scoreDocs[i].score <= phraseQueryHits.scoreDocs[i-1].score);
-    }
-    reader.close();
-    dir.close();
-  }
-
-  public void testRescoreIsIdempotent() throws Exception {
-    Directory dir = newDirectory();
-    int numDocs = 100;
-    String fieldName = "field";
-    IndexReader reader = publishDocs(numDocs, fieldName, dir);
-
-    // Construct a query that will get numDocs hits.
-    String wordOne = dictionary.get(0);
-    TermQuery termQuery = new TermQuery(new Term(fieldName, wordOne));
-    IndexSearcher searcher = getSearcher(reader);
-    searcher.setSimilarity(new BM25Similarity());
-    TopDocs hits = searcher.search(termQuery, numDocs);
-
-    // Next, use a more specific phrase query that will return different scores
-    // from the above term query
-    String wordTwo = RandomPicks.randomFrom(random(), dictionary);
-    PhraseQuery phraseQuery = new PhraseQuery(1, fieldName, wordOne, wordTwo);
-
-    // rescore, requesting the same hits as topN
-    int topN = numDocs;
-    TopDocs firstRescoreHits = QueryRescorer.rescore(searcher, hits, phraseQuery, 2.0, topN);
-
-    // now rescore again, where topN is less than numDocs
-    topN = random().nextInt(numDocs-1);
-    ScoreDoc[] secondRescoreHits = QueryRescorer.rescore(searcher, hits, phraseQuery, 2.0, topN).scoreDocs;
-    ScoreDoc[] expectedTopNScoreDocs = ArrayUtil.copyOfSubArray(firstRescoreHits.scoreDocs, 0, topN);
-    CheckHits.checkEqual(phraseQuery, expectedTopNScoreDocs, secondRescoreHits);
-
-    reader.close();
-    dir.close();
-  }
-
   public void testBasic() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig());