LUCENE-5515: Improved TopDocs#merge to create a merged ScoreDoc array with length of at most equal to the specified size instead of length equal to at most from + size as was before.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1578262 13f79535-47bb-0310-9956-ffa450edef68
2014-03-17 07:50:05 +00:00 · 2014-03-17 07:50:05 +00:00 · 1fd9722f49
parent 023aa2bf2a
commit 1fd9722f49
2 changed files with 94 additions and 28 deletions
--- a/lucene/core/src/java/org/apache/lucene/search/TopDocs.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopDocs.java
@ -206,7 +206,14 @@ public class TopDocs {
   *
   * @lucene.experimental */
  public static TopDocs merge(Sort sort, int topN, TopDocs[] shardHits) throws IOException {
+    return merge(sort, 0, topN, shardHits);
+  }

+  /**
+   * Same as {@link #merge(Sort, int, TopDocs[])} but also slices the result at the same time based
+   * on the provided start and size. The return TopDocs will always have a scoreDocs with length of at most size.
+   */
+  public static TopDocs merge(Sort sort, int start, int size, TopDocs[] shardHits) throws IOException {
    final PriorityQueue<ShardRef> queue;
    if (sort == null) {
      queue = new ScoreMergeSortQueue(shardHits);
@ -234,24 +241,32 @@ public class TopDocs {
      maxScore = Float.NaN;
    }

-    final ScoreDoc[] hits = new ScoreDoc[Math.min(topN, availHitCount)];
+    final ScoreDoc[] hits;
+    if (availHitCount <= start) {
+      hits = new ScoreDoc[0];
+    } else {
+      hits = new ScoreDoc[Math.min(size, availHitCount - start)];
+      int requestedResultWindow = start + size;
+      int numIterOnHits = Math.min(availHitCount, requestedResultWindow);
+      int hitUpto = 0;
+      while (hitUpto < numIterOnHits) {
+        assert queue.size() > 0;
+        ShardRef ref = queue.pop();
+        final ScoreDoc hit = shardHits[ref.shardIndex].scoreDocs[ref.hitIndex++];
+        hit.shardIndex = ref.shardIndex;
+        if (hitUpto >= start) {
+          hits[hitUpto - start] = hit;
+        }

-    int hitUpto = 0;
-    while(hitUpto < hits.length) {
-      assert queue.size() > 0;
-      ShardRef ref = queue.pop();
-      final ScoreDoc hit = shardHits[ref.shardIndex].scoreDocs[ref.hitIndex++];
-      hit.shardIndex = ref.shardIndex;
-      hits[hitUpto] = hit;
+        //System.out.println("  hitUpto=" + hitUpto);
+        //System.out.println("    doc=" + hits[hitUpto].doc + " score=" + hits[hitUpto].score);

-      //System.out.println("  hitUpto=" + hitUpto);
-      //System.out.println("    doc=" + hits[hitUpto].doc + " score=" + hits[hitUpto].score);
+        hitUpto++;

-      hitUpto++;
-
-      if (ref.hitIndex < shardHits[ref.shardIndex].scoreDocs.length) {
-        // Not done with this these TopDocs yet:
-        queue.add(ref);
+        if (ref.hitIndex < shardHits[ref.shardIndex].scoreDocs.length) {
+          // Not done with this these TopDocs yet:
+          queue.add(ref);
+        }
      }
    }

--- a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsMerge.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsMerge.java
@ -17,11 +17,6 @@ package org.apache.lucene.search;
 * limitations under the License.
 */

-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FloatField;
@ -36,7 +31,11 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.TestUtil;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;

 public class TestTopDocsMerge extends LuceneTestCase {

@ -62,7 +61,15 @@ public class TestTopDocsMerge extends LuceneTestCase {
    }
  }

-  public void testSort() throws Exception {
+  public void testSort_1() throws Exception {
+    testSort(false);
+  }
+
+  public void testSort_2() throws Exception {
+    testSort(true);
+  }
+
+  void testSort(boolean useFrom) throws Exception {

    IndexReader reader = null;
    Directory dir = null;
@ -125,7 +132,7 @@ public class TestTopDocsMerge extends LuceneTestCase {

    final ShardSearcher[] subSearchers;
    final int[] docStarts;
-    
+
    if (ctx instanceof AtomicReaderContext) {
      subSearchers = new ShardSearcher[1];
      docStarts = new int[1];
@ -176,23 +183,62 @@ public class TestTopDocsMerge extends LuceneTestCase {

      final int numHits = TestUtil.nextInt(random(), 1, numDocs + 5);
      //final int numHits = 5;
-      
+
      if (VERBOSE) {
        System.out.println("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits);
      }

+      int from = -1;
+      int size = -1;
      // First search on whole index:
      final TopDocs topHits;
      if (sort == null) {
-        topHits = searcher.search(query, numHits);
+        if (useFrom) {
+          TopScoreDocCollector c = TopScoreDocCollector.create(numHits, random().nextBoolean());
+          searcher.search(query, c);
+          from = TestUtil.nextInt(random(), 0, numHits - 1);
+          size = numHits - from;
+          TopDocs tempTopHits = c.topDocs();
+          if (from < tempTopHits.scoreDocs.length) {
+            // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount
+            // than TopDocs#merge currently has
+            ScoreDoc[] newScoreDocs = new ScoreDoc[Math.min(size, tempTopHits.scoreDocs.length - from)];
+            System.arraycopy(tempTopHits.scoreDocs, from, newScoreDocs, 0, newScoreDocs.length);
+            tempTopHits.scoreDocs = newScoreDocs;
+            topHits = tempTopHits;
+          } else {
+            topHits = new TopDocs(tempTopHits.totalHits, new ScoreDoc[0], tempTopHits.getMaxScore());
+          }
+        } else {
+          topHits = searcher.search(query, numHits);
+        }
      } else {
        final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true, random().nextBoolean());
        searcher.search(query, c);
-        topHits = c.topDocs(0, numHits);
+        if (useFrom) {
+          from = TestUtil.nextInt(random(), 0, numHits - 1);
+          size = numHits - from;
+          TopDocs tempTopHits = c.topDocs();
+          if (from < tempTopHits.scoreDocs.length) {
+            // Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount
+            // than TopDocs#merge currently has
+            ScoreDoc[] newScoreDocs = new ScoreDoc[Math.min(size, tempTopHits.scoreDocs.length - from)];
+            System.arraycopy(tempTopHits.scoreDocs, from, newScoreDocs, 0, newScoreDocs.length);
+            tempTopHits.scoreDocs = newScoreDocs;
+            topHits = tempTopHits;
+          } else {
+            topHits = new TopDocs(tempTopHits.totalHits, new ScoreDoc[0], tempTopHits.getMaxScore());
+          }
+        } else {
+          topHits = c.topDocs(0, numHits);
+        }
      }

      if (VERBOSE) {
-        System.out.println("  top search: " + topHits.totalHits + " totalHits; hits=" + (topHits.scoreDocs == null ? "null" : topHits.scoreDocs.length));
+        if (useFrom) {
+          System.out.println("from=" + from + " size=" + size);
+        }
+        System.out.println("  top search: " + topHits.totalHits + " totalHits; hits=" + (topHits.scoreDocs == null ? "null" : topHits.scoreDocs.length + " maxScore=" + topHits.getMaxScore()));
        if (topHits.scoreDocs != null) {
          for(int hitIDX=0;hitIDX<topHits.scoreDocs.length;hitIDX++) {
            final ScoreDoc sd = topHits.scoreDocs[hitIDX];
@ -228,7 +274,12 @@ public class TestTopDocsMerge extends LuceneTestCase {
      }

      // Merge:
-      final TopDocs mergedHits = TopDocs.merge(sort, numHits, shardHits);
+      final TopDocs mergedHits;
+      if (useFrom) {
+        mergedHits = TopDocs.merge(sort, from, size, shardHits);
+      } else {
+        mergedHits = TopDocs.merge(sort, numHits, shardHits);
+      }

      if (mergedHits.scoreDocs != null) {
        // Make sure the returned shards are correct: