mirror of https://github.com/apache/lucene.git
LUCENE-5515: Improved TopDocs#merge to create a merged ScoreDoc array with length of at most equal to the specified size instead of length equal to at most from + size as was before.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1578262 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
023aa2bf2a
commit
1fd9722f49
|
@ -206,7 +206,14 @@ public class TopDocs {
|
||||||
*
|
*
|
||||||
* @lucene.experimental */
|
* @lucene.experimental */
|
||||||
public static TopDocs merge(Sort sort, int topN, TopDocs[] shardHits) throws IOException {
|
public static TopDocs merge(Sort sort, int topN, TopDocs[] shardHits) throws IOException {
|
||||||
|
return merge(sort, 0, topN, shardHits);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Same as {@link #merge(Sort, int, TopDocs[])} but also slices the result at the same time based
|
||||||
|
* on the provided start and size. The return TopDocs will always have a scoreDocs with length of at most size.
|
||||||
|
*/
|
||||||
|
public static TopDocs merge(Sort sort, int start, int size, TopDocs[] shardHits) throws IOException {
|
||||||
final PriorityQueue<ShardRef> queue;
|
final PriorityQueue<ShardRef> queue;
|
||||||
if (sort == null) {
|
if (sort == null) {
|
||||||
queue = new ScoreMergeSortQueue(shardHits);
|
queue = new ScoreMergeSortQueue(shardHits);
|
||||||
|
@ -234,15 +241,22 @@ public class TopDocs {
|
||||||
maxScore = Float.NaN;
|
maxScore = Float.NaN;
|
||||||
}
|
}
|
||||||
|
|
||||||
final ScoreDoc[] hits = new ScoreDoc[Math.min(topN, availHitCount)];
|
final ScoreDoc[] hits;
|
||||||
|
if (availHitCount <= start) {
|
||||||
|
hits = new ScoreDoc[0];
|
||||||
|
} else {
|
||||||
|
hits = new ScoreDoc[Math.min(size, availHitCount - start)];
|
||||||
|
int requestedResultWindow = start + size;
|
||||||
|
int numIterOnHits = Math.min(availHitCount, requestedResultWindow);
|
||||||
int hitUpto = 0;
|
int hitUpto = 0;
|
||||||
while(hitUpto < hits.length) {
|
while (hitUpto < numIterOnHits) {
|
||||||
assert queue.size() > 0;
|
assert queue.size() > 0;
|
||||||
ShardRef ref = queue.pop();
|
ShardRef ref = queue.pop();
|
||||||
final ScoreDoc hit = shardHits[ref.shardIndex].scoreDocs[ref.hitIndex++];
|
final ScoreDoc hit = shardHits[ref.shardIndex].scoreDocs[ref.hitIndex++];
|
||||||
hit.shardIndex = ref.shardIndex;
|
hit.shardIndex = ref.shardIndex;
|
||||||
hits[hitUpto] = hit;
|
if (hitUpto >= start) {
|
||||||
|
hits[hitUpto - start] = hit;
|
||||||
|
}
|
||||||
|
|
||||||
//System.out.println(" hitUpto=" + hitUpto);
|
//System.out.println(" hitUpto=" + hitUpto);
|
||||||
//System.out.println(" doc=" + hits[hitUpto].doc + " score=" + hits[hitUpto].score);
|
//System.out.println(" doc=" + hits[hitUpto].doc + " score=" + hits[hitUpto].score);
|
||||||
|
@ -254,6 +268,7 @@ public class TopDocs {
|
||||||
queue.add(ref);
|
queue.add(ref);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (sort == null) {
|
if (sort == null) {
|
||||||
return new TopDocs(totalHitCount, hits, maxScore);
|
return new TopDocs(totalHitCount, hits, maxScore);
|
||||||
|
|
|
@ -17,11 +17,6 @@ package org.apache.lucene.search;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FloatField;
|
import org.apache.lucene.document.FloatField;
|
||||||
|
@ -36,7 +31,11 @@ import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
import org.apache.lucene.util.TestUtil;
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class TestTopDocsMerge extends LuceneTestCase {
|
public class TestTopDocsMerge extends LuceneTestCase {
|
||||||
|
|
||||||
|
@ -62,7 +61,15 @@ public class TestTopDocsMerge extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSort() throws Exception {
|
public void testSort_1() throws Exception {
|
||||||
|
testSort(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSort_2() throws Exception {
|
||||||
|
testSort(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
void testSort(boolean useFrom) throws Exception {
|
||||||
|
|
||||||
IndexReader reader = null;
|
IndexReader reader = null;
|
||||||
Directory dir = null;
|
Directory dir = null;
|
||||||
|
@ -181,18 +188,57 @@ public class TestTopDocsMerge extends LuceneTestCase {
|
||||||
System.out.println("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits);
|
System.out.println("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int from = -1;
|
||||||
|
int size = -1;
|
||||||
// First search on whole index:
|
// First search on whole index:
|
||||||
final TopDocs topHits;
|
final TopDocs topHits;
|
||||||
if (sort == null) {
|
if (sort == null) {
|
||||||
|
if (useFrom) {
|
||||||
|
TopScoreDocCollector c = TopScoreDocCollector.create(numHits, random().nextBoolean());
|
||||||
|
searcher.search(query, c);
|
||||||
|
from = TestUtil.nextInt(random(), 0, numHits - 1);
|
||||||
|
size = numHits - from;
|
||||||
|
TopDocs tempTopHits = c.topDocs();
|
||||||
|
if (from < tempTopHits.scoreDocs.length) {
|
||||||
|
// Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount
|
||||||
|
// than TopDocs#merge currently has
|
||||||
|
ScoreDoc[] newScoreDocs = new ScoreDoc[Math.min(size, tempTopHits.scoreDocs.length - from)];
|
||||||
|
System.arraycopy(tempTopHits.scoreDocs, from, newScoreDocs, 0, newScoreDocs.length);
|
||||||
|
tempTopHits.scoreDocs = newScoreDocs;
|
||||||
|
topHits = tempTopHits;
|
||||||
|
} else {
|
||||||
|
topHits = new TopDocs(tempTopHits.totalHits, new ScoreDoc[0], tempTopHits.getMaxScore());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
topHits = searcher.search(query, numHits);
|
topHits = searcher.search(query, numHits);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true, random().nextBoolean());
|
final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true, random().nextBoolean());
|
||||||
searcher.search(query, c);
|
searcher.search(query, c);
|
||||||
|
if (useFrom) {
|
||||||
|
from = TestUtil.nextInt(random(), 0, numHits - 1);
|
||||||
|
size = numHits - from;
|
||||||
|
TopDocs tempTopHits = c.topDocs();
|
||||||
|
if (from < tempTopHits.scoreDocs.length) {
|
||||||
|
// Can't use TopDocs#topDocs(start, howMany), since it has different behaviour when start >= hitCount
|
||||||
|
// than TopDocs#merge currently has
|
||||||
|
ScoreDoc[] newScoreDocs = new ScoreDoc[Math.min(size, tempTopHits.scoreDocs.length - from)];
|
||||||
|
System.arraycopy(tempTopHits.scoreDocs, from, newScoreDocs, 0, newScoreDocs.length);
|
||||||
|
tempTopHits.scoreDocs = newScoreDocs;
|
||||||
|
topHits = tempTopHits;
|
||||||
|
} else {
|
||||||
|
topHits = new TopDocs(tempTopHits.totalHits, new ScoreDoc[0], tempTopHits.getMaxScore());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
topHits = c.topDocs(0, numHits);
|
topHits = c.topDocs(0, numHits);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println(" top search: " + topHits.totalHits + " totalHits; hits=" + (topHits.scoreDocs == null ? "null" : topHits.scoreDocs.length));
|
if (useFrom) {
|
||||||
|
System.out.println("from=" + from + " size=" + size);
|
||||||
|
}
|
||||||
|
System.out.println(" top search: " + topHits.totalHits + " totalHits; hits=" + (topHits.scoreDocs == null ? "null" : topHits.scoreDocs.length + " maxScore=" + topHits.getMaxScore()));
|
||||||
if (topHits.scoreDocs != null) {
|
if (topHits.scoreDocs != null) {
|
||||||
for(int hitIDX=0;hitIDX<topHits.scoreDocs.length;hitIDX++) {
|
for(int hitIDX=0;hitIDX<topHits.scoreDocs.length;hitIDX++) {
|
||||||
final ScoreDoc sd = topHits.scoreDocs[hitIDX];
|
final ScoreDoc sd = topHits.scoreDocs[hitIDX];
|
||||||
|
@ -228,7 +274,12 @@ public class TestTopDocsMerge extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Merge:
|
// Merge:
|
||||||
final TopDocs mergedHits = TopDocs.merge(sort, numHits, shardHits);
|
final TopDocs mergedHits;
|
||||||
|
if (useFrom) {
|
||||||
|
mergedHits = TopDocs.merge(sort, from, size, shardHits);
|
||||||
|
} else {
|
||||||
|
mergedHits = TopDocs.merge(sort, numHits, shardHits);
|
||||||
|
}
|
||||||
|
|
||||||
if (mergedHits.scoreDocs != null) {
|
if (mergedHits.scoreDocs != null) {
|
||||||
// Make sure the returned shards are correct:
|
// Make sure the returned shards are correct:
|
||||||
|
|
Loading…
Reference in New Issue