mirror of https://github.com/apache/lucene.git
LUCENE-9342: Collector's totalHitsThreshold should not be lower than numHits (#1448)
Use the maximum of the two, this is so that relation is EQUAL_TO in the case of the number of hits in a query is less than the collector's numHits
This commit is contained in:
parent
4eb755db18
commit
a11b78e06a
|
@ -149,6 +149,9 @@ Improvements
|
|||
* LUCENE-9324: Add an ID to SegmentCommitInfo in order to compare commits for equality and make
|
||||
snapshots incremental on generational files. (Simon Willnauer, Mike Mccandless, Adrien Grant)
|
||||
|
||||
* LUCENE-9342: TotalHits' relation will be EQUAL_TO when the number of hits is lower than TopDocsColector's numHits
|
||||
(Tomás Fernández Löbbe)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -477,8 +477,8 @@ public class IndexSearcher {
|
|||
|
||||
final CollectorManager<TopScoreDocCollector, TopDocs> manager = new CollectorManager<TopScoreDocCollector, TopDocs>() {
|
||||
|
||||
private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(TOTAL_HITS_THRESHOLD) :
|
||||
HitsThresholdChecker.createShared(TOTAL_HITS_THRESHOLD);
|
||||
private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits)) :
|
||||
HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits));
|
||||
|
||||
private final MaxScoreAccumulator minScoreAcc = (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator();
|
||||
|
||||
|
@ -610,8 +610,8 @@ public class IndexSearcher {
|
|||
|
||||
final CollectorManager<TopFieldCollector, TopFieldDocs> manager = new CollectorManager<>() {
|
||||
|
||||
private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(TOTAL_HITS_THRESHOLD) :
|
||||
HitsThresholdChecker.createShared(TOTAL_HITS_THRESHOLD);
|
||||
private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits)) :
|
||||
HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits));
|
||||
|
||||
private final MaxScoreAccumulator minScoreAcc = (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator();
|
||||
|
||||
|
|
|
@ -411,7 +411,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
throw new IllegalArgumentException("totalHitsThreshold must be >= 0, got " + totalHitsThreshold);
|
||||
}
|
||||
|
||||
return create(sort, numHits, after, HitsThresholdChecker.create(totalHitsThreshold), null /* bottomValueChecker */);
|
||||
return create(sort, numHits, after, HitsThresholdChecker.create(Math.max(totalHitsThreshold, numHits)), null /* bottomValueChecker */);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -458,7 +458,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
int totalHitsThreshold) {
|
||||
return new CollectorManager<>() {
|
||||
|
||||
private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(totalHitsThreshold);
|
||||
private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(Math.max(totalHitsThreshold, numHits));
|
||||
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
|
||||
|
||||
@Override
|
||||
|
|
|
@ -217,7 +217,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
* objects.
|
||||
*/
|
||||
public static TopScoreDocCollector create(int numHits, ScoreDoc after, int totalHitsThreshold) {
|
||||
return create(numHits, after, HitsThresholdChecker.create(totalHitsThreshold), null);
|
||||
return create(numHits, after, HitsThresholdChecker.create(Math.max(totalHitsThreshold, numHits)), null);
|
||||
}
|
||||
|
||||
static TopScoreDocCollector create(int numHits, ScoreDoc after, HitsThresholdChecker hitsThresholdChecker,
|
||||
|
@ -246,7 +246,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
|
|||
int totalHitsThreshold) {
|
||||
return new CollectorManager<>() {
|
||||
|
||||
private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(totalHitsThreshold);
|
||||
private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(Math.max(totalHitsThreshold, numHits));
|
||||
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
|
||||
|
||||
@Override
|
||||
|
|
|
@ -27,6 +27,8 @@ import java.util.concurrent.TimeUnit;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -282,7 +284,7 @@ public class TestTopDocsCollector extends LuceneTestCase {
|
|||
assertEquals(2, reader.leaves().size());
|
||||
w.close();
|
||||
|
||||
TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 1);
|
||||
TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 2);
|
||||
ScoreAndDoc scorer = new ScoreAndDoc();
|
||||
|
||||
LeafCollector leafCollector = collector.getLeafCollector(reader.leaves().get(0));
|
||||
|
@ -297,35 +299,40 @@ public class TestTopDocsCollector extends LuceneTestCase {
|
|||
scorer.doc = 1;
|
||||
scorer.score = 2;
|
||||
leafCollector.collect(1);
|
||||
assertEquals(Math.nextUp(1f), scorer.minCompetitiveScore, 0f);
|
||||
assertNull(scorer.minCompetitiveScore);
|
||||
|
||||
scorer.doc = 2;
|
||||
scorer.score = 3;
|
||||
leafCollector.collect(2);
|
||||
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
|
||||
|
||||
scorer.doc = 3;
|
||||
scorer.score = 0.5f;
|
||||
// Make sure we do not call setMinCompetitiveScore for non-competitive hits
|
||||
scorer.minCompetitiveScore = Float.NaN;
|
||||
leafCollector.collect(2);
|
||||
leafCollector.collect(3);
|
||||
assertTrue(Float.isNaN(scorer.minCompetitiveScore));
|
||||
|
||||
scorer.doc = 3;
|
||||
scorer.doc = 4;
|
||||
scorer.score = 4;
|
||||
leafCollector.collect(3);
|
||||
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
|
||||
leafCollector.collect(4);
|
||||
assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f);
|
||||
|
||||
// Make sure the min score is set on scorers on new segments
|
||||
scorer = new ScoreAndDoc();
|
||||
leafCollector = collector.getLeafCollector(reader.leaves().get(1));
|
||||
leafCollector.setScorer(scorer);
|
||||
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f);
|
||||
|
||||
scorer.doc = 0;
|
||||
scorer.score = 1;
|
||||
leafCollector.collect(0);
|
||||
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f);
|
||||
|
||||
scorer.doc = 1;
|
||||
scorer.score = 3;
|
||||
scorer.score = 4;
|
||||
leafCollector.collect(1);
|
||||
assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(Math.nextUp(4f), scorer.minCompetitiveScore, 0f);
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
|
@ -402,6 +409,36 @@ public class TestTopDocsCollector extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testRelationVsTopDocsCount() throws Exception {
|
||||
try (Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) {
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("f", "foo bar", Store.NO));
|
||||
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
|
||||
w.flush();
|
||||
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
|
||||
w.flush();
|
||||
|
||||
try (IndexReader reader = DirectoryReader.open(w)) {
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 10);
|
||||
searcher.search(new TermQuery(new Term("f", "foo")), collector);
|
||||
assertEquals(10, collector.totalHits);
|
||||
assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
|
||||
|
||||
collector = TopScoreDocCollector.create(2, null, 2);
|
||||
searcher.search(new TermQuery(new Term("f", "foo")), collector);
|
||||
assertTrue(10 >= collector.totalHits);
|
||||
assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, collector.totalHitsRelation);
|
||||
|
||||
collector = TopScoreDocCollector.create(10, null, 2);
|
||||
searcher.search(new TermQuery(new Term("f", "foo")), collector);
|
||||
assertEquals(10, collector.totalHits);
|
||||
assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testConcurrentMinScore() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE));
|
||||
|
|
|
@ -294,7 +294,7 @@ public class TestTopFieldCollector extends LuceneTestCase {
|
|||
w.close();
|
||||
|
||||
Sort sort = new Sort(FIELD_SCORE, new SortField("foo", SortField.Type.LONG));
|
||||
TopFieldCollector collector = TopFieldCollector.create(sort, 2, null, 1);
|
||||
TopFieldCollector collector = TopFieldCollector.create(sort, 2, null, 2);
|
||||
ScoreAndDoc scorer = new ScoreAndDoc();
|
||||
|
||||
LeafCollector leafCollector = collector.getLeafCollector(reader.leaves().get(0));
|
||||
|
@ -309,35 +309,40 @@ public class TestTopFieldCollector extends LuceneTestCase {
|
|||
scorer.doc = 1;
|
||||
scorer.score = 2;
|
||||
leafCollector.collect(1);
|
||||
assertEquals(1f, scorer.minCompetitiveScore, 0f);
|
||||
assertNull(scorer.minCompetitiveScore);
|
||||
|
||||
scorer.doc = 2;
|
||||
scorer.score = 3;
|
||||
leafCollector.collect(2);
|
||||
assertEquals(2f, scorer.minCompetitiveScore, 0f);
|
||||
|
||||
scorer.doc = 3;
|
||||
scorer.score = 0.5f;
|
||||
// Make sure we do not call setMinCompetitiveScore for non-competitive hits
|
||||
scorer.minCompetitiveScore = Float.NaN;
|
||||
leafCollector.collect(2);
|
||||
leafCollector.collect(3);
|
||||
assertTrue(Float.isNaN(scorer.minCompetitiveScore));
|
||||
|
||||
scorer.doc = 3;
|
||||
scorer.doc = 4;
|
||||
scorer.score = 4;
|
||||
leafCollector.collect(3);
|
||||
assertEquals(2f, scorer.minCompetitiveScore, 0f);
|
||||
leafCollector.collect(4);
|
||||
assertEquals(3f, scorer.minCompetitiveScore, 0f);
|
||||
|
||||
// Make sure the min score is set on scorers on new segments
|
||||
scorer = new ScoreAndDoc();
|
||||
leafCollector = collector.getLeafCollector(reader.leaves().get(1));
|
||||
leafCollector.setScorer(scorer);
|
||||
assertEquals(2f, scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(3f, scorer.minCompetitiveScore, 0f);
|
||||
|
||||
scorer.doc = 0;
|
||||
scorer.score = 1;
|
||||
leafCollector.collect(0);
|
||||
assertEquals(2f, scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(3f, scorer.minCompetitiveScore, 0f);
|
||||
|
||||
scorer.doc = 1;
|
||||
scorer.score = 3;
|
||||
scorer.score = 4;
|
||||
leafCollector.collect(1);
|
||||
assertEquals(3f, scorer.minCompetitiveScore, 0f);
|
||||
assertEquals(4f, scorer.minCompetitiveScore, 0f);
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
|
@ -690,4 +695,35 @@ public class TestTopFieldCollector extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testRelationVsTopDocsCount() throws Exception {
|
||||
Sort sort = new Sort(SortField.FIELD_SCORE, SortField.FIELD_DOC);
|
||||
try (Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) {
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("f", "foo bar", Store.NO));
|
||||
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
|
||||
w.flush();
|
||||
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
|
||||
w.flush();
|
||||
|
||||
try (IndexReader reader = DirectoryReader.open(w)) {
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
TopFieldCollector collector = TopFieldCollector.create(sort, 2, 10);
|
||||
searcher.search(new TermQuery(new Term("f", "foo")), collector);
|
||||
assertEquals(10, collector.totalHits);
|
||||
assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
|
||||
|
||||
collector = TopFieldCollector.create(sort, 2, 2);
|
||||
searcher.search(new TermQuery(new Term("f", "foo")), collector);
|
||||
assertTrue(10 >= collector.totalHits);
|
||||
assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, collector.totalHitsRelation);
|
||||
|
||||
collector = TopFieldCollector.create(sort, 10, 2);
|
||||
searcher.search(new TermQuery(new Term("f", "foo")), collector);
|
||||
assertEquals(10, collector.totalHits);
|
||||
assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue