LUCENE-9342: Collector's totalHitsThreshold should not be lower than numHits (#1448)

Use the maximum of the two, this is so that relation is EQUAL_TO in the case of the number of hits in a query is less than the collector's numHits
This commit is contained in:
Tomas Fernandez Lobbe 2020-04-23 12:04:02 -07:00 committed by GitHub
parent 4eb755db18
commit a11b78e06a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 106 additions and 30 deletions

View File

@ -149,6 +149,9 @@ Improvements
* LUCENE-9324: Add an ID to SegmentCommitInfo in order to compare commits for equality and make
snapshots incremental on generational files. (Simon Willnauer, Mike Mccandless, Adrien Grant)
* LUCENE-9342: TotalHits' relation will be EQUAL_TO when the number of hits is lower than TopDocsColector's numHits
(Tomás Fernández Löbbe)
Optimizations
---------------------

View File

@ -477,8 +477,8 @@ public class IndexSearcher {
final CollectorManager<TopScoreDocCollector, TopDocs> manager = new CollectorManager<TopScoreDocCollector, TopDocs>() {
private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(TOTAL_HITS_THRESHOLD) :
HitsThresholdChecker.createShared(TOTAL_HITS_THRESHOLD);
private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits)) :
HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits));
private final MaxScoreAccumulator minScoreAcc = (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator();
@ -610,8 +610,8 @@ public class IndexSearcher {
final CollectorManager<TopFieldCollector, TopFieldDocs> manager = new CollectorManager<>() {
private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(TOTAL_HITS_THRESHOLD) :
HitsThresholdChecker.createShared(TOTAL_HITS_THRESHOLD);
private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits)) :
HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits));
private final MaxScoreAccumulator minScoreAcc = (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator();

View File

@ -411,7 +411,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
throw new IllegalArgumentException("totalHitsThreshold must be >= 0, got " + totalHitsThreshold);
}
return create(sort, numHits, after, HitsThresholdChecker.create(totalHitsThreshold), null /* bottomValueChecker */);
return create(sort, numHits, after, HitsThresholdChecker.create(Math.max(totalHitsThreshold, numHits)), null /* bottomValueChecker */);
}
/**
@ -458,7 +458,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
int totalHitsThreshold) {
return new CollectorManager<>() {
private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(totalHitsThreshold);
private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(Math.max(totalHitsThreshold, numHits));
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
@Override

View File

@ -217,7 +217,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
* objects.
*/
public static TopScoreDocCollector create(int numHits, ScoreDoc after, int totalHitsThreshold) {
return create(numHits, after, HitsThresholdChecker.create(totalHitsThreshold), null);
return create(numHits, after, HitsThresholdChecker.create(Math.max(totalHitsThreshold, numHits)), null);
}
static TopScoreDocCollector create(int numHits, ScoreDoc after, HitsThresholdChecker hitsThresholdChecker,
@ -246,7 +246,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
int totalHitsThreshold) {
return new CollectorManager<>() {
private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(totalHitsThreshold);
private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(Math.max(totalHitsThreshold, numHits));
private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator();
@Override

View File

@ -27,6 +27,8 @@ import java.util.concurrent.TimeUnit;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
@ -282,7 +284,7 @@ public class TestTopDocsCollector extends LuceneTestCase {
assertEquals(2, reader.leaves().size());
w.close();
TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 1);
TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 2);
ScoreAndDoc scorer = new ScoreAndDoc();
LeafCollector leafCollector = collector.getLeafCollector(reader.leaves().get(0));
@ -297,35 +299,40 @@ public class TestTopDocsCollector extends LuceneTestCase {
scorer.doc = 1;
scorer.score = 2;
leafCollector.collect(1);
assertEquals(Math.nextUp(1f), scorer.minCompetitiveScore, 0f);
assertNull(scorer.minCompetitiveScore);
scorer.doc = 2;
scorer.score = 3;
leafCollector.collect(2);
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
scorer.doc = 3;
scorer.score = 0.5f;
// Make sure we do not call setMinCompetitiveScore for non-competitive hits
scorer.minCompetitiveScore = Float.NaN;
leafCollector.collect(2);
leafCollector.collect(3);
assertTrue(Float.isNaN(scorer.minCompetitiveScore));
scorer.doc = 3;
scorer.doc = 4;
scorer.score = 4;
leafCollector.collect(3);
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
leafCollector.collect(4);
assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f);
// Make sure the min score is set on scorers on new segments
scorer = new ScoreAndDoc();
leafCollector = collector.getLeafCollector(reader.leaves().get(1));
leafCollector.setScorer(scorer);
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f);
scorer.doc = 0;
scorer.score = 1;
leafCollector.collect(0);
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f);
scorer.doc = 1;
scorer.score = 3;
scorer.score = 4;
leafCollector.collect(1);
assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(4f), scorer.minCompetitiveScore, 0f);
reader.close();
dir.close();
@ -402,6 +409,36 @@ public class TestTopDocsCollector extends LuceneTestCase {
dir.close();
}
public void testRelationVsTopDocsCount() throws Exception {
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) {
Document doc = new Document();
doc.add(new TextField("f", "foo bar", Store.NO));
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
w.flush();
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
w.flush();
try (IndexReader reader = DirectoryReader.open(w)) {
IndexSearcher searcher = new IndexSearcher(reader);
TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 10);
searcher.search(new TermQuery(new Term("f", "foo")), collector);
assertEquals(10, collector.totalHits);
assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
collector = TopScoreDocCollector.create(2, null, 2);
searcher.search(new TermQuery(new Term("f", "foo")), collector);
assertTrue(10 >= collector.totalHits);
assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, collector.totalHitsRelation);
collector = TopScoreDocCollector.create(10, null, 2);
searcher.search(new TermQuery(new Term("f", "foo")), collector);
assertEquals(10, collector.totalHits);
assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
}
}
}
public void testConcurrentMinScore() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE));

View File

@ -294,7 +294,7 @@ public class TestTopFieldCollector extends LuceneTestCase {
w.close();
Sort sort = new Sort(FIELD_SCORE, new SortField("foo", SortField.Type.LONG));
TopFieldCollector collector = TopFieldCollector.create(sort, 2, null, 1);
TopFieldCollector collector = TopFieldCollector.create(sort, 2, null, 2);
ScoreAndDoc scorer = new ScoreAndDoc();
LeafCollector leafCollector = collector.getLeafCollector(reader.leaves().get(0));
@ -309,35 +309,40 @@ public class TestTopFieldCollector extends LuceneTestCase {
scorer.doc = 1;
scorer.score = 2;
leafCollector.collect(1);
assertEquals(1f, scorer.minCompetitiveScore, 0f);
assertNull(scorer.minCompetitiveScore);
scorer.doc = 2;
scorer.score = 3;
leafCollector.collect(2);
assertEquals(2f, scorer.minCompetitiveScore, 0f);
scorer.doc = 3;
scorer.score = 0.5f;
// Make sure we do not call setMinCompetitiveScore for non-competitive hits
scorer.minCompetitiveScore = Float.NaN;
leafCollector.collect(2);
leafCollector.collect(3);
assertTrue(Float.isNaN(scorer.minCompetitiveScore));
scorer.doc = 3;
scorer.doc = 4;
scorer.score = 4;
leafCollector.collect(3);
assertEquals(2f, scorer.minCompetitiveScore, 0f);
leafCollector.collect(4);
assertEquals(3f, scorer.minCompetitiveScore, 0f);
// Make sure the min score is set on scorers on new segments
scorer = new ScoreAndDoc();
leafCollector = collector.getLeafCollector(reader.leaves().get(1));
leafCollector.setScorer(scorer);
assertEquals(2f, scorer.minCompetitiveScore, 0f);
assertEquals(3f, scorer.minCompetitiveScore, 0f);
scorer.doc = 0;
scorer.score = 1;
leafCollector.collect(0);
assertEquals(2f, scorer.minCompetitiveScore, 0f);
assertEquals(3f, scorer.minCompetitiveScore, 0f);
scorer.doc = 1;
scorer.score = 3;
scorer.score = 4;
leafCollector.collect(1);
assertEquals(3f, scorer.minCompetitiveScore, 0f);
assertEquals(4f, scorer.minCompetitiveScore, 0f);
reader.close();
dir.close();
@ -690,4 +695,35 @@ public class TestTopFieldCollector extends LuceneTestCase {
dir.close();
}
public void testRelationVsTopDocsCount() throws Exception {
Sort sort = new Sort(SortField.FIELD_SCORE, SortField.FIELD_DOC);
try (Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) {
Document doc = new Document();
doc.add(new TextField("f", "foo bar", Store.NO));
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
w.flush();
w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
w.flush();
try (IndexReader reader = DirectoryReader.open(w)) {
IndexSearcher searcher = new IndexSearcher(reader);
TopFieldCollector collector = TopFieldCollector.create(sort, 2, 10);
searcher.search(new TermQuery(new Term("f", "foo")), collector);
assertEquals(10, collector.totalHits);
assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
collector = TopFieldCollector.create(sort, 2, 2);
searcher.search(new TermQuery(new Term("f", "foo")), collector);
assertTrue(10 >= collector.totalHits);
assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, collector.totalHitsRelation);
collector = TopFieldCollector.create(sort, 10, 2);
searcher.search(new TermQuery(new Term("f", "foo")), collector);
assertEquals(10, collector.totalHits);
assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
}
}
}
}