From a11b78e06a5947ffb43a9b66b37033ebe64753e0 Mon Sep 17 00:00:00 2001 From: Tomas Fernandez Lobbe Date: Thu, 23 Apr 2020 12:04:02 -0700 Subject: [PATCH] LUCENE-9342: Collector's totalHitsThreshold should not be lower than numHits (#1448) Use the maximum of the two, this is so that relation is EQUAL_TO in the case of the number of hits in a query is less than the collector's numHits --- lucene/CHANGES.txt | 3 + .../apache/lucene/search/IndexSearcher.java | 8 +-- .../lucene/search/TopFieldCollector.java | 4 +- .../lucene/search/TopScoreDocCollector.java | 4 +- .../lucene/search/TestTopDocsCollector.java | 59 +++++++++++++++---- .../lucene/search/TestTopFieldCollector.java | 58 ++++++++++++++---- 6 files changed, 106 insertions(+), 30 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 02b93f64a8d..7e9afc18fe2 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -149,6 +149,9 @@ Improvements * LUCENE-9324: Add an ID to SegmentCommitInfo in order to compare commits for equality and make snapshots incremental on generational files. (Simon Willnauer, Mike Mccandless, Adrien Grant) +* LUCENE-9342: TotalHits' relation will be EQUAL_TO when the number of hits is lower than TopDocsColector's numHits + (Tomás Fernández Löbbe) + Optimizations --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index 44fba0a19b8..692cec3e8bc 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -477,8 +477,8 @@ public class IndexSearcher { final CollectorManager manager = new CollectorManager() { - private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(TOTAL_HITS_THRESHOLD) : - HitsThresholdChecker.createShared(TOTAL_HITS_THRESHOLD); + private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits)) : + HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits)); private final MaxScoreAccumulator minScoreAcc = (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator(); @@ -610,8 +610,8 @@ public class IndexSearcher { final CollectorManager manager = new CollectorManager<>() { - private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(TOTAL_HITS_THRESHOLD) : - HitsThresholdChecker.createShared(TOTAL_HITS_THRESHOLD); + private final HitsThresholdChecker hitsThresholdChecker = (executor == null || leafSlices.length <= 1) ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits)) : + HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits)); private final MaxScoreAccumulator minScoreAcc = (executor == null || leafSlices.length <= 1) ? null : new MaxScoreAccumulator(); diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java index 0def79521ae..62ca6f48921 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java @@ -411,7 +411,7 @@ public abstract class TopFieldCollector extends TopDocsCollector { throw new IllegalArgumentException("totalHitsThreshold must be >= 0, got " + totalHitsThreshold); } - return create(sort, numHits, after, HitsThresholdChecker.create(totalHitsThreshold), null /* bottomValueChecker */); + return create(sort, numHits, after, HitsThresholdChecker.create(Math.max(totalHitsThreshold, numHits)), null /* bottomValueChecker */); } /** @@ -458,7 +458,7 @@ public abstract class TopFieldCollector extends TopDocsCollector { int totalHitsThreshold) { return new CollectorManager<>() { - private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(totalHitsThreshold); + private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(Math.max(totalHitsThreshold, numHits)); private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator(); @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java index 2c7710733d9..b9051d422a2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java @@ -217,7 +217,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { * objects. */ public static TopScoreDocCollector create(int numHits, ScoreDoc after, int totalHitsThreshold) { - return create(numHits, after, HitsThresholdChecker.create(totalHitsThreshold), null); + return create(numHits, after, HitsThresholdChecker.create(Math.max(totalHitsThreshold, numHits)), null); } static TopScoreDocCollector create(int numHits, ScoreDoc after, HitsThresholdChecker hitsThresholdChecker, @@ -246,7 +246,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { int totalHitsThreshold) { return new CollectorManager<>() { - private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(totalHitsThreshold); + private final HitsThresholdChecker hitsThresholdChecker = HitsThresholdChecker.createShared(Math.max(totalHitsThreshold, numHits)); private final MaxScoreAccumulator minScoreAcc = new MaxScoreAccumulator(); @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java index addd37cae2c..cf5102aae4f 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java @@ -27,6 +27,8 @@ import java.util.concurrent.TimeUnit; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; @@ -282,7 +284,7 @@ public class TestTopDocsCollector extends LuceneTestCase { assertEquals(2, reader.leaves().size()); w.close(); - TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 1); + TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 2); ScoreAndDoc scorer = new ScoreAndDoc(); LeafCollector leafCollector = collector.getLeafCollector(reader.leaves().get(0)); @@ -297,35 +299,40 @@ public class TestTopDocsCollector extends LuceneTestCase { scorer.doc = 1; scorer.score = 2; leafCollector.collect(1); - assertEquals(Math.nextUp(1f), scorer.minCompetitiveScore, 0f); - + assertNull(scorer.minCompetitiveScore); + scorer.doc = 2; + scorer.score = 3; + leafCollector.collect(2); + assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f); + + scorer.doc = 3; scorer.score = 0.5f; // Make sure we do not call setMinCompetitiveScore for non-competitive hits scorer.minCompetitiveScore = Float.NaN; - leafCollector.collect(2); + leafCollector.collect(3); assertTrue(Float.isNaN(scorer.minCompetitiveScore)); - scorer.doc = 3; + scorer.doc = 4; scorer.score = 4; - leafCollector.collect(3); - assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f); + leafCollector.collect(4); + assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f); // Make sure the min score is set on scorers on new segments scorer = new ScoreAndDoc(); leafCollector = collector.getLeafCollector(reader.leaves().get(1)); leafCollector.setScorer(scorer); - assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f); + assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f); scorer.doc = 0; scorer.score = 1; leafCollector.collect(0); - assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f); + assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f); scorer.doc = 1; - scorer.score = 3; + scorer.score = 4; leafCollector.collect(1); - assertEquals(Math.nextUp(3f), scorer.minCompetitiveScore, 0f); + assertEquals(Math.nextUp(4f), scorer.minCompetitiveScore, 0f); reader.close(); dir.close(); @@ -401,6 +408,36 @@ public class TestTopDocsCollector extends LuceneTestCase { reader.close(); dir.close(); } + + public void testRelationVsTopDocsCount() throws Exception { + try (Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) { + Document doc = new Document(); + doc.add(new TextField("f", "foo bar", Store.NO)); + w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc)); + w.flush(); + w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc)); + w.flush(); + + try (IndexReader reader = DirectoryReader.open(w)) { + IndexSearcher searcher = new IndexSearcher(reader); + TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 10); + searcher.search(new TermQuery(new Term("f", "foo")), collector); + assertEquals(10, collector.totalHits); + assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation); + + collector = TopScoreDocCollector.create(2, null, 2); + searcher.search(new TermQuery(new Term("f", "foo")), collector); + assertTrue(10 >= collector.totalHits); + assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, collector.totalHitsRelation); + + collector = TopScoreDocCollector.create(10, null, 2); + searcher.search(new TermQuery(new Term("f", "foo")), collector); + assertEquals(10, collector.totalHits); + assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation); + } + } + } public void testConcurrentMinScore() throws Exception { Directory dir = newDirectory(); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java index 58869694c54..460bf1ac53b 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java @@ -294,7 +294,7 @@ public class TestTopFieldCollector extends LuceneTestCase { w.close(); Sort sort = new Sort(FIELD_SCORE, new SortField("foo", SortField.Type.LONG)); - TopFieldCollector collector = TopFieldCollector.create(sort, 2, null, 1); + TopFieldCollector collector = TopFieldCollector.create(sort, 2, null, 2); ScoreAndDoc scorer = new ScoreAndDoc(); LeafCollector leafCollector = collector.getLeafCollector(reader.leaves().get(0)); @@ -309,35 +309,40 @@ public class TestTopFieldCollector extends LuceneTestCase { scorer.doc = 1; scorer.score = 2; leafCollector.collect(1); - assertEquals(1f, scorer.minCompetitiveScore, 0f); - + assertNull(scorer.minCompetitiveScore); + scorer.doc = 2; + scorer.score = 3; + leafCollector.collect(2); + assertEquals(2f, scorer.minCompetitiveScore, 0f); + + scorer.doc = 3; scorer.score = 0.5f; // Make sure we do not call setMinCompetitiveScore for non-competitive hits scorer.minCompetitiveScore = Float.NaN; - leafCollector.collect(2); + leafCollector.collect(3); assertTrue(Float.isNaN(scorer.minCompetitiveScore)); - scorer.doc = 3; + scorer.doc = 4; scorer.score = 4; - leafCollector.collect(3); - assertEquals(2f, scorer.minCompetitiveScore, 0f); + leafCollector.collect(4); + assertEquals(3f, scorer.minCompetitiveScore, 0f); // Make sure the min score is set on scorers on new segments scorer = new ScoreAndDoc(); leafCollector = collector.getLeafCollector(reader.leaves().get(1)); leafCollector.setScorer(scorer); - assertEquals(2f, scorer.minCompetitiveScore, 0f); + assertEquals(3f, scorer.minCompetitiveScore, 0f); scorer.doc = 0; scorer.score = 1; leafCollector.collect(0); - assertEquals(2f, scorer.minCompetitiveScore, 0f); + assertEquals(3f, scorer.minCompetitiveScore, 0f); scorer.doc = 1; - scorer.score = 3; + scorer.score = 4; leafCollector.collect(1); - assertEquals(3f, scorer.minCompetitiveScore, 0f); + assertEquals(4f, scorer.minCompetitiveScore, 0f); reader.close(); dir.close(); @@ -689,5 +694,36 @@ public class TestTopFieldCollector extends LuceneTestCase { indexReader.close(); dir.close(); } + + public void testRelationVsTopDocsCount() throws Exception { + Sort sort = new Sort(SortField.FIELD_SCORE, SortField.FIELD_DOC); + try (Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) { + Document doc = new Document(); + doc.add(new TextField("f", "foo bar", Store.NO)); + w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc)); + w.flush(); + w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc)); + w.flush(); + + try (IndexReader reader = DirectoryReader.open(w)) { + IndexSearcher searcher = new IndexSearcher(reader); + TopFieldCollector collector = TopFieldCollector.create(sort, 2, 10); + searcher.search(new TermQuery(new Term("f", "foo")), collector); + assertEquals(10, collector.totalHits); + assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation); + + collector = TopFieldCollector.create(sort, 2, 2); + searcher.search(new TermQuery(new Term("f", "foo")), collector); + assertTrue(10 >= collector.totalHits); + assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, collector.totalHitsRelation); + + collector = TopFieldCollector.create(sort, 10, 2); + searcher.search(new TermQuery(new Term("f", "foo")), collector); + assertEquals(10, collector.totalHits); + assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation); + } + } + } }