From 7eab6ebb2752c7a3ab61a66aaf531e07838b36a9 Mon Sep 17 00:00:00 2001 From: jimczi Date: Tue, 1 Oct 2019 12:32:23 +0200 Subject: [PATCH] refactor updateMinCompetitiveScore and ensures that the recorded min competitive score is resetted on setScorer --- .../lucene/search/TopFieldCollector.java | 69 +++++----- .../lucene/search/TopScoreDocCollector.java | 80 ++++++------ .../lucene/search/TestTopDocsCollector.java | 119 +----------------- .../lucene/search/TestTopFieldCollector.java | 117 +++++++++++++++++ 4 files changed, 195 insertions(+), 190 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java index 2f62ba04ac5..6cfef6002d2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java @@ -124,7 +124,9 @@ public abstract class TopFieldCollector extends TopDocsCollector { @Override public void setScorer(Scorable scorer) throws IOException { super.setScorer(scorer); - updateMinCompetitiveScore(scorer); + // reset the minimum competitive score + minCompetitiveScore = 0f; + updateMinCompetitiveScore(scorer, true); } @Override @@ -143,10 +145,8 @@ public abstract class TopFieldCollector extends TopDocsCollector { } else { collectedAllCompetitiveHits = true; } - } else if (totalHitsRelation == Relation.EQUAL_TO || shouldUpdateMinScore()) { - // we just reached totalHitsThreshold, we can start setting the min - // competitive score now - updateMinCompetitiveScore(scorer); + } else { + updateMinCompetitiveScore(scorer, totalHitsRelation == Relation.EQUAL_TO); } return; } @@ -155,7 +155,7 @@ public abstract class TopFieldCollector extends TopDocsCollector { comparator.copy(bottom.slot, doc); updateBottom(doc); comparator.setBottom(bottom.slot); - updateMinCompetitiveScore(scorer); + updateMinCompetitiveScore(scorer, true); } else { // Startup transient: queue hasn't gathered numHits yet final int slot = totalHits - 1; @@ -165,7 +165,7 @@ public abstract class TopFieldCollector extends TopDocsCollector { add(slot, doc); if (queueFull) { comparator.setBottom(bottom.slot); - updateMinCompetitiveScore(scorer); + updateMinCompetitiveScore(scorer, true); } } } @@ -214,7 +214,8 @@ public abstract class TopFieldCollector extends TopDocsCollector { @Override public void setScorer(Scorable scorer) throws IOException { super.setScorer(scorer); - updateMinCompetitiveScore(scorer); + minCompetitiveScore = 0f; + updateMinCompetitiveScore(scorer, true); } @Override @@ -238,10 +239,10 @@ public abstract class TopFieldCollector extends TopDocsCollector { } else { collectedAllCompetitiveHits = true; } - } else if (totalHitsRelation == Relation.EQUAL_TO || shouldUpdateMinScore()) { + } else { // we just reached totalHitsThreshold, we can start setting the min // competitive score now - updateMinCompetitiveScore(scorer); + updateMinCompetitiveScore(scorer, totalHitsRelation == Relation.EQUAL_TO); } return; } @@ -260,7 +261,7 @@ public abstract class TopFieldCollector extends TopDocsCollector { updateBottom(doc); comparator.setBottom(bottom.slot); - updateMinCompetitiveScore(scorer); + updateMinCompetitiveScore(scorer, true); } else { collectedHits++; @@ -274,7 +275,7 @@ public abstract class TopFieldCollector extends TopDocsCollector { queueFull = collectedHits == numHits; if (queueFull) { comparator.setBottom(bottom.slot); - updateMinCompetitiveScore(scorer); + updateMinCompetitiveScore(scorer, true); } } } @@ -290,10 +291,9 @@ public abstract class TopFieldCollector extends TopDocsCollector { final BottomValueChecker bottomValueChecker; final FieldComparator.RelevanceComparator firstComparator; final boolean canSetMinScore; - // the minimum score (if canSetMinScore is true) that is currently used by the scorer, - // can be different than pqTop.score if the provided bottomValueChecker reports a minimum - // score that is greater than the local one. - float minScore; + // the minimum competitive score (if canSetMinScore is true) that is currently + // used by the underlying scorer (see setScorer) + float minCompetitiveScore; final int numComparators; FieldValueHitQueue.Entry bottom = null; boolean queueFull; @@ -335,28 +335,31 @@ public abstract class TopFieldCollector extends TopDocsCollector { return scoreMode; } - protected boolean shouldUpdateMinScore() { - return bottomValueChecker != null ? bottomValueChecker.getBottomValue() > minScore : false; - } - - protected void updateMinCompetitiveScore(Scorable scorer) throws IOException { - if (canSetMinScore && hitsThresholdChecker.isThresholdReached() - && (queueFull || (bottomValueChecker != null && bottomValueChecker.getBottomValue() > 0f))) { - float maxMinScore = Float.NEGATIVE_INFINITY; - if (queueFull) { + protected void updateMinCompetitiveScore(Scorable scorer, boolean checkQueue) throws IOException { + if (canSetMinScore && hitsThresholdChecker.isThresholdReached()) { + boolean hasChanged = false; + if (checkQueue && queueFull) { assert bottom != null && firstComparator != null; - maxMinScore = firstComparator.value(bottom.slot); - if (bottomValueChecker != null) { - bottomValueChecker.updateThreadLocalBottomValue(maxMinScore); + float localMinScore = firstComparator.value(bottom.slot); + if (localMinScore > minCompetitiveScore) { + hasChanged = true; + minCompetitiveScore = localMinScore; + if (bottomValueChecker != null) { + bottomValueChecker.updateThreadLocalBottomValue(minCompetitiveScore); + } } } if (bottomValueChecker != null) { - maxMinScore = Math.max(maxMinScore, bottomValueChecker.getBottomValue()); + float globalMinScore = bottomValueChecker.getBottomValue(); + if (globalMinScore > minCompetitiveScore) { + hasChanged = true; + minCompetitiveScore = globalMinScore; + } + } + if (hasChanged) { + scorer.setMinCompetitiveScore(minCompetitiveScore); + totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO; } - assert maxMinScore > 0f; - scorer.setMinCompetitiveScore(maxMinScore); - minScore = maxMinScore; - totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java index a08e969579f..2504cf7badd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java @@ -62,7 +62,9 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { @Override public void setScorer(Scorable scorer) throws IOException { super.setScorer(scorer); - updateMinCompetitiveScore(scorer); + // reset the minimum competitive score + minCompetitiveScore = 0f; + updateMinCompetitiveScore(scorer, true); } @Override @@ -76,12 +78,10 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { hitsThresholdChecker.incrementHitCount(); if (score <= pqTop.score) { - if ((totalHitsRelation == TotalHits.Relation.EQUAL_TO || shouldUpdateMinScore()) - && hitsThresholdChecker.isThresholdReached()) { - // we just reached totalHitsThreshold, we can start setting the min - // competitive score now - updateMinCompetitiveScore(scorer); - } + // the document is not competitive but we need to update the minimum competitive score + // if we just reached the total hits threshold or if the global minimum score (bottomValueChecker) + // has been updated + updateMinCompetitiveScore(scorer, totalHitsRelation == TotalHits.Relation.EQUAL_TO); // Since docs are returned in-order (i.e., increasing doc Id), a document // with equal score to pqTop.score cannot compete since HitQueue favors // documents with lower doc Ids. Therefore reject those docs too. @@ -90,7 +90,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { pqTop.doc = doc + docBase; pqTop.score = score; pqTop = pq.updateTop(); - updateMinCompetitiveScore(scorer); + updateMinCompetitiveScore(scorer, true); } }; @@ -140,12 +140,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { if (score > after.score || (score == after.score && doc <= afterDoc)) { // hit was collected on a previous page - if ((totalHitsRelation == TotalHits.Relation.EQUAL_TO || shouldUpdateMinScore()) - && hitsThresholdChecker.isThresholdReached()) { - // we just reached totalHitsThreshold, we can start setting the min - // competitive score now - updateMinCompetitiveScore(scorer); - } + updateMinCompetitiveScore(scorer, totalHitsRelation == TotalHits.Relation.EQUAL_TO); return; } @@ -159,7 +154,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { pqTop.doc = doc + docBase; pqTop.score = score; pqTop = pq.updateTop(); - updateMinCompetitiveScore(scorer); + updateMinCompetitiveScore(scorer, true); } }; } @@ -249,10 +244,9 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { } ScoreDoc pqTop; - // the minimum score that is currently used by the scorer, can be different than pqTop.score - // if the provided bottomValueChecker reports a minimum score that is greater than the local - // one. - float minScore; + // the minimum competitive score that is currently + // used by the underlying scorer (see setScorer) + float minCompetitiveScore; final HitsThresholdChecker hitsThresholdChecker; final BottomValueChecker bottomValueChecker; @@ -283,36 +277,44 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { return hitsThresholdChecker.scoreMode(); } - protected boolean shouldUpdateMinScore() { - return bottomValueChecker != null ? bottomValueChecker.getBottomValue() > minScore : false; + public boolean isQueueFull() { + return pqTop != null && pqTop.score != Float.NEGATIVE_INFINITY; } - protected void updateMinCompetitiveScore(Scorable scorer) throws IOException { - if (hitsThresholdChecker.isThresholdReached() - && ((bottomValueChecker != null && bottomValueChecker.getBottomValue() > 0) - || (pqTop != null && pqTop.score != Float.NEGATIVE_INFINITY))) { // -Infinity is the score of sentinels - // since we tie-break on doc id and collect in doc id order, we can require - // the next float - float bottomScore = Float.NEGATIVE_INFINITY; - - if (pqTop != null && pqTop.score != Float.NEGATIVE_INFINITY) { - bottomScore = Math.nextUp(pqTop.score); - - if (bottomValueChecker != null) { - bottomValueChecker.updateThreadLocalBottomValue(pqTop.score); + protected void updateMinCompetitiveScore(Scorable scorer, boolean checkQueue) throws IOException { + if (hitsThresholdChecker.isThresholdReached()) { + boolean hasChanged = false; + if (checkQueue && isQueueFull()) { + // since we tie-break on doc id and collect in doc id order, we can require + // the next float + float localMinScore = Math.nextUp(pqTop.score); + if (localMinScore > minCompetitiveScore) { + hasChanged = true; + minCompetitiveScore = localMinScore; + if (bottomValueChecker != null) { + // we don't use the next float here since we register a minimum value + // for other segments that might have smaller doc ids + bottomValueChecker.updateThreadLocalBottomValue(pqTop.score); + } } } // Global bottom can only be greater than or equal to the local bottom score // The updating of global bottom score for this hit before getting here should // ensure that - if (bottomValueChecker != null && bottomValueChecker.getBottomValue() > bottomScore) { - bottomScore = bottomValueChecker.getBottomValue(); + if (bottomValueChecker != null) { + float globalMinScore = bottomValueChecker.getBottomValue(); + if (globalMinScore > minCompetitiveScore) { + assert isQueueFull() == false || bottomValueChecker.getBottomValue() > Math.nextUp(pqTop.score); + hasChanged = true; + minCompetitiveScore = bottomValueChecker.getBottomValue(); + } } - scorer.setMinCompetitiveScore(bottomScore); - minScore = bottomScore; - totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO; + if (hasChanged) { + scorer.setMinCompetitiveScore(minCompetitiveScore); + totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO; + } } } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java index 1476b189a70..dc215b14cb5 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java @@ -476,7 +476,7 @@ public class TestTopDocsCollector extends LuceneTestCase { dir.close(); } - public void testConcurrentMinScoreTopScoreDocs() throws Exception { + public void testConcurrentMinScore() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE)); Document doc = new Document(); @@ -591,121 +591,4 @@ public class TestTopDocsCollector extends LuceneTestCase { reader.close(); dir.close(); } - - public void testConcurrentMinScoreTopFieldDocs() throws Exception { - Directory dir = newDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE)); - Document doc = new Document(); - w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc)); - w.flush(); - w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc, doc)); - w.flush(); - w.addDocuments(Arrays.asList(doc, doc)); - w.flush(); - IndexReader reader = DirectoryReader.open(w); - assertEquals(3, reader.leaves().size()); - w.close(); - - Sort sort = new Sort(SortField.FIELD_SCORE, SortField.FIELD_DOC); - CollectorManager manager = - TopFieldCollector.createSharedManager(sort, 2, null, 0); - TopFieldCollector collector = manager.newCollector(); - TopFieldCollector collector2 = manager.newCollector(); - assertTrue(collector.bottomValueChecker == collector2.bottomValueChecker); - BottomValueChecker minValueChecker = collector.bottomValueChecker; - - ScoreAndDoc scorer = new ScoreAndDoc(); - ScoreAndDoc scorer2 = new ScoreAndDoc(); - - LeafCollector leafCollector = collector.getLeafCollector(reader.leaves().get(0)); - leafCollector.setScorer(scorer); - LeafCollector leafCollector2 = collector2.getLeafCollector(reader.leaves().get(1)); - leafCollector2.setScorer(scorer2); - - scorer.doc = 0; - scorer.score = 3; - leafCollector.collect(0); - - scorer2.doc = 0; - scorer2.score = 6; - leafCollector2.collect(0); - - scorer.doc = 1; - scorer.score = 2; - leafCollector.collect(1); - assertEquals(minValueChecker.getBottomValue(), 2f, 0f); - assertEquals(scorer.minCompetitiveScore, 2f, 0f); - assertNull(scorer2.minCompetitiveScore); - - scorer2.doc = 1; - scorer2.score = 9; - leafCollector2.collect(1); - assertEquals(minValueChecker.getBottomValue(), 6f, 0f); - assertEquals(scorer.minCompetitiveScore, 2f, 0f); - assertEquals(scorer2.minCompetitiveScore, 6f, 0f); - - scorer2.doc = 2; - scorer2.score = 7; - leafCollector2.collect(2); - assertEquals(minValueChecker.getBottomValue(), 7f, 0f); - assertEquals(scorer.minCompetitiveScore, 2f, 0f); - assertEquals(scorer2.minCompetitiveScore, 7f, 0f); - - scorer2.doc = 3; - scorer2.score = 1; - leafCollector2.collect(3); - assertEquals(minValueChecker.getBottomValue(), 7f, 0f); - assertEquals(scorer.minCompetitiveScore, 2f, 0f); - assertEquals(scorer2.minCompetitiveScore, 7f, 0f); - - scorer.doc = 2; - scorer.score = 10; - leafCollector.collect(2); - assertEquals(minValueChecker.getBottomValue(), 7f, 0f); - assertEquals(scorer.minCompetitiveScore, 7f, 0f); - assertEquals(scorer2.minCompetitiveScore, 7f, 0f); - - scorer.doc = 3; - scorer.score = 11; - leafCollector.collect(3); - assertEquals(minValueChecker.getBottomValue(), 10, 0f); - assertEquals(scorer.minCompetitiveScore, 10f, 0f); - assertEquals(scorer2.minCompetitiveScore, 7f, 0f); - - TopFieldCollector collector3 = manager.newCollector(); - LeafCollector leafCollector3 = collector3.getLeafCollector(reader.leaves().get(2)); - ScoreAndDoc scorer3 = new ScoreAndDoc(); - leafCollector3.setScorer(scorer3); - assertEquals(scorer3.minCompetitiveScore, 10f, 0f); - - scorer3.doc = 0; - scorer3.score = 1f; - leafCollector3.collect(0); - assertEquals(minValueChecker.getBottomValue(), 10f, 0f); - assertEquals(scorer3.minCompetitiveScore, 10f, 0f); - - scorer.doc = 4; - scorer.score = 11; - leafCollector.collect(4); - assertEquals(minValueChecker.getBottomValue(), 11f, 0f); - assertEquals(scorer.minCompetitiveScore, 11f, 0f); - assertEquals(scorer2.minCompetitiveScore, 7f, 0f); - assertEquals(scorer3.minCompetitiveScore, 10f, 0f); - - scorer3.doc = 1; - scorer3.score = 2f; - leafCollector3.collect(1); - assertEquals(minValueChecker.getBottomValue(), 11f, 0f); - assertEquals(scorer.minCompetitiveScore, 11f, 0f); - assertEquals(scorer2.minCompetitiveScore, 7f, 0f); - assertEquals(scorer3.minCompetitiveScore, 11f, 0f); - - - TopFieldDocs topDocs = manager.reduce(Arrays.asList(collector, collector2, collector3)); - assertEquals(11, topDocs.totalHits.value); - assertEquals(new TotalHits(11, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), topDocs.totalHits); - - reader.close(); - dir.close(); - } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java index c8741eecdf9..b224f39fadc 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java @@ -495,4 +495,121 @@ public class TestTopFieldCollector extends LuceneTestCase { dir.close(); } + public void testConcurrentMinScore() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE)); + Document doc = new Document(); + w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc)); + w.flush(); + w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc, doc)); + w.flush(); + w.addDocuments(Arrays.asList(doc, doc)); + w.flush(); + IndexReader reader = DirectoryReader.open(w); + assertEquals(3, reader.leaves().size()); + w.close(); + + Sort sort = new Sort(SortField.FIELD_SCORE, SortField.FIELD_DOC); + CollectorManager manager = + TopFieldCollector.createSharedManager(sort, 2, null, 0); + TopFieldCollector collector = manager.newCollector(); + TopFieldCollector collector2 = manager.newCollector(); + assertTrue(collector.bottomValueChecker == collector2.bottomValueChecker); + BottomValueChecker minValueChecker = collector.bottomValueChecker; + + ScoreAndDoc scorer = new ScoreAndDoc(); + ScoreAndDoc scorer2 = new ScoreAndDoc(); + + LeafCollector leafCollector = collector.getLeafCollector(reader.leaves().get(0)); + leafCollector.setScorer(scorer); + LeafCollector leafCollector2 = collector2.getLeafCollector(reader.leaves().get(1)); + leafCollector2.setScorer(scorer2); + + scorer.doc = 0; + scorer.score = 3; + leafCollector.collect(0); + + scorer2.doc = 0; + scorer2.score = 6; + leafCollector2.collect(0); + + scorer.doc = 1; + scorer.score = 2; + leafCollector.collect(1); + assertEquals(minValueChecker.getBottomValue(), 2f, 0f); + assertEquals(scorer.minCompetitiveScore, 2f, 0f); + assertNull(scorer2.minCompetitiveScore); + + scorer2.doc = 1; + scorer2.score = 9; + leafCollector2.collect(1); + assertEquals(minValueChecker.getBottomValue(), 6f, 0f); + assertEquals(scorer.minCompetitiveScore, 2f, 0f); + assertEquals(scorer2.minCompetitiveScore, 6f, 0f); + + scorer2.doc = 2; + scorer2.score = 7; + leafCollector2.collect(2); + assertEquals(minValueChecker.getBottomValue(), 7f, 0f); + assertEquals(scorer.minCompetitiveScore, 2f, 0f); + assertEquals(scorer2.minCompetitiveScore, 7f, 0f); + + scorer2.doc = 3; + scorer2.score = 1; + leafCollector2.collect(3); + assertEquals(minValueChecker.getBottomValue(), 7f, 0f); + assertEquals(scorer.minCompetitiveScore, 2f, 0f); + assertEquals(scorer2.minCompetitiveScore, 7f, 0f); + + scorer.doc = 2; + scorer.score = 10; + leafCollector.collect(2); + assertEquals(minValueChecker.getBottomValue(), 7f, 0f); + assertEquals(scorer.minCompetitiveScore, 7f, 0f); + assertEquals(scorer2.minCompetitiveScore, 7f, 0f); + + scorer.doc = 3; + scorer.score = 11; + leafCollector.collect(3); + assertEquals(minValueChecker.getBottomValue(), 10, 0f); + assertEquals(scorer.minCompetitiveScore, 10f, 0f); + assertEquals(scorer2.minCompetitiveScore, 7f, 0f); + + TopFieldCollector collector3 = manager.newCollector(); + LeafCollector leafCollector3 = collector3.getLeafCollector(reader.leaves().get(2)); + ScoreAndDoc scorer3 = new ScoreAndDoc(); + leafCollector3.setScorer(scorer3); + assertEquals(scorer3.minCompetitiveScore, 10f, 0f); + + scorer3.doc = 0; + scorer3.score = 1f; + leafCollector3.collect(0); + assertEquals(minValueChecker.getBottomValue(), 10f, 0f); + assertEquals(scorer3.minCompetitiveScore, 10f, 0f); + + scorer.doc = 4; + scorer.score = 11; + leafCollector.collect(4); + assertEquals(minValueChecker.getBottomValue(), 11f, 0f); + assertEquals(scorer.minCompetitiveScore, 11f, 0f); + assertEquals(scorer2.minCompetitiveScore, 7f, 0f); + assertEquals(scorer3.minCompetitiveScore, 10f, 0f); + + scorer3.doc = 1; + scorer3.score = 2f; + leafCollector3.collect(1); + assertEquals(minValueChecker.getBottomValue(), 11f, 0f); + assertEquals(scorer.minCompetitiveScore, 11f, 0f); + assertEquals(scorer2.minCompetitiveScore, 7f, 0f); + assertEquals(scorer3.minCompetitiveScore, 11f, 0f); + + + TopFieldDocs topDocs = manager.reduce(Arrays.asList(collector, collector2, collector3)); + assertEquals(11, topDocs.totalHits.value); + assertEquals(new TotalHits(11, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), topDocs.totalHits); + + reader.close(); + dir.close(); + } + }