diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 20b8554be04..d08374d01af 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -548,6 +548,8 @@ Bug Fixes * LUCENE-10154: NumericLeafComparator to define getPointValues. (Mayya Sharipova, Adrien Grand) +* LUCENE-10208: Ensure that the minimum competitive score does not decrease in concurrent search. (Jim Ferenczi, Adrien Grand) + Build --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java index fa662389994..b0f60f8113f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java +++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java @@ -26,7 +26,7 @@ final class MaxScoreAccumulator { static final int DEFAULT_INTERVAL = 0x3ff; // scores are always positive - final LongAccumulator acc = new LongAccumulator(Long::max, Long.MIN_VALUE); + final LongAccumulator acc = new LongAccumulator(MaxScoreAccumulator::maxEncode, Long.MIN_VALUE); // non-final and visible for tests long modInterval; @@ -35,9 +35,26 @@ final class MaxScoreAccumulator { this.modInterval = DEFAULT_INTERVAL; } - void accumulate(int docID, float score) { - assert docID >= 0 && score >= 0; - long encode = (((long) Float.floatToIntBits(score)) << 32) | docID; + /** + * Return the max encoded DocAndScore in a way that is consistent with {@link + * DocAndScore#compareTo}. + */ + private static long maxEncode(long v1, long v2) { + float score1 = Float.intBitsToFloat((int) (v1 >> 32)); + float score2 = Float.intBitsToFloat((int) (v2 >> 32)); + int cmp = Float.compare(score1, score2); + if (cmp == 0) { + // tie-break on the minimum doc base + return (int) v1 < (int) v2 ? v1 : v2; + } else if (cmp > 0) { + return v1; + } + return v2; + } + + void accumulate(int docBase, float score) { + assert docBase >= 0 && score >= 0; + long encode = (((long) Float.floatToIntBits(score)) << 32) | docBase; acc.accumulate(encode); } @@ -47,16 +64,16 @@ final class MaxScoreAccumulator { return null; } float score = Float.intBitsToFloat((int) (value >> 32)); - int docID = (int) value; - return new DocAndScore(docID, score); + int docBase = (int) value; + return new DocAndScore(docBase, score); } static class DocAndScore implements Comparable { - final int docID; + final int docBase; final float score; - DocAndScore(int docID, float score) { - this.docID = docID; + DocAndScore(int docBase, float score) { + this.docBase = docBase; this.score = score; } @@ -64,7 +81,14 @@ final class MaxScoreAccumulator { public int compareTo(DocAndScore o) { int cmp = Float.compare(score, o.score); if (cmp == 0) { - return Integer.compare(docID, o.docID); + // tie-break on the minimum doc base + // For a given minimum competitive score, we want to know the first segment + // where this score occurred, hence the reverse order here. + // On segments with a lower docBase, any document whose score is greater + // than or equal to this score would be competitive, while on segments with a + // higher docBase, documents need to have a strictly greater score to be + // competitive since we tie break on doc ID. + return Integer.compare(o.docBase, docBase); } return cmp; } @@ -74,17 +98,17 @@ final class MaxScoreAccumulator { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; DocAndScore result = (DocAndScore) o; - return docID == result.docID && Float.compare(result.score, score) == 0; + return docBase == result.docBase && Float.compare(result.score, score) == 0; } @Override public int hashCode() { - return Objects.hash(docID, score); + return Objects.hash(docBase, score); } @Override public String toString() { - return "DocAndScore{" + "docID=" + docID + ", score=" + score + '}'; + return "DocAndScore{" + "docBase=" + docBase + ", score=" + score + '}'; } } } diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java index db977041660..0023d718424 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java @@ -134,9 +134,9 @@ public abstract class TopFieldCollector extends TopDocsCollector { public void setScorer(Scorable scorer) throws IOException { this.scorer = scorer; comparator.setScorer(scorer); - minCompetitiveScore = 0f; - updateMinCompetitiveScore(scorer); - if (minScoreAcc != null) { + if (minScoreAcc == null) { + updateMinCompetitiveScore(scorer); + } else { updateGlobalMinCompetitiveScore(scorer); } } @@ -191,6 +191,8 @@ public abstract class TopFieldCollector extends TopDocsCollector { @Override public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { + // reset the minimum competitive score + minCompetitiveScore = 0f; docBase = context.docBase; return new TopFieldLeafCollector(queue, sort, context) { @@ -244,6 +246,8 @@ public abstract class TopFieldCollector extends TopDocsCollector { @Override public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { + // reset the minimum competitive score + minCompetitiveScore = 0f; docBase = context.docBase; final int afterDoc = after.doc - docBase; @@ -363,7 +367,7 @@ public abstract class TopFieldCollector extends TopDocsCollector { minCompetitiveScore = minScore; totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO; if (minScoreAcc != null) { - minScoreAcc.accumulate(bottom.doc, minScore); + minScoreAcc.accumulate(docBase, minScore); } } } diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java index 87d2a2710f3..d57dbec458d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java +++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java @@ -55,14 +55,15 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { // reset the minimum competitive score docBase = context.docBase; - return new ScorerLeafCollector() { + minCompetitiveScore = 0f; + return new ScorerLeafCollector() { @Override public void setScorer(Scorable scorer) throws IOException { super.setScorer(scorer); - minCompetitiveScore = 0f; - updateMinCompetitiveScore(scorer); - if (minScoreAcc != null) { + if (minScoreAcc == null) { + updateMinCompetitiveScore(scorer); + } else { updateGlobalMinCompetitiveScore(scorer); } } @@ -132,8 +133,19 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { docBase = context.docBase; final int afterDoc = after.doc - context.docBase; + minCompetitiveScore = 0f; return new ScorerLeafCollector() { + @Override + public void setScorer(Scorable scorer) throws IOException { + super.setScorer(scorer); + if (minScoreAcc == null) { + updateMinCompetitiveScore(scorer); + } else { + updateGlobalMinCompetitiveScore(scorer); + } + } + @Override public void collect(int doc) throws IOException { float score = scorer.score(); @@ -307,7 +319,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { // the next float if the global minimum score is set on a document id that is // smaller than the ids in the current leaf float score = - docBase > maxMinScore.docID ? Math.nextUp(maxMinScore.score) : maxMinScore.score; + docBase >= maxMinScore.docBase ? Math.nextUp(maxMinScore.score) : maxMinScore.score; if (score > minCompetitiveScore) { assert hitsThresholdChecker.isThresholdReached(); scorer.setMinCompetitiveScore(score); @@ -332,7 +344,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector { // we don't use the next float but we register the document // id so that other leaves can require it if they are after // the current maximum - minScoreAcc.accumulate(pqTop.doc, pqTop.score); + minScoreAcc.accumulate(docBase, pqTop.score); } } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java index d53a2a2fe5e..28bec9cc15d 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java @@ -23,21 +23,29 @@ public class TestMaxScoreAccumulator extends LuceneTestCase { public void testSimple() { MaxScoreAccumulator acc = new MaxScoreAccumulator(); acc.accumulate(0, 0f); + assertEquals(0f, acc.get().score, 0); + assertEquals(0, acc.get().docBase, 0); acc.accumulate(10, 0f); assertEquals(0f, acc.get().score, 0); - assertEquals(10, acc.get().docID, 0); + assertEquals(0, acc.get().docBase, 0); acc.accumulate(100, 1000f); assertEquals(1000f, acc.get().score, 0); - assertEquals(100, acc.get().docID, 0); + assertEquals(100, acc.get().docBase, 0); acc.accumulate(1000, 5f); assertEquals(1000f, acc.get().score, 0); - assertEquals(100, acc.get().docID, 0); + assertEquals(100, acc.get().docBase, 0); acc.accumulate(99, 1000f); assertEquals(1000f, acc.get().score, 0); - assertEquals(100, acc.get().docID, 0); - acc.accumulate(0, 1001f); + assertEquals(99, acc.get().docBase, 0); + acc.accumulate(1000, 1001f); assertEquals(1001f, acc.get().score, 0); - assertEquals(0, acc.get().docID, 0); + assertEquals(1000, acc.get().docBase, 0); + acc.accumulate(10, 1001f); + assertEquals(1001f, acc.get().score, 0); + assertEquals(10, acc.get().docBase, 0); + acc.accumulate(100, 1001f); + assertEquals(1001f, acc.get().score, 0); + assertEquals(10, acc.get().docBase, 0); } public void testRandom() { @@ -48,7 +56,7 @@ public class TestMaxScoreAccumulator extends LuceneTestCase { for (int i = 0; i < numDocs; i++) { MaxScoreAccumulator.DocAndScore res = new MaxScoreAccumulator.DocAndScore(random().nextInt(maxDocs), random().nextFloat()); - acc.accumulate(res.docID, res.score); + acc.accumulate(res.docBase, res.score); if (res.compareTo(max) > 0) { max = res; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java index 35b5dbd6fa2..ceecb82613e 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java @@ -18,10 +18,6 @@ package org.apache.lucene.search; import java.io.IOException; import java.util.Arrays; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Store; @@ -41,7 +37,6 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.NamedThreadFactory; public class TestTopDocsCollector extends LuceneTestCase { @@ -141,7 +136,7 @@ public class TestTopDocsCollector extends LuceneTestCase { private TopDocsCollector doSearchWithThreshold( int numResults, int thresHold, Query q, IndexReader indexReader) throws IOException { - IndexSearcher searcher = new IndexSearcher(indexReader); + IndexSearcher searcher = newSearcher(indexReader, true, true, false); TopDocsCollector tdc = TopScoreDocCollector.create(numResults, thresHold); searcher.search(q, tdc); return tdc; @@ -149,24 +144,10 @@ public class TestTopDocsCollector extends LuceneTestCase { private TopDocs doConcurrentSearchWithThreshold( int numResults, int threshold, Query q, IndexReader indexReader) throws IOException { - ExecutorService service = - new ThreadPoolExecutor( - 4, - 4, - 0L, - TimeUnit.MILLISECONDS, - new LinkedBlockingQueue(), - new NamedThreadFactory("TestTopDocsCollector")); - try { - IndexSearcher searcher = new IndexSearcher(indexReader, service); - - CollectorManager collectorManager = - TopScoreDocCollector.createSharedManager(numResults, null, threshold); - - return searcher.search(q, collectorManager); - } finally { - service.shutdown(); - } + IndexSearcher searcher = newSearcher(indexReader, true, true, true); + CollectorManager collectorManager = + TopScoreDocCollector.createSharedManager(numResults, null, threshold); + return searcher.search(q, collectorManager); } @Override @@ -303,8 +284,9 @@ public class TestTopDocsCollector extends LuceneTestCase { Float minCompetitiveScore = null; @Override - public void setMinCompetitiveScore(float minCompetitiveScore) { - this.minCompetitiveScore = minCompetitiveScore; + public void setMinCompetitiveScore(float score) { + assert minCompetitiveScore == null || score >= minCompetitiveScore; + this.minCompetitiveScore = score; } @Override @@ -356,9 +338,9 @@ public class TestTopDocsCollector extends LuceneTestCase { scorer.doc = 3; scorer.score = 0.5f; // Make sure we do not call setMinCompetitiveScore for non-competitive hits - scorer.minCompetitiveScore = Float.NaN; + scorer.minCompetitiveScore = null; leafCollector.collect(3); - assertTrue(Float.isNaN(scorer.minCompetitiveScore)); + assertNull(scorer.minCompetitiveScore); scorer.doc = 4; scorer.score = 4; @@ -613,6 +595,10 @@ public class TestTopDocsCollector extends LuceneTestCase { assertEquals(11, topDocs.totalHits.value); assertEquals(new TotalHits(11, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), topDocs.totalHits); + leafCollector.setScorer(scorer); + leafCollector2.setScorer(scorer2); + leafCollector3.setScorer(scorer3); + reader.close(); dir.close(); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java index 8419caa7a4c..9d99f87ed90 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java @@ -21,10 +21,6 @@ import static org.apache.lucene.search.SortField.FIELD_SCORE; import java.io.IOException; import java.util.Arrays; import java.util.Comparator; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Store; @@ -42,7 +38,6 @@ import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.FieldValueHitQueue.Entry; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.NamedThreadFactory; import org.apache.lucene.util.TestUtil; public class TestTopFieldCollector extends LuceneTestCase { @@ -75,7 +70,7 @@ public class TestTopFieldCollector extends LuceneTestCase { private TopFieldCollector doSearchWithThreshold( int numResults, int thresHold, Query q, Sort sort, IndexReader indexReader) throws IOException { - IndexSearcher searcher = new IndexSearcher(indexReader); + IndexSearcher searcher = newSearcher(indexReader); TopFieldCollector tdc = TopFieldCollector.create(sort, numResults, thresHold); searcher.search(q, tdc); return tdc; @@ -84,26 +79,14 @@ public class TestTopFieldCollector extends LuceneTestCase { private TopDocs doConcurrentSearchWithThreshold( int numResults, int threshold, Query q, Sort sort, IndexReader indexReader) throws IOException { - ExecutorService service = - new ThreadPoolExecutor( - 4, - 4, - 0L, - TimeUnit.MILLISECONDS, - new LinkedBlockingQueue(), - new NamedThreadFactory("TestTopDocsCollector")); - try { - IndexSearcher searcher = new IndexSearcher(indexReader, service); + IndexSearcher searcher = newSearcher(indexReader, true, true, true); - CollectorManager collectorManager = - TopFieldCollector.createSharedManager(sort, numResults, null, threshold); + CollectorManager collectorManager = + TopFieldCollector.createSharedManager(sort, numResults, null, threshold); - TopDocs tdc = searcher.search(q, collectorManager); + TopDocs tdc = searcher.search(q, collectorManager); - return tdc; - } finally { - service.shutdown(); - } + return tdc; } public void testSortWithoutFillFields() throws Exception { @@ -146,17 +129,7 @@ public class TestTopFieldCollector extends LuceneTestCase { } public void testSharedHitcountCollector() throws Exception { - - ExecutorService service = - new ThreadPoolExecutor( - 4, - 4, - 0L, - TimeUnit.MILLISECONDS, - new LinkedBlockingQueue(), - new NamedThreadFactory("TestTopFieldCollector")); - - IndexSearcher concurrentSearcher = new IndexSearcher(ir, service); + IndexSearcher concurrentSearcher = newSearcher(ir, true, true, true); // Two Sort criteria to instantiate the multi/single comparators. Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC), new Sort()}; @@ -178,8 +151,6 @@ public class TestTopFieldCollector extends LuceneTestCase { CheckHits.checkEqual(q, td.scoreDocs, td2.scoreDocs); } - - service.shutdown(); } public void testSortWithoutTotalHitTracking() throws Exception { @@ -678,6 +649,10 @@ public class TestTopFieldCollector extends LuceneTestCase { assertEquals(11, topDocs.totalHits.value); assertEquals(new TotalHits(11, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), topDocs.totalHits); + leafCollector.setScorer(scorer); + leafCollector2.setScorer(scorer2); + leafCollector3.setScorer(scorer3); + reader.close(); dir.close(); }