From 26301898b20e30f484d653ab6415d460d011b099 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 5 May 2022 10:24:28 +0100 Subject: [PATCH] LUCENE-10553: Fix WANDScorer's handling of 0 and +Infty. (#860) The computation of the scaling factor has special cases for these two values, but the current logic is backwards. --- .../org/apache/lucene/search/WANDScorer.java | 14 +-- .../apache/lucene/search/TestWANDScorer.java | 106 +++++++++++++++--- 2 files changed, 99 insertions(+), 21 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java index a6ba7b0d29f..90694e6dc99 100644 --- a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java @@ -60,17 +60,17 @@ final class WANDScorer extends Scorer { * {@code [2^23, 2^24[}. Special cases: * *
-   *    scalingFactor(0) = scalingFactor(MIN_VALUE) - 1
-   *    scalingFactor(+Infty) = scalingFactor(MAX_VALUE) + 1
-   *  
+ * scalingFactor(0) = scalingFactor(MIN_VALUE) + 1 + * scalingFactor(+Infty) = scalingFactor(MAX_VALUE) - 1 + * */ static int scalingFactor(float f) { if (f < 0) { throw new IllegalArgumentException("Scores must be positive or null"); } else if (f == 0) { - return scalingFactor(Float.MIN_VALUE) - 1; + return scalingFactor(Float.MIN_VALUE) + 1; } else if (Float.isInfinite(f)) { - return scalingFactor(Float.MAX_VALUE) + 1; + return scalingFactor(Float.MAX_VALUE) - 1; } else { double d = f; // Since doubles have more amplitude than floats for the @@ -86,7 +86,6 @@ final class WANDScorer extends Scorer { * sure we do not miss any matches. */ static long scaleMaxScore(float maxScore, int scalingFactor) { - assert scalingFactor(maxScore) >= scalingFactor; assert Float.isNaN(maxScore) == false; assert maxScore >= 0; @@ -95,7 +94,8 @@ final class WANDScorer extends Scorer { final double scaled = Math.scalb((double) maxScore, scalingFactor); if (scaled > MAX_SCALED_SCORE) { - // This happens if one scorer returns +Infty as a max score + // This happens if one scorer returns +Infty as a max score, or if the scorer returns greater + // max scores locally than globally - which shouldn't happen with well-behaved scorers return MAX_SCALED_SCORE; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java index 5ab0291dec5..5f5d214070c 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestWANDScorer.java @@ -45,10 +45,17 @@ public class TestWANDScorer extends LuceneTestCase { doTestScalingFactor(Math.nextUp(Float.MIN_VALUE)); doTestScalingFactor(Float.MAX_VALUE); doTestScalingFactor(Math.nextDown(Float.MAX_VALUE)); - assertEquals(WANDScorer.scalingFactor(Float.MIN_VALUE) - 1, WANDScorer.scalingFactor(0)); + assertEquals(WANDScorer.scalingFactor(Float.MIN_VALUE) + 1, WANDScorer.scalingFactor(0)); assertEquals( - WANDScorer.scalingFactor(Float.MAX_VALUE) + 1, + WANDScorer.scalingFactor(Float.MAX_VALUE) - 1, WANDScorer.scalingFactor(Float.POSITIVE_INFINITY)); + + // Greater scores produce lower scaling factors + assertTrue(WANDScorer.scalingFactor(1f) > WANDScorer.scalingFactor(10f)); + assertTrue( + WANDScorer.scalingFactor(Float.MAX_VALUE) + > WANDScorer.scalingFactor(Float.POSITIVE_INFINITY)); + assertTrue(WANDScorer.scalingFactor(0f) > WANDScorer.scalingFactor(Float.MIN_VALUE)); } private void doTestScalingFactor(float f) { @@ -720,7 +727,65 @@ public class TestWANDScorer extends LuceneTestCase { dir.close(); } + /** Degenerate case: all clauses produce a score of 0. */ + public void testRandomWithZeroScores() throws IOException { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); + int numDocs = atLeast(1000); + for (int i = 0; i < numDocs; ++i) { + Document doc = new Document(); + int numValues = random().nextInt(1 << random().nextInt(5)); + int start = random().nextInt(10); + for (int j = 0; j < numValues; ++j) { + doc.add(new StringField("foo", Integer.toString(start + j), Store.NO)); + } + w.addDocument(doc); + } + IndexReader reader = DirectoryReader.open(w); + w.close(); + IndexSearcher searcher = newSearcher(reader); + + for (int iter = 0; iter < 100; ++iter) { + int start = random().nextInt(10); + int numClauses = random().nextInt(1 << random().nextInt(5)); + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (int i = 0; i < numClauses; ++i) { + builder.add( + maybeWrap( + new BoostQuery( + new ConstantScoreQuery( + new TermQuery(new Term("foo", Integer.toString(start + i)))), + 0f)), + Occur.SHOULD); + } + Query query = builder.build(); + + CheckHits.checkTopScores(random(), query, searcher); + + int filterTerm = random().nextInt(30); + Query filteredQuery = + new BooleanQuery.Builder() + .add(query, Occur.MUST) + .add(new TermQuery(new Term("foo", Integer.toString(filterTerm))), Occur.FILTER) + .build(); + + CheckHits.checkTopScores(random(), filteredQuery, searcher); + } + reader.close(); + dir.close(); + } + + /** Test the case when some clauses produce infinite max scores. */ public void testRandomWithInfiniteMaxScore() throws IOException { + doTestRandomSpecialMaxScore(Float.POSITIVE_INFINITY); + } + + /** Test the case when some clauses produce finite max scores, but their sum overflows. */ + public void testRandomWithMaxScoreOverflow() throws IOException { + doTestRandomSpecialMaxScore(Float.MAX_VALUE); + } + + private void doTestRandomSpecialMaxScore(float maxScore) throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); int numDocs = atLeast(1000); @@ -745,7 +810,8 @@ public class TestWANDScorer extends LuceneTestCase { Query query = new TermQuery(new Term("foo", Integer.toString(start + i))); if (random().nextBoolean()) { query = - new InfiniteMaxScoreWrapperQuery(query, numDocs / TestUtil.nextInt(random(), 1, 100)); + new MaxScoreWrapperQuery( + query, numDocs / TestUtil.nextInt(random(), 1, 100), maxScore); } builder.add(query, Occur.SHOULD); } @@ -766,14 +832,16 @@ public class TestWANDScorer extends LuceneTestCase { dir.close(); } - private static class InfiniteMaxScoreWrapperScorer extends FilterScorer { + private static class MaxScoreWrapperScorer extends FilterScorer { private final int maxRange; + private final float maxScore; private int lastShallowTarget = -1; - InfiniteMaxScoreWrapperScorer(Scorer scorer, int maxRange) { + MaxScoreWrapperScorer(Scorer scorer, int maxRange, float maxScore) { super(scorer); this.maxRange = maxRange; + this.maxScore = maxScore; } @Override @@ -785,24 +853,26 @@ public class TestWANDScorer extends LuceneTestCase { @Override public float getMaxScore(int upTo) throws IOException { if (upTo - Math.max(docID(), lastShallowTarget) >= maxRange) { - return Float.POSITIVE_INFINITY; + return maxScore; } return in.getMaxScore(upTo); } } - private static class InfiniteMaxScoreWrapperQuery extends Query { + private static class MaxScoreWrapperQuery extends Query { private final Query query; private final int maxRange; + private final float maxScore; /** * If asked for the maximum score over a range of doc IDs that is greater than or equal to - * maxRange, this query will return a maximum score of +Infty + * maxRange, this query will return the provided maxScore. */ - InfiniteMaxScoreWrapperQuery(Query query, int maxRange) { + MaxScoreWrapperQuery(Query query, int maxRange, float maxScore) { this.query = query; this.maxRange = maxRange; + this.maxScore = maxScore; } @Override @@ -812,19 +882,27 @@ public class TestWANDScorer extends LuceneTestCase { @Override public boolean equals(Object obj) { - return sameClassAs(obj) && query.equals(((InfiniteMaxScoreWrapperQuery) obj).query); + if (sameClassAs(obj) == false) { + return false; + } + MaxScoreWrapperQuery that = (MaxScoreWrapperQuery) obj; + return query.equals(that.query) && maxRange == that.maxRange && maxScore == that.maxScore; } @Override public int hashCode() { - return 31 * classHash() + query.hashCode(); + int hash = classHash(); + hash = 31 * hash + query.hashCode(); + hash = 31 * hash + Integer.hashCode(maxRange); + hash = 31 * hash + Float.hashCode(maxScore); + return hash; } @Override public Query rewrite(IndexReader reader) throws IOException { Query rewritten = query.rewrite(reader); if (rewritten != query) { - return new InfiniteMaxScoreWrapperQuery(rewritten, maxRange); + return new MaxScoreWrapperQuery(rewritten, maxRange, maxScore); } return super.rewrite(reader); } @@ -842,7 +920,7 @@ public class TestWANDScorer extends LuceneTestCase { if (scorer == null) { return null; } else { - return new InfiniteMaxScoreWrapperScorer(scorer, maxRange); + return new MaxScoreWrapperScorer(scorer, maxRange, maxScore); } } @@ -856,7 +934,7 @@ public class TestWANDScorer extends LuceneTestCase { @Override public Scorer get(long leadCost) throws IOException { - return new InfiniteMaxScoreWrapperScorer(supplier.get(leadCost), maxRange); + return new MaxScoreWrapperScorer(supplier.get(leadCost), maxRange, maxScore); } @Override