mirror of https://github.com/apache/lucene.git
LUCENE-10553: Fix WANDScorer's handling of 0 and +Infty. (#860)
The computation of the scaling factor has special cases for these two values, but the current logic is backwards.
This commit is contained in:
parent
a89c57f35f
commit
26301898b2
|
@ -60,17 +60,17 @@ final class WANDScorer extends Scorer {
|
|||
* {@code [2^23, 2^24[}. Special cases:
|
||||
*
|
||||
* <pre>
|
||||
* scalingFactor(0) = scalingFactor(MIN_VALUE) - 1
|
||||
* scalingFactor(+Infty) = scalingFactor(MAX_VALUE) + 1
|
||||
* </pre>
|
||||
* scalingFactor(0) = scalingFactor(MIN_VALUE) + 1
|
||||
* scalingFactor(+Infty) = scalingFactor(MAX_VALUE) - 1
|
||||
* </pre>
|
||||
*/
|
||||
static int scalingFactor(float f) {
|
||||
if (f < 0) {
|
||||
throw new IllegalArgumentException("Scores must be positive or null");
|
||||
} else if (f == 0) {
|
||||
return scalingFactor(Float.MIN_VALUE) - 1;
|
||||
return scalingFactor(Float.MIN_VALUE) + 1;
|
||||
} else if (Float.isInfinite(f)) {
|
||||
return scalingFactor(Float.MAX_VALUE) + 1;
|
||||
return scalingFactor(Float.MAX_VALUE) - 1;
|
||||
} else {
|
||||
double d = f;
|
||||
// Since doubles have more amplitude than floats for the
|
||||
|
@ -86,7 +86,6 @@ final class WANDScorer extends Scorer {
|
|||
* sure we do not miss any matches.
|
||||
*/
|
||||
static long scaleMaxScore(float maxScore, int scalingFactor) {
|
||||
assert scalingFactor(maxScore) >= scalingFactor;
|
||||
assert Float.isNaN(maxScore) == false;
|
||||
assert maxScore >= 0;
|
||||
|
||||
|
@ -95,7 +94,8 @@ final class WANDScorer extends Scorer {
|
|||
final double scaled = Math.scalb((double) maxScore, scalingFactor);
|
||||
|
||||
if (scaled > MAX_SCALED_SCORE) {
|
||||
// This happens if one scorer returns +Infty as a max score
|
||||
// This happens if one scorer returns +Infty as a max score, or if the scorer returns greater
|
||||
// max scores locally than globally - which shouldn't happen with well-behaved scorers
|
||||
return MAX_SCALED_SCORE;
|
||||
}
|
||||
|
||||
|
|
|
@ -45,10 +45,17 @@ public class TestWANDScorer extends LuceneTestCase {
|
|||
doTestScalingFactor(Math.nextUp(Float.MIN_VALUE));
|
||||
doTestScalingFactor(Float.MAX_VALUE);
|
||||
doTestScalingFactor(Math.nextDown(Float.MAX_VALUE));
|
||||
assertEquals(WANDScorer.scalingFactor(Float.MIN_VALUE) - 1, WANDScorer.scalingFactor(0));
|
||||
assertEquals(WANDScorer.scalingFactor(Float.MIN_VALUE) + 1, WANDScorer.scalingFactor(0));
|
||||
assertEquals(
|
||||
WANDScorer.scalingFactor(Float.MAX_VALUE) + 1,
|
||||
WANDScorer.scalingFactor(Float.MAX_VALUE) - 1,
|
||||
WANDScorer.scalingFactor(Float.POSITIVE_INFINITY));
|
||||
|
||||
// Greater scores produce lower scaling factors
|
||||
assertTrue(WANDScorer.scalingFactor(1f) > WANDScorer.scalingFactor(10f));
|
||||
assertTrue(
|
||||
WANDScorer.scalingFactor(Float.MAX_VALUE)
|
||||
> WANDScorer.scalingFactor(Float.POSITIVE_INFINITY));
|
||||
assertTrue(WANDScorer.scalingFactor(0f) > WANDScorer.scalingFactor(Float.MIN_VALUE));
|
||||
}
|
||||
|
||||
private void doTestScalingFactor(float f) {
|
||||
|
@ -720,7 +727,65 @@ public class TestWANDScorer extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
/** Degenerate case: all clauses produce a score of 0. */
|
||||
public void testRandomWithZeroScores() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
|
||||
int numDocs = atLeast(1000);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
Document doc = new Document();
|
||||
int numValues = random().nextInt(1 << random().nextInt(5));
|
||||
int start = random().nextInt(10);
|
||||
for (int j = 0; j < numValues; ++j) {
|
||||
doc.add(new StringField("foo", Integer.toString(start + j), Store.NO));
|
||||
}
|
||||
w.addDocument(doc);
|
||||
}
|
||||
IndexReader reader = DirectoryReader.open(w);
|
||||
w.close();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
for (int iter = 0; iter < 100; ++iter) {
|
||||
int start = random().nextInt(10);
|
||||
int numClauses = random().nextInt(1 << random().nextInt(5));
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
for (int i = 0; i < numClauses; ++i) {
|
||||
builder.add(
|
||||
maybeWrap(
|
||||
new BoostQuery(
|
||||
new ConstantScoreQuery(
|
||||
new TermQuery(new Term("foo", Integer.toString(start + i)))),
|
||||
0f)),
|
||||
Occur.SHOULD);
|
||||
}
|
||||
Query query = builder.build();
|
||||
|
||||
CheckHits.checkTopScores(random(), query, searcher);
|
||||
|
||||
int filterTerm = random().nextInt(30);
|
||||
Query filteredQuery =
|
||||
new BooleanQuery.Builder()
|
||||
.add(query, Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", Integer.toString(filterTerm))), Occur.FILTER)
|
||||
.build();
|
||||
|
||||
CheckHits.checkTopScores(random(), filteredQuery, searcher);
|
||||
}
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/** Test the case when some clauses produce infinite max scores. */
|
||||
public void testRandomWithInfiniteMaxScore() throws IOException {
|
||||
doTestRandomSpecialMaxScore(Float.POSITIVE_INFINITY);
|
||||
}
|
||||
|
||||
/** Test the case when some clauses produce finite max scores, but their sum overflows. */
|
||||
public void testRandomWithMaxScoreOverflow() throws IOException {
|
||||
doTestRandomSpecialMaxScore(Float.MAX_VALUE);
|
||||
}
|
||||
|
||||
private void doTestRandomSpecialMaxScore(float maxScore) throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
|
||||
int numDocs = atLeast(1000);
|
||||
|
@ -745,7 +810,8 @@ public class TestWANDScorer extends LuceneTestCase {
|
|||
Query query = new TermQuery(new Term("foo", Integer.toString(start + i)));
|
||||
if (random().nextBoolean()) {
|
||||
query =
|
||||
new InfiniteMaxScoreWrapperQuery(query, numDocs / TestUtil.nextInt(random(), 1, 100));
|
||||
new MaxScoreWrapperQuery(
|
||||
query, numDocs / TestUtil.nextInt(random(), 1, 100), maxScore);
|
||||
}
|
||||
builder.add(query, Occur.SHOULD);
|
||||
}
|
||||
|
@ -766,14 +832,16 @@ public class TestWANDScorer extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
private static class InfiniteMaxScoreWrapperScorer extends FilterScorer {
|
||||
private static class MaxScoreWrapperScorer extends FilterScorer {
|
||||
|
||||
private final int maxRange;
|
||||
private final float maxScore;
|
||||
private int lastShallowTarget = -1;
|
||||
|
||||
InfiniteMaxScoreWrapperScorer(Scorer scorer, int maxRange) {
|
||||
MaxScoreWrapperScorer(Scorer scorer, int maxRange, float maxScore) {
|
||||
super(scorer);
|
||||
this.maxRange = maxRange;
|
||||
this.maxScore = maxScore;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -785,24 +853,26 @@ public class TestWANDScorer extends LuceneTestCase {
|
|||
@Override
|
||||
public float getMaxScore(int upTo) throws IOException {
|
||||
if (upTo - Math.max(docID(), lastShallowTarget) >= maxRange) {
|
||||
return Float.POSITIVE_INFINITY;
|
||||
return maxScore;
|
||||
}
|
||||
return in.getMaxScore(upTo);
|
||||
}
|
||||
}
|
||||
|
||||
private static class InfiniteMaxScoreWrapperQuery extends Query {
|
||||
private static class MaxScoreWrapperQuery extends Query {
|
||||
|
||||
private final Query query;
|
||||
private final int maxRange;
|
||||
private final float maxScore;
|
||||
|
||||
/**
|
||||
* If asked for the maximum score over a range of doc IDs that is greater than or equal to
|
||||
* maxRange, this query will return a maximum score of +Infty
|
||||
* maxRange, this query will return the provided maxScore.
|
||||
*/
|
||||
InfiniteMaxScoreWrapperQuery(Query query, int maxRange) {
|
||||
MaxScoreWrapperQuery(Query query, int maxRange, float maxScore) {
|
||||
this.query = query;
|
||||
this.maxRange = maxRange;
|
||||
this.maxScore = maxScore;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -812,19 +882,27 @@ public class TestWANDScorer extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return sameClassAs(obj) && query.equals(((InfiniteMaxScoreWrapperQuery) obj).query);
|
||||
if (sameClassAs(obj) == false) {
|
||||
return false;
|
||||
}
|
||||
MaxScoreWrapperQuery that = (MaxScoreWrapperQuery) obj;
|
||||
return query.equals(that.query) && maxRange == that.maxRange && maxScore == that.maxScore;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return 31 * classHash() + query.hashCode();
|
||||
int hash = classHash();
|
||||
hash = 31 * hash + query.hashCode();
|
||||
hash = 31 * hash + Integer.hashCode(maxRange);
|
||||
hash = 31 * hash + Float.hashCode(maxScore);
|
||||
return hash;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
Query rewritten = query.rewrite(reader);
|
||||
if (rewritten != query) {
|
||||
return new InfiniteMaxScoreWrapperQuery(rewritten, maxRange);
|
||||
return new MaxScoreWrapperQuery(rewritten, maxRange, maxScore);
|
||||
}
|
||||
return super.rewrite(reader);
|
||||
}
|
||||
|
@ -842,7 +920,7 @@ public class TestWANDScorer extends LuceneTestCase {
|
|||
if (scorer == null) {
|
||||
return null;
|
||||
} else {
|
||||
return new InfiniteMaxScoreWrapperScorer(scorer, maxRange);
|
||||
return new MaxScoreWrapperScorer(scorer, maxRange, maxScore);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -856,7 +934,7 @@ public class TestWANDScorer extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public Scorer get(long leadCost) throws IOException {
|
||||
return new InfiniteMaxScoreWrapperScorer(supplier.get(leadCost), maxRange);
|
||||
return new MaxScoreWrapperScorer(supplier.get(leadCost), maxRange, maxScore);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue