LUCENE-5245: Fix MultiTermQuery's constant score rewrites to always return a ConstantScoreQuery to make scoring consistent. Previously it returned an empty unwrapped BooleanQuery, if no terms were available, which has a different query norm

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1526399 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2013-09-26 06:46:19 +00:00
parent 818b41198b
commit 40e71b8f36
4 changed files with 66 additions and 12 deletions

View File

@ -89,6 +89,11 @@ Bug Fixes
its state, which could result in exceptions being thrown, as well as
incorrect ordinals returned from getParent. (Shai Erera)
* LUCENE-5245: Fix MultiTermQuery's constant score rewrites to always
return a ConstantScoreQuery to make scoring consistent. Previously it
returned an empty unwrapped BooleanQuery, if no terms were available,
which has a different query norm. (Nik Everett, Uwe Schindler)
API Changes:
* LUCENE-5222: Add SortField.needsScores(). Previously it was not possible

View File

@ -96,17 +96,17 @@ class ConstantScoreAutoRewrite extends TermCollectingRewrite<BooleanQuery> {
final int size = col.pendingTerms.size();
if (col.hasCutOff) {
return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query);
} else if (size == 0) {
return getTopLevelQuery();
} else {
final BooleanQuery bq = getTopLevelQuery();
final BytesRefHash pendingTerms = col.pendingTerms;
final int sort[] = pendingTerms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
for(int i = 0; i < size; i++) {
final int pos = sort[i];
// docFreq is not used for constant score here, we pass 1
// to explicitely set a fake value, so it's not calculated
addClause(bq, new Term(query.field, pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]);
if (size > 0) {
final BytesRefHash pendingTerms = col.pendingTerms;
final int sort[] = pendingTerms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
for(int i = 0; i < size; i++) {
final int pos = sort[i];
// docFreq is not used for constant score here, we pass 1
// to explicitely set a fake value, so it's not calculated
addClause(bq, new Term(query.field, pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]);
}
}
// Strip scores
final Query result = new ConstantScoreQuery(bq);

View File

@ -87,9 +87,6 @@ public abstract class ScoringRewrite<Q extends Query> extends TermCollectingRewr
@Override
public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
final BooleanQuery bq = SCORING_BOOLEAN_QUERY_REWRITE.rewrite(reader, query);
// TODO: if empty boolean query return NullQuery?
if (bq.clauses().isEmpty())
return bq;
// strip the scores off
final Query result = new ConstantScoreQuery(bq);
result.setBoost(query.getBoost());

View File

@ -160,6 +160,58 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
result[i].score, SCORE_COMP_THRESH);
}
result = search.search(csrq("data", "1", "6", T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, 1000).scoreDocs;
numHits = result.length;
assertEquals("wrong number of results", 6, numHits);
for (int i = 0; i < numHits; i++) {
assertEquals("score for " + i + " was not the same", score,
result[i].score, SCORE_COMP_THRESH);
}
}
@Test // Test for LUCENE-5245: Empty MTQ rewrites should have a consistent norm, so always need to return a CSQ!
public void testEqualScoresWhenNoHits() throws IOException {
// NOTE: uses index build in *this* setUp
IndexSearcher search = newSearcher(reader);
ScoreDoc[] result;
TermQuery dummyTerm = new TermQuery(new Term("data", "1"));
BooleanQuery bq = new BooleanQuery();
bq.add(dummyTerm, BooleanClause.Occur.SHOULD); // hits one doc
bq.add(csrq("data", "#", "#", T, T), BooleanClause.Occur.SHOULD); // hits no docs
result = search.search(bq, null, 1000).scoreDocs;
int numHits = result.length;
assertEquals("wrong number of results", 1, numHits);
float score = result[0].score;
for (int i = 1; i < numHits; i++) {
assertEquals("score for " + i + " was not the same", score,
result[i].score, SCORE_COMP_THRESH);
}
bq = new BooleanQuery();
bq.add(dummyTerm, BooleanClause.Occur.SHOULD); // hits one doc
bq.add(csrq("data", "#", "#", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE), BooleanClause.Occur.SHOULD); // hits no docs
result = search.search(bq, null, 1000).scoreDocs;
numHits = result.length;
assertEquals("wrong number of results", 1, numHits);
for (int i = 0; i < numHits; i++) {
assertEquals("score for " + i + " was not the same", score,
result[i].score, SCORE_COMP_THRESH);
}
bq = new BooleanQuery();
bq.add(dummyTerm, BooleanClause.Occur.SHOULD); // hits one doc
bq.add(csrq("data", "#", "#", T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), BooleanClause.Occur.SHOULD); // hits no docs
result = search.search(bq, null, 1000).scoreDocs;
numHits = result.length;
assertEquals("wrong number of results", 1, numHits);
for (int i = 0; i < numHits; i++) {
assertEquals("score for " + i + " was not the same", score,
result[i].score, SCORE_COMP_THRESH);
}
}
@Test