LUCENE-5245: Fix MultiTermQuery's constant score rewrites to always return a ConstantScoreQuery to make scoring consistent. Previously it returned an empty unwrapped BooleanQuery, if no terms were available, which has a different query norm

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1526399 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2013-09-26 06:46:19 +00:00
parent 818b41198b
commit 40e71b8f36
4 changed files with 66 additions and 12 deletions

View File

@ -89,6 +89,11 @@ Bug Fixes
its state, which could result in exceptions being thrown, as well as its state, which could result in exceptions being thrown, as well as
incorrect ordinals returned from getParent. (Shai Erera) incorrect ordinals returned from getParent. (Shai Erera)
* LUCENE-5245: Fix MultiTermQuery's constant score rewrites to always
return a ConstantScoreQuery to make scoring consistent. Previously it
returned an empty unwrapped BooleanQuery, if no terms were available,
which has a different query norm. (Nik Everett, Uwe Schindler)
API Changes: API Changes:
* LUCENE-5222: Add SortField.needsScores(). Previously it was not possible * LUCENE-5222: Add SortField.needsScores(). Previously it was not possible

View File

@ -96,10 +96,9 @@ class ConstantScoreAutoRewrite extends TermCollectingRewrite<BooleanQuery> {
final int size = col.pendingTerms.size(); final int size = col.pendingTerms.size();
if (col.hasCutOff) { if (col.hasCutOff) {
return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query); return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query);
} else if (size == 0) {
return getTopLevelQuery();
} else { } else {
final BooleanQuery bq = getTopLevelQuery(); final BooleanQuery bq = getTopLevelQuery();
if (size > 0) {
final BytesRefHash pendingTerms = col.pendingTerms; final BytesRefHash pendingTerms = col.pendingTerms;
final int sort[] = pendingTerms.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); final int sort[] = pendingTerms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
for(int i = 0; i < size; i++) { for(int i = 0; i < size; i++) {
@ -108,6 +107,7 @@ class ConstantScoreAutoRewrite extends TermCollectingRewrite<BooleanQuery> {
// to explicitely set a fake value, so it's not calculated // to explicitely set a fake value, so it's not calculated
addClause(bq, new Term(query.field, pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]); addClause(bq, new Term(query.field, pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]);
} }
}
// Strip scores // Strip scores
final Query result = new ConstantScoreQuery(bq); final Query result = new ConstantScoreQuery(bq);
result.setBoost(query.getBoost()); result.setBoost(query.getBoost());

View File

@ -87,9 +87,6 @@ public abstract class ScoringRewrite<Q extends Query> extends TermCollectingRewr
@Override @Override
public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
final BooleanQuery bq = SCORING_BOOLEAN_QUERY_REWRITE.rewrite(reader, query); final BooleanQuery bq = SCORING_BOOLEAN_QUERY_REWRITE.rewrite(reader, query);
// TODO: if empty boolean query return NullQuery?
if (bq.clauses().isEmpty())
return bq;
// strip the scores off // strip the scores off
final Query result = new ConstantScoreQuery(bq); final Query result = new ConstantScoreQuery(bq);
result.setBoost(query.getBoost()); result.setBoost(query.getBoost());

View File

@ -160,6 +160,58 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
result[i].score, SCORE_COMP_THRESH); result[i].score, SCORE_COMP_THRESH);
} }
result = search.search(csrq("data", "1", "6", T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, 1000).scoreDocs;
numHits = result.length;
assertEquals("wrong number of results", 6, numHits);
for (int i = 0; i < numHits; i++) {
assertEquals("score for " + i + " was not the same", score,
result[i].score, SCORE_COMP_THRESH);
}
}
@Test // Test for LUCENE-5245: Empty MTQ rewrites should have a consistent norm, so always need to return a CSQ!
public void testEqualScoresWhenNoHits() throws IOException {
// NOTE: uses index build in *this* setUp
IndexSearcher search = newSearcher(reader);
ScoreDoc[] result;
TermQuery dummyTerm = new TermQuery(new Term("data", "1"));
BooleanQuery bq = new BooleanQuery();
bq.add(dummyTerm, BooleanClause.Occur.SHOULD); // hits one doc
bq.add(csrq("data", "#", "#", T, T), BooleanClause.Occur.SHOULD); // hits no docs
result = search.search(bq, null, 1000).scoreDocs;
int numHits = result.length;
assertEquals("wrong number of results", 1, numHits);
float score = result[0].score;
for (int i = 1; i < numHits; i++) {
assertEquals("score for " + i + " was not the same", score,
result[i].score, SCORE_COMP_THRESH);
}
bq = new BooleanQuery();
bq.add(dummyTerm, BooleanClause.Occur.SHOULD); // hits one doc
bq.add(csrq("data", "#", "#", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE), BooleanClause.Occur.SHOULD); // hits no docs
result = search.search(bq, null, 1000).scoreDocs;
numHits = result.length;
assertEquals("wrong number of results", 1, numHits);
for (int i = 0; i < numHits; i++) {
assertEquals("score for " + i + " was not the same", score,
result[i].score, SCORE_COMP_THRESH);
}
bq = new BooleanQuery();
bq.add(dummyTerm, BooleanClause.Occur.SHOULD); // hits one doc
bq.add(csrq("data", "#", "#", T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), BooleanClause.Occur.SHOULD); // hits no docs
result = search.search(bq, null, 1000).scoreDocs;
numHits = result.length;
assertEquals("wrong number of results", 1, numHits);
for (int i = 0; i < numHits; i++) {
assertEquals("score for " + i + " was not the same", score,
result[i].score, SCORE_COMP_THRESH);
}
} }
@Test @Test