mirror of https://github.com/apache/lucene.git
LUCENE-5245: Fix MultiTermQuery's constant score rewrites to always return a ConstantScoreQuery to make scoring consistent. Previously it returned an empty unwrapped BooleanQuery, if no terms were available, which has a different query norm
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1526399 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
818b41198b
commit
40e71b8f36
|
@ -89,6 +89,11 @@ Bug Fixes
|
|||
its state, which could result in exceptions being thrown, as well as
|
||||
incorrect ordinals returned from getParent. (Shai Erera)
|
||||
|
||||
* LUCENE-5245: Fix MultiTermQuery's constant score rewrites to always
|
||||
return a ConstantScoreQuery to make scoring consistent. Previously it
|
||||
returned an empty unwrapped BooleanQuery, if no terms were available,
|
||||
which has a different query norm. (Nik Everett, Uwe Schindler)
|
||||
|
||||
API Changes:
|
||||
|
||||
* LUCENE-5222: Add SortField.needsScores(). Previously it was not possible
|
||||
|
|
|
@ -96,17 +96,17 @@ class ConstantScoreAutoRewrite extends TermCollectingRewrite<BooleanQuery> {
|
|||
final int size = col.pendingTerms.size();
|
||||
if (col.hasCutOff) {
|
||||
return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query);
|
||||
} else if (size == 0) {
|
||||
return getTopLevelQuery();
|
||||
} else {
|
||||
final BooleanQuery bq = getTopLevelQuery();
|
||||
final BytesRefHash pendingTerms = col.pendingTerms;
|
||||
final int sort[] = pendingTerms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
for(int i = 0; i < size; i++) {
|
||||
final int pos = sort[i];
|
||||
// docFreq is not used for constant score here, we pass 1
|
||||
// to explicitely set a fake value, so it's not calculated
|
||||
addClause(bq, new Term(query.field, pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]);
|
||||
if (size > 0) {
|
||||
final BytesRefHash pendingTerms = col.pendingTerms;
|
||||
final int sort[] = pendingTerms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
for(int i = 0; i < size; i++) {
|
||||
final int pos = sort[i];
|
||||
// docFreq is not used for constant score here, we pass 1
|
||||
// to explicitely set a fake value, so it's not calculated
|
||||
addClause(bq, new Term(query.field, pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]);
|
||||
}
|
||||
}
|
||||
// Strip scores
|
||||
final Query result = new ConstantScoreQuery(bq);
|
||||
|
|
|
@ -87,9 +87,6 @@ public abstract class ScoringRewrite<Q extends Query> extends TermCollectingRewr
|
|||
@Override
|
||||
public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
|
||||
final BooleanQuery bq = SCORING_BOOLEAN_QUERY_REWRITE.rewrite(reader, query);
|
||||
// TODO: if empty boolean query return NullQuery?
|
||||
if (bq.clauses().isEmpty())
|
||||
return bq;
|
||||
// strip the scores off
|
||||
final Query result = new ConstantScoreQuery(bq);
|
||||
result.setBoost(query.getBoost());
|
||||
|
|
|
@ -160,6 +160,58 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
|
|||
result[i].score, SCORE_COMP_THRESH);
|
||||
}
|
||||
|
||||
result = search.search(csrq("data", "1", "6", T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, 1000).scoreDocs;
|
||||
numHits = result.length;
|
||||
assertEquals("wrong number of results", 6, numHits);
|
||||
for (int i = 0; i < numHits; i++) {
|
||||
assertEquals("score for " + i + " was not the same", score,
|
||||
result[i].score, SCORE_COMP_THRESH);
|
||||
}
|
||||
}
|
||||
|
||||
@Test // Test for LUCENE-5245: Empty MTQ rewrites should have a consistent norm, so always need to return a CSQ!
|
||||
public void testEqualScoresWhenNoHits() throws IOException {
|
||||
// NOTE: uses index build in *this* setUp
|
||||
|
||||
IndexSearcher search = newSearcher(reader);
|
||||
|
||||
ScoreDoc[] result;
|
||||
|
||||
TermQuery dummyTerm = new TermQuery(new Term("data", "1"));
|
||||
|
||||
BooleanQuery bq = new BooleanQuery();
|
||||
bq.add(dummyTerm, BooleanClause.Occur.SHOULD); // hits one doc
|
||||
bq.add(csrq("data", "#", "#", T, T), BooleanClause.Occur.SHOULD); // hits no docs
|
||||
result = search.search(bq, null, 1000).scoreDocs;
|
||||
int numHits = result.length;
|
||||
assertEquals("wrong number of results", 1, numHits);
|
||||
float score = result[0].score;
|
||||
for (int i = 1; i < numHits; i++) {
|
||||
assertEquals("score for " + i + " was not the same", score,
|
||||
result[i].score, SCORE_COMP_THRESH);
|
||||
}
|
||||
|
||||
bq = new BooleanQuery();
|
||||
bq.add(dummyTerm, BooleanClause.Occur.SHOULD); // hits one doc
|
||||
bq.add(csrq("data", "#", "#", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE), BooleanClause.Occur.SHOULD); // hits no docs
|
||||
result = search.search(bq, null, 1000).scoreDocs;
|
||||
numHits = result.length;
|
||||
assertEquals("wrong number of results", 1, numHits);
|
||||
for (int i = 0; i < numHits; i++) {
|
||||
assertEquals("score for " + i + " was not the same", score,
|
||||
result[i].score, SCORE_COMP_THRESH);
|
||||
}
|
||||
|
||||
bq = new BooleanQuery();
|
||||
bq.add(dummyTerm, BooleanClause.Occur.SHOULD); // hits one doc
|
||||
bq.add(csrq("data", "#", "#", T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), BooleanClause.Occur.SHOULD); // hits no docs
|
||||
result = search.search(bq, null, 1000).scoreDocs;
|
||||
numHits = result.length;
|
||||
assertEquals("wrong number of results", 1, numHits);
|
||||
for (int i = 0; i < numHits; i++) {
|
||||
assertEquals("score for " + i + " was not the same", score,
|
||||
result[i].score, SCORE_COMP_THRESH);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue