From 40e71b8f36f308208941ec1a7a7237b41c41204a Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Thu, 26 Sep 2013 06:46:19 +0000 Subject: [PATCH] LUCENE-5245: Fix MultiTermQuery's constant score rewrites to always return a ConstantScoreQuery to make scoring consistent. Previously it returned an empty unwrapped BooleanQuery, if no terms were available, which has a different query norm git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1526399 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 5 ++ .../search/ConstantScoreAutoRewrite.java | 18 +++---- .../apache/lucene/search/ScoringRewrite.java | 3 -- .../search/TestMultiTermConstantScore.java | 52 +++++++++++++++++++ 4 files changed, 66 insertions(+), 12 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 092ccb996bd..c62b9a6ff64 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -89,6 +89,11 @@ Bug Fixes its state, which could result in exceptions being thrown, as well as incorrect ordinals returned from getParent. (Shai Erera) +* LUCENE-5245: Fix MultiTermQuery's constant score rewrites to always + return a ConstantScoreQuery to make scoring consistent. Previously it + returned an empty unwrapped BooleanQuery, if no terms were available, + which has a different query norm. (Nik Everett, Uwe Schindler) + API Changes: * LUCENE-5222: Add SortField.needsScores(). Previously it was not possible diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java index 3b8ebde94ff..d26abe16c21 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java @@ -96,17 +96,17 @@ class ConstantScoreAutoRewrite extends TermCollectingRewrite { final int size = col.pendingTerms.size(); if (col.hasCutOff) { return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query); - } else if (size == 0) { - return getTopLevelQuery(); } else { final BooleanQuery bq = getTopLevelQuery(); - final BytesRefHash pendingTerms = col.pendingTerms; - final int sort[] = pendingTerms.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); - for(int i = 0; i < size; i++) { - final int pos = sort[i]; - // docFreq is not used for constant score here, we pass 1 - // to explicitely set a fake value, so it's not calculated - addClause(bq, new Term(query.field, pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]); + if (size > 0) { + final BytesRefHash pendingTerms = col.pendingTerms; + final int sort[] = pendingTerms.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); + for(int i = 0; i < size; i++) { + final int pos = sort[i]; + // docFreq is not used for constant score here, we pass 1 + // to explicitely set a fake value, so it's not calculated + addClause(bq, new Term(query.field, pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]); + } } // Strip scores final Query result = new ConstantScoreQuery(bq); diff --git a/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java b/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java index 152c1f8039b..954b2bbb596 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java +++ b/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java @@ -87,9 +87,6 @@ public abstract class ScoringRewrite extends TermCollectingRewr @Override public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { final BooleanQuery bq = SCORING_BOOLEAN_QUERY_REWRITE.rewrite(reader, query); - // TODO: if empty boolean query return NullQuery? - if (bq.clauses().isEmpty()) - return bq; // strip the scores off final Query result = new ConstantScoreQuery(bq); result.setBoost(query.getBoost()); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java b/lucene/core/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java index 549e1c56ca2..d286d5472a6 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java @@ -160,6 +160,58 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { result[i].score, SCORE_COMP_THRESH); } + result = search.search(csrq("data", "1", "6", T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, 1000).scoreDocs; + numHits = result.length; + assertEquals("wrong number of results", 6, numHits); + for (int i = 0; i < numHits; i++) { + assertEquals("score for " + i + " was not the same", score, + result[i].score, SCORE_COMP_THRESH); + } + } + + @Test // Test for LUCENE-5245: Empty MTQ rewrites should have a consistent norm, so always need to return a CSQ! + public void testEqualScoresWhenNoHits() throws IOException { + // NOTE: uses index build in *this* setUp + + IndexSearcher search = newSearcher(reader); + + ScoreDoc[] result; + + TermQuery dummyTerm = new TermQuery(new Term("data", "1")); + + BooleanQuery bq = new BooleanQuery(); + bq.add(dummyTerm, BooleanClause.Occur.SHOULD); // hits one doc + bq.add(csrq("data", "#", "#", T, T), BooleanClause.Occur.SHOULD); // hits no docs + result = search.search(bq, null, 1000).scoreDocs; + int numHits = result.length; + assertEquals("wrong number of results", 1, numHits); + float score = result[0].score; + for (int i = 1; i < numHits; i++) { + assertEquals("score for " + i + " was not the same", score, + result[i].score, SCORE_COMP_THRESH); + } + + bq = new BooleanQuery(); + bq.add(dummyTerm, BooleanClause.Occur.SHOULD); // hits one doc + bq.add(csrq("data", "#", "#", T, T, MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE), BooleanClause.Occur.SHOULD); // hits no docs + result = search.search(bq, null, 1000).scoreDocs; + numHits = result.length; + assertEquals("wrong number of results", 1, numHits); + for (int i = 0; i < numHits; i++) { + assertEquals("score for " + i + " was not the same", score, + result[i].score, SCORE_COMP_THRESH); + } + + bq = new BooleanQuery(); + bq.add(dummyTerm, BooleanClause.Occur.SHOULD); // hits one doc + bq.add(csrq("data", "#", "#", T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), BooleanClause.Occur.SHOULD); // hits no docs + result = search.search(bq, null, 1000).scoreDocs; + numHits = result.length; + assertEquals("wrong number of results", 1, numHits); + for (int i = 0; i < numHits; i++) { + assertEquals("score for " + i + " was not the same", score, + result[i].score, SCORE_COMP_THRESH); + } } @Test