diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 4439484148a..d0f7859b3c1 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -54,6 +54,9 @@ New Features and queries, for fast "bbox/polygon contains lat/lon points" (Mike McCandless) +* LUCENE-6526: Asserting(Query|Weight|Scorer) now ensure scores are not computed + if they are not needed. (Adrien Grand) + API Changes * LUCENE-6508: Simplify Lock api, there is now just diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java index dbc2f212185..03ac2046b2d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java @@ -22,10 +22,8 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Objects; -import java.util.Set; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.ToStringUtils; @@ -204,10 +202,8 @@ public class BooleanQuery extends Query implements Iterable { } } else { // our single clause is a filter - if (query.getBoost() != 0f) { - query = query.clone(); - query.setBoost(0); - } + query = new ConstantScoreQuery(query); + query.setBoost(0); } return query; diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java index 3d8e449e12f..22ef4654898 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java @@ -362,12 +362,34 @@ public class BooleanWeight extends Weight { if (required.size() == 1) { Scorer req = required.get(0); - if (needsScores == false || - (requiredScoring.size() == 1 && (disableCoord || maxCoord == 1))) { + if (needsScores == false) { return req; - } else { - return new BooleanTopLevelScorers.BoostedScorer(req, coord(requiredScoring.size(), maxCoord)); } + + if (requiredScoring.isEmpty()) { + // Scores are needed but we only have a filter clause + // BooleanWeight expects that calling score() is ok so we need to wrap + // to prevent score() from being propagated + return new FilterScorer(req) { + @Override + public float score() throws IOException { + return 0f; + } + @Override + public int freq() throws IOException { + return 0; + } + }; + } + + float boost = 1f; + if (disableCoord == false) { + boost = coord(1, maxCoord); + } + if (boost == 1f) { + return req; + } + return new BooleanTopLevelScorers.BoostedScorer(req, boost); } else { return new ConjunctionScorer(this, required, requiredScoring, disableCoord ? 1.0F : coord(requiredScoring.size(), maxCoord)); diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java index f5ed40069b2..4e534fc5d2c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java @@ -97,6 +97,10 @@ public class ConstantScoreQuery extends Query { public float score() throws IOException { return theScore; } + @Override + public int freq() throws IOException { + return 1; + } }); } }; @@ -136,6 +140,10 @@ public class ConstantScoreQuery extends Query { return score; } @Override + public int freq() throws IOException { + return 1; + } + @Override public Collection getChildren() { return Collections.singleton(new ChildScorer(innerScorer, "constant")); } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index e6b740be8a5..6622b26141f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -33,6 +33,7 @@ import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity.SimScorer; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.ToStringUtils; /** @@ -40,9 +41,33 @@ import org.apache.lucene.util.ToStringUtils; * other terms with a {@link BooleanQuery}. */ public class TermQuery extends Query { + + private static final Similarity.SimScorer NON_SCORING_SIM_SCORER = new Similarity.SimScorer() { + + @Override + public float score(int doc, float freq) { + return 0f; + } + + @Override + public float computeSlopFactor(int distance) { + return 1f; + } + + @Override + public float computePayloadFactor(int doc, int start, int end, BytesRef payload) { + return 1f; + } + + @Override + public Explanation explain(int doc, Explanation freq) { + return Explanation.match(0f, "Match on doc=" + doc); + } + }; + private final Term term; private final TermContext perReaderTermState; - + final class TermWeight extends Weight { private final Similarity similarity; private final Similarity.SimWeight stats; @@ -58,7 +83,7 @@ public class TermQuery extends Query { assert termStates.hasOnlyRealTerms(); this.termStates = termStates; this.similarity = searcher.getSimilarity(); - + final CollectionStatistics collectionStats; final TermStatistics termStats; if (needsScores) { @@ -72,6 +97,7 @@ public class TermQuery extends Query { collectionStats = new CollectionStatistics(term.field(), maxDoc, -1, -1, -1); termStats = new TermStatistics(term.bytes(), docFreq, totalTermFreq); } + this.stats = similarity.computeWeight(getBoost(), collectionStats, termStats); } @@ -84,17 +110,17 @@ public class TermQuery extends Query { public String toString() { return "weight(" + TermQuery.this + ")"; } - + @Override public float getValueForNormalization() { return stats.getValueForNormalization(); } - + @Override public void normalize(float queryNorm, float topLevelBoost) { stats.normalize(queryNorm, topLevelBoost); } - + @Override public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); @@ -104,9 +130,16 @@ public class TermQuery extends Query { } PostingsEnum docs = termsEnum.postings(acceptDocs, null, needsScores ? PostingsEnum.FREQS : PostingsEnum.NONE); assert docs != null; - return new TermScorer(this, docs, similarity.simScorer(stats, context)); + final Similarity.SimScorer simScorer; + if (needsScores) { + simScorer = similarity.simScorer(stats, context); + } else { + // avoids loading other scoring factors such as norms + simScorer = NON_SCORING_SIM_SCORER; + } + return new TermScorer(this, docs, simScorer); } - + /** * Returns a {@link TermsEnum} positioned at this weights Term or null if * the term does not exist in the given context @@ -124,14 +157,14 @@ public class TermQuery extends Query { termsEnum.seekExact(term.bytes(), state); return termsEnum; } - + private boolean termNotInReader(LeafReader reader, Term term) throws IOException { // only called from assert // System.out.println("TQ.termNotInReader reader=" + reader + " term=" + // field + ":" + bytes.utf8ToString()); return reader.docFreq(term) == 0; } - + @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { Scorer scorer = scorer(context, context.reader().getLiveDocs()); @@ -152,13 +185,13 @@ public class TermQuery extends Query { return Explanation.noMatch("no matching term"); } } - + /** Constructs a query for the term t. */ public TermQuery(Term t) { term = Objects.requireNonNull(t); perReaderTermState = null; } - + /** * Expert: constructs a TermQuery that will use the provided docFreq instead * of looking up the docFreq against the searcher. @@ -174,12 +207,12 @@ public class TermQuery extends Query { } perReaderTermState = Objects.requireNonNull(states); } - + /** Returns the term of this query. */ public Term getTerm() { return term; } - + @Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); @@ -193,10 +226,10 @@ public class TermQuery extends Query { // PRTS was pre-build for this IS termState = this.perReaderTermState; } - + return new TermWeight(searcher, needsScores, termState); } - + /** Prints a user-readable version of this query. */ @Override public String toString(String field) { @@ -209,7 +242,7 @@ public class TermQuery extends Query { buffer.append(ToStringUtils.boost(getBoost())); return buffer.toString(); } - + /** Returns true iff o is equal to this. */ @Override public boolean equals(Object o) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java index 23bff9b50c0..bc362c5381c 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java @@ -575,7 +575,7 @@ public class TestBooleanQuery extends LuceneTestCase { // Single clauses rewrite to a term query final Query rewritten1 = query1.rewrite(reader); - assertTrue(rewritten1 instanceof TermQuery); + assertTrue(rewritten1 instanceof ConstantScoreQuery); assertEquals(0f, rewritten1.getBoost(), 0f); // When there are two clauses, we cannot rewrite, but if one of them creates @@ -586,8 +586,9 @@ public class TestBooleanQuery extends LuceneTestCase { query2.add(new TermQuery(new Term("field", "b")), Occur.SHOULD); final Weight weight = searcher.createNormalizedWeight(query2, true); final Scorer scorer = weight.scorer(reader.leaves().get(0), null); - assertTrue(scorer.getClass().getName(), scorer instanceof BooleanTopLevelScorers.BoostedScorer); - assertEquals(0, ((BooleanTopLevelScorers.BoostedScorer) scorer).boost, 0f); + assertEquals(0, scorer.nextDoc()); + assertTrue(scorer.getClass().getName(), scorer instanceof FilterScorer); + assertEquals(0f, scorer.score(), 0f); reader.close(); w.close(); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java index 751b3dd0771..1359c4f0f1e 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java @@ -24,8 +24,11 @@ import java.util.List; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.index.FilterLeafReader; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.Term; @@ -40,7 +43,7 @@ public class TestTermScorer extends LuceneTestCase { protected String[] values = new String[] {"all", "dogs dogs", "like", "playing", "fetch", "all"}; protected IndexSearcher indexSearcher; - protected IndexReader indexReader; + protected LeafReader indexReader; @Override public void setUp() throws Exception { @@ -179,5 +182,32 @@ public class TestTermScorer extends LuceneTestCase { return "TestHit{" + "doc=" + doc + ", score=" + score + "}"; } } - + + public void testDoesNotLoadNorms() throws IOException { + Term allTerm = new Term(FIELD, "all"); + TermQuery termQuery = new TermQuery(allTerm); + + LeafReader forbiddenNorms = new FilterLeafReader(indexReader) { + @Override + public NumericDocValues getNormValues(String field) throws IOException { + fail("Norms should not be loaded"); + // unreachable + return null; + } + }; + IndexSearcher indexSearcher = newSearcher(forbiddenNorms); + + Weight weight = indexSearcher.createNormalizedWeight(termQuery, true); + try { + weight.scorer(forbiddenNorms.getContext(), null).nextDoc(); + fail("Should load norms"); + } catch (AssertionError e) { + // ok + } + + weight = indexSearcher.createNormalizedWeight(termQuery, false); + // should not fail this time since norms are not necessary + weight.scorer(forbiddenNorms.getContext(), null).nextDoc(); + } + } diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java index 3013d2f4087..c58870a226b 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java @@ -119,7 +119,7 @@ public class ToParentBlockJoinQuery extends Query { @Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { - return new BlockJoinWeight(this, childQuery.createWeight(searcher, needsScores), parentsFilter, scoreMode); + return new BlockJoinWeight(this, childQuery.createWeight(searcher, needsScores), parentsFilter, needsScores ? scoreMode : ScoreMode.None); } /** Return our child query. */ diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java index 63f79e53a28..38909f7ba02 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java @@ -25,7 +25,6 @@ import java.util.concurrent.ExecutorService; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.util.Bits; /** * Helper class that adds some extra checks to ensure correct @@ -57,7 +56,7 @@ public class AssertingIndexSearcher extends IndexSearcher { @Override public Weight createNormalizedWeight(Query query, boolean needsScores) throws IOException { final Weight w = super.createNormalizedWeight(query, needsScores); - return new AssertingWeight(random, w) { + return new AssertingWeight(random, w, needsScores) { @Override public void normalize(float norm, float topLevelBoost) { @@ -75,7 +74,7 @@ public class AssertingIndexSearcher extends IndexSearcher { @Override public Weight createWeight(Query query, boolean needsScores) throws IOException { // this adds assertions to the inner weights/scorers too - return new AssertingWeight(random, super.createWeight(query, needsScores)); + return new AssertingWeight(random, super.createWeight(query, needsScores), needsScores); } @Override @@ -89,8 +88,8 @@ public class AssertingIndexSearcher extends IndexSearcher { @Override protected void search(List leaves, Weight weight, Collector collector) throws IOException { - // TODO: shouldn't we AssertingCollector.wrap(collector) here? - super.search(leaves, AssertingWeight.wrap(random, weight), AssertingCollector.wrap(random, collector)); + assert weight instanceof AssertingWeight; + super.search(leaves, weight, AssertingCollector.wrap(random, collector)); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingLeafCollector.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingLeafCollector.java index 15aa35d31f5..621e81231df 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingLeafCollector.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingLeafCollector.java @@ -41,7 +41,7 @@ class AssertingLeafCollector extends FilterLeafCollector { @Override public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; - super.setScorer(AssertingScorer.wrap(random, scorer)); + super.setScorer(AssertingScorer.wrap(random, scorer, true)); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java index 3db594f7bc3..76567e670c5 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingQuery.java @@ -19,10 +19,8 @@ package org.apache.lucene.search; import java.io.IOException; import java.util.Random; -import java.util.Set; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; /** Assertion-enabled query. */ public class AssertingQuery extends Query { @@ -43,7 +41,7 @@ public class AssertingQuery extends Query { @Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { - return AssertingWeight.wrap(new Random(random.nextLong()), in.createWeight(searcher, needsScores)); + return new AssertingWeight(new Random(random.nextLong()), in.createWeight(searcher, needsScores), needsScores); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java index 9d7a405a7d6..96d2065ca22 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingScorer.java @@ -27,23 +27,25 @@ public class AssertingScorer extends Scorer { static enum IteratorState { START, APPROXIMATING, ITERATING, FINISHED }; - public static Scorer wrap(Random random, Scorer other) { - if (other == null || other instanceof AssertingScorer) { - return other; + public static Scorer wrap(Random random, Scorer other, boolean canScore) { + if (other == null) { + return null; } - return new AssertingScorer(random, other); + return new AssertingScorer(random, other, canScore); } final Random random; final Scorer in; + final boolean needsScores; IteratorState state = IteratorState.START; int doc = -1; - private AssertingScorer(Random random, Scorer in) { + private AssertingScorer(Random random, Scorer in, boolean needsScores) { super(in.weight); this.random = random; this.in = in; + this.needsScores = needsScores; } public Scorer getIn() { @@ -63,6 +65,7 @@ public class AssertingScorer extends Scorer { @Override public float score() throws IOException { + assert needsScores; assert iterating(); final float score = in.score(); assert !Float.isNaN(score) : "NaN score for in="+in; @@ -80,6 +83,7 @@ public class AssertingScorer extends Scorer { @Override public int freq() throws IOException { + assert needsScores; assert iterating(); return in.freq(); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java index 25fa2be70d7..71036d88106 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java @@ -27,17 +27,15 @@ import org.apache.lucene.util.Bits; class AssertingWeight extends Weight { - static Weight wrap(Random random, Weight other) { - return other instanceof AssertingWeight ? other : new AssertingWeight(random, other); - } - final Random random; final Weight in; + final boolean needsScores; - AssertingWeight(Random random, Weight in) { + AssertingWeight(Random random, Weight in, boolean needsScores) { super(in.getQuery()); this.random = random; this.in = in; + this.needsScores = needsScores; } @Override @@ -64,7 +62,7 @@ class AssertingWeight extends Weight { public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException { final Scorer inScorer = in.scorer(context, acceptDocs); assert inScorer == null || inScorer.docID() == -1; - return AssertingScorer.wrap(new Random(random.nextLong()), inScorer); + return AssertingScorer.wrap(new Random(random.nextLong()), inScorer, needsScores); } @Override