LUCENE-6526: Asserting(Query|Weight|Scorer) now ensure scores are not computed if they are not needed.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1683734 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2015-06-05 12:18:12 +00:00
parent fe6c3dc939
commit c129a9080b
13 changed files with 144 additions and 52 deletions

View File

@ -54,6 +54,9 @@ New Features
and queries, for fast "bbox/polygon contains lat/lon points" (Mike
McCandless)
* LUCENE-6526: Asserting(Query|Weight|Scorer) now ensure scores are not computed
if they are not needed. (Adrien Grand)
API Changes
* LUCENE-6508: Simplify Lock api, there is now just

View File

@ -22,10 +22,8 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.ToStringUtils;
@ -204,10 +202,8 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
}
} else {
// our single clause is a filter
if (query.getBoost() != 0f) {
query = query.clone();
query.setBoost(0);
}
query = new ConstantScoreQuery(query);
query.setBoost(0);
}
return query;

View File

@ -362,12 +362,34 @@ public class BooleanWeight extends Weight {
if (required.size() == 1) {
Scorer req = required.get(0);
if (needsScores == false ||
(requiredScoring.size() == 1 && (disableCoord || maxCoord == 1))) {
if (needsScores == false) {
return req;
} else {
return new BooleanTopLevelScorers.BoostedScorer(req, coord(requiredScoring.size(), maxCoord));
}
if (requiredScoring.isEmpty()) {
// Scores are needed but we only have a filter clause
// BooleanWeight expects that calling score() is ok so we need to wrap
// to prevent score() from being propagated
return new FilterScorer(req) {
@Override
public float score() throws IOException {
return 0f;
}
@Override
public int freq() throws IOException {
return 0;
}
};
}
float boost = 1f;
if (disableCoord == false) {
boost = coord(1, maxCoord);
}
if (boost == 1f) {
return req;
}
return new BooleanTopLevelScorers.BoostedScorer(req, boost);
} else {
return new ConjunctionScorer(this, required, requiredScoring,
disableCoord ? 1.0F : coord(requiredScoring.size(), maxCoord));

View File

@ -97,6 +97,10 @@ public class ConstantScoreQuery extends Query {
public float score() throws IOException {
return theScore;
}
@Override
public int freq() throws IOException {
return 1;
}
});
}
};
@ -136,6 +140,10 @@ public class ConstantScoreQuery extends Query {
return score;
}
@Override
public int freq() throws IOException {
return 1;
}
@Override
public Collection<ChildScorer> getChildren() {
return Collections.singleton(new ChildScorer(innerScorer, "constant"));
}

View File

@ -33,6 +33,7 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SimScorer;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ToStringUtils;
/**
@ -40,9 +41,33 @@ import org.apache.lucene.util.ToStringUtils;
* other terms with a {@link BooleanQuery}.
*/
public class TermQuery extends Query {
private static final Similarity.SimScorer NON_SCORING_SIM_SCORER = new Similarity.SimScorer() {
@Override
public float score(int doc, float freq) {
return 0f;
}
@Override
public float computeSlopFactor(int distance) {
return 1f;
}
@Override
public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
return 1f;
}
@Override
public Explanation explain(int doc, Explanation freq) {
return Explanation.match(0f, "Match on doc=" + doc);
}
};
private final Term term;
private final TermContext perReaderTermState;
final class TermWeight extends Weight {
private final Similarity similarity;
private final Similarity.SimWeight stats;
@ -58,7 +83,7 @@ public class TermQuery extends Query {
assert termStates.hasOnlyRealTerms();
this.termStates = termStates;
this.similarity = searcher.getSimilarity();
final CollectionStatistics collectionStats;
final TermStatistics termStats;
if (needsScores) {
@ -72,6 +97,7 @@ public class TermQuery extends Query {
collectionStats = new CollectionStatistics(term.field(), maxDoc, -1, -1, -1);
termStats = new TermStatistics(term.bytes(), docFreq, totalTermFreq);
}
this.stats = similarity.computeWeight(getBoost(), collectionStats, termStats);
}
@ -84,17 +110,17 @@ public class TermQuery extends Query {
public String toString() {
return "weight(" + TermQuery.this + ")";
}
@Override
public float getValueForNormalization() {
return stats.getValueForNormalization();
}
@Override
public void normalize(float queryNorm, float topLevelBoost) {
stats.normalize(queryNorm, topLevelBoost);
}
@Override
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
@ -104,9 +130,16 @@ public class TermQuery extends Query {
}
PostingsEnum docs = termsEnum.postings(acceptDocs, null, needsScores ? PostingsEnum.FREQS : PostingsEnum.NONE);
assert docs != null;
return new TermScorer(this, docs, similarity.simScorer(stats, context));
final Similarity.SimScorer simScorer;
if (needsScores) {
simScorer = similarity.simScorer(stats, context);
} else {
// avoids loading other scoring factors such as norms
simScorer = NON_SCORING_SIM_SCORER;
}
return new TermScorer(this, docs, simScorer);
}
/**
* Returns a {@link TermsEnum} positioned at this weights Term or null if
* the term does not exist in the given context
@ -124,14 +157,14 @@ public class TermQuery extends Query {
termsEnum.seekExact(term.bytes(), state);
return termsEnum;
}
private boolean termNotInReader(LeafReader reader, Term term) throws IOException {
// only called from assert
// System.out.println("TQ.termNotInReader reader=" + reader + " term=" +
// field + ":" + bytes.utf8ToString());
return reader.docFreq(term) == 0;
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context, context.reader().getLiveDocs());
@ -152,13 +185,13 @@ public class TermQuery extends Query {
return Explanation.noMatch("no matching term");
}
}
/** Constructs a query for the term <code>t</code>. */
public TermQuery(Term t) {
term = Objects.requireNonNull(t);
perReaderTermState = null;
}
/**
* Expert: constructs a TermQuery that will use the provided docFreq instead
* of looking up the docFreq against the searcher.
@ -174,12 +207,12 @@ public class TermQuery extends Query {
}
perReaderTermState = Objects.requireNonNull(states);
}
/** Returns the term of this query. */
public Term getTerm() {
return term;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
final IndexReaderContext context = searcher.getTopReaderContext();
@ -193,10 +226,10 @@ public class TermQuery extends Query {
// PRTS was pre-build for this IS
termState = this.perReaderTermState;
}
return new TermWeight(searcher, needsScores, termState);
}
/** Prints a user-readable version of this query. */
@Override
public String toString(String field) {
@ -209,7 +242,7 @@ public class TermQuery extends Query {
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
}
/** Returns true iff <code>o</code> is equal to this. */
@Override
public boolean equals(Object o) {

View File

@ -575,7 +575,7 @@ public class TestBooleanQuery extends LuceneTestCase {
// Single clauses rewrite to a term query
final Query rewritten1 = query1.rewrite(reader);
assertTrue(rewritten1 instanceof TermQuery);
assertTrue(rewritten1 instanceof ConstantScoreQuery);
assertEquals(0f, rewritten1.getBoost(), 0f);
// When there are two clauses, we cannot rewrite, but if one of them creates
@ -586,8 +586,9 @@ public class TestBooleanQuery extends LuceneTestCase {
query2.add(new TermQuery(new Term("field", "b")), Occur.SHOULD);
final Weight weight = searcher.createNormalizedWeight(query2, true);
final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
assertTrue(scorer.getClass().getName(), scorer instanceof BooleanTopLevelScorers.BoostedScorer);
assertEquals(0, ((BooleanTopLevelScorers.BoostedScorer) scorer).boost, 0f);
assertEquals(0, scorer.nextDoc());
assertTrue(scorer.getClass().getName(), scorer instanceof FilterScorer);
assertEquals(0f, scorer.score(), 0f);
reader.close();
w.close();

View File

@ -24,8 +24,11 @@ import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
@ -40,7 +43,7 @@ public class TestTermScorer extends LuceneTestCase {
protected String[] values = new String[] {"all", "dogs dogs", "like",
"playing", "fetch", "all"};
protected IndexSearcher indexSearcher;
protected IndexReader indexReader;
protected LeafReader indexReader;
@Override
public void setUp() throws Exception {
@ -179,5 +182,32 @@ public class TestTermScorer extends LuceneTestCase {
return "TestHit{" + "doc=" + doc + ", score=" + score + "}";
}
}
public void testDoesNotLoadNorms() throws IOException {
Term allTerm = new Term(FIELD, "all");
TermQuery termQuery = new TermQuery(allTerm);
LeafReader forbiddenNorms = new FilterLeafReader(indexReader) {
@Override
public NumericDocValues getNormValues(String field) throws IOException {
fail("Norms should not be loaded");
// unreachable
return null;
}
};
IndexSearcher indexSearcher = newSearcher(forbiddenNorms);
Weight weight = indexSearcher.createNormalizedWeight(termQuery, true);
try {
weight.scorer(forbiddenNorms.getContext(), null).nextDoc();
fail("Should load norms");
} catch (AssertionError e) {
// ok
}
weight = indexSearcher.createNormalizedWeight(termQuery, false);
// should not fail this time since norms are not necessary
weight.scorer(forbiddenNorms.getContext(), null).nextDoc();
}
}

View File

@ -119,7 +119,7 @@ public class ToParentBlockJoinQuery extends Query {
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new BlockJoinWeight(this, childQuery.createWeight(searcher, needsScores), parentsFilter, scoreMode);
return new BlockJoinWeight(this, childQuery.createWeight(searcher, needsScores), parentsFilter, needsScores ? scoreMode : ScoreMode.None);
}
/** Return our child query. */

View File

@ -25,7 +25,6 @@ import java.util.concurrent.ExecutorService;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.Bits;
/**
* Helper class that adds some extra checks to ensure correct
@ -57,7 +56,7 @@ public class AssertingIndexSearcher extends IndexSearcher {
@Override
public Weight createNormalizedWeight(Query query, boolean needsScores) throws IOException {
final Weight w = super.createNormalizedWeight(query, needsScores);
return new AssertingWeight(random, w) {
return new AssertingWeight(random, w, needsScores) {
@Override
public void normalize(float norm, float topLevelBoost) {
@ -75,7 +74,7 @@ public class AssertingIndexSearcher extends IndexSearcher {
@Override
public Weight createWeight(Query query, boolean needsScores) throws IOException {
// this adds assertions to the inner weights/scorers too
return new AssertingWeight(random, super.createWeight(query, needsScores));
return new AssertingWeight(random, super.createWeight(query, needsScores), needsScores);
}
@Override
@ -89,8 +88,8 @@ public class AssertingIndexSearcher extends IndexSearcher {
@Override
protected void search(List<LeafReaderContext> leaves, Weight weight, Collector collector) throws IOException {
// TODO: shouldn't we AssertingCollector.wrap(collector) here?
super.search(leaves, AssertingWeight.wrap(random, weight), AssertingCollector.wrap(random, collector));
assert weight instanceof AssertingWeight;
super.search(leaves, weight, AssertingCollector.wrap(random, collector));
}
@Override

View File

@ -41,7 +41,7 @@ class AssertingLeafCollector extends FilterLeafCollector {
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
super.setScorer(AssertingScorer.wrap(random, scorer));
super.setScorer(AssertingScorer.wrap(random, scorer, true));
}
@Override

View File

@ -19,10 +19,8 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
/** Assertion-enabled query. */
public class AssertingQuery extends Query {
@ -43,7 +41,7 @@ public class AssertingQuery extends Query {
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return AssertingWeight.wrap(new Random(random.nextLong()), in.createWeight(searcher, needsScores));
return new AssertingWeight(new Random(random.nextLong()), in.createWeight(searcher, needsScores), needsScores);
}
@Override

View File

@ -27,23 +27,25 @@ public class AssertingScorer extends Scorer {
static enum IteratorState { START, APPROXIMATING, ITERATING, FINISHED };
public static Scorer wrap(Random random, Scorer other) {
if (other == null || other instanceof AssertingScorer) {
return other;
public static Scorer wrap(Random random, Scorer other, boolean canScore) {
if (other == null) {
return null;
}
return new AssertingScorer(random, other);
return new AssertingScorer(random, other, canScore);
}
final Random random;
final Scorer in;
final boolean needsScores;
IteratorState state = IteratorState.START;
int doc = -1;
private AssertingScorer(Random random, Scorer in) {
private AssertingScorer(Random random, Scorer in, boolean needsScores) {
super(in.weight);
this.random = random;
this.in = in;
this.needsScores = needsScores;
}
public Scorer getIn() {
@ -63,6 +65,7 @@ public class AssertingScorer extends Scorer {
@Override
public float score() throws IOException {
assert needsScores;
assert iterating();
final float score = in.score();
assert !Float.isNaN(score) : "NaN score for in="+in;
@ -80,6 +83,7 @@ public class AssertingScorer extends Scorer {
@Override
public int freq() throws IOException {
assert needsScores;
assert iterating();
return in.freq();
}

View File

@ -27,17 +27,15 @@ import org.apache.lucene.util.Bits;
class AssertingWeight extends Weight {
static Weight wrap(Random random, Weight other) {
return other instanceof AssertingWeight ? other : new AssertingWeight(random, other);
}
final Random random;
final Weight in;
final boolean needsScores;
AssertingWeight(Random random, Weight in) {
AssertingWeight(Random random, Weight in, boolean needsScores) {
super(in.getQuery());
this.random = random;
this.in = in;
this.needsScores = needsScores;
}
@Override
@ -64,7 +62,7 @@ class AssertingWeight extends Weight {
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
final Scorer inScorer = in.scorer(context, acceptDocs);
assert inScorer == null || inScorer.docID() == -1;
return AssertingScorer.wrap(new Random(random.nextLong()), inScorer);
return AssertingScorer.wrap(new Random(random.nextLong()), inScorer, needsScores);
}
@Override