mirror of https://github.com/apache/lucene.git
LUCENE-6223: Move BooleanWeight to its own file
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1657895 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e1786f313f
commit
8eed769e18
|
@ -83,6 +83,9 @@ API Changes
|
||||||
instead. This will be as efficient now that queries can opt out from
|
instead. This will be as efficient now that queries can opt out from
|
||||||
scoring. (Adrien Grand)
|
scoring. (Adrien Grand)
|
||||||
|
|
||||||
|
* LUCENE-6223: Move BooleanQuery.BooleanWeight to BooleanWeight.
|
||||||
|
(Robert Muir)
|
||||||
|
|
||||||
Other
|
Other
|
||||||
|
|
||||||
* LUCENE-6193: Collapse identical catch branches in try-catch statements.
|
* LUCENE-6193: Collapse identical catch branches in try-catch statements.
|
||||||
|
|
|
@ -162,379 +162,9 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
||||||
@Override
|
@Override
|
||||||
public final Iterator<BooleanClause> iterator() { return clauses().iterator(); }
|
public final Iterator<BooleanClause> iterator() { return clauses().iterator(); }
|
||||||
|
|
||||||
/**
|
|
||||||
* Expert: the Weight for BooleanQuery, used to
|
|
||||||
* normalize, score and explain these queries.
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
protected class BooleanWeight extends Weight {
|
|
||||||
/** The Similarity implementation. */
|
|
||||||
protected Similarity similarity;
|
|
||||||
protected ArrayList<Weight> weights;
|
|
||||||
protected int maxCoord; // num optional + num required
|
|
||||||
private final boolean disableCoord;
|
|
||||||
private final boolean needsScores;
|
|
||||||
|
|
||||||
public BooleanWeight(IndexSearcher searcher, boolean needsScores, boolean disableCoord)
|
|
||||||
throws IOException {
|
|
||||||
super(BooleanQuery.this);
|
|
||||||
this.needsScores = needsScores;
|
|
||||||
this.similarity = searcher.getSimilarity();
|
|
||||||
this.disableCoord = disableCoord;
|
|
||||||
weights = new ArrayList<>(clauses.size());
|
|
||||||
for (int i = 0 ; i < clauses.size(); i++) {
|
|
||||||
BooleanClause c = clauses.get(i);
|
|
||||||
final boolean queryNeedsScores = needsScores && c.getOccur() != Occur.MUST_NOT;
|
|
||||||
Weight w = c.getQuery().createWeight(searcher, queryNeedsScores);
|
|
||||||
weights.add(w);
|
|
||||||
if (!c.isProhibited()) {
|
|
||||||
maxCoord++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float getValueForNormalization() throws IOException {
|
|
||||||
float sum = 0.0f;
|
|
||||||
for (int i = 0 ; i < weights.size(); i++) {
|
|
||||||
// call sumOfSquaredWeights for all clauses in case of side effects
|
|
||||||
float s = weights.get(i).getValueForNormalization(); // sum sub weights
|
|
||||||
if (!clauses.get(i).isProhibited()) {
|
|
||||||
// only add to sum for non-prohibited clauses
|
|
||||||
sum += s;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sum *= getBoost() * getBoost(); // boost each sub-weight
|
|
||||||
|
|
||||||
return sum ;
|
|
||||||
}
|
|
||||||
|
|
||||||
public float coord(int overlap, int maxOverlap) {
|
|
||||||
// LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away,
|
|
||||||
// so coord() is not applied. But when BQ cannot optimize itself away
|
|
||||||
// for a single clause (minNrShouldMatch, prohibited clauses, etc), it's
|
|
||||||
// important not to apply coord(1,1) for consistency, it might not be 1.0F
|
|
||||||
return maxOverlap == 1 ? 1F : similarity.coord(overlap, maxOverlap);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void normalize(float norm, float topLevelBoost) {
|
|
||||||
topLevelBoost *= getBoost(); // incorporate boost
|
|
||||||
for (Weight w : weights) {
|
|
||||||
// normalize all clauses, (even if prohibited in case of side affects)
|
|
||||||
w.normalize(norm, topLevelBoost);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Explanation explain(LeafReaderContext context, int doc)
|
|
||||||
throws IOException {
|
|
||||||
final int minShouldMatch =
|
|
||||||
BooleanQuery.this.getMinimumNumberShouldMatch();
|
|
||||||
ComplexExplanation sumExpl = new ComplexExplanation();
|
|
||||||
sumExpl.setDescription("sum of:");
|
|
||||||
int coord = 0;
|
|
||||||
float sum = 0.0f;
|
|
||||||
boolean fail = false;
|
|
||||||
int shouldMatchCount = 0;
|
|
||||||
Iterator<BooleanClause> cIter = clauses.iterator();
|
|
||||||
for (Iterator<Weight> wIter = weights.iterator(); wIter.hasNext();) {
|
|
||||||
Weight w = wIter.next();
|
|
||||||
BooleanClause c = cIter.next();
|
|
||||||
if (w.scorer(context, context.reader().getLiveDocs()) == null) {
|
|
||||||
if (c.isRequired()) {
|
|
||||||
fail = true;
|
|
||||||
Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
|
|
||||||
sumExpl.addDetail(r);
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
Explanation e = w.explain(context, doc);
|
|
||||||
if (e.isMatch()) {
|
|
||||||
if (!c.isProhibited()) {
|
|
||||||
sumExpl.addDetail(e);
|
|
||||||
sum += e.getValue();
|
|
||||||
coord++;
|
|
||||||
} else {
|
|
||||||
Explanation r =
|
|
||||||
new Explanation(0.0f, "match on prohibited clause (" + c.getQuery().toString() + ")");
|
|
||||||
r.addDetail(e);
|
|
||||||
sumExpl.addDetail(r);
|
|
||||||
fail = true;
|
|
||||||
}
|
|
||||||
if (c.getOccur() == Occur.SHOULD) {
|
|
||||||
shouldMatchCount++;
|
|
||||||
}
|
|
||||||
} else if (c.isRequired()) {
|
|
||||||
Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
|
|
||||||
r.addDetail(e);
|
|
||||||
sumExpl.addDetail(r);
|
|
||||||
fail = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (fail) {
|
|
||||||
sumExpl.setMatch(Boolean.FALSE);
|
|
||||||
sumExpl.setValue(0.0f);
|
|
||||||
sumExpl.setDescription
|
|
||||||
("Failure to meet condition(s) of required/prohibited clause(s)");
|
|
||||||
return sumExpl;
|
|
||||||
} else if (shouldMatchCount < minShouldMatch) {
|
|
||||||
sumExpl.setMatch(Boolean.FALSE);
|
|
||||||
sumExpl.setValue(0.0f);
|
|
||||||
sumExpl.setDescription("Failure to match minimum number "+
|
|
||||||
"of optional clauses: " + minShouldMatch);
|
|
||||||
return sumExpl;
|
|
||||||
}
|
|
||||||
|
|
||||||
sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE);
|
|
||||||
sumExpl.setValue(sum);
|
|
||||||
|
|
||||||
final float coordFactor = disableCoord ? 1.0f : coord(coord, maxCoord);
|
|
||||||
if (coordFactor == 1.0f) {
|
|
||||||
return sumExpl; // eliminate wrapper
|
|
||||||
} else {
|
|
||||||
ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(),
|
|
||||||
sum*coordFactor,
|
|
||||||
"product of:");
|
|
||||||
result.addDetail(sumExpl);
|
|
||||||
result.addDetail(new Explanation(coordFactor,
|
|
||||||
"coord("+coord+"/"+maxCoord+")"));
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Try to build a boolean scorer for this weight. Returns null if {@link BooleanScorer}
|
|
||||||
* cannot be used. */
|
|
||||||
// pkg-private for forcing use of BooleanScorer in tests
|
|
||||||
BooleanScorer booleanScorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
|
||||||
List<BulkScorer> optional = new ArrayList<BulkScorer>();
|
|
||||||
Iterator<BooleanClause> cIter = clauses.iterator();
|
|
||||||
for (Weight w : weights) {
|
|
||||||
BooleanClause c = cIter.next();
|
|
||||||
BulkScorer subScorer = w.bulkScorer(context, acceptDocs);
|
|
||||||
if (subScorer == null) {
|
|
||||||
if (c.isRequired()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
} else if (c.isRequired()) {
|
|
||||||
// TODO: there are some cases where BooleanScorer
|
|
||||||
// would handle conjunctions faster than
|
|
||||||
// BooleanScorer2...
|
|
||||||
return null;
|
|
||||||
} else if (c.isProhibited()) {
|
|
||||||
// TODO: there are some cases where BooleanScorer could do this faster
|
|
||||||
return null;
|
|
||||||
} else {
|
|
||||||
optional.add(subScorer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (optional.size() == 0) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (minNrShouldMatch > optional.size()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return new BooleanScorer(this, disableCoord, maxCoord, optional, Math.max(1, minNrShouldMatch));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public BulkScorer bulkScorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
|
||||||
final BooleanScorer bulkScorer = booleanScorer(context, acceptDocs);
|
|
||||||
if (bulkScorer != null) { // BooleanScorer is applicable
|
|
||||||
// TODO: what is the right heuristic here?
|
|
||||||
final long costThreshold;
|
|
||||||
if (minNrShouldMatch <= 1) {
|
|
||||||
// when all clauses are optional, use BooleanScorer aggressively
|
|
||||||
// TODO: is there actually a threshold under which we should rather
|
|
||||||
// use the regular scorer?
|
|
||||||
costThreshold = -1;
|
|
||||||
} else {
|
|
||||||
// when a minimum number of clauses should match, BooleanScorer is
|
|
||||||
// going to score all windows that have at least minNrShouldMatch
|
|
||||||
// matches in the window. But there is no way to know if there is
|
|
||||||
// an intersection (all clauses might match a different doc ID and
|
|
||||||
// there will be no matches in the end) so we should only use
|
|
||||||
// BooleanScorer if matches are very dense
|
|
||||||
costThreshold = context.reader().maxDoc() / 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bulkScorer.cost() > costThreshold) {
|
|
||||||
return bulkScorer;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return super.bulkScorer(context, acceptDocs);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
|
||||||
// initially the user provided value,
|
|
||||||
// but if minNrShouldMatch == optional.size(),
|
|
||||||
// we will optimize and move these to required, making this 0
|
|
||||||
int minShouldMatch = minNrShouldMatch;
|
|
||||||
|
|
||||||
List<Scorer> required = new ArrayList<>();
|
|
||||||
List<Scorer> prohibited = new ArrayList<>();
|
|
||||||
List<Scorer> optional = new ArrayList<>();
|
|
||||||
Iterator<BooleanClause> cIter = clauses.iterator();
|
|
||||||
for (Weight w : weights) {
|
|
||||||
BooleanClause c = cIter.next();
|
|
||||||
Scorer subScorer = w.scorer(context, acceptDocs);
|
|
||||||
if (subScorer == null) {
|
|
||||||
if (c.isRequired()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
} else if (c.isRequired()) {
|
|
||||||
required.add(subScorer);
|
|
||||||
} else if (c.isProhibited()) {
|
|
||||||
prohibited.add(subScorer);
|
|
||||||
} else {
|
|
||||||
optional.add(subScorer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// scorer simplifications:
|
|
||||||
|
|
||||||
if (optional.size() == minShouldMatch) {
|
|
||||||
// any optional clauses are in fact required
|
|
||||||
required.addAll(optional);
|
|
||||||
optional.clear();
|
|
||||||
minShouldMatch = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (required.isEmpty() && optional.isEmpty()) {
|
|
||||||
// no required and optional clauses.
|
|
||||||
return null;
|
|
||||||
} else if (optional.size() < minShouldMatch) {
|
|
||||||
// either >1 req scorer, or there are 0 req scorers and at least 1
|
|
||||||
// optional scorer. Therefore if there are not enough optional scorers
|
|
||||||
// no documents will be matched by the query
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// we don't need scores, so if we have required clauses, drop optional clauses completely
|
|
||||||
if (!needsScores && minShouldMatch == 0 && required.size() > 0) {
|
|
||||||
optional.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
// three cases: conjunction, disjunction, or mix
|
|
||||||
|
|
||||||
// pure conjunction
|
|
||||||
if (optional.isEmpty()) {
|
|
||||||
return excl(req(required, disableCoord), prohibited);
|
|
||||||
}
|
|
||||||
|
|
||||||
// pure disjunction
|
|
||||||
if (required.isEmpty()) {
|
|
||||||
return excl(opt(optional, minShouldMatch, disableCoord), prohibited);
|
|
||||||
}
|
|
||||||
|
|
||||||
// conjunction-disjunction mix:
|
|
||||||
// we create the required and optional pieces with coord disabled, and then
|
|
||||||
// combine the two: if minNrShouldMatch > 0, then it's a conjunction: because the
|
|
||||||
// optional side must match. otherwise it's required + optional, factoring the
|
|
||||||
// number of optional terms into the coord calculation
|
|
||||||
|
|
||||||
Scorer req = excl(req(required, true), prohibited);
|
|
||||||
Scorer opt = opt(optional, minShouldMatch, true);
|
|
||||||
|
|
||||||
// TODO: clean this up: it's horrible
|
|
||||||
if (disableCoord) {
|
|
||||||
if (minShouldMatch > 0) {
|
|
||||||
return new ConjunctionScorer(this, new Scorer[] { req, opt }, 1F);
|
|
||||||
} else {
|
|
||||||
return new ReqOptSumScorer(req, opt);
|
|
||||||
}
|
|
||||||
} else if (optional.size() == 1) {
|
|
||||||
if (minShouldMatch > 0) {
|
|
||||||
return new ConjunctionScorer(this, new Scorer[] { req, opt }, coord(required.size()+1, maxCoord));
|
|
||||||
} else {
|
|
||||||
float coordReq = coord(required.size(), maxCoord);
|
|
||||||
float coordBoth = coord(required.size() + 1, maxCoord);
|
|
||||||
return new BooleanTopLevelScorers.ReqSingleOptScorer(req, opt, coordReq, coordBoth);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (minShouldMatch > 0) {
|
|
||||||
return new BooleanTopLevelScorers.CoordinatingConjunctionScorer(this, coords(), req, required.size(), opt);
|
|
||||||
} else {
|
|
||||||
return new BooleanTopLevelScorers.ReqMultiOptScorer(req, opt, required.size(), coords());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private Scorer req(List<Scorer> required, boolean disableCoord) {
|
|
||||||
if (required.size() == 1) {
|
|
||||||
Scorer req = required.get(0);
|
|
||||||
if (!disableCoord && maxCoord > 1) {
|
|
||||||
return new BooleanTopLevelScorers.BoostedScorer(req, coord(1, maxCoord));
|
|
||||||
} else {
|
|
||||||
return req;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return new ConjunctionScorer(this,
|
|
||||||
required.toArray(new Scorer[required.size()]),
|
|
||||||
disableCoord ? 1.0F : coord(required.size(), maxCoord));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private Scorer excl(Scorer main, List<Scorer> prohibited) throws IOException {
|
|
||||||
if (prohibited.isEmpty()) {
|
|
||||||
return main;
|
|
||||||
} else if (prohibited.size() == 1) {
|
|
||||||
return new ReqExclScorer(main, prohibited.get(0));
|
|
||||||
} else {
|
|
||||||
float coords[] = new float[prohibited.size()+1];
|
|
||||||
Arrays.fill(coords, 1F);
|
|
||||||
return new ReqExclScorer(main,
|
|
||||||
new DisjunctionSumScorer(this,
|
|
||||||
prohibited.toArray(new Scorer[prohibited.size()]),
|
|
||||||
coords));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private Scorer opt(List<Scorer> optional, int minShouldMatch, boolean disableCoord) throws IOException {
|
|
||||||
if (optional.size() == 1) {
|
|
||||||
Scorer opt = optional.get(0);
|
|
||||||
if (!disableCoord && maxCoord > 1) {
|
|
||||||
return new BooleanTopLevelScorers.BoostedScorer(opt, coord(1, maxCoord));
|
|
||||||
} else {
|
|
||||||
return opt;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
float coords[];
|
|
||||||
if (disableCoord) {
|
|
||||||
coords = new float[optional.size()+1];
|
|
||||||
Arrays.fill(coords, 1F);
|
|
||||||
} else {
|
|
||||||
coords = coords();
|
|
||||||
}
|
|
||||||
if (minShouldMatch > 1) {
|
|
||||||
return new MinShouldMatchSumScorer(this, optional, minShouldMatch, coords);
|
|
||||||
} else {
|
|
||||||
return new DisjunctionSumScorer(this,
|
|
||||||
optional.toArray(new Scorer[optional.size()]),
|
|
||||||
coords);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private float[] coords() {
|
|
||||||
float[] coords = new float[maxCoord+1];
|
|
||||||
coords[0] = 0F;
|
|
||||||
for (int i = 1; i < coords.length; i++) {
|
|
||||||
coords[i] = coord(i, maxCoord);
|
|
||||||
}
|
|
||||||
return coords;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||||
return new BooleanWeight(searcher, needsScores, disableCoord);
|
return new BooleanWeight(this, searcher, needsScores, disableCoord);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
||||||
import org.apache.lucene.search.BooleanQuery.BooleanWeight;
|
import org.apache.lucene.search.BooleanWeight;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -0,0 +1,398 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Expert: the Weight for BooleanQuery, used to
|
||||||
|
* normalize, score and explain these queries.
|
||||||
|
*
|
||||||
|
* @lucene.experimental
|
||||||
|
*/
|
||||||
|
public class BooleanWeight extends Weight {
|
||||||
|
/** The Similarity implementation. */
|
||||||
|
protected Similarity similarity;
|
||||||
|
protected final BooleanQuery query;
|
||||||
|
protected ArrayList<Weight> weights;
|
||||||
|
protected int maxCoord; // num optional + num required
|
||||||
|
private final boolean disableCoord;
|
||||||
|
private final boolean needsScores;
|
||||||
|
|
||||||
|
public BooleanWeight(BooleanQuery query, IndexSearcher searcher, boolean needsScores, boolean disableCoord) throws IOException {
|
||||||
|
super(query);
|
||||||
|
this.query = query;
|
||||||
|
this.needsScores = needsScores;
|
||||||
|
this.similarity = searcher.getSimilarity();
|
||||||
|
this.disableCoord = disableCoord;
|
||||||
|
weights = new ArrayList<>(query.clauses().size());
|
||||||
|
for (int i = 0 ; i < query.clauses().size(); i++) {
|
||||||
|
BooleanClause c = query.clauses().get(i);
|
||||||
|
final boolean queryNeedsScores = needsScores && c.getOccur() != Occur.MUST_NOT;
|
||||||
|
Weight w = c.getQuery().createWeight(searcher, queryNeedsScores);
|
||||||
|
weights.add(w);
|
||||||
|
if (!c.isProhibited()) {
|
||||||
|
maxCoord++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float getValueForNormalization() throws IOException {
|
||||||
|
float sum = 0.0f;
|
||||||
|
for (int i = 0 ; i < weights.size(); i++) {
|
||||||
|
// call sumOfSquaredWeights for all clauses in case of side effects
|
||||||
|
float s = weights.get(i).getValueForNormalization(); // sum sub weights
|
||||||
|
if (!query.clauses().get(i).isProhibited()) {
|
||||||
|
// only add to sum for non-prohibited clauses
|
||||||
|
sum += s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sum *= query.getBoost() * query.getBoost(); // boost each sub-weight
|
||||||
|
|
||||||
|
return sum ;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float coord(int overlap, int maxOverlap) {
|
||||||
|
// LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away,
|
||||||
|
// so coord() is not applied. But when BQ cannot optimize itself away
|
||||||
|
// for a single clause (minNrShouldMatch, prohibited clauses, etc), it's
|
||||||
|
// important not to apply coord(1,1) for consistency, it might not be 1.0F
|
||||||
|
return maxOverlap == 1 ? 1F : similarity.coord(overlap, maxOverlap);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void normalize(float norm, float topLevelBoost) {
|
||||||
|
topLevelBoost *= query.getBoost(); // incorporate boost
|
||||||
|
for (Weight w : weights) {
|
||||||
|
// normalize all clauses, (even if prohibited in case of side affects)
|
||||||
|
w.normalize(norm, topLevelBoost);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||||
|
final int minShouldMatch = query.getMinimumNumberShouldMatch();
|
||||||
|
ComplexExplanation sumExpl = new ComplexExplanation();
|
||||||
|
sumExpl.setDescription("sum of:");
|
||||||
|
int coord = 0;
|
||||||
|
float sum = 0.0f;
|
||||||
|
boolean fail = false;
|
||||||
|
int shouldMatchCount = 0;
|
||||||
|
Iterator<BooleanClause> cIter = query.clauses().iterator();
|
||||||
|
for (Iterator<Weight> wIter = weights.iterator(); wIter.hasNext();) {
|
||||||
|
Weight w = wIter.next();
|
||||||
|
BooleanClause c = cIter.next();
|
||||||
|
if (w.scorer(context, context.reader().getLiveDocs()) == null) {
|
||||||
|
if (c.isRequired()) {
|
||||||
|
fail = true;
|
||||||
|
Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
|
||||||
|
sumExpl.addDetail(r);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Explanation e = w.explain(context, doc);
|
||||||
|
if (e.isMatch()) {
|
||||||
|
if (!c.isProhibited()) {
|
||||||
|
sumExpl.addDetail(e);
|
||||||
|
sum += e.getValue();
|
||||||
|
coord++;
|
||||||
|
} else {
|
||||||
|
Explanation r =
|
||||||
|
new Explanation(0.0f, "match on prohibited clause (" + c.getQuery().toString() + ")");
|
||||||
|
r.addDetail(e);
|
||||||
|
sumExpl.addDetail(r);
|
||||||
|
fail = true;
|
||||||
|
}
|
||||||
|
if (c.getOccur() == Occur.SHOULD) {
|
||||||
|
shouldMatchCount++;
|
||||||
|
}
|
||||||
|
} else if (c.isRequired()) {
|
||||||
|
Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
|
||||||
|
r.addDetail(e);
|
||||||
|
sumExpl.addDetail(r);
|
||||||
|
fail = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (fail) {
|
||||||
|
sumExpl.setMatch(Boolean.FALSE);
|
||||||
|
sumExpl.setValue(0.0f);
|
||||||
|
sumExpl.setDescription
|
||||||
|
("Failure to meet condition(s) of required/prohibited clause(s)");
|
||||||
|
return sumExpl;
|
||||||
|
} else if (shouldMatchCount < minShouldMatch) {
|
||||||
|
sumExpl.setMatch(Boolean.FALSE);
|
||||||
|
sumExpl.setValue(0.0f);
|
||||||
|
sumExpl.setDescription("Failure to match minimum number "+
|
||||||
|
"of optional clauses: " + minShouldMatch);
|
||||||
|
return sumExpl;
|
||||||
|
}
|
||||||
|
|
||||||
|
sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE);
|
||||||
|
sumExpl.setValue(sum);
|
||||||
|
|
||||||
|
final float coordFactor = disableCoord ? 1.0f : coord(coord, maxCoord);
|
||||||
|
if (coordFactor == 1.0f) {
|
||||||
|
return sumExpl; // eliminate wrapper
|
||||||
|
} else {
|
||||||
|
ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(),
|
||||||
|
sum*coordFactor,
|
||||||
|
"product of:");
|
||||||
|
result.addDetail(sumExpl);
|
||||||
|
result.addDetail(new Explanation(coordFactor,
|
||||||
|
"coord("+coord+"/"+maxCoord+")"));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Try to build a boolean scorer for this weight. Returns null if {@link BooleanScorer}
|
||||||
|
* cannot be used. */
|
||||||
|
// pkg-private for forcing use of BooleanScorer in tests
|
||||||
|
BooleanScorer booleanScorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||||
|
List<BulkScorer> optional = new ArrayList<BulkScorer>();
|
||||||
|
Iterator<BooleanClause> cIter = query.clauses().iterator();
|
||||||
|
for (Weight w : weights) {
|
||||||
|
BooleanClause c = cIter.next();
|
||||||
|
BulkScorer subScorer = w.bulkScorer(context, acceptDocs);
|
||||||
|
if (subScorer == null) {
|
||||||
|
if (c.isRequired()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
} else if (c.isRequired()) {
|
||||||
|
// TODO: there are some cases where BooleanScorer
|
||||||
|
// would handle conjunctions faster than
|
||||||
|
// BooleanScorer2...
|
||||||
|
return null;
|
||||||
|
} else if (c.isProhibited()) {
|
||||||
|
// TODO: there are some cases where BooleanScorer could do this faster
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
optional.add(subScorer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (optional.size() == 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (query.minNrShouldMatch > optional.size()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new BooleanScorer(this, disableCoord, maxCoord, optional, Math.max(1, query.minNrShouldMatch));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BulkScorer bulkScorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||||
|
final BooleanScorer bulkScorer = booleanScorer(context, acceptDocs);
|
||||||
|
if (bulkScorer != null) { // BooleanScorer is applicable
|
||||||
|
// TODO: what is the right heuristic here?
|
||||||
|
final long costThreshold;
|
||||||
|
if (query.minNrShouldMatch <= 1) {
|
||||||
|
// when all clauses are optional, use BooleanScorer aggressively
|
||||||
|
// TODO: is there actually a threshold under which we should rather
|
||||||
|
// use the regular scorer?
|
||||||
|
costThreshold = -1;
|
||||||
|
} else {
|
||||||
|
// when a minimum number of clauses should match, BooleanScorer is
|
||||||
|
// going to score all windows that have at least minNrShouldMatch
|
||||||
|
// matches in the window. But there is no way to know if there is
|
||||||
|
// an intersection (all clauses might match a different doc ID and
|
||||||
|
// there will be no matches in the end) so we should only use
|
||||||
|
// BooleanScorer if matches are very dense
|
||||||
|
costThreshold = context.reader().maxDoc() / 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bulkScorer.cost() > costThreshold) {
|
||||||
|
return bulkScorer;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return super.bulkScorer(context, acceptDocs);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||||
|
// initially the user provided value,
|
||||||
|
// but if minNrShouldMatch == optional.size(),
|
||||||
|
// we will optimize and move these to required, making this 0
|
||||||
|
int minShouldMatch = query.minNrShouldMatch;
|
||||||
|
|
||||||
|
List<Scorer> required = new ArrayList<>();
|
||||||
|
List<Scorer> prohibited = new ArrayList<>();
|
||||||
|
List<Scorer> optional = new ArrayList<>();
|
||||||
|
Iterator<BooleanClause> cIter = query.clauses().iterator();
|
||||||
|
for (Weight w : weights) {
|
||||||
|
BooleanClause c = cIter.next();
|
||||||
|
Scorer subScorer = w.scorer(context, acceptDocs);
|
||||||
|
if (subScorer == null) {
|
||||||
|
if (c.isRequired()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
} else if (c.isRequired()) {
|
||||||
|
required.add(subScorer);
|
||||||
|
} else if (c.isProhibited()) {
|
||||||
|
prohibited.add(subScorer);
|
||||||
|
} else {
|
||||||
|
optional.add(subScorer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// scorer simplifications:
|
||||||
|
|
||||||
|
if (optional.size() == minShouldMatch) {
|
||||||
|
// any optional clauses are in fact required
|
||||||
|
required.addAll(optional);
|
||||||
|
optional.clear();
|
||||||
|
minShouldMatch = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (required.isEmpty() && optional.isEmpty()) {
|
||||||
|
// no required and optional clauses.
|
||||||
|
return null;
|
||||||
|
} else if (optional.size() < minShouldMatch) {
|
||||||
|
// either >1 req scorer, or there are 0 req scorers and at least 1
|
||||||
|
// optional scorer. Therefore if there are not enough optional scorers
|
||||||
|
// no documents will be matched by the query
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we don't need scores, so if we have required clauses, drop optional clauses completely
|
||||||
|
if (!needsScores && minShouldMatch == 0 && required.size() > 0) {
|
||||||
|
optional.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
// three cases: conjunction, disjunction, or mix
|
||||||
|
|
||||||
|
// pure conjunction
|
||||||
|
if (optional.isEmpty()) {
|
||||||
|
return excl(req(required, disableCoord), prohibited);
|
||||||
|
}
|
||||||
|
|
||||||
|
// pure disjunction
|
||||||
|
if (required.isEmpty()) {
|
||||||
|
return excl(opt(optional, minShouldMatch, disableCoord), prohibited);
|
||||||
|
}
|
||||||
|
|
||||||
|
// conjunction-disjunction mix:
|
||||||
|
// we create the required and optional pieces with coord disabled, and then
|
||||||
|
// combine the two: if minNrShouldMatch > 0, then it's a conjunction: because the
|
||||||
|
// optional side must match. otherwise it's required + optional, factoring the
|
||||||
|
// number of optional terms into the coord calculation
|
||||||
|
|
||||||
|
Scorer req = excl(req(required, true), prohibited);
|
||||||
|
Scorer opt = opt(optional, minShouldMatch, true);
|
||||||
|
|
||||||
|
// TODO: clean this up: it's horrible
|
||||||
|
if (disableCoord) {
|
||||||
|
if (minShouldMatch > 0) {
|
||||||
|
return new ConjunctionScorer(this, new Scorer[] { req, opt }, 1F);
|
||||||
|
} else {
|
||||||
|
return new ReqOptSumScorer(req, opt);
|
||||||
|
}
|
||||||
|
} else if (optional.size() == 1) {
|
||||||
|
if (minShouldMatch > 0) {
|
||||||
|
return new ConjunctionScorer(this, new Scorer[] { req, opt }, coord(required.size()+1, maxCoord));
|
||||||
|
} else {
|
||||||
|
float coordReq = coord(required.size(), maxCoord);
|
||||||
|
float coordBoth = coord(required.size() + 1, maxCoord);
|
||||||
|
return new BooleanTopLevelScorers.ReqSingleOptScorer(req, opt, coordReq, coordBoth);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (minShouldMatch > 0) {
|
||||||
|
return new BooleanTopLevelScorers.CoordinatingConjunctionScorer(this, coords(), req, required.size(), opt);
|
||||||
|
} else {
|
||||||
|
return new BooleanTopLevelScorers.ReqMultiOptScorer(req, opt, required.size(), coords());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Scorer req(List<Scorer> required, boolean disableCoord) {
|
||||||
|
if (required.size() == 1) {
|
||||||
|
Scorer req = required.get(0);
|
||||||
|
if (!disableCoord && maxCoord > 1) {
|
||||||
|
return new BooleanTopLevelScorers.BoostedScorer(req, coord(1, maxCoord));
|
||||||
|
} else {
|
||||||
|
return req;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return new ConjunctionScorer(this,
|
||||||
|
required.toArray(new Scorer[required.size()]),
|
||||||
|
disableCoord ? 1.0F : coord(required.size(), maxCoord));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Scorer excl(Scorer main, List<Scorer> prohibited) throws IOException {
|
||||||
|
if (prohibited.isEmpty()) {
|
||||||
|
return main;
|
||||||
|
} else if (prohibited.size() == 1) {
|
||||||
|
return new ReqExclScorer(main, prohibited.get(0));
|
||||||
|
} else {
|
||||||
|
float coords[] = new float[prohibited.size()+1];
|
||||||
|
Arrays.fill(coords, 1F);
|
||||||
|
return new ReqExclScorer(main,
|
||||||
|
new DisjunctionSumScorer(this,
|
||||||
|
prohibited.toArray(new Scorer[prohibited.size()]),
|
||||||
|
coords));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Scorer opt(List<Scorer> optional, int minShouldMatch, boolean disableCoord) throws IOException {
|
||||||
|
if (optional.size() == 1) {
|
||||||
|
Scorer opt = optional.get(0);
|
||||||
|
if (!disableCoord && maxCoord > 1) {
|
||||||
|
return new BooleanTopLevelScorers.BoostedScorer(opt, coord(1, maxCoord));
|
||||||
|
} else {
|
||||||
|
return opt;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
float coords[];
|
||||||
|
if (disableCoord) {
|
||||||
|
coords = new float[optional.size()+1];
|
||||||
|
Arrays.fill(coords, 1F);
|
||||||
|
} else {
|
||||||
|
coords = coords();
|
||||||
|
}
|
||||||
|
if (minShouldMatch > 1) {
|
||||||
|
return new MinShouldMatchSumScorer(this, optional, minShouldMatch, coords);
|
||||||
|
} else {
|
||||||
|
return new DisjunctionSumScorer(this,
|
||||||
|
optional.toArray(new Scorer[optional.size()]),
|
||||||
|
coords);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private float[] coords() {
|
||||||
|
float[] coords = new float[maxCoord+1];
|
||||||
|
coords[0] = 0F;
|
||||||
|
for (int i = 1; i < coords.length; i++) {
|
||||||
|
coords[i] = coord(i, maxCoord);
|
||||||
|
}
|
||||||
|
return coords;
|
||||||
|
}
|
||||||
|
}
|
|
@ -283,7 +283,7 @@ public class TestBooleanQueryVisitSubscorers extends LuceneTestCase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||||
return new BooleanWeight(searcher, needsScores, false) {
|
return new BooleanWeight(this, searcher, needsScores, false) {
|
||||||
@Override
|
@Override
|
||||||
public BulkScorer bulkScorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
public BulkScorer bulkScorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||||
Scorer scorer = scorer(context, acceptDocs);
|
Scorer scorer = scorer(context, acceptDocs);
|
||||||
|
|
|
@ -35,7 +35,6 @@ import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.SortedSetDocValues;
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermContext;
|
import org.apache.lucene.index.TermContext;
|
||||||
import org.apache.lucene.search.BooleanQuery.BooleanWeight;
|
|
||||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SimWeight;
|
import org.apache.lucene.search.similarities.Similarity.SimWeight;
|
||||||
|
|
|
@ -55,7 +55,7 @@ public class BoostingQuery extends Query {
|
||||||
BooleanQuery result = new BooleanQuery() {
|
BooleanQuery result = new BooleanQuery() {
|
||||||
@Override
|
@Override
|
||||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||||
return new BooleanWeight(searcher, needsScores, false) {
|
return new BooleanWeight(this, searcher, needsScores, false) {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public float coord(int overlap, int max) {
|
public float coord(int overlap, int max) {
|
||||||
|
|
Loading…
Reference in New Issue