mirror of https://github.com/apache/lucene.git
LUCENE-6940: Speed up MUST_NOT clauses.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1722443 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fd765b6252
commit
e773508648
|
@ -156,6 +156,9 @@ Optimizations
|
|||
* LUCENE-6944: BooleanWeight no longer creates sub-scorers if BS1 is not
|
||||
applicable. (Adrien Grand)
|
||||
|
||||
* LUCENE-6940: MUST_NOT clauses execute faster, especially when they are sparse.
|
||||
(Adrien Grand)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-6918: LRUQueryCache.onDocIdSetEviction is only called when at least
|
||||
|
|
|
@ -37,35 +37,6 @@ final class BooleanScorer extends BulkScorer {
|
|||
static final int SET_SIZE = 1 << (SHIFT - 6);
|
||||
static final int SET_MASK = SET_SIZE - 1;
|
||||
|
||||
private static BulkScorer disableScoring(final BulkScorer scorer) {
|
||||
return new BulkScorer() {
|
||||
|
||||
@Override
|
||||
public int score(final LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
|
||||
final LeafCollector noScoreCollector = new LeafCollector() {
|
||||
FakeScorer fake = new FakeScorer();
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
collector.setScorer(fake);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
fake.doc = doc;
|
||||
collector.collect(doc);
|
||||
}
|
||||
};
|
||||
return scorer.score(noScoreCollector, acceptDocs, min, max);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return scorer.cost();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static class Bucket {
|
||||
double score;
|
||||
int freq;
|
||||
|
@ -193,7 +164,7 @@ final class BooleanScorer extends BulkScorer {
|
|||
if (needsScores == false) {
|
||||
// OrCollector calls score() all the time so we have to explicitly
|
||||
// disable scoring in order to avoid decoding useless norms
|
||||
scorer = disableScoring(scorer);
|
||||
scorer = BooleanWeight.disableScoring(scorer);
|
||||
}
|
||||
final BulkScorerAndDoc evicted = tail.insertWithOverflow(new BulkScorerAndDoc(scorer));
|
||||
if (evicted != null) {
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.index.LeafReaderContext;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/**
|
||||
* Expert: the Weight for BooleanQuery, used to
|
||||
|
@ -187,27 +188,45 @@ final class BooleanWeight extends Weight {
|
|||
}
|
||||
}
|
||||
|
||||
/** Try to build a boolean scorer for this weight. Returns null if {@link BooleanScorer}
|
||||
* cannot be used. */
|
||||
// pkg-private for forcing use of BooleanScorer in tests
|
||||
BulkScorer booleanScorer(LeafReaderContext context) throws IOException {
|
||||
if (query.getClauses(Occur.MUST).isEmpty() == false
|
||||
|| query.getClauses(Occur.FILTER).isEmpty() == false) {
|
||||
// TODO: there are some cases where BooleanScorer
|
||||
// would handle conjunctions faster than
|
||||
// BooleanScorer2...
|
||||
return null;
|
||||
} else if (query.getClauses(Occur.MUST_NOT).isEmpty() == false) {
|
||||
// TODO: there are some cases where BooleanScorer could do this faster
|
||||
return null;
|
||||
}
|
||||
static BulkScorer disableScoring(final BulkScorer scorer) {
|
||||
return new BulkScorer() {
|
||||
|
||||
@Override
|
||||
public int score(final LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
|
||||
final LeafCollector noScoreCollector = new LeafCollector() {
|
||||
FakeScorer fake = new FakeScorer();
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
collector.setScorer(fake);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
fake.doc = doc;
|
||||
collector.collect(doc);
|
||||
}
|
||||
};
|
||||
return scorer.score(noScoreCollector, acceptDocs, min, max);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return scorer.cost();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Return a BulkScorer for the optional clauses only,
|
||||
// or null if it is not applicable
|
||||
// pkg-private for forcing use of BooleanScorer in tests
|
||||
BulkScorer optionalBulkScorer(LeafReaderContext context) throws IOException {
|
||||
List<BulkScorer> optional = new ArrayList<BulkScorer>();
|
||||
Iterator<BooleanClause> cIter = query.iterator();
|
||||
for (Weight w : weights) {
|
||||
BooleanClause c = cIter.next();
|
||||
if (c.getOccur() != Occur.SHOULD) {
|
||||
throw new AssertionError();
|
||||
continue;
|
||||
}
|
||||
BulkScorer subScorer = w.bulkScorer(context);
|
||||
|
||||
|
@ -236,10 +255,50 @@ final class BooleanWeight extends Weight {
|
|||
return new BooleanScorer(this, disableCoord, maxCoord, optional, Math.max(1, query.getMinimumNumberShouldMatch()), needsScores);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
|
||||
final BulkScorer bulkScorer = booleanScorer(context);
|
||||
if (bulkScorer != null) { // BooleanScorer is applicable
|
||||
// Return a BulkScorer for the required clauses only,
|
||||
// or null if it is not applicable
|
||||
private BulkScorer requiredBulkScorer(LeafReaderContext context) throws IOException {
|
||||
BulkScorer scorer = null;
|
||||
|
||||
Iterator<BooleanClause> cIter = query.iterator();
|
||||
for (Weight w : weights) {
|
||||
BooleanClause c = cIter.next();
|
||||
if (c.isRequired() == false) {
|
||||
continue;
|
||||
}
|
||||
if (scorer != null) {
|
||||
// we don't have a BulkScorer for conjunctions
|
||||
return null;
|
||||
}
|
||||
scorer = w.bulkScorer(context);
|
||||
if (scorer == null) {
|
||||
// no matches
|
||||
return null;
|
||||
}
|
||||
if (c.isScoring() == false) {
|
||||
if (needsScores) {
|
||||
scorer = disableScoring(scorer);
|
||||
}
|
||||
} else {
|
||||
assert maxCoord == 1;
|
||||
}
|
||||
}
|
||||
return scorer;
|
||||
}
|
||||
|
||||
/** Try to build a boolean scorer for this weight. Returns null if {@link BooleanScorer}
|
||||
* cannot be used. */
|
||||
BulkScorer booleanScorer(LeafReaderContext context) throws IOException {
|
||||
final int numOptionalClauses = query.getClauses(Occur.SHOULD).size();
|
||||
final int numRequiredClauses = query.getClauses(Occur.MUST).size() + query.getClauses(Occur.FILTER).size();
|
||||
|
||||
BulkScorer positiveScorer;
|
||||
if (numRequiredClauses == 0) {
|
||||
positiveScorer = optionalBulkScorer(context);
|
||||
if (positiveScorer == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// TODO: what is the right heuristic here?
|
||||
final long costThreshold;
|
||||
if (query.getMinimumNumberShouldMatch() <= 1) {
|
||||
|
@ -257,11 +316,59 @@ final class BooleanWeight extends Weight {
|
|||
costThreshold = context.reader().maxDoc() / 3;
|
||||
}
|
||||
|
||||
if (bulkScorer.cost() > costThreshold) {
|
||||
return bulkScorer;
|
||||
if (positiveScorer.cost() < costThreshold) {
|
||||
return null;
|
||||
}
|
||||
|
||||
} else if (numRequiredClauses == 1
|
||||
&& numOptionalClauses == 0
|
||||
&& query.getMinimumNumberShouldMatch() == 0) {
|
||||
positiveScorer = requiredBulkScorer(context);
|
||||
} else {
|
||||
// TODO: there are some cases where BooleanScorer
|
||||
// would handle conjunctions faster than
|
||||
// BooleanScorer2...
|
||||
return null;
|
||||
}
|
||||
|
||||
if (positiveScorer == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
List<Scorer> prohibited = new ArrayList<>();
|
||||
Iterator<BooleanClause> cIter = query.iterator();
|
||||
for (Weight w : weights) {
|
||||
BooleanClause c = cIter.next();
|
||||
if (c.isProhibited()) {
|
||||
Scorer scorer = w.scorer(context);
|
||||
if (scorer != null) {
|
||||
prohibited.add(scorer);
|
||||
}
|
||||
}
|
||||
}
|
||||
return super.bulkScorer(context);
|
||||
|
||||
if (prohibited.isEmpty()) {
|
||||
return positiveScorer;
|
||||
} else {
|
||||
Scorer prohibitedScorer = opt(prohibited, 1, true);
|
||||
if (prohibitedScorer.twoPhaseIterator() != null) {
|
||||
// ReqExclBulkScorer can't deal efficiently with two-phased prohibited clauses
|
||||
return null;
|
||||
}
|
||||
return new ReqExclBulkScorer(positiveScorer, prohibitedScorer.iterator());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
|
||||
final BulkScorer bulkScorer = booleanScorer(context);
|
||||
if (bulkScorer != null) {
|
||||
// bulk scoring is applicable, use it
|
||||
return bulkScorer;
|
||||
} else {
|
||||
// use a Scorer-based impl (BS2)
|
||||
return super.bulkScorer(context);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
final class ReqExclBulkScorer extends BulkScorer {
|
||||
|
||||
private final BulkScorer req;
|
||||
private final DocIdSetIterator excl;
|
||||
|
||||
ReqExclBulkScorer(BulkScorer req, DocIdSetIterator excl) {
|
||||
this.req = req;
|
||||
this.excl = excl;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
|
||||
int upTo = min;
|
||||
int exclDoc = excl.docID();
|
||||
|
||||
while (upTo < max) {
|
||||
if (exclDoc < upTo) {
|
||||
exclDoc = excl.advance(upTo);
|
||||
}
|
||||
if (exclDoc == upTo) {
|
||||
// upTo is excluded so we can consider that we scored up to upTo+1
|
||||
upTo += 1;
|
||||
exclDoc = excl.nextDoc();
|
||||
} else {
|
||||
upTo = req.score(collector, acceptDocs, upTo, Math.min(exclDoc, max));
|
||||
}
|
||||
}
|
||||
|
||||
if (upTo == max) {
|
||||
upTo = req.score(collector, acceptDocs, upTo, upTo);
|
||||
}
|
||||
|
||||
return upTo;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return req.cost();
|
||||
}
|
||||
|
||||
}
|
|
@ -196,6 +196,60 @@ public class TestBooleanScorer extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testOptimizeProhibitedClauses() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("foo", "bar", Store.NO));
|
||||
doc.add(new StringField("foo", "baz", Store.NO));
|
||||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new StringField("foo", "baz", Store.NO));
|
||||
w.addDocument(doc);
|
||||
w.forceMerge(1);
|
||||
IndexReader reader = w.getReader();
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
searcher.setQueryCache(null); // so that weights are not wrapped
|
||||
final LeafReaderContext ctx = reader.leaves().get(0);
|
||||
|
||||
Query query = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST_NOT)
|
||||
.build();
|
||||
Weight weight = searcher.createNormalizedWeight(query, true);
|
||||
BulkScorer scorer = ((BooleanWeight) weight).booleanScorer(ctx);
|
||||
assertTrue(scorer instanceof ReqExclBulkScorer);
|
||||
|
||||
query = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.SHOULD)
|
||||
.add(new MatchAllDocsQuery(), Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST_NOT)
|
||||
.build();
|
||||
weight = searcher.createNormalizedWeight(query, true);
|
||||
scorer = ((BooleanWeight) weight).booleanScorer(ctx);
|
||||
assertTrue(scorer instanceof ReqExclBulkScorer);
|
||||
|
||||
query = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.MUST)
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST_NOT)
|
||||
.build();
|
||||
weight = searcher.createNormalizedWeight(query, true);
|
||||
scorer = ((BooleanWeight) weight).booleanScorer(ctx);
|
||||
assertTrue(scorer instanceof ReqExclBulkScorer);
|
||||
|
||||
query = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("foo", "baz")), Occur.FILTER)
|
||||
.add(new TermQuery(new Term("foo", "bar")), Occur.MUST_NOT)
|
||||
.build();
|
||||
weight = searcher.createNormalizedWeight(query, true);
|
||||
scorer = ((BooleanWeight) weight).booleanScorer(ctx);
|
||||
assertTrue(scorer instanceof ReqExclBulkScorer);
|
||||
|
||||
w.close();
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testSparseClauseOptimization() throws IOException {
|
||||
// When some windows have only one scorer that can match, the scorer will
|
||||
// directly call the collector in this window
|
||||
|
|
|
@ -132,7 +132,7 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
|
|||
case SCORER:
|
||||
return weight.scorer(reader.getContext());
|
||||
case BULK_SCORER:
|
||||
final BulkScorer bulkScorer = weight.booleanScorer(reader.getContext());
|
||||
final BulkScorer bulkScorer = weight.optionalBulkScorer(reader.getContext());
|
||||
if (bulkScorer == null) {
|
||||
if (weight.scorer(reader.getContext()) != null) {
|
||||
throw new AssertionError("BooleanScorer should be applicable for this query");
|
||||
|
|
|
@ -0,0 +1,116 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestReqExclBulkScorer extends LuceneTestCase {
|
||||
|
||||
public void testRandom() throws IOException {
|
||||
final int iters = atLeast(10);
|
||||
for (int iter = 0; iter < iters; ++iter) {
|
||||
doTestRandom();
|
||||
}
|
||||
}
|
||||
|
||||
public void doTestRandom() throws IOException {
|
||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1000);
|
||||
DocIdSetBuilder reqBuilder = new DocIdSetBuilder(maxDoc);
|
||||
DocIdSetBuilder exclBuilder = new DocIdSetBuilder(maxDoc);
|
||||
final int numIncludedDocs = TestUtil.nextInt(random(), 1, maxDoc);
|
||||
final int numExcludedDocs = TestUtil.nextInt(random(), 1, maxDoc);
|
||||
for (int i = 0; i < numIncludedDocs; ++i) {
|
||||
reqBuilder.add(random().nextInt(maxDoc));
|
||||
}
|
||||
for (int i = 0; i < numExcludedDocs; ++i) {
|
||||
exclBuilder.add(random().nextInt(maxDoc));
|
||||
}
|
||||
|
||||
final DocIdSet req = reqBuilder.build();
|
||||
final DocIdSet excl = exclBuilder.build();
|
||||
|
||||
final BulkScorer reqBulkScorer = new BulkScorer() {
|
||||
final DocIdSetIterator iterator = req.iterator();
|
||||
|
||||
@Override
|
||||
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
|
||||
int doc = iterator.docID();
|
||||
if (iterator.docID() < min) {
|
||||
doc = iterator.advance(min);
|
||||
}
|
||||
while (doc < max) {
|
||||
if (acceptDocs == null || acceptDocs.get(doc)) {
|
||||
collector.collect(doc);
|
||||
}
|
||||
doc = iterator.nextDoc();
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return iterator.cost();
|
||||
}
|
||||
};
|
||||
|
||||
ReqExclBulkScorer reqExcl = new ReqExclBulkScorer(reqBulkScorer, excl.iterator());
|
||||
final FixedBitSet actualMatches = new FixedBitSet(maxDoc);
|
||||
if (random().nextBoolean()) {
|
||||
reqExcl.score(new LeafCollector() {
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
actualMatches.set(doc);
|
||||
}
|
||||
}, null);
|
||||
} else {
|
||||
int next = 0;
|
||||
while (next < maxDoc) {
|
||||
final int min = next;
|
||||
final int max = min + random().nextInt(10);
|
||||
next = reqExcl.score(new LeafCollector() {
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
actualMatches.set(doc);
|
||||
}
|
||||
}, null, min, max);
|
||||
assertTrue(next >= max);
|
||||
}
|
||||
}
|
||||
|
||||
final FixedBitSet expectedMatches = new FixedBitSet(maxDoc);
|
||||
expectedMatches.or(req.iterator());
|
||||
FixedBitSet excludedSet = new FixedBitSet(maxDoc);
|
||||
excludedSet.or(excl.iterator());
|
||||
expectedMatches.andNot(excludedSet);
|
||||
|
||||
assertArrayEquals(expectedMatches.getBits(), actualMatches.getBits());
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue