LUCENE-6251: Add two-phase support to ConstantScorer and most boolean scorers.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1660318 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2015-02-17 08:32:48 +00:00
parent 274e24afed
commit a8b94ea3c2
8 changed files with 340 additions and 65 deletions

View File

@ -89,8 +89,9 @@ Optimizations
positions lazily if the phrase query is in a conjunction with other queries.
(Robert Muir, Adrien Grand)
* LUCENE-6244: Pure disjunctions now propagate two-phase iterators of the
wrapped scorers (see LUCENE-6198). (Adrien Grand, Robert Muir)
* LUCENE-6244, LUCENE-6251: All boolean queries but those that have a
minShouldMatch > 1 now either propagate or take advantage of the two-phase
iteration capabilities added in LUCENE-6198. (Adrien Grand, Robert Muir)
* LUCENE-6241: FSDirectory.listAll() doesnt filter out subdirectories anymore,
for faster performance. Subdirectories don't matter to Lucene. If you need to

View File

@ -38,6 +38,11 @@ class BooleanTopLevelScorers {
this.boost = boost;
}
@Override
public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
return in.asTwoPhaseIterator();
}
@Override
public float score() throws IOException {
return in.score() * boost;

View File

@ -183,6 +183,11 @@ public class ConstantScoreQuery extends Query {
this.score = score;
}
@Override
public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
return in.asTwoPhaseIterator();
}
@Override
public int freq() throws IOException {
return 1;

View File

@ -22,77 +22,88 @@ import java.util.Collection;
import java.util.Collections;
/** A Scorer for queries with a required subscorer
* and an excluding (prohibited) sub DocIdSetIterator.
* and an excluding (prohibited) sub {@link Scorer}.
* <br>
* This <code>Scorer</code> implements {@link Scorer#advance(int)},
* and it uses the skipTo() on the given scorers.
* and it uses the advance() on the given scorers.
*/
class ReqExclScorer extends FilterScorer {
private Scorer reqScorer;
private DocIdSetIterator exclDisi;
private int doc = -1;
private final Scorer reqScorer;
// approximations of the scorers, or the scorers themselves if they don't support approximations
private final DocIdSetIterator reqApproximation;
private final DocIdSetIterator exclApproximation;
// two-phase views of the scorers, or null if they do not support approximations
private final TwoPhaseDocIdSetIterator reqTwoPhaseIterator;
private final TwoPhaseDocIdSetIterator exclTwoPhaseIterator;
/** Construct a <code>ReqExclScorer</code>.
* @param reqScorer The scorer that must match, except where
* @param exclDisi indicates exclusion.
* @param exclScorer indicates exclusion.
*/
public ReqExclScorer(Scorer reqScorer, DocIdSetIterator exclDisi) {
public ReqExclScorer(Scorer reqScorer, Scorer exclScorer) {
super(reqScorer);
this.reqScorer = reqScorer;
this.exclDisi = exclDisi;
reqTwoPhaseIterator = reqScorer.asTwoPhaseIterator();
if (reqTwoPhaseIterator == null) {
reqApproximation = reqScorer;
} else {
reqApproximation = reqTwoPhaseIterator.approximation();
}
exclTwoPhaseIterator = exclScorer.asTwoPhaseIterator();
if (exclTwoPhaseIterator == null) {
exclApproximation = exclScorer;
} else {
exclApproximation = exclTwoPhaseIterator.approximation();
}
}
@Override
public int nextDoc() throws IOException {
if (reqScorer == null) {
return doc;
}
doc = reqScorer.nextDoc();
if (doc == NO_MORE_DOCS) {
reqScorer = null; // exhausted, nothing left
return doc;
}
if (exclDisi == null) {
return doc;
}
return doc = toNonExcluded();
return toNonExcluded(reqApproximation.nextDoc());
}
/** Advance to non excluded doc.
* <br>On entry:
* <ul>
* <li>reqScorer != null,
* <li>exclScorer != null,
* <li>reqScorer was advanced once via next() or skipTo()
* and reqScorer.doc() may still be excluded.
* </ul>
* Advances reqScorer a non excluded required doc, if any.
* @return true iff there is a non excluded required doc.
*/
private int toNonExcluded() throws IOException {
int exclDoc = exclDisi.docID();
int reqDoc = reqScorer.docID(); // may be excluded
do {
if (reqDoc < exclDoc) {
return reqDoc; // reqScorer advanced to before exclScorer, ie. not excluded
} else if (reqDoc > exclDoc) {
exclDoc = exclDisi.advance(reqDoc);
if (exclDoc == NO_MORE_DOCS) {
exclDisi = null; // exhausted, no more exclusions
return reqDoc;
}
if (exclDoc > reqDoc) {
return reqDoc; // not excluded
}
/** Confirms whether or not the given {@link TwoPhaseDocIdSetIterator}
* matches on the current document. */
private static boolean matches(TwoPhaseDocIdSetIterator it) throws IOException {
return it == null || it.matches();
}
/** Confirm whether there is a match given the current positions of the
* req and excl approximations. This method has 2 important properties:
* - it only calls matches() on excl if the excl approximation is on
* the same doc ID as the req approximation
* - it does NOT call matches() on req if the excl approximation is exact
* and is on the same doc ID as the req approximation */
private static boolean matches(int doc, int exclDoc,
TwoPhaseDocIdSetIterator reqTwoPhaseIterator,
TwoPhaseDocIdSetIterator exclTwoPhaseIterator) throws IOException {
assert exclDoc >= doc;
if (doc == exclDoc && matches(exclTwoPhaseIterator)) {
return false;
}
return matches(reqTwoPhaseIterator);
}
/** Advance to the next non-excluded doc. */
private int toNonExcluded(int doc) throws IOException {
int exclDoc = exclApproximation.docID();
for (;; doc = reqApproximation.nextDoc()) {
if (doc == NO_MORE_DOCS) {
return NO_MORE_DOCS;
}
} while ((reqDoc = reqScorer.nextDoc()) != NO_MORE_DOCS);
reqScorer = null; // exhausted, nothing left
return NO_MORE_DOCS;
if (exclDoc < doc) {
exclDoc = exclApproximation.advance(doc);
}
if (matches(doc, exclDoc, reqTwoPhaseIterator, exclTwoPhaseIterator)) {
return doc;
}
}
}
@Override
public int docID() {
return doc;
return reqScorer.docID();
}
/** Returns the score of the current document matching the query.
@ -111,17 +122,32 @@ class ReqExclScorer extends FilterScorer {
@Override
public int advance(int target) throws IOException {
if (reqScorer == null) {
return doc = NO_MORE_DOCS;
}
if (exclDisi == null) {
return doc = reqScorer.advance(target);
}
if (reqScorer.advance(target) == NO_MORE_DOCS) {
reqScorer = null;
return doc = NO_MORE_DOCS;
}
return doc = toNonExcluded();
return toNonExcluded(reqApproximation.advance(target));
}
@Override
public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
if (reqTwoPhaseIterator == null) {
return null;
}
return new TwoPhaseDocIdSetIterator() {
@Override
public DocIdSetIterator approximation() {
return reqApproximation;
}
@Override
public boolean matches() throws IOException {
final int doc = reqApproximation.docID();
// check if the doc is not excluded
int exclDoc = exclApproximation.docID();
if (exclDoc < doc) {
exclDoc = exclApproximation.advance(doc);
}
return ReqExclScorer.matches(doc, exclDoc, reqTwoPhaseIterator, exclTwoPhaseIterator);
}
};
}
}

View File

@ -31,7 +31,7 @@ class ReqOptSumScorer extends Scorer {
/** The scorers passed from the constructor.
* These are set to null as soon as their next() or skipTo() returns false.
*/
protected Scorer reqScorer;
protected final Scorer reqScorer;
protected Scorer optScorer;
/** Construct a <code>ReqOptScorer</code>.
@ -49,6 +49,11 @@ class ReqOptSumScorer extends Scorer {
this.optScorer = optScorer;
}
@Override
public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
return reqScorer.asTwoPhaseIterator();
}
@Override
public int nextDoc() throws IOException {
return reqScorer.nextDoc();

View File

@ -166,4 +166,116 @@ public class TestApproximationSearchEquivalence extends SearchEquivalenceTestBas
assertSameScores(bq2, bq4);
}
public void testConstantScore() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
TermQuery q1 = new TermQuery(t1);
TermQuery q2 = new TermQuery(t2);
BooleanQuery bq1 = new BooleanQuery();
bq1.add(new ConstantScoreQuery(q1), Occur.MUST);
bq1.add(new ConstantScoreQuery(q2), Occur.MUST);
BooleanQuery bq2 = new BooleanQuery();
bq2.add(new ConstantScoreQuery(new RandomApproximationQuery(q1, random())), Occur.MUST);
bq2.add(new ConstantScoreQuery(new RandomApproximationQuery(q2, random())), Occur.MUST);
assertSameScores(bq1, bq2);
}
public void testExclusion() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
TermQuery q1 = new TermQuery(t1);
TermQuery q2 = new TermQuery(t2);
BooleanQuery bq1 = new BooleanQuery();
bq1.add(q1, Occur.MUST);
bq1.add(q2, Occur.MUST_NOT);
BooleanQuery bq2 = new BooleanQuery();
bq2.add(new RandomApproximationQuery(q1, random()), Occur.MUST);
bq2.add(new RandomApproximationQuery(q2, random()), Occur.MUST_NOT);
assertSameScores(bq1, bq2);
}
public void testNestedExclusion() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
Term t3 = randomTerm();
TermQuery q1 = new TermQuery(t1);
TermQuery q2 = new TermQuery(t2);
TermQuery q3 = new TermQuery(t3);
BooleanQuery bq1 = new BooleanQuery();
bq1.add(q1, Occur.MUST);
bq1.add(q2, Occur.MUST_NOT);
BooleanQuery bq2 = new BooleanQuery();
bq2.add(bq1, Occur.MUST);
bq2.add(q3, Occur.MUST);
// Both req and excl have approximations
BooleanQuery bq3 = new BooleanQuery();
bq3.add(new RandomApproximationQuery(q1, random()), Occur.MUST);
bq3.add(new RandomApproximationQuery(q2, random()), Occur.MUST_NOT);
BooleanQuery bq4 = new BooleanQuery();
bq4.add(bq3, Occur.MUST);
bq4.add(q3, Occur.MUST);
assertSameScores(bq2, bq4);
// Only req has an approximation
bq3 = new BooleanQuery();
bq3.add(new RandomApproximationQuery(q1, random()), Occur.MUST);
bq3.add(q2, Occur.MUST_NOT);
bq4 = new BooleanQuery();
bq4.add(bq3, Occur.MUST);
bq4.add(q3, Occur.MUST);
assertSameScores(bq2, bq4);
// Only excl has an approximation
bq3 = new BooleanQuery();
bq3.add(q1, Occur.MUST);
bq3.add(new RandomApproximationQuery(q2, random()), Occur.MUST_NOT);
bq4 = new BooleanQuery();
bq4.add(bq3, Occur.MUST);
bq4.add(q3, Occur.MUST);
assertSameScores(bq2, bq4);
}
public void testReqOpt() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
Term t3 = randomTerm();
TermQuery q1 = new TermQuery(t1);
TermQuery q2 = new TermQuery(t2);
TermQuery q3 = new TermQuery(t3);
BooleanQuery bq1 = new BooleanQuery();
bq1.add(q1, Occur.MUST);
bq1.add(q2, Occur.SHOULD);
BooleanQuery bq2 = new BooleanQuery();
bq2.add(bq1, Occur.MUST);
bq2.add(q3, Occur.MUST);
BooleanQuery bq3 = new BooleanQuery();
bq3.add(new RandomApproximationQuery(q1, random()), Occur.MUST);
bq3.add(new RandomApproximationQuery(q2, random()), Occur.SHOULD);
BooleanQuery bq4 = new BooleanQuery();
bq4.add(bq3, Occur.MUST);
bq4.add(q3, Occur.MUST);
assertSameScores(bq2, bq4);
}
}

View File

@ -41,6 +41,7 @@ import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanTopLevelScorers.BoostedScorer;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
@ -613,6 +614,7 @@ public class TestBooleanQuery extends LuceneTestCase {
final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean());
final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
assertTrue(scorer instanceof ConjunctionScorer);
assertNotNull(scorer.asTwoPhaseIterator());
reader.close();
@ -642,6 +644,97 @@ public class TestBooleanQuery extends LuceneTestCase {
final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean());
final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
assertTrue(scorer instanceof DisjunctionScorer);
assertNotNull(scorer.asTwoPhaseIterator());
reader.close();
w.close();
dir.close();
}
public void testBoostedScorerPropagatesApproximations() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
Field f = newTextField("field", "a b c", Field.Store.NO);
doc.add(f);
w.addDocument(doc);
w.commit();
DirectoryReader reader = w.getReader();
final IndexSearcher searcher = new IndexSearcher(reader);
PhraseQuery pq = new PhraseQuery();
pq.add(new Term("field", "a"));
pq.add(new Term("field", "b"));
BooleanQuery q = new BooleanQuery();
q.add(pq, Occur.SHOULD);
q.add(new TermQuery(new Term("field", "d")), Occur.SHOULD);
final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean());
final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
assertTrue(scorer instanceof BoostedScorer);
assertNotNull(scorer.asTwoPhaseIterator());
reader.close();
w.close();
dir.close();
}
public void testExclusionPropagatesApproximations() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
Field f = newTextField("field", "a b c", Field.Store.NO);
doc.add(f);
w.addDocument(doc);
w.commit();
DirectoryReader reader = w.getReader();
final IndexSearcher searcher = new IndexSearcher(reader);
PhraseQuery pq = new PhraseQuery();
pq.add(new Term("field", "a"));
pq.add(new Term("field", "b"));
BooleanQuery q = new BooleanQuery();
q.add(pq, Occur.SHOULD);
q.add(new TermQuery(new Term("field", "c")), Occur.MUST_NOT);
final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean());
final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
assertTrue(scorer instanceof ReqExclScorer);
assertNotNull(scorer.asTwoPhaseIterator());
reader.close();
w.close();
dir.close();
}
public void testReqOptPropagatesApproximations() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
Field f = newTextField("field", "a b c", Field.Store.NO);
doc.add(f);
w.addDocument(doc);
w.commit();
DirectoryReader reader = w.getReader();
final IndexSearcher searcher = new IndexSearcher(reader);
PhraseQuery pq = new PhraseQuery();
pq.add(new Term("field", "a"));
pq.add(new Term("field", "b"));
BooleanQuery q = new BooleanQuery();
q.add(pq, Occur.MUST);
q.add(new TermQuery(new Term("field", "c")), Occur.SHOULD);
final Weight weight = searcher.createNormalizedWeight(q, true);
final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
assertTrue(scorer instanceof ReqOptSumScorer);
assertNotNull(scorer.asTwoPhaseIterator());
reader.close();

View File

@ -21,9 +21,11 @@ import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@ -179,4 +181,30 @@ public class TestConstantScoreQuery extends LuceneTestCase {
d.close();
}
public void testPropagatesApproximations() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
Field f = newTextField("field", "a b", Field.Store.NO);
doc.add(f);
w.addDocument(doc);
w.commit();
DirectoryReader reader = w.getReader();
final IndexSearcher searcher = new IndexSearcher(reader);
PhraseQuery pq = new PhraseQuery();
pq.add(new Term("field", "a"));
pq.add(new Term("field", "b"));
ConstantScoreQuery q = new ConstantScoreQuery(pq);
final Weight weight = searcher.createNormalizedWeight(q, true);
final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
assertNotNull(scorer.asTwoPhaseIterator());
reader.close();
w.close();
dir.close();
}
}