mirror of https://github.com/apache/lucene.git
LUCENE-6251: Add two-phase support to ConstantScorer and most boolean scorers.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1660318 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
274e24afed
commit
a8b94ea3c2
|
@ -89,8 +89,9 @@ Optimizations
|
|||
positions lazily if the phrase query is in a conjunction with other queries.
|
||||
(Robert Muir, Adrien Grand)
|
||||
|
||||
* LUCENE-6244: Pure disjunctions now propagate two-phase iterators of the
|
||||
wrapped scorers (see LUCENE-6198). (Adrien Grand, Robert Muir)
|
||||
* LUCENE-6244, LUCENE-6251: All boolean queries but those that have a
|
||||
minShouldMatch > 1 now either propagate or take advantage of the two-phase
|
||||
iteration capabilities added in LUCENE-6198. (Adrien Grand, Robert Muir)
|
||||
|
||||
* LUCENE-6241: FSDirectory.listAll() doesnt filter out subdirectories anymore,
|
||||
for faster performance. Subdirectories don't matter to Lucene. If you need to
|
||||
|
|
|
@ -38,6 +38,11 @@ class BooleanTopLevelScorers {
|
|||
this.boost = boost;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
|
||||
return in.asTwoPhaseIterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return in.score() * boost;
|
||||
|
|
|
@ -183,6 +183,11 @@ public class ConstantScoreQuery extends Query {
|
|||
this.score = score;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
|
||||
return in.asTwoPhaseIterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
return 1;
|
||||
|
|
|
@ -22,77 +22,88 @@ import java.util.Collection;
|
|||
import java.util.Collections;
|
||||
|
||||
/** A Scorer for queries with a required subscorer
|
||||
* and an excluding (prohibited) sub DocIdSetIterator.
|
||||
* and an excluding (prohibited) sub {@link Scorer}.
|
||||
* <br>
|
||||
* This <code>Scorer</code> implements {@link Scorer#advance(int)},
|
||||
* and it uses the skipTo() on the given scorers.
|
||||
* and it uses the advance() on the given scorers.
|
||||
*/
|
||||
class ReqExclScorer extends FilterScorer {
|
||||
private Scorer reqScorer;
|
||||
private DocIdSetIterator exclDisi;
|
||||
private int doc = -1;
|
||||
|
||||
private final Scorer reqScorer;
|
||||
// approximations of the scorers, or the scorers themselves if they don't support approximations
|
||||
private final DocIdSetIterator reqApproximation;
|
||||
private final DocIdSetIterator exclApproximation;
|
||||
// two-phase views of the scorers, or null if they do not support approximations
|
||||
private final TwoPhaseDocIdSetIterator reqTwoPhaseIterator;
|
||||
private final TwoPhaseDocIdSetIterator exclTwoPhaseIterator;
|
||||
|
||||
/** Construct a <code>ReqExclScorer</code>.
|
||||
* @param reqScorer The scorer that must match, except where
|
||||
* @param exclDisi indicates exclusion.
|
||||
* @param exclScorer indicates exclusion.
|
||||
*/
|
||||
public ReqExclScorer(Scorer reqScorer, DocIdSetIterator exclDisi) {
|
||||
public ReqExclScorer(Scorer reqScorer, Scorer exclScorer) {
|
||||
super(reqScorer);
|
||||
this.reqScorer = reqScorer;
|
||||
this.exclDisi = exclDisi;
|
||||
reqTwoPhaseIterator = reqScorer.asTwoPhaseIterator();
|
||||
if (reqTwoPhaseIterator == null) {
|
||||
reqApproximation = reqScorer;
|
||||
} else {
|
||||
reqApproximation = reqTwoPhaseIterator.approximation();
|
||||
}
|
||||
exclTwoPhaseIterator = exclScorer.asTwoPhaseIterator();
|
||||
if (exclTwoPhaseIterator == null) {
|
||||
exclApproximation = exclScorer;
|
||||
} else {
|
||||
exclApproximation = exclTwoPhaseIterator.approximation();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (reqScorer == null) {
|
||||
return doc;
|
||||
}
|
||||
doc = reqScorer.nextDoc();
|
||||
if (doc == NO_MORE_DOCS) {
|
||||
reqScorer = null; // exhausted, nothing left
|
||||
return doc;
|
||||
}
|
||||
if (exclDisi == null) {
|
||||
return doc;
|
||||
}
|
||||
return doc = toNonExcluded();
|
||||
return toNonExcluded(reqApproximation.nextDoc());
|
||||
}
|
||||
|
||||
/** Advance to non excluded doc.
|
||||
* <br>On entry:
|
||||
* <ul>
|
||||
* <li>reqScorer != null,
|
||||
* <li>exclScorer != null,
|
||||
* <li>reqScorer was advanced once via next() or skipTo()
|
||||
* and reqScorer.doc() may still be excluded.
|
||||
* </ul>
|
||||
* Advances reqScorer a non excluded required doc, if any.
|
||||
* @return true iff there is a non excluded required doc.
|
||||
*/
|
||||
private int toNonExcluded() throws IOException {
|
||||
int exclDoc = exclDisi.docID();
|
||||
int reqDoc = reqScorer.docID(); // may be excluded
|
||||
do {
|
||||
if (reqDoc < exclDoc) {
|
||||
return reqDoc; // reqScorer advanced to before exclScorer, ie. not excluded
|
||||
} else if (reqDoc > exclDoc) {
|
||||
exclDoc = exclDisi.advance(reqDoc);
|
||||
if (exclDoc == NO_MORE_DOCS) {
|
||||
exclDisi = null; // exhausted, no more exclusions
|
||||
return reqDoc;
|
||||
/** Confirms whether or not the given {@link TwoPhaseDocIdSetIterator}
|
||||
* matches on the current document. */
|
||||
private static boolean matches(TwoPhaseDocIdSetIterator it) throws IOException {
|
||||
return it == null || it.matches();
|
||||
}
|
||||
if (exclDoc > reqDoc) {
|
||||
return reqDoc; // not excluded
|
||||
|
||||
/** Confirm whether there is a match given the current positions of the
|
||||
* req and excl approximations. This method has 2 important properties:
|
||||
* - it only calls matches() on excl if the excl approximation is on
|
||||
* the same doc ID as the req approximation
|
||||
* - it does NOT call matches() on req if the excl approximation is exact
|
||||
* and is on the same doc ID as the req approximation */
|
||||
private static boolean matches(int doc, int exclDoc,
|
||||
TwoPhaseDocIdSetIterator reqTwoPhaseIterator,
|
||||
TwoPhaseDocIdSetIterator exclTwoPhaseIterator) throws IOException {
|
||||
assert exclDoc >= doc;
|
||||
if (doc == exclDoc && matches(exclTwoPhaseIterator)) {
|
||||
return false;
|
||||
}
|
||||
return matches(reqTwoPhaseIterator);
|
||||
}
|
||||
} while ((reqDoc = reqScorer.nextDoc()) != NO_MORE_DOCS);
|
||||
reqScorer = null; // exhausted, nothing left
|
||||
|
||||
/** Advance to the next non-excluded doc. */
|
||||
private int toNonExcluded(int doc) throws IOException {
|
||||
int exclDoc = exclApproximation.docID();
|
||||
for (;; doc = reqApproximation.nextDoc()) {
|
||||
if (doc == NO_MORE_DOCS) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
if (exclDoc < doc) {
|
||||
exclDoc = exclApproximation.advance(doc);
|
||||
}
|
||||
if (matches(doc, exclDoc, reqTwoPhaseIterator, exclTwoPhaseIterator)) {
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
return reqScorer.docID();
|
||||
}
|
||||
|
||||
/** Returns the score of the current document matching the query.
|
||||
|
@ -111,17 +122,32 @@ class ReqExclScorer extends FilterScorer {
|
|||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (reqScorer == null) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
if (exclDisi == null) {
|
||||
return doc = reqScorer.advance(target);
|
||||
}
|
||||
if (reqScorer.advance(target) == NO_MORE_DOCS) {
|
||||
reqScorer = null;
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
return doc = toNonExcluded();
|
||||
return toNonExcluded(reqApproximation.advance(target));
|
||||
}
|
||||
|
||||
@Override
|
||||
public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
|
||||
if (reqTwoPhaseIterator == null) {
|
||||
return null;
|
||||
}
|
||||
return new TwoPhaseDocIdSetIterator() {
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator approximation() {
|
||||
return reqApproximation;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
final int doc = reqApproximation.docID();
|
||||
// check if the doc is not excluded
|
||||
int exclDoc = exclApproximation.docID();
|
||||
if (exclDoc < doc) {
|
||||
exclDoc = exclApproximation.advance(doc);
|
||||
}
|
||||
return ReqExclScorer.matches(doc, exclDoc, reqTwoPhaseIterator, exclTwoPhaseIterator);
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ class ReqOptSumScorer extends Scorer {
|
|||
/** The scorers passed from the constructor.
|
||||
* These are set to null as soon as their next() or skipTo() returns false.
|
||||
*/
|
||||
protected Scorer reqScorer;
|
||||
protected final Scorer reqScorer;
|
||||
protected Scorer optScorer;
|
||||
|
||||
/** Construct a <code>ReqOptScorer</code>.
|
||||
|
@ -49,6 +49,11 @@ class ReqOptSumScorer extends Scorer {
|
|||
this.optScorer = optScorer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
|
||||
return reqScorer.asTwoPhaseIterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return reqScorer.nextDoc();
|
||||
|
|
|
@ -166,4 +166,116 @@ public class TestApproximationSearchEquivalence extends SearchEquivalenceTestBas
|
|||
|
||||
assertSameScores(bq2, bq4);
|
||||
}
|
||||
|
||||
public void testConstantScore() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
TermQuery q1 = new TermQuery(t1);
|
||||
TermQuery q2 = new TermQuery(t2);
|
||||
|
||||
BooleanQuery bq1 = new BooleanQuery();
|
||||
bq1.add(new ConstantScoreQuery(q1), Occur.MUST);
|
||||
bq1.add(new ConstantScoreQuery(q2), Occur.MUST);
|
||||
|
||||
BooleanQuery bq2 = new BooleanQuery();
|
||||
bq2.add(new ConstantScoreQuery(new RandomApproximationQuery(q1, random())), Occur.MUST);
|
||||
bq2.add(new ConstantScoreQuery(new RandomApproximationQuery(q2, random())), Occur.MUST);
|
||||
|
||||
assertSameScores(bq1, bq2);
|
||||
}
|
||||
|
||||
public void testExclusion() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
TermQuery q1 = new TermQuery(t1);
|
||||
TermQuery q2 = new TermQuery(t2);
|
||||
|
||||
BooleanQuery bq1 = new BooleanQuery();
|
||||
bq1.add(q1, Occur.MUST);
|
||||
bq1.add(q2, Occur.MUST_NOT);
|
||||
|
||||
BooleanQuery bq2 = new BooleanQuery();
|
||||
bq2.add(new RandomApproximationQuery(q1, random()), Occur.MUST);
|
||||
bq2.add(new RandomApproximationQuery(q2, random()), Occur.MUST_NOT);
|
||||
|
||||
assertSameScores(bq1, bq2);
|
||||
}
|
||||
|
||||
public void testNestedExclusion() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
Term t3 = randomTerm();
|
||||
TermQuery q1 = new TermQuery(t1);
|
||||
TermQuery q2 = new TermQuery(t2);
|
||||
TermQuery q3 = new TermQuery(t3);
|
||||
|
||||
BooleanQuery bq1 = new BooleanQuery();
|
||||
bq1.add(q1, Occur.MUST);
|
||||
bq1.add(q2, Occur.MUST_NOT);
|
||||
|
||||
BooleanQuery bq2 = new BooleanQuery();
|
||||
bq2.add(bq1, Occur.MUST);
|
||||
bq2.add(q3, Occur.MUST);
|
||||
|
||||
// Both req and excl have approximations
|
||||
BooleanQuery bq3 = new BooleanQuery();
|
||||
bq3.add(new RandomApproximationQuery(q1, random()), Occur.MUST);
|
||||
bq3.add(new RandomApproximationQuery(q2, random()), Occur.MUST_NOT);
|
||||
|
||||
BooleanQuery bq4 = new BooleanQuery();
|
||||
bq4.add(bq3, Occur.MUST);
|
||||
bq4.add(q3, Occur.MUST);
|
||||
|
||||
assertSameScores(bq2, bq4);
|
||||
|
||||
// Only req has an approximation
|
||||
bq3 = new BooleanQuery();
|
||||
bq3.add(new RandomApproximationQuery(q1, random()), Occur.MUST);
|
||||
bq3.add(q2, Occur.MUST_NOT);
|
||||
|
||||
bq4 = new BooleanQuery();
|
||||
bq4.add(bq3, Occur.MUST);
|
||||
bq4.add(q3, Occur.MUST);
|
||||
|
||||
assertSameScores(bq2, bq4);
|
||||
|
||||
// Only excl has an approximation
|
||||
bq3 = new BooleanQuery();
|
||||
bq3.add(q1, Occur.MUST);
|
||||
bq3.add(new RandomApproximationQuery(q2, random()), Occur.MUST_NOT);
|
||||
|
||||
bq4 = new BooleanQuery();
|
||||
bq4.add(bq3, Occur.MUST);
|
||||
bq4.add(q3, Occur.MUST);
|
||||
|
||||
assertSameScores(bq2, bq4);
|
||||
}
|
||||
|
||||
public void testReqOpt() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
Term t3 = randomTerm();
|
||||
TermQuery q1 = new TermQuery(t1);
|
||||
TermQuery q2 = new TermQuery(t2);
|
||||
TermQuery q3 = new TermQuery(t3);
|
||||
|
||||
BooleanQuery bq1 = new BooleanQuery();
|
||||
bq1.add(q1, Occur.MUST);
|
||||
bq1.add(q2, Occur.SHOULD);
|
||||
|
||||
BooleanQuery bq2 = new BooleanQuery();
|
||||
bq2.add(bq1, Occur.MUST);
|
||||
bq2.add(q3, Occur.MUST);
|
||||
|
||||
BooleanQuery bq3 = new BooleanQuery();
|
||||
bq3.add(new RandomApproximationQuery(q1, random()), Occur.MUST);
|
||||
bq3.add(new RandomApproximationQuery(q2, random()), Occur.SHOULD);
|
||||
|
||||
BooleanQuery bq4 = new BooleanQuery();
|
||||
bq4.add(bq3, Occur.MUST);
|
||||
bq4.add(q3, Occur.MUST);
|
||||
|
||||
assertSameScores(bq2, bq4);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -41,6 +41,7 @@ import org.apache.lucene.index.MultiReader;
|
|||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanTopLevelScorers.BoostedScorer;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
|
@ -613,6 +614,7 @@ public class TestBooleanQuery extends LuceneTestCase {
|
|||
|
||||
final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean());
|
||||
final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
|
||||
assertTrue(scorer instanceof ConjunctionScorer);
|
||||
assertNotNull(scorer.asTwoPhaseIterator());
|
||||
|
||||
reader.close();
|
||||
|
@ -642,6 +644,97 @@ public class TestBooleanQuery extends LuceneTestCase {
|
|||
|
||||
final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean());
|
||||
final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
|
||||
assertTrue(scorer instanceof DisjunctionScorer);
|
||||
assertNotNull(scorer.asTwoPhaseIterator());
|
||||
|
||||
reader.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testBoostedScorerPropagatesApproximations() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
Field f = newTextField("field", "a b c", Field.Store.NO);
|
||||
doc.add(f);
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
|
||||
DirectoryReader reader = w.getReader();
|
||||
final IndexSearcher searcher = new IndexSearcher(reader);
|
||||
|
||||
PhraseQuery pq = new PhraseQuery();
|
||||
pq.add(new Term("field", "a"));
|
||||
pq.add(new Term("field", "b"));
|
||||
|
||||
BooleanQuery q = new BooleanQuery();
|
||||
q.add(pq, Occur.SHOULD);
|
||||
q.add(new TermQuery(new Term("field", "d")), Occur.SHOULD);
|
||||
|
||||
final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean());
|
||||
final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
|
||||
assertTrue(scorer instanceof BoostedScorer);
|
||||
assertNotNull(scorer.asTwoPhaseIterator());
|
||||
|
||||
reader.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testExclusionPropagatesApproximations() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
Field f = newTextField("field", "a b c", Field.Store.NO);
|
||||
doc.add(f);
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
|
||||
DirectoryReader reader = w.getReader();
|
||||
final IndexSearcher searcher = new IndexSearcher(reader);
|
||||
|
||||
PhraseQuery pq = new PhraseQuery();
|
||||
pq.add(new Term("field", "a"));
|
||||
pq.add(new Term("field", "b"));
|
||||
|
||||
BooleanQuery q = new BooleanQuery();
|
||||
q.add(pq, Occur.SHOULD);
|
||||
q.add(new TermQuery(new Term("field", "c")), Occur.MUST_NOT);
|
||||
|
||||
final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean());
|
||||
final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
|
||||
assertTrue(scorer instanceof ReqExclScorer);
|
||||
assertNotNull(scorer.asTwoPhaseIterator());
|
||||
|
||||
reader.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testReqOptPropagatesApproximations() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
Field f = newTextField("field", "a b c", Field.Store.NO);
|
||||
doc.add(f);
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
|
||||
DirectoryReader reader = w.getReader();
|
||||
final IndexSearcher searcher = new IndexSearcher(reader);
|
||||
|
||||
PhraseQuery pq = new PhraseQuery();
|
||||
pq.add(new Term("field", "a"));
|
||||
pq.add(new Term("field", "b"));
|
||||
|
||||
BooleanQuery q = new BooleanQuery();
|
||||
q.add(pq, Occur.MUST);
|
||||
q.add(new TermQuery(new Term("field", "c")), Occur.SHOULD);
|
||||
|
||||
final Weight weight = searcher.createNormalizedWeight(q, true);
|
||||
final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
|
||||
assertTrue(scorer instanceof ReqOptSumScorer);
|
||||
assertNotNull(scorer.asTwoPhaseIterator());
|
||||
|
||||
reader.close();
|
||||
|
|
|
@ -21,9 +21,11 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -179,4 +181,30 @@ public class TestConstantScoreQuery extends LuceneTestCase {
|
|||
d.close();
|
||||
}
|
||||
|
||||
public void testPropagatesApproximations() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
Document doc = new Document();
|
||||
Field f = newTextField("field", "a b", Field.Store.NO);
|
||||
doc.add(f);
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
|
||||
DirectoryReader reader = w.getReader();
|
||||
final IndexSearcher searcher = new IndexSearcher(reader);
|
||||
|
||||
PhraseQuery pq = new PhraseQuery();
|
||||
pq.add(new Term("field", "a"));
|
||||
pq.add(new Term("field", "b"));
|
||||
|
||||
ConstantScoreQuery q = new ConstantScoreQuery(pq);
|
||||
|
||||
final Weight weight = searcher.createNormalizedWeight(q, true);
|
||||
final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
|
||||
assertNotNull(scorer.asTwoPhaseIterator());
|
||||
|
||||
reader.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue