diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 33aa202b1be..982f9269599 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -89,8 +89,9 @@ Optimizations positions lazily if the phrase query is in a conjunction with other queries. (Robert Muir, Adrien Grand) -* LUCENE-6244: Pure disjunctions now propagate two-phase iterators of the - wrapped scorers (see LUCENE-6198). (Adrien Grand, Robert Muir) +* LUCENE-6244, LUCENE-6251: All boolean queries but those that have a + minShouldMatch > 1 now either propagate or take advantage of the two-phase + iteration capabilities added in LUCENE-6198. (Adrien Grand, Robert Muir) * LUCENE-6241: FSDirectory.listAll() doesnt filter out subdirectories anymore, for faster performance. Subdirectories don't matter to Lucene. If you need to diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java b/lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java index f44f254413e..b0d7e2ddf7c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java @@ -38,6 +38,11 @@ class BooleanTopLevelScorers { this.boost = boost; } + @Override + public TwoPhaseDocIdSetIterator asTwoPhaseIterator() { + return in.asTwoPhaseIterator(); + } + @Override public float score() throws IOException { return in.score() * boost; diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java index 5950a2a4ec3..33760ed4347 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java @@ -183,6 +183,11 @@ public class ConstantScoreQuery extends Query { this.score = score; } + @Override + public TwoPhaseDocIdSetIterator asTwoPhaseIterator() { + return in.asTwoPhaseIterator(); + } + @Override public int freq() throws IOException { return 1; diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java index cf6aec1cc3b..e8b7832dc5b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java @@ -22,77 +22,88 @@ import java.util.Collection; import java.util.Collections; /** A Scorer for queries with a required subscorer - * and an excluding (prohibited) sub DocIdSetIterator. + * and an excluding (prohibited) sub {@link Scorer}. *
* This Scorer implements {@link Scorer#advance(int)}, - * and it uses the skipTo() on the given scorers. + * and it uses the advance() on the given scorers. */ class ReqExclScorer extends FilterScorer { - private Scorer reqScorer; - private DocIdSetIterator exclDisi; - private int doc = -1; + + private final Scorer reqScorer; + // approximations of the scorers, or the scorers themselves if they don't support approximations + private final DocIdSetIterator reqApproximation; + private final DocIdSetIterator exclApproximation; + // two-phase views of the scorers, or null if they do not support approximations + private final TwoPhaseDocIdSetIterator reqTwoPhaseIterator; + private final TwoPhaseDocIdSetIterator exclTwoPhaseIterator; /** Construct a ReqExclScorer. * @param reqScorer The scorer that must match, except where - * @param exclDisi indicates exclusion. + * @param exclScorer indicates exclusion. */ - public ReqExclScorer(Scorer reqScorer, DocIdSetIterator exclDisi) { + public ReqExclScorer(Scorer reqScorer, Scorer exclScorer) { super(reqScorer); this.reqScorer = reqScorer; - this.exclDisi = exclDisi; + reqTwoPhaseIterator = reqScorer.asTwoPhaseIterator(); + if (reqTwoPhaseIterator == null) { + reqApproximation = reqScorer; + } else { + reqApproximation = reqTwoPhaseIterator.approximation(); + } + exclTwoPhaseIterator = exclScorer.asTwoPhaseIterator(); + if (exclTwoPhaseIterator == null) { + exclApproximation = exclScorer; + } else { + exclApproximation = exclTwoPhaseIterator.approximation(); + } } @Override public int nextDoc() throws IOException { - if (reqScorer == null) { - return doc; - } - doc = reqScorer.nextDoc(); - if (doc == NO_MORE_DOCS) { - reqScorer = null; // exhausted, nothing left - return doc; - } - if (exclDisi == null) { - return doc; - } - return doc = toNonExcluded(); + return toNonExcluded(reqApproximation.nextDoc()); } - - /** Advance to non excluded doc. - *
On entry: - * - * Advances reqScorer a non excluded required doc, if any. - * @return true iff there is a non excluded required doc. - */ - private int toNonExcluded() throws IOException { - int exclDoc = exclDisi.docID(); - int reqDoc = reqScorer.docID(); // may be excluded - do { - if (reqDoc < exclDoc) { - return reqDoc; // reqScorer advanced to before exclScorer, ie. not excluded - } else if (reqDoc > exclDoc) { - exclDoc = exclDisi.advance(reqDoc); - if (exclDoc == NO_MORE_DOCS) { - exclDisi = null; // exhausted, no more exclusions - return reqDoc; - } - if (exclDoc > reqDoc) { - return reqDoc; // not excluded - } + + /** Confirms whether or not the given {@link TwoPhaseDocIdSetIterator} + * matches on the current document. */ + private static boolean matches(TwoPhaseDocIdSetIterator it) throws IOException { + return it == null || it.matches(); + } + + /** Confirm whether there is a match given the current positions of the + * req and excl approximations. This method has 2 important properties: + * - it only calls matches() on excl if the excl approximation is on + * the same doc ID as the req approximation + * - it does NOT call matches() on req if the excl approximation is exact + * and is on the same doc ID as the req approximation */ + private static boolean matches(int doc, int exclDoc, + TwoPhaseDocIdSetIterator reqTwoPhaseIterator, + TwoPhaseDocIdSetIterator exclTwoPhaseIterator) throws IOException { + assert exclDoc >= doc; + if (doc == exclDoc && matches(exclTwoPhaseIterator)) { + return false; + } + return matches(reqTwoPhaseIterator); + } + + /** Advance to the next non-excluded doc. */ + private int toNonExcluded(int doc) throws IOException { + int exclDoc = exclApproximation.docID(); + for (;; doc = reqApproximation.nextDoc()) { + if (doc == NO_MORE_DOCS) { + return NO_MORE_DOCS; } - } while ((reqDoc = reqScorer.nextDoc()) != NO_MORE_DOCS); - reqScorer = null; // exhausted, nothing left - return NO_MORE_DOCS; + if (exclDoc < doc) { + exclDoc = exclApproximation.advance(doc); + } + if (matches(doc, exclDoc, reqTwoPhaseIterator, exclTwoPhaseIterator)) { + return doc; + } + } } @Override public int docID() { - return doc; + return reqScorer.docID(); } /** Returns the score of the current document matching the query. @@ -111,17 +122,32 @@ class ReqExclScorer extends FilterScorer { @Override public int advance(int target) throws IOException { - if (reqScorer == null) { - return doc = NO_MORE_DOCS; - } - if (exclDisi == null) { - return doc = reqScorer.advance(target); - } - if (reqScorer.advance(target) == NO_MORE_DOCS) { - reqScorer = null; - return doc = NO_MORE_DOCS; - } - return doc = toNonExcluded(); + return toNonExcluded(reqApproximation.advance(target)); } + @Override + public TwoPhaseDocIdSetIterator asTwoPhaseIterator() { + if (reqTwoPhaseIterator == null) { + return null; + } + return new TwoPhaseDocIdSetIterator() { + + @Override + public DocIdSetIterator approximation() { + return reqApproximation; + } + + @Override + public boolean matches() throws IOException { + final int doc = reqApproximation.docID(); + // check if the doc is not excluded + int exclDoc = exclApproximation.docID(); + if (exclDoc < doc) { + exclDoc = exclApproximation.advance(doc); + } + return ReqExclScorer.matches(doc, exclDoc, reqTwoPhaseIterator, exclTwoPhaseIterator); + } + + }; + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java index 0d1b82f2464..5755976550b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java @@ -31,7 +31,7 @@ class ReqOptSumScorer extends Scorer { /** The scorers passed from the constructor. * These are set to null as soon as their next() or skipTo() returns false. */ - protected Scorer reqScorer; + protected final Scorer reqScorer; protected Scorer optScorer; /** Construct a ReqOptScorer. @@ -49,6 +49,11 @@ class ReqOptSumScorer extends Scorer { this.optScorer = optScorer; } + @Override + public TwoPhaseDocIdSetIterator asTwoPhaseIterator() { + return reqScorer.asTwoPhaseIterator(); + } + @Override public int nextDoc() throws IOException { return reqScorer.nextDoc(); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestApproximationSearchEquivalence.java b/lucene/core/src/test/org/apache/lucene/search/TestApproximationSearchEquivalence.java index 7f9d22c7903..1e578cc99f0 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestApproximationSearchEquivalence.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestApproximationSearchEquivalence.java @@ -166,4 +166,116 @@ public class TestApproximationSearchEquivalence extends SearchEquivalenceTestBas assertSameScores(bq2, bq4); } + + public void testConstantScore() throws Exception { + Term t1 = randomTerm(); + Term t2 = randomTerm(); + TermQuery q1 = new TermQuery(t1); + TermQuery q2 = new TermQuery(t2); + + BooleanQuery bq1 = new BooleanQuery(); + bq1.add(new ConstantScoreQuery(q1), Occur.MUST); + bq1.add(new ConstantScoreQuery(q2), Occur.MUST); + + BooleanQuery bq2 = new BooleanQuery(); + bq2.add(new ConstantScoreQuery(new RandomApproximationQuery(q1, random())), Occur.MUST); + bq2.add(new ConstantScoreQuery(new RandomApproximationQuery(q2, random())), Occur.MUST); + + assertSameScores(bq1, bq2); + } + + public void testExclusion() throws Exception { + Term t1 = randomTerm(); + Term t2 = randomTerm(); + TermQuery q1 = new TermQuery(t1); + TermQuery q2 = new TermQuery(t2); + + BooleanQuery bq1 = new BooleanQuery(); + bq1.add(q1, Occur.MUST); + bq1.add(q2, Occur.MUST_NOT); + + BooleanQuery bq2 = new BooleanQuery(); + bq2.add(new RandomApproximationQuery(q1, random()), Occur.MUST); + bq2.add(new RandomApproximationQuery(q2, random()), Occur.MUST_NOT); + + assertSameScores(bq1, bq2); + } + + public void testNestedExclusion() throws Exception { + Term t1 = randomTerm(); + Term t2 = randomTerm(); + Term t3 = randomTerm(); + TermQuery q1 = new TermQuery(t1); + TermQuery q2 = new TermQuery(t2); + TermQuery q3 = new TermQuery(t3); + + BooleanQuery bq1 = new BooleanQuery(); + bq1.add(q1, Occur.MUST); + bq1.add(q2, Occur.MUST_NOT); + + BooleanQuery bq2 = new BooleanQuery(); + bq2.add(bq1, Occur.MUST); + bq2.add(q3, Occur.MUST); + + // Both req and excl have approximations + BooleanQuery bq3 = new BooleanQuery(); + bq3.add(new RandomApproximationQuery(q1, random()), Occur.MUST); + bq3.add(new RandomApproximationQuery(q2, random()), Occur.MUST_NOT); + + BooleanQuery bq4 = new BooleanQuery(); + bq4.add(bq3, Occur.MUST); + bq4.add(q3, Occur.MUST); + + assertSameScores(bq2, bq4); + + // Only req has an approximation + bq3 = new BooleanQuery(); + bq3.add(new RandomApproximationQuery(q1, random()), Occur.MUST); + bq3.add(q2, Occur.MUST_NOT); + + bq4 = new BooleanQuery(); + bq4.add(bq3, Occur.MUST); + bq4.add(q3, Occur.MUST); + + assertSameScores(bq2, bq4); + + // Only excl has an approximation + bq3 = new BooleanQuery(); + bq3.add(q1, Occur.MUST); + bq3.add(new RandomApproximationQuery(q2, random()), Occur.MUST_NOT); + + bq4 = new BooleanQuery(); + bq4.add(bq3, Occur.MUST); + bq4.add(q3, Occur.MUST); + + assertSameScores(bq2, bq4); + } + + public void testReqOpt() throws Exception { + Term t1 = randomTerm(); + Term t2 = randomTerm(); + Term t3 = randomTerm(); + TermQuery q1 = new TermQuery(t1); + TermQuery q2 = new TermQuery(t2); + TermQuery q3 = new TermQuery(t3); + + BooleanQuery bq1 = new BooleanQuery(); + bq1.add(q1, Occur.MUST); + bq1.add(q2, Occur.SHOULD); + + BooleanQuery bq2 = new BooleanQuery(); + bq2.add(bq1, Occur.MUST); + bq2.add(q3, Occur.MUST); + + BooleanQuery bq3 = new BooleanQuery(); + bq3.add(new RandomApproximationQuery(q1, random()), Occur.MUST); + bq3.add(new RandomApproximationQuery(q2, random()), Occur.SHOULD); + + BooleanQuery bq4 = new BooleanQuery(); + bq4.add(bq3, Occur.MUST); + bq4.add(q3, Occur.MUST); + + assertSameScores(bq2, bq4); + } + } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java index ded5dd551fb..626e651a0d1 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java @@ -41,6 +41,7 @@ import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanTopLevelScorers.BoostedScorer; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; @@ -613,6 +614,7 @@ public class TestBooleanQuery extends LuceneTestCase { final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean()); final Scorer scorer = weight.scorer(reader.leaves().get(0), null); + assertTrue(scorer instanceof ConjunctionScorer); assertNotNull(scorer.asTwoPhaseIterator()); reader.close(); @@ -642,6 +644,97 @@ public class TestBooleanQuery extends LuceneTestCase { final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean()); final Scorer scorer = weight.scorer(reader.leaves().get(0), null); + assertTrue(scorer instanceof DisjunctionScorer); + assertNotNull(scorer.asTwoPhaseIterator()); + + reader.close(); + w.close(); + dir.close(); + } + + public void testBoostedScorerPropagatesApproximations() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + Field f = newTextField("field", "a b c", Field.Store.NO); + doc.add(f); + w.addDocument(doc); + w.commit(); + + DirectoryReader reader = w.getReader(); + final IndexSearcher searcher = new IndexSearcher(reader); + + PhraseQuery pq = new PhraseQuery(); + pq.add(new Term("field", "a")); + pq.add(new Term("field", "b")); + + BooleanQuery q = new BooleanQuery(); + q.add(pq, Occur.SHOULD); + q.add(new TermQuery(new Term("field", "d")), Occur.SHOULD); + + final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean()); + final Scorer scorer = weight.scorer(reader.leaves().get(0), null); + assertTrue(scorer instanceof BoostedScorer); + assertNotNull(scorer.asTwoPhaseIterator()); + + reader.close(); + w.close(); + dir.close(); + } + + public void testExclusionPropagatesApproximations() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + Field f = newTextField("field", "a b c", Field.Store.NO); + doc.add(f); + w.addDocument(doc); + w.commit(); + + DirectoryReader reader = w.getReader(); + final IndexSearcher searcher = new IndexSearcher(reader); + + PhraseQuery pq = new PhraseQuery(); + pq.add(new Term("field", "a")); + pq.add(new Term("field", "b")); + + BooleanQuery q = new BooleanQuery(); + q.add(pq, Occur.SHOULD); + q.add(new TermQuery(new Term("field", "c")), Occur.MUST_NOT); + + final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean()); + final Scorer scorer = weight.scorer(reader.leaves().get(0), null); + assertTrue(scorer instanceof ReqExclScorer); + assertNotNull(scorer.asTwoPhaseIterator()); + + reader.close(); + w.close(); + dir.close(); + } + + public void testReqOptPropagatesApproximations() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + Field f = newTextField("field", "a b c", Field.Store.NO); + doc.add(f); + w.addDocument(doc); + w.commit(); + + DirectoryReader reader = w.getReader(); + final IndexSearcher searcher = new IndexSearcher(reader); + + PhraseQuery pq = new PhraseQuery(); + pq.add(new Term("field", "a")); + pq.add(new Term("field", "b")); + + BooleanQuery q = new BooleanQuery(); + q.add(pq, Occur.MUST); + q.add(new TermQuery(new Term("field", "c")), Occur.SHOULD); + + final Weight weight = searcher.createNormalizedWeight(q, true); + final Scorer scorer = weight.scorer(reader.leaves().get(0), null); + assertTrue(scorer instanceof ReqOptSumScorer); assertNotNull(scorer.asTwoPhaseIterator()); reader.close(); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java index 70ca0947791..d1830d70a71 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java @@ -21,9 +21,11 @@ import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -179,4 +181,30 @@ public class TestConstantScoreQuery extends LuceneTestCase { d.close(); } + public void testPropagatesApproximations() throws IOException { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + Field f = newTextField("field", "a b", Field.Store.NO); + doc.add(f); + w.addDocument(doc); + w.commit(); + + DirectoryReader reader = w.getReader(); + final IndexSearcher searcher = new IndexSearcher(reader); + + PhraseQuery pq = new PhraseQuery(); + pq.add(new Term("field", "a")); + pq.add(new Term("field", "b")); + + ConstantScoreQuery q = new ConstantScoreQuery(pq); + + final Weight weight = searcher.createNormalizedWeight(q, true); + final Scorer scorer = weight.scorer(reader.leaves().get(0), null); + assertNotNull(scorer.asTwoPhaseIterator()); + + reader.close(); + w.close(); + dir.close(); + } }