LUCENE-6251: Add two-phase support to ConstantScorer and most boolean scorers.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1660318 13f79535-47bb-0310-9956-ffa450edef68
2015-02-17 08:32:48 +00:00 · 2015-02-17 08:32:48 +00:00 · a8b94ea3c2
parent 274e24afed
commit a8b94ea3c2
8 changed files with 340 additions and 65 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -89,8 +89,9 @@ Optimizations
  positions lazily if the phrase query is in a conjunction with other queries.
  (Robert Muir, Adrien Grand)

-* LUCENE-6244: Pure disjunctions now propagate two-phase iterators of the
-  wrapped scorers (see LUCENE-6198). (Adrien Grand, Robert Muir)
+* LUCENE-6244, LUCENE-6251: All boolean queries but those that have a
+  minShouldMatch > 1 now either propagate or take advantage of the two-phase
+  iteration capabilities added in LUCENE-6198. (Adrien Grand, Robert Muir)

 * LUCENE-6241: FSDirectory.listAll() doesnt filter out subdirectories anymore,
  for faster performance. Subdirectories don't matter to Lucene. If you need to
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanTopLevelScorers.java
@ -38,6 +38,11 @@ class BooleanTopLevelScorers {
      this.boost = boost;
    }

+    @Override
+    public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
+      return in.asTwoPhaseIterator();
+    }
+
    @Override
    public float score() throws IOException {
      return in.score() * boost;
--- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
@ -183,6 +183,11 @@ public class ConstantScoreQuery extends Query {
      this.score = score;
    }

+    @Override
+    public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
+      return in.asTwoPhaseIterator();
+    }
+
    @Override
    public int freq() throws IOException {
      return 1;
--- a/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
@ -22,77 +22,88 @@ import java.util.Collection;
 import java.util.Collections;

 /** A Scorer for queries with a required subscorer
- * and an excluding (prohibited) sub DocIdSetIterator.
+ * and an excluding (prohibited) sub {@link Scorer}.
 * <br>
 * This <code>Scorer</code> implements {@link Scorer#advance(int)},
- * and it uses the skipTo() on the given scorers.
+ * and it uses the advance() on the given scorers.
 */
 class ReqExclScorer extends FilterScorer {
-  private Scorer reqScorer;
-  private DocIdSetIterator exclDisi;
-  private int doc = -1;
+
+  private final Scorer reqScorer;
+  // approximations of the scorers, or the scorers themselves if they don't support approximations
+  private final DocIdSetIterator reqApproximation;
+  private final DocIdSetIterator exclApproximation;
+  // two-phase views of the scorers, or null if they do not support approximations
+  private final TwoPhaseDocIdSetIterator reqTwoPhaseIterator;
+  private final TwoPhaseDocIdSetIterator exclTwoPhaseIterator;

  /** Construct a <code>ReqExclScorer</code>.
   * @param reqScorer The scorer that must match, except where
-   * @param exclDisi indicates exclusion.
+   * @param exclScorer indicates exclusion.
   */
-  public ReqExclScorer(Scorer reqScorer, DocIdSetIterator exclDisi) {
+  public ReqExclScorer(Scorer reqScorer, Scorer exclScorer) {
    super(reqScorer);
    this.reqScorer = reqScorer;
-    this.exclDisi = exclDisi;
+    reqTwoPhaseIterator = reqScorer.asTwoPhaseIterator();
+    if (reqTwoPhaseIterator == null) {
+      reqApproximation = reqScorer;
+    } else {
+      reqApproximation = reqTwoPhaseIterator.approximation();
+    }
+    exclTwoPhaseIterator = exclScorer.asTwoPhaseIterator();
+    if (exclTwoPhaseIterator == null) {
+      exclApproximation = exclScorer;
+    } else {
+      exclApproximation = exclTwoPhaseIterator.approximation();
+    }
  }

  @Override
  public int nextDoc() throws IOException {
-    if (reqScorer == null) {
-      return doc;
-    }
-    doc = reqScorer.nextDoc();
-    if (doc == NO_MORE_DOCS) {
-      reqScorer = null; // exhausted, nothing left
-      return doc;
-    }
-    if (exclDisi == null) {
-      return doc;
-    }
-    return doc = toNonExcluded();
+    return toNonExcluded(reqApproximation.nextDoc());
  }

-  /** Advance to non excluded doc.
-   * <br>On entry:
-   * <ul>
-   * <li>reqScorer != null,
-   * <li>exclScorer != null,
-   * <li>reqScorer was advanced once via next() or skipTo()
-   *      and reqScorer.doc() may still be excluded.
-   * </ul>
-   * Advances reqScorer a non excluded required doc, if any.
-   * @return true iff there is a non excluded required doc.
-   */
-  private int toNonExcluded() throws IOException {
-    int exclDoc = exclDisi.docID();
-    int reqDoc = reqScorer.docID(); // may be excluded
-    do {  
-      if (reqDoc < exclDoc) {
-        return reqDoc; // reqScorer advanced to before exclScorer, ie. not excluded
-      } else if (reqDoc > exclDoc) {
-        exclDoc = exclDisi.advance(reqDoc);
-        if (exclDoc == NO_MORE_DOCS) {
-          exclDisi = null; // exhausted, no more exclusions
-          return reqDoc;
+  /** Confirms whether or not the given {@link TwoPhaseDocIdSetIterator}
+   *  matches on the current document. */
+  private static boolean matches(TwoPhaseDocIdSetIterator it) throws IOException {
+    return it == null || it.matches();
  }
-        if (exclDoc > reqDoc) {
-          return reqDoc; // not excluded
+
+  /** Confirm whether there is a match given the current positions of the
+   *  req and excl approximations. This method has 2 important properties:
+   *   - it only calls matches() on excl if the excl approximation is on
+   *     the same doc ID as the req approximation
+   *   - it does NOT call matches() on req if the excl approximation is exact
+   *     and is on the same doc ID as the req approximation */
+  private static boolean matches(int doc, int exclDoc,
+      TwoPhaseDocIdSetIterator reqTwoPhaseIterator,
+      TwoPhaseDocIdSetIterator exclTwoPhaseIterator) throws IOException {
+    assert exclDoc >= doc;
+    if (doc == exclDoc && matches(exclTwoPhaseIterator)) {
+      return false;
    }
+    return matches(reqTwoPhaseIterator);
  }
-    } while ((reqDoc = reqScorer.nextDoc()) != NO_MORE_DOCS);
-    reqScorer = null; // exhausted, nothing left
+
+  /** Advance to the next non-excluded doc. */
+  private int toNonExcluded(int doc) throws IOException {
+    int exclDoc = exclApproximation.docID();
+    for (;; doc = reqApproximation.nextDoc()) {
+      if (doc == NO_MORE_DOCS) {
        return NO_MORE_DOCS;
      }
+      if (exclDoc < doc) {
+        exclDoc = exclApproximation.advance(doc);
+      }
+      if (matches(doc, exclDoc, reqTwoPhaseIterator, exclTwoPhaseIterator)) {
+        return doc;
+      }
+    }
+  }

  @Override
  public int docID() {
-    return doc;
+    return reqScorer.docID();
  }

  /** Returns the score of the current document matching the query.
@ -111,17 +122,32 @@ class ReqExclScorer extends FilterScorer {

  @Override
  public int advance(int target) throws IOException {
-    if (reqScorer == null) {
-      return doc = NO_MORE_DOCS;
-    }
-    if (exclDisi == null) {
-      return doc = reqScorer.advance(target);
-    }
-    if (reqScorer.advance(target) == NO_MORE_DOCS) {
-      reqScorer = null;
-      return doc = NO_MORE_DOCS;
-    }
-    return doc = toNonExcluded();
+    return toNonExcluded(reqApproximation.advance(target));
  }

+  @Override
+  public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
+    if (reqTwoPhaseIterator == null) {
+      return null;
+    }
+    return new TwoPhaseDocIdSetIterator() {
+
+      @Override
+      public DocIdSetIterator approximation() {
+        return reqApproximation;
+      }
+
+      @Override
+      public boolean matches() throws IOException {
+        final int doc = reqApproximation.docID();
+        // check if the doc is not excluded
+        int exclDoc = exclApproximation.docID();
+        if (exclDoc < doc) {
+          exclDoc = exclApproximation.advance(doc);
+        }
+        return ReqExclScorer.matches(doc, exclDoc, reqTwoPhaseIterator, exclTwoPhaseIterator);
+      }
+
+    };
+  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java
@ -31,7 +31,7 @@ class ReqOptSumScorer extends Scorer {
  /** The scorers passed from the constructor.
   * These are set to null as soon as their next() or skipTo() returns false.
   */
-  protected Scorer reqScorer;
+  protected final Scorer reqScorer;
  protected Scorer optScorer;

  /** Construct a <code>ReqOptScorer</code>.
@ -49,6 +49,11 @@ class ReqOptSumScorer extends Scorer {
    this.optScorer = optScorer;
  }

+  @Override
+  public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
+    return reqScorer.asTwoPhaseIterator();
+  }
+
  @Override
  public int nextDoc() throws IOException {
    return reqScorer.nextDoc();
--- a/lucene/core/src/test/org/apache/lucene/search/TestApproximationSearchEquivalence.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestApproximationSearchEquivalence.java
@ -166,4 +166,116 @@ public class TestApproximationSearchEquivalence extends SearchEquivalenceTestBas

    assertSameScores(bq2, bq4);
  }
+
+  public void testConstantScore() throws Exception {
+    Term t1 = randomTerm();
+    Term t2 = randomTerm();
+    TermQuery q1 = new TermQuery(t1);
+    TermQuery q2 = new TermQuery(t2);
+
+    BooleanQuery bq1 = new BooleanQuery();
+    bq1.add(new ConstantScoreQuery(q1), Occur.MUST);
+    bq1.add(new ConstantScoreQuery(q2), Occur.MUST);
+
+    BooleanQuery bq2 = new BooleanQuery();
+    bq2.add(new ConstantScoreQuery(new RandomApproximationQuery(q1, random())), Occur.MUST);
+    bq2.add(new ConstantScoreQuery(new RandomApproximationQuery(q2, random())), Occur.MUST);
+
+    assertSameScores(bq1, bq2);
+  }
+
+  public void testExclusion() throws Exception {
+    Term t1 = randomTerm();
+    Term t2 = randomTerm();
+    TermQuery q1 = new TermQuery(t1);
+    TermQuery q2 = new TermQuery(t2);
+
+    BooleanQuery bq1 = new BooleanQuery();
+    bq1.add(q1, Occur.MUST);
+    bq1.add(q2, Occur.MUST_NOT);
+
+    BooleanQuery bq2 = new BooleanQuery();
+    bq2.add(new RandomApproximationQuery(q1, random()), Occur.MUST);
+    bq2.add(new RandomApproximationQuery(q2, random()), Occur.MUST_NOT);
+
+    assertSameScores(bq1, bq2);
+  }
+
+  public void testNestedExclusion() throws Exception {
+    Term t1 = randomTerm();
+    Term t2 = randomTerm();
+    Term t3 = randomTerm();
+    TermQuery q1 = new TermQuery(t1);
+    TermQuery q2 = new TermQuery(t2);
+    TermQuery q3 = new TermQuery(t3);
+
+    BooleanQuery bq1 = new BooleanQuery();
+    bq1.add(q1, Occur.MUST);
+    bq1.add(q2, Occur.MUST_NOT);
+
+    BooleanQuery bq2 = new BooleanQuery();
+    bq2.add(bq1, Occur.MUST);
+    bq2.add(q3, Occur.MUST);
+
+    // Both req and excl have approximations
+    BooleanQuery bq3 = new BooleanQuery();
+    bq3.add(new RandomApproximationQuery(q1, random()), Occur.MUST);
+    bq3.add(new RandomApproximationQuery(q2, random()), Occur.MUST_NOT);
+
+    BooleanQuery bq4 = new BooleanQuery();
+    bq4.add(bq3, Occur.MUST);
+    bq4.add(q3, Occur.MUST);
+
+    assertSameScores(bq2, bq4);
+
+    // Only req has an approximation
+    bq3 = new BooleanQuery();
+    bq3.add(new RandomApproximationQuery(q1, random()), Occur.MUST);
+    bq3.add(q2, Occur.MUST_NOT);
+
+    bq4 = new BooleanQuery();
+    bq4.add(bq3, Occur.MUST);
+    bq4.add(q3, Occur.MUST);
+
+    assertSameScores(bq2, bq4);
+
+    // Only excl has an approximation
+    bq3 = new BooleanQuery();
+    bq3.add(q1, Occur.MUST);
+    bq3.add(new RandomApproximationQuery(q2, random()), Occur.MUST_NOT);
+
+    bq4 = new BooleanQuery();
+    bq4.add(bq3, Occur.MUST);
+    bq4.add(q3, Occur.MUST);
+
+    assertSameScores(bq2, bq4);
+  }
+
+  public void testReqOpt() throws Exception {
+    Term t1 = randomTerm();
+    Term t2 = randomTerm();
+    Term t3 = randomTerm();
+    TermQuery q1 = new TermQuery(t1);
+    TermQuery q2 = new TermQuery(t2);
+    TermQuery q3 = new TermQuery(t3);
+
+    BooleanQuery bq1 = new BooleanQuery();
+    bq1.add(q1, Occur.MUST);
+    bq1.add(q2, Occur.SHOULD);
+
+    BooleanQuery bq2 = new BooleanQuery();
+    bq2.add(bq1, Occur.MUST);
+    bq2.add(q3, Occur.MUST);
+    
+    BooleanQuery bq3 = new BooleanQuery();
+    bq3.add(new RandomApproximationQuery(q1, random()), Occur.MUST);
+    bq3.add(new RandomApproximationQuery(q2, random()), Occur.SHOULD);
+
+    BooleanQuery bq4 = new BooleanQuery();
+    bq4.add(bq3, Occur.MUST);
+    bq4.add(q3, Occur.MUST);
+
+    assertSameScores(bq2, bq4);
+  }
+
 }
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java
@ -41,6 +41,7 @@ import org.apache.lucene.index.MultiReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanTopLevelScorers.BoostedScorer;
 import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
@ -613,6 +614,7 @@ public class TestBooleanQuery extends LuceneTestCase {

    final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean());
    final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
+    assertTrue(scorer instanceof ConjunctionScorer);
    assertNotNull(scorer.asTwoPhaseIterator());

    reader.close();
@ -642,6 +644,97 @@ public class TestBooleanQuery extends LuceneTestCase {

    final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean());
    final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
+    assertTrue(scorer instanceof DisjunctionScorer);
+    assertNotNull(scorer.asTwoPhaseIterator());
+
+    reader.close();
+    w.close();
+    dir.close();
+  }
+
+  public void testBoostedScorerPropagatesApproximations() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    Field f = newTextField("field", "a b c", Field.Store.NO);
+    doc.add(f);
+    w.addDocument(doc);
+    w.commit();
+
+    DirectoryReader reader = w.getReader();
+    final IndexSearcher searcher = new IndexSearcher(reader);
+
+    PhraseQuery pq = new PhraseQuery();
+    pq.add(new Term("field", "a"));
+    pq.add(new Term("field", "b"));
+
+    BooleanQuery q = new BooleanQuery();
+    q.add(pq, Occur.SHOULD);
+    q.add(new TermQuery(new Term("field", "d")), Occur.SHOULD);
+
+    final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean());
+    final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
+    assertTrue(scorer instanceof BoostedScorer);
+    assertNotNull(scorer.asTwoPhaseIterator());
+
+    reader.close();
+    w.close();
+    dir.close();
+  }
+
+  public void testExclusionPropagatesApproximations() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    Field f = newTextField("field", "a b c", Field.Store.NO);
+    doc.add(f);
+    w.addDocument(doc);
+    w.commit();
+
+    DirectoryReader reader = w.getReader();
+    final IndexSearcher searcher = new IndexSearcher(reader);
+
+    PhraseQuery pq = new PhraseQuery();
+    pq.add(new Term("field", "a"));
+    pq.add(new Term("field", "b"));
+
+    BooleanQuery q = new BooleanQuery();
+    q.add(pq, Occur.SHOULD);
+    q.add(new TermQuery(new Term("field", "c")), Occur.MUST_NOT);
+
+    final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean());
+    final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
+    assertTrue(scorer instanceof ReqExclScorer);
+    assertNotNull(scorer.asTwoPhaseIterator());
+
+    reader.close();
+    w.close();
+    dir.close();
+  }
+
+  public void testReqOptPropagatesApproximations() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    Field f = newTextField("field", "a b c", Field.Store.NO);
+    doc.add(f);
+    w.addDocument(doc);
+    w.commit();
+
+    DirectoryReader reader = w.getReader();
+    final IndexSearcher searcher = new IndexSearcher(reader);
+
+    PhraseQuery pq = new PhraseQuery();
+    pq.add(new Term("field", "a"));
+    pq.add(new Term("field", "b"));
+
+    BooleanQuery q = new BooleanQuery();
+    q.add(pq, Occur.MUST);
+    q.add(new TermQuery(new Term("field", "c")), Occur.SHOULD);
+
+    final Weight weight = searcher.createNormalizedWeight(q, true);
+    final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
+    assertTrue(scorer instanceof ReqOptSumScorer);
    assertNotNull(scorer.asTwoPhaseIterator());

    reader.close();
--- a/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestConstantScoreQuery.java
@ -21,9 +21,11 @@ import java.io.IOException;

 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
@ -179,4 +181,30 @@ public class TestConstantScoreQuery extends LuceneTestCase {
    d.close();
  }

+  public void testPropagatesApproximations() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    Field f = newTextField("field", "a b", Field.Store.NO);
+    doc.add(f);
+    w.addDocument(doc);
+    w.commit();
+
+    DirectoryReader reader = w.getReader();
+    final IndexSearcher searcher = new IndexSearcher(reader);
+
+    PhraseQuery pq = new PhraseQuery();
+    pq.add(new Term("field", "a"));
+    pq.add(new Term("field", "b"));
+
+    ConstantScoreQuery q = new ConstantScoreQuery(pq);
+
+    final Weight weight = searcher.createNormalizedWeight(q, true);
+    final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
+    assertNotNull(scorer.asTwoPhaseIterator());
+
+    reader.close();
+    w.close();
+    dir.close();
+  }
 }