LUCENE-6198: Two-phase execution for phrase queries and conjunctions.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1659599 13f79535-47bb-0310-9956-ffa450edef68
2015-02-13 16:45:06 +00:00 · 2015-02-13 16:45:06 +00:00 · 5b4c02a3a1
parent 82eff4eb4d
commit 5b4c02a3a1
8 changed files with 653 additions and 88 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -81,6 +81,11 @@ Optimizations
 * LUCENE-6233 Speed up CheckIndex when the index has term vectors
  (Robert Muir, Mike McCandless)

+* LUCENE-6198: Added the TwoPhaseDocIdSetIterator API, exposed on scorers which
+  is for now only used on phrase queries and conjunctions in order to check
+  positions lazily if the phrase query is in a conjunction with other queries.
+  (Robert Muir, Adrien Grand)
+
 API Changes

 * LUCENE-6204, LUCENE-6208: Simplify CompoundFormat: remove files()
--- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
@ -0,0 +1,202 @@
+package org.apache.lucene.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.lucene.util.CollectionUtil;
+
+class ConjunctionDISI extends DocIdSetIterator {
+
+  /** Create a conjunction over the provided iterators, taking advantage of
+   *  {@link TwoPhaseDocIdSetIterator}. */
+  public static ConjunctionDISI intersect(List<? extends DocIdSetIterator> iterators) {
+    final List<DocIdSetIterator> allIterators = new ArrayList<>();
+    final List<TwoPhaseDocIdSetIterator> twoPhaseIterators = new ArrayList<>();
+    for (DocIdSetIterator iterator : iterators) {
+      if (iterator instanceof Scorer) {
+        // if we have a scorer, check if it supports two-phase iteration
+        TwoPhaseDocIdSetIterator twoPhaseIterator = ((Scorer) iterator).asTwoPhaseIterator();
+        if (twoPhaseIterator != null) {
+          // Note: 
+          allIterators.add(twoPhaseIterator.approximation());
+          twoPhaseIterators.add(twoPhaseIterator);
+        } else {
+          allIterators.add(iterator);
+        }
+      } else {
+        // no approximation support, use the iterator as-is
+        allIterators.add(iterator);
+      }
+    }
+
+    if (twoPhaseIterators.isEmpty()) {
+      return new ConjunctionDISI(allIterators);
+    } else {
+      return new TwoPhase(allIterators, twoPhaseIterators);
+    }
+  }
+
+  final DocIdSetIterator lead;
+  final DocIdSetIterator[] others;
+
+  ConjunctionDISI(List<? extends DocIdSetIterator> iterators) {
+    // Sort the array the first time to allow the least frequent DocsEnum to
+    // lead the matching.
+    CollectionUtil.timSort(iterators, new Comparator<DocIdSetIterator>() {
+      @Override
+      public int compare(DocIdSetIterator o1, DocIdSetIterator o2) {
+        return Long.compare(o1.cost(), o2.cost());
+      }
+    });
+    lead = iterators.get(0);
+    others = iterators.subList(1, iterators.size()).toArray(new DocIdSetIterator[0]);
+  }
+
+  protected boolean matches() throws IOException {
+    return true;
+  }
+
+  TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
+    return null;
+  }
+
+  private int doNext(int doc) throws IOException {
+    for(;;) {
+
+      if (doc == NO_MORE_DOCS) {
+        // we need this check because it is only ok to call #matches when positioned
+        return NO_MORE_DOCS;
+      }
+
+      advanceHead: for(;;) {
+        for (DocIdSetIterator other : others) {
+          // invariant: docsAndFreqs[i].doc <= doc at this point.
+
+          // docsAndFreqs[i].doc may already be equal to doc if we "broke advanceHead"
+          // on the previous iteration and the advance on the lead scorer exactly matched.
+          if (other.docID() < doc) {
+            final int next = other.advance(doc);
+
+            if (next > doc) {
+              // DocsEnum beyond the current doc - break and advance lead to the new highest doc.
+              doc = lead.advance(next);
+              break advanceHead;
+            }
+          }
+        }
+
+        if (matches()) {
+          // success - all DocsEnums are on the same doc
+          return doc;
+        } else {
+          doc = lead.nextDoc();
+          break advanceHead;
+        }
+      }
+    }
+  }
+
+  @Override
+  public int advance(int target) throws IOException {
+    return doNext(lead.advance(target));
+  }
+
+  @Override
+  public int docID() {
+    return lead.docID();
+  }
+
+  @Override
+  public int nextDoc() throws IOException {
+    return doNext(lead.nextDoc());
+  }
+
+  @Override
+  public long cost() {
+    return lead.cost();
+  }
+
+  /**
+   * {@link TwoPhaseDocIdSetIterator} view of a {@link TwoPhase} conjunction.
+   */
+  private static class TwoPhaseConjunctionDISI extends TwoPhaseDocIdSetIterator {
+
+    private final ConjunctionDISI approximation;
+    private final TwoPhaseDocIdSetIterator[] twoPhaseIterators;
+
+    private TwoPhaseConjunctionDISI(List<? extends DocIdSetIterator> iterators, List<TwoPhaseDocIdSetIterator> twoPhaseIterators) {
+      approximation = new ConjunctionDISI(iterators);
+      assert twoPhaseIterators.size() > 0;
+      this.twoPhaseIterators = twoPhaseIterators.toArray(new TwoPhaseDocIdSetIterator[0]);
+    }
+
+    @Override
+    public DocIdSetIterator approximation() {
+      return approximation;
+    }
+
+    @Override
+    public boolean matches() throws IOException {
+      for (TwoPhaseDocIdSetIterator twoPhaseIterator : twoPhaseIterators) {
+        if (twoPhaseIterator.matches() == false) {
+          return false;
+        }
+      }
+      return true;
+    }
+
+  }
+
+  /**
+   * A conjunction DISI built on top of approximations. This implementation
+   * verifies that documents actually match by consulting the provided
+   * {@link TwoPhaseDocIdSetIterator}s.
+   *
+   * Another important difference with {@link ConjunctionDISI} is that this
+   * implementation supports approximations too: the approximation of this
+   * impl is the conjunction of the approximations of the wrapped iterators.
+   * This allows eg. {@code +"A B" +C} to be approximated as
+   * {@code +(+A +B) +C}.
+   */
+  // NOTE: this is essentially the same as TwoPhaseDocIdSetIterator.asDocIdSetIterator
+  // but is its own impl in order to be able to expose a two-phase view
+  private static class TwoPhase extends ConjunctionDISI {
+
+    final TwoPhaseConjunctionDISI twoPhaseView;
+
+    private TwoPhase(List<? extends DocIdSetIterator> iterators, List<TwoPhaseDocIdSetIterator> twoPhaseIterators) {
+      super(iterators);
+      twoPhaseView = new TwoPhaseConjunctionDISI(iterators, twoPhaseIterators);
+    }
+
+    @Override
+    public TwoPhaseConjunctionDISI asTwoPhaseIterator() {
+      return twoPhaseView;
+    }
+
+    @Override
+    protected boolean matches() throws IOException {
+      return twoPhaseView.matches();
+    }
+  }
+
+}
--- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
@ -20,18 +20,14 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Comparator;
 import java.util.List;

-import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;

 /** Scorer for conjunctions, sets of queries, all of which are required. */
 class ConjunctionScorer extends Scorer {

-  protected int lastDoc = -1;
-  protected final DocsAndFreqs[] docsAndFreqs;
-  private final DocsAndFreqs lead;
+  private final ConjunctionDISI disi;
  private final Scorer[] scorers;
  private final float coord;

@ -44,68 +40,28 @@ class ConjunctionScorer extends Scorer {
    super(weight);
    assert required.containsAll(scorers);
    this.coord = coord;
-    this.docsAndFreqs = new DocsAndFreqs[required.size()];
-    for (int i = 0; i < required.size(); ++i) {
-      docsAndFreqs[i] = new DocsAndFreqs(required.get(i));
-    }
-    // Sort the array the first time to allow the least frequent DocsEnum to
-    // lead the matching.
-    ArrayUtil.timSort(docsAndFreqs, new Comparator<DocsAndFreqs>() {
-      @Override
-      public int compare(DocsAndFreqs o1, DocsAndFreqs o2) {
-        return Long.compare(o1.cost, o2.cost);
-      }
-    });
-
-    lead = docsAndFreqs[0]; // least frequent DocsEnum leads the intersection
-
+    this.disi = ConjunctionDISI.intersect(required);
    this.scorers = scorers.toArray(new Scorer[scorers.size()]);
  }

-  private int doNext(int doc) throws IOException {
-    for(;;) {
-      // doc may already be NO_MORE_DOCS here, but we don't check explicitly
-      // since all scorers should advance to NO_MORE_DOCS, match, then
-      // return that value.
-      advanceHead: for(;;) {
-        for (int i = 1; i < docsAndFreqs.length; i++) {
-          // invariant: docsAndFreqs[i].doc <= doc at this point.
-
-          // docsAndFreqs[i].doc may already be equal to doc if we "broke advanceHead"
-          // on the previous iteration and the advance on the lead scorer exactly matched.
-          if (docsAndFreqs[i].doc < doc) {
-            docsAndFreqs[i].doc = docsAndFreqs[i].iterator.advance(doc);
-
-            if (docsAndFreqs[i].doc > doc) {
-              // DocsEnum beyond the current doc - break and advance lead to the new highest doc.
-              doc = docsAndFreqs[i].doc;
-              break advanceHead;
-            }
-          }
-        }
-        // success - all DocsEnums are on the same doc
-        return doc;
-      }
-      // advance head for next iteration
-      doc = lead.doc = lead.iterator.advance(doc);
-    }
+  @Override
+  public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
+    return disi.asTwoPhaseIterator();
  }

  @Override
  public int advance(int target) throws IOException {
-    lead.doc = lead.iterator.advance(target);
-    return lastDoc = doNext(lead.doc);
+    return disi.advance(target);
  }

  @Override
  public int docID() {
-    return lastDoc;
+    return disi.docID();
  }

  @Override
  public int nextDoc() throws IOException {
-    lead.doc = lead.iterator.nextDoc();
-    return lastDoc = doNext(lead.doc);
+    return disi.nextDoc();
  }

  @Override
@ -120,7 +76,7 @@ class ConjunctionScorer extends Scorer {

  @Override
  public int freq() {
-    return docsAndFreqs.length;
+    return scorers.length;
  }

  @Override
@ -145,12 +101,12 @@ class ConjunctionScorer extends Scorer {

  @Override
  public long cost() {
-    return lead.iterator.cost();
+    return disi.cost();
  }

  @Override
  public Collection<ChildScorer> getChildren() {
-    ArrayList<ChildScorer> children = new ArrayList<>(docsAndFreqs.length);
+    ArrayList<ChildScorer> children = new ArrayList<>();
    for (Scorer scorer : scorers) {
      children.add(new ChildScorer(scorer, "MUST"));
    }
--- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
@ -18,9 +18,11 @@ package org.apache.lucene.search;
 */

 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;

-import org.apache.lucene.index.*;
+import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.util.BytesRef;

@ -49,10 +51,11 @@ final class ExactPhraseScorer extends Scorer {
    }
  }

+  private final ConjunctionDISI conjunction;
+
  private final ChunkState[] chunkStates;
  private final PostingsEnum lead;

-  private int docID = -1;
  private int freq;

  private final Similarity.SimScorer docScorer;
@ -72,49 +75,46 @@ final class ExactPhraseScorer extends Scorer {
    // min(cost)
    cost = lead.cost();

+    List<DocIdSetIterator> iterators = new ArrayList<>();
    for(int i=0;i<postings.length;i++) {
      chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position);
+      iterators.add(postings[i].postings);
    }
+    conjunction = ConjunctionDISI.intersect(iterators);
+  }
+
+  @Override
+  public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
+    return new TwoPhaseDocIdSetIterator() {
+
+      @Override
+      public boolean matches() throws IOException {
+        return phraseFreq() > 0;
+      }
+
+      @Override
+      public DocIdSetIterator approximation() {
+        return conjunction;
+      }
+    };
  }

  private int doNext(int doc) throws IOException {
-    for(;;) {
-      // TODO: don't dup this logic from conjunctionscorer :)
-      advanceHead: for(;;) {
-        for (int i = 1; i < chunkStates.length; i++) {
-          final PostingsEnum de = chunkStates[i].posEnum;
-          if (de.docID() < doc) {
-            int d = de.advance(doc);
-
-            if (d > doc) {
-              // DocsEnum beyond the current doc - break and advance lead to the new highest doc.
-              doc = d;
-              break advanceHead;
-            }
-          }
-        }
-        // all DocsEnums are on the same doc
-        if (doc == NO_MORE_DOCS) {
+    for (;; doc = conjunction.nextDoc()) {
+      if (doc == NO_MORE_DOCS || phraseFreq() > 0) {
        return doc;
-        } else if (phraseFreq() > 0) {
-          return doc;            // success: matches phrase
-        } else {
-          doc = lead.nextDoc();  // doesn't match phrase
      }
    }
-      // advance head for next iteration
-      doc = lead.advance(doc);
-    }
  }

  @Override
  public int nextDoc() throws IOException {
-    return docID = doNext(lead.nextDoc());
+    return doNext(conjunction.nextDoc());
  }

  @Override
  public int advance(int target) throws IOException {
-    return docID = doNext(lead.advance(target));
+    return doNext(conjunction.advance(target));
  }

  @Override
@ -149,12 +149,12 @@ final class ExactPhraseScorer extends Scorer {

  @Override
  public int docID() {
-    return docID;
+    return conjunction.docID();
  }

  @Override
  public float score() {
-    return docScorer.score(docID, freq);
+    return docScorer.score(docID(), freq);
  }

  private int phraseFreq() throws IOException {
--- a/lucene/core/src/java/org/apache/lucene/search/Scorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Scorer.java
@ -99,4 +99,23 @@ public abstract class Scorer extends PostingsEnum {
      this.relationship = relationship;
    }
  }
+
+  /**
+   * Optional method: Return a {@link TwoPhaseDocIdSetIterator} view of this
+   * {@link Scorer}. A return value of {@code null} indicates that
+   * two-phase iteration is not supported.
+   *
+   * Note that the returned {@link TwoPhaseDocIdSetIterator}'s
+   * {@link TwoPhaseDocIdSetIterator#approximation() approximation} must
+   * advance synchronously with this iterator: advancing the approximation must
+   * advance this iterator and vice-versa.
+   *
+   * Implementing this method is typically useful on {@link Scorer}s
+   * that have a high per-document overhead in order to confirm matches.
+   *
+   * The default implementation returns {@code null}.
+   */
+  public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
+    return null;
+  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/TwoPhaseDocIdSetIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TwoPhaseDocIdSetIterator.java
@ -0,0 +1,81 @@
+package org.apache.lucene.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+/**
+ * An approximation of a {@link DocIdSetIterator}. When the {@link #approximation()}'s
+ * {@link DocIdSetIterator#nextDoc()} or {@link DocIdSetIterator#advance(int)}
+ * return, {@link #matches()} needs to be checked in order to know whether the
+ * returned doc ID actually matches.
+ * @lucene.experimental
+ */
+public abstract class TwoPhaseDocIdSetIterator {
+
+  /** Return a {@link DocIdSetIterator} view of the provided
+   *  {@link TwoPhaseDocIdSetIterator}. */
+  public static DocIdSetIterator asDocIdSetIterator(TwoPhaseDocIdSetIterator twoPhaseIterator) {
+    final DocIdSetIterator approximation = twoPhaseIterator.approximation();
+    return new DocIdSetIterator() {
+
+      @Override
+      public int docID() {
+        return approximation.docID();
+      }
+
+      @Override
+      public int nextDoc() throws IOException {
+        return doNext(approximation.nextDoc());
+      }
+
+      @Override
+      public int advance(int target) throws IOException {
+        return doNext(approximation.advance(target));
+      }
+
+      private int doNext(int doc) throws IOException {
+        for (;; doc = approximation.nextDoc()) {
+          if (doc == NO_MORE_DOCS) {
+            return NO_MORE_DOCS;
+          } else if (twoPhaseIterator.matches()) {
+            return doc;
+          }
+        }
+      }
+
+      @Override
+      public long cost() {
+        return approximation.cost();
+      }
+
+    };
+  }
+
+  /** Return an approximation. The returned {@link DocIdSetIterator} is a
+   *  superset of the matching documents, and each match needs to be confirmed
+   *  with {@link #matches()} in order to know whether it matches or not. */
+  public abstract DocIdSetIterator approximation();
+
+  /** Return whether the current doc ID that the iterator is on matches. This
+   *  method should only be called when the iterator is positionned, ie. not
+   *  when {@link DocIdSetIterator#docID()} is {@code -1} or
+   *  {@link DocIdSetIterator#NO_MORE_DOCS}. */
+  public abstract boolean matches() throws IOException;
+
+}
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java
@ -590,4 +590,33 @@ public class TestBooleanQuery extends LuceneTestCase {
    w.close();
    dir.close();
  }
+
+  public void testConjunctionSupportsApproximations() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    Field f = newTextField("field", "a b c", Field.Store.NO);
+    doc.add(f);
+    w.addDocument(doc);
+    w.commit();
+
+    DirectoryReader reader = w.getReader();
+    final IndexSearcher searcher = new IndexSearcher(reader);
+
+    PhraseQuery pq = new PhraseQuery();
+    pq.add(new Term("field", "a"));
+    pq.add(new Term("field", "b"));
+
+    BooleanQuery q = new BooleanQuery();
+    q.add(pq, Occur.MUST);
+    q.add(new TermQuery(new Term("field", "c")), Occur.FILTER);
+
+    final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean());
+    final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
+    assertNotNull(scorer.asTwoPhaseIterator());
+
+    reader.close();
+    w.close();
+    dir.close();
+  }
 }
--- a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java
@ -0,0 +1,273 @@
+package org.apache.lucene.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.util.BitDocIdSet;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+public class TestConjunctionDISI extends LuceneTestCase {
+
+  private static TwoPhaseDocIdSetIterator approximation(final DocIdSetIterator iterator, final FixedBitSet confirmed) {
+    return new TwoPhaseDocIdSetIterator() {
+
+      @Override
+      public DocIdSetIterator approximation() {
+        return iterator;
+      }
+
+      @Override
+      public boolean matches() throws IOException {
+        return confirmed.get(iterator.docID());
+      }
+    };
+  }
+
+  private static Scorer scorer(TwoPhaseDocIdSetIterator twoPhaseIterator) {
+    return scorer(TwoPhaseDocIdSetIterator.asDocIdSetIterator(twoPhaseIterator), twoPhaseIterator);
+  }
+
+  /**
+   * Create a {@link Scorer} that wraps the given {@link DocIdSetIterator}. It
+   * also accepts a {@link TwoPhaseDocIdSetIterator} view, which is exposed in
+   * {@link Scorer#asTwoPhaseIterator()}. When the two-phase view is not null,
+   * then {@link Scorer#nextDoc()} and {@link Scorer#advance(int)} will raise
+   * an exception in order to make sure that {@link ConjunctionDISI} takes
+   * advantage of the {@link TwoPhaseDocIdSetIterator} view.
+   */
+  private static Scorer scorer(DocIdSetIterator it, TwoPhaseDocIdSetIterator twoPhaseIterator) {
+    return new Scorer(null) {
+
+      @Override
+      public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
+        return twoPhaseIterator;
+      }
+
+      @Override
+      public int docID() {
+        if (twoPhaseIterator != null) {
+          throw new UnsupportedOperationException("ConjunctionDISI should call the two-phase iterator");
+        }
+        return it.docID();
+      }
+
+      @Override
+      public int nextDoc() throws IOException {
+        if (twoPhaseIterator != null) {
+          throw new UnsupportedOperationException("ConjunctionDISI should call the two-phase iterator");
+        }
+        return it.nextDoc();
+      }
+
+      @Override
+      public int advance(int target) throws IOException {
+        if (twoPhaseIterator != null) {
+          throw new UnsupportedOperationException("ConjunctionDISI should call the two-phase iterator");
+        }
+        return it.advance(target);
+      }
+
+      @Override
+      public long cost() {
+        if (twoPhaseIterator != null) {
+          throw new UnsupportedOperationException("ConjunctionDISI should call the two-phase iterator");
+        }
+        return it.cost();
+      }
+
+      @Override
+      public float score() throws IOException {
+        return 0;
+      }
+
+      @Override
+      public int freq() throws IOException {
+        return 0;
+      }
+
+      @Override
+      public int nextPosition() throws IOException {
+        return 0;
+      }
+
+      @Override
+      public int startOffset() throws IOException {
+        return 0;
+      }
+
+      @Override
+      public int endOffset() throws IOException {
+        return 0;
+      }
+
+      @Override
+      public BytesRef getPayload() throws IOException {
+        return null;
+      }
+
+    };
+  }
+
+  private static FixedBitSet randomSet(int maxDoc) {
+    final int step = TestUtil.nextInt(random(), 1, 10);
+    FixedBitSet set = new FixedBitSet(maxDoc);
+    for (int doc = random().nextInt(step); doc < maxDoc; doc += TestUtil.nextInt(random(), 1, step)) {
+      set.set(doc);
+    }
+    return set;
+  }
+
+  private static FixedBitSet clearRandomBits(FixedBitSet other) {
+    final FixedBitSet set = new FixedBitSet(other.length());
+    set.or(other);
+    for (int i = 0; i < set.length(); ++i) {
+      if (random().nextBoolean()) {
+        set.clear(i);
+      }
+    }
+    return set;
+  }
+
+  private static FixedBitSet intersect(FixedBitSet[] bitSets) {
+    final FixedBitSet intersection = new FixedBitSet(bitSets[0].length());
+    intersection.or(bitSets[0]);
+    for (int i = 1; i < bitSets.length; ++i) {
+      intersection.and(bitSets[i]);
+    }
+    return intersection;
+  }
+
+  private static FixedBitSet toBitSet(int maxDoc, DocIdSetIterator iterator) throws IOException {
+    final FixedBitSet set = new FixedBitSet(maxDoc);
+    for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
+      set.set(doc);
+    }
+    return set;
+  }
+
+  // Test that the conjunction iterator is correct
+  public void testConjunction() throws IOException {
+    final int iters = atLeast(100);
+    for (int iter = 0; iter < iters; ++iter) {
+      final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
+      final int numIterators = TestUtil.nextInt(random(), 2, 5);
+      final FixedBitSet[] sets = new FixedBitSet[numIterators];
+      final DocIdSetIterator[] iterators = new DocIdSetIterator[numIterators];
+      for (int i = 0; i < iterators.length; ++i) {
+        final FixedBitSet set = randomSet(maxDoc);
+        if (random().nextBoolean()) {
+          // simple iterator
+          sets[i] = set;
+          iterators[i] = new BitDocIdSet(set).iterator();
+        } else {
+          // scorer with approximation
+          final FixedBitSet confirmed = clearRandomBits(set);
+          sets[i] = confirmed;
+          final TwoPhaseDocIdSetIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
+          iterators[i] = scorer(approximation);
+        }
+      }
+
+      final ConjunctionDISI conjunction = ConjunctionDISI.intersect(Arrays.asList(iterators));
+      assertEquals(intersect(sets), toBitSet(maxDoc, conjunction));
+    }
+  }
+
+  // Test that the conjunction approximation is correct
+  public void testConjunctionApproximation() throws IOException {
+    final int iters = atLeast(100);
+    for (int iter = 0; iter < iters; ++iter) {
+      final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
+      final int numIterators = TestUtil.nextInt(random(), 2, 5);
+      final FixedBitSet[] sets = new FixedBitSet[numIterators];
+      final DocIdSetIterator[] iterators = new DocIdSetIterator[numIterators];
+      boolean hasApproximation = false;
+      for (int i = 0; i < iterators.length; ++i) {
+        final FixedBitSet set = randomSet(maxDoc);
+        if (random().nextBoolean()) {
+          // simple iterator
+          sets[i] = set;
+          iterators[i] = new BitDocIdSet(set).iterator();
+        } else {
+          // scorer with approximation
+          final FixedBitSet confirmed = clearRandomBits(set);
+          sets[i] = confirmed;
+          final TwoPhaseDocIdSetIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
+          iterators[i] = scorer(approximation);
+          hasApproximation = true;
+        }
+      }
+
+      final ConjunctionDISI conjunction = ConjunctionDISI.intersect(Arrays.asList(iterators));
+      TwoPhaseDocIdSetIterator twoPhaseIterator = conjunction.asTwoPhaseIterator();
+      assertEquals(hasApproximation, twoPhaseIterator != null);
+      if (hasApproximation) {
+        assertEquals(intersect(sets), toBitSet(maxDoc, TwoPhaseDocIdSetIterator.asDocIdSetIterator(twoPhaseIterator)));
+      }
+    }
+  }
+
+  // This test makes sure that when nesting scorers with ConjunctionDISI, confirmations are pushed to the root.
+  public void testRecursiveConjunctionApproximation() throws IOException {
+    final int iters = atLeast(100);
+    for (int iter = 0; iter < iters; ++iter) {
+      final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
+      final int numIterators = TestUtil.nextInt(random(), 2, 5);
+      final FixedBitSet[] sets = new FixedBitSet[numIterators];
+      DocIdSetIterator conjunction = null;
+      boolean hasApproximation = false;
+      for (int i = 0; i < numIterators; ++i) {
+        final FixedBitSet set = randomSet(maxDoc);
+        final DocIdSetIterator newIterator;
+        if (random().nextBoolean()) {
+          // simple iterator
+          sets[i] = set;
+          newIterator = new BitDocIdSet(set).iterator();
+        } else {
+          // scorer with approximation
+          final FixedBitSet confirmed = clearRandomBits(set);
+          sets[i] = confirmed;
+          final TwoPhaseDocIdSetIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
+          newIterator = scorer(approximation);
+          hasApproximation = true;
+        }
+
+        if (conjunction == null) {
+          conjunction = newIterator;
+        } else {
+          final ConjunctionDISI conj = ConjunctionDISI.intersect(Arrays.asList(conjunction, newIterator));
+          conjunction = scorer(conj, conj.asTwoPhaseIterator());
+        }
+      }
+
+      TwoPhaseDocIdSetIterator twoPhaseIterator = ((Scorer) conjunction).asTwoPhaseIterator();
+      assertEquals(hasApproximation, twoPhaseIterator != null);
+      if (hasApproximation) {
+        assertEquals(intersect(sets), toBitSet(maxDoc, TwoPhaseDocIdSetIterator.asDocIdSetIterator(twoPhaseIterator)));
+      } else {
+        assertEquals(intersect(sets), toBitSet(maxDoc, conjunction));
+      }
+    }
+  }
+
+}