LUCENE-6198: Two-phase execution for phrase queries and conjunctions.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1659599 13f79535-47bb-0310-9956-ffa450edef68
2015-02-13 16:45:06 +00:00 · 2015-02-13 16:45:06 +00:00 · 5b4c02a3a1
parent 82eff4eb4d
commit 5b4c02a3a1
8 changed files with 653 additions and 88 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -81,6 +81,11 @@ Optimizations
 * LUCENE-6233 Speed up CheckIndex when the index has term vectors
  (Robert Muir, Mike McCandless)
 * LUCENE-6198: Added the TwoPhaseDocIdSetIterator API, exposed on scorers which
  is for now only used on phrase queries and conjunctions in order to check
  positions lazily if the phrase query is in a conjunction with other queries.
  (Robert Muir, Adrien Grand)
 API Changes
 * LUCENE-6204, LUCENE-6208: Simplify CompoundFormat: remove files()
--- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
@ -0,0 +1,202 @@
 package org.apache.lucene.search;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
 import org.apache.lucene.util.CollectionUtil;
 class ConjunctionDISI extends DocIdSetIterator {
  /** Create a conjunction over the provided iterators, taking advantage of
   *  {@link TwoPhaseDocIdSetIterator}. */
  public static ConjunctionDISI intersect(List<? extends DocIdSetIterator> iterators) {
    final List<DocIdSetIterator> allIterators = new ArrayList<>();
    final List<TwoPhaseDocIdSetIterator> twoPhaseIterators = new ArrayList<>();
    for (DocIdSetIterator iterator : iterators) {
      if (iterator instanceof Scorer) {
        // if we have a scorer, check if it supports two-phase iteration
        TwoPhaseDocIdSetIterator twoPhaseIterator = ((Scorer) iterator).asTwoPhaseIterator();
        if (twoPhaseIterator != null) {
          // Note: 
          allIterators.add(twoPhaseIterator.approximation());
          twoPhaseIterators.add(twoPhaseIterator);
        } else {
          allIterators.add(iterator);
        }
      } else {
        // no approximation support, use the iterator as-is
        allIterators.add(iterator);
      }
    }
    if (twoPhaseIterators.isEmpty()) {
      return new ConjunctionDISI(allIterators);
    } else {
      return new TwoPhase(allIterators, twoPhaseIterators);
    }
  }
  final DocIdSetIterator lead;
  final DocIdSetIterator[] others;
  ConjunctionDISI(List<? extends DocIdSetIterator> iterators) {
    // Sort the array the first time to allow the least frequent DocsEnum to
    // lead the matching.
    CollectionUtil.timSort(iterators, new Comparator<DocIdSetIterator>() {
      @Override
      public int compare(DocIdSetIterator o1, DocIdSetIterator o2) {
        return Long.compare(o1.cost(), o2.cost());
      }
    });
    lead = iterators.get(0);
    others = iterators.subList(1, iterators.size()).toArray(new DocIdSetIterator[0]);
  }
  protected boolean matches() throws IOException {
    return true;
  }
  TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
    return null;
  }
  private int doNext(int doc) throws IOException {
    for(;;) {
      if (doc == NO_MORE_DOCS) {
        // we need this check because it is only ok to call #matches when positioned
        return NO_MORE_DOCS;
      }
      advanceHead: for(;;) {
        for (DocIdSetIterator other : others) {
          // invariant: docsAndFreqs[i].doc <= doc at this point.
          // docsAndFreqs[i].doc may already be equal to doc if we "broke advanceHead"
          // on the previous iteration and the advance on the lead scorer exactly matched.
          if (other.docID() < doc) {
            final int next = other.advance(doc);
            if (next > doc) {
              // DocsEnum beyond the current doc - break and advance lead to the new highest doc.
              doc = lead.advance(next);
              break advanceHead;
            }
          }
        }
        if (matches()) {
          // success - all DocsEnums are on the same doc
          return doc;
        } else {
          doc = lead.nextDoc();
          break advanceHead;
        }
      }
    }
  }
  @Override
  public int advance(int target) throws IOException {
    return doNext(lead.advance(target));
  }
  @Override
  public int docID() {
    return lead.docID();
  }
  @Override
  public int nextDoc() throws IOException {
    return doNext(lead.nextDoc());
  }
  @Override
  public long cost() {
    return lead.cost();
  }
  /**
   * {@link TwoPhaseDocIdSetIterator} view of a {@link TwoPhase} conjunction.
   */
  private static class TwoPhaseConjunctionDISI extends TwoPhaseDocIdSetIterator {
    private final ConjunctionDISI approximation;
    private final TwoPhaseDocIdSetIterator[] twoPhaseIterators;
    private TwoPhaseConjunctionDISI(List<? extends DocIdSetIterator> iterators, List<TwoPhaseDocIdSetIterator> twoPhaseIterators) {
      approximation = new ConjunctionDISI(iterators);
      assert twoPhaseIterators.size() > 0;
      this.twoPhaseIterators = twoPhaseIterators.toArray(new TwoPhaseDocIdSetIterator[0]);
    }
    @Override
    public DocIdSetIterator approximation() {
      return approximation;
    }
    @Override
    public boolean matches() throws IOException {
      for (TwoPhaseDocIdSetIterator twoPhaseIterator : twoPhaseIterators) {
        if (twoPhaseIterator.matches() == false) {
          return false;
        }
      }
      return true;
    }
  }
  /**
   * A conjunction DISI built on top of approximations. This implementation
   * verifies that documents actually match by consulting the provided
   * {@link TwoPhaseDocIdSetIterator}s.
   *
   * Another important difference with {@link ConjunctionDISI} is that this
   * implementation supports approximations too: the approximation of this
   * impl is the conjunction of the approximations of the wrapped iterators.
   * This allows eg. {@code +"A B" +C} to be approximated as
   * {@code +(+A +B) +C}.
   */
  // NOTE: this is essentially the same as TwoPhaseDocIdSetIterator.asDocIdSetIterator
  // but is its own impl in order to be able to expose a two-phase view
  private static class TwoPhase extends ConjunctionDISI {
    final TwoPhaseConjunctionDISI twoPhaseView;
    private TwoPhase(List<? extends DocIdSetIterator> iterators, List<TwoPhaseDocIdSetIterator> twoPhaseIterators) {
      super(iterators);
      twoPhaseView = new TwoPhaseConjunctionDISI(iterators, twoPhaseIterators);
    }
    @Override
    public TwoPhaseConjunctionDISI asTwoPhaseIterator() {
      return twoPhaseView;
    }
    @Override
    protected boolean matches() throws IOException {
      return twoPhaseView.matches();
    }
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
@ -20,18 +20,14 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Comparator;
 import java.util.List;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 /** Scorer for conjunctions, sets of queries, all of which are required. */
 class ConjunctionScorer extends Scorer {
-  protected int lastDoc = -1;
+  private final ConjunctionDISI disi;
  protected final DocsAndFreqs[] docsAndFreqs;
  private final DocsAndFreqs lead;
  private final Scorer[] scorers;
  private final float coord;
@ -44,68 +40,28 @@ class ConjunctionScorer extends Scorer {
    super(weight);
    assert required.containsAll(scorers);
    this.coord = coord;
-    this.docsAndFreqs = new DocsAndFreqs[required.size()];
+    this.disi = ConjunctionDISI.intersect(required);
    for (int i = 0; i < required.size(); ++i) {
      docsAndFreqs[i] = new DocsAndFreqs(required.get(i));
    }
    // Sort the array the first time to allow the least frequent DocsEnum to
    // lead the matching.
    ArrayUtil.timSort(docsAndFreqs, new Comparator<DocsAndFreqs>() {
      @Override
      public int compare(DocsAndFreqs o1, DocsAndFreqs o2) {
        return Long.compare(o1.cost, o2.cost);
      }
    });
    lead = docsAndFreqs[0]; // least frequent DocsEnum leads the intersection
    this.scorers = scorers.toArray(new Scorer[scorers.size()]);
  }
-  private int doNext(int doc) throws IOException {
+  @Override
-    for(;;) {
+  public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
-      // doc may already be NO_MORE_DOCS here, but we don't check explicitly
+    return disi.asTwoPhaseIterator();
      // since all scorers should advance to NO_MORE_DOCS, match, then
      // return that value.
      advanceHead: for(;;) {
        for (int i = 1; i < docsAndFreqs.length; i++) {
          // invariant: docsAndFreqs[i].doc <= doc at this point.
          // docsAndFreqs[i].doc may already be equal to doc if we "broke advanceHead"
          // on the previous iteration and the advance on the lead scorer exactly matched.
          if (docsAndFreqs[i].doc < doc) {
            docsAndFreqs[i].doc = docsAndFreqs[i].iterator.advance(doc);
            if (docsAndFreqs[i].doc > doc) {
              // DocsEnum beyond the current doc - break and advance lead to the new highest doc.
              doc = docsAndFreqs[i].doc;
              break advanceHead;
            }
          }
        }
        // success - all DocsEnums are on the same doc
        return doc;
      }
      // advance head for next iteration
      doc = lead.doc = lead.iterator.advance(doc);
    }
  }
  @Override
  public int advance(int target) throws IOException {
-    lead.doc = lead.iterator.advance(target);
+    return disi.advance(target);
    return lastDoc = doNext(lead.doc);
  }
  @Override
  public int docID() {
-    return lastDoc;
+    return disi.docID();
  }
  @Override
  public int nextDoc() throws IOException {
-    lead.doc = lead.iterator.nextDoc();
+    return disi.nextDoc();
    return lastDoc = doNext(lead.doc);
  }
  @Override
@ -120,7 +76,7 @@ class ConjunctionScorer extends Scorer {
  @Override
  public int freq() {
-    return docsAndFreqs.length;
+    return scorers.length;
  }
  @Override
@ -145,12 +101,12 @@ class ConjunctionScorer extends Scorer {
  @Override
  public long cost() {
-    return lead.iterator.cost();
+    return disi.cost();
  }
  @Override
  public Collection<ChildScorer> getChildren() {
-    ArrayList<ChildScorer> children = new ArrayList<>(docsAndFreqs.length);
+    ArrayList<ChildScorer> children = new ArrayList<>();
    for (Scorer scorer : scorers) {
      children.add(new ChildScorer(scorer, "MUST"));
    }
--- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
@ -18,9 +18,11 @@ package org.apache.lucene.search;
 */
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
-import org.apache.lucene.index.*;
+import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.util.BytesRef;
@ -49,10 +51,11 @@ final class ExactPhraseScorer extends Scorer {
    }
  }
  private final ConjunctionDISI conjunction;
  private final ChunkState[] chunkStates;
  private final PostingsEnum lead;
  private int docID = -1;
  private int freq;
  private final Similarity.SimScorer docScorer;
@ -72,49 +75,46 @@ final class ExactPhraseScorer extends Scorer {
    // min(cost)
    cost = lead.cost();
    List<DocIdSetIterator> iterators = new ArrayList<>();
    for(int i=0;i<postings.length;i++) {
      chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position);
      iterators.add(postings[i].postings);
    }
    conjunction = ConjunctionDISI.intersect(iterators);
  }
  private int doNext(int doc) throws IOException {
    for(;;) {
      // TODO: don't dup this logic from conjunctionscorer :)
      advanceHead: for(;;) {
        for (int i = 1; i < chunkStates.length; i++) {
          final PostingsEnum de = chunkStates[i].posEnum;
          if (de.docID() < doc) {
            int d = de.advance(doc);
-            if (d > doc) {
+  @Override
-              // DocsEnum beyond the current doc - break and advance lead to the new highest doc.
+  public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
-              doc = d;
+    return new TwoPhaseDocIdSetIterator() {
-              break advanceHead;
+
-            }
+      @Override
-          }
+      public boolean matches() throws IOException {
-        }
+        return phraseFreq() > 0;
-        // all DocsEnums are on the same doc
+      }
-        if (doc == NO_MORE_DOCS) {
+
-          return doc;
+      @Override
-        } else if (phraseFreq() > 0) {
+      public DocIdSetIterator approximation() {
-          return doc;            // success: matches phrase
+        return conjunction;
-        } else {
+      }
-          doc = lead.nextDoc();  // doesn't match phrase
+    };
-        }
+  }
  private int doNext(int doc) throws IOException {
    for (;; doc = conjunction.nextDoc()) {
      if (doc == NO_MORE_DOCS || phraseFreq() > 0) {
        return doc;
      }
      // advance head for next iteration
      doc = lead.advance(doc);
    }
  }
  @Override
  public int nextDoc() throws IOException {
-    return docID = doNext(lead.nextDoc());
+    return doNext(conjunction.nextDoc());
  }
  @Override
  public int advance(int target) throws IOException {
-    return docID = doNext(lead.advance(target));
+    return doNext(conjunction.advance(target));
  }
  @Override
@ -149,12 +149,12 @@ final class ExactPhraseScorer extends Scorer {
  @Override
  public int docID() {
-    return docID;
+    return conjunction.docID();
  }
  @Override
  public float score() {
-    return docScorer.score(docID, freq);
+    return docScorer.score(docID(), freq);
  }
  private int phraseFreq() throws IOException {
--- a/lucene/core/src/java/org/apache/lucene/search/Scorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/Scorer.java
@ -60,7 +60,7 @@ public abstract class Scorer extends PostingsEnum {
   * {@link LeafCollector#collect}.
   */
  public abstract float score() throws IOException;
-  
+
  /** returns parent Weight
   * @lucene.experimental
   */
@ -99,4 +99,23 @@ public abstract class Scorer extends PostingsEnum {
      this.relationship = relationship;
    }
  }
  /**
   * Optional method: Return a {@link TwoPhaseDocIdSetIterator} view of this
   * {@link Scorer}. A return value of {@code null} indicates that
   * two-phase iteration is not supported.
   *
   * Note that the returned {@link TwoPhaseDocIdSetIterator}'s
   * {@link TwoPhaseDocIdSetIterator#approximation() approximation} must
   * advance synchronously with this iterator: advancing the approximation must
   * advance this iterator and vice-versa.
   *
   * Implementing this method is typically useful on {@link Scorer}s
   * that have a high per-document overhead in order to confirm matches.
   *
   * The default implementation returns {@code null}.
   */
  public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
    return null;
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/TwoPhaseDocIdSetIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TwoPhaseDocIdSetIterator.java
@ -0,0 +1,81 @@
 package org.apache.lucene.search;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 /**
 * An approximation of a {@link DocIdSetIterator}. When the {@link #approximation()}'s
 * {@link DocIdSetIterator#nextDoc()} or {@link DocIdSetIterator#advance(int)}
 * return, {@link #matches()} needs to be checked in order to know whether the
 * returned doc ID actually matches.
 * @lucene.experimental
 */
 public abstract class TwoPhaseDocIdSetIterator {
  /** Return a {@link DocIdSetIterator} view of the provided
   *  {@link TwoPhaseDocIdSetIterator}. */
  public static DocIdSetIterator asDocIdSetIterator(TwoPhaseDocIdSetIterator twoPhaseIterator) {
    final DocIdSetIterator approximation = twoPhaseIterator.approximation();
    return new DocIdSetIterator() {
      @Override
      public int docID() {
        return approximation.docID();
      }
      @Override
      public int nextDoc() throws IOException {
        return doNext(approximation.nextDoc());
      }
      @Override
      public int advance(int target) throws IOException {
        return doNext(approximation.advance(target));
      }
      private int doNext(int doc) throws IOException {
        for (;; doc = approximation.nextDoc()) {
          if (doc == NO_MORE_DOCS) {
            return NO_MORE_DOCS;
          } else if (twoPhaseIterator.matches()) {
            return doc;
          }
        }
      }
      @Override
      public long cost() {
        return approximation.cost();
      }
    };
  }
  /** Return an approximation. The returned {@link DocIdSetIterator} is a
   *  superset of the matching documents, and each match needs to be confirmed
   *  with {@link #matches()} in order to know whether it matches or not. */
  public abstract DocIdSetIterator approximation();
  /** Return whether the current doc ID that the iterator is on matches. This
   *  method should only be called when the iterator is positionned, ie. not
   *  when {@link DocIdSetIterator#docID()} is {@code -1} or
   *  {@link DocIdSetIterator#NO_MORE_DOCS}. */
  public abstract boolean matches() throws IOException;
 }
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java
@ -590,4 +590,33 @@ public class TestBooleanQuery extends LuceneTestCase {
    w.close();
    dir.close();
  }
  public void testConjunctionSupportsApproximations() throws IOException {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    Field f = newTextField("field", "a b c", Field.Store.NO);
    doc.add(f);
    w.addDocument(doc);
    w.commit();
    DirectoryReader reader = w.getReader();
    final IndexSearcher searcher = new IndexSearcher(reader);
    PhraseQuery pq = new PhraseQuery();
    pq.add(new Term("field", "a"));
    pq.add(new Term("field", "b"));
    BooleanQuery q = new BooleanQuery();
    q.add(pq, Occur.MUST);
    q.add(new TermQuery(new Term("field", "c")), Occur.FILTER);
    final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean());
    final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
    assertNotNull(scorer.asTwoPhaseIterator());
    reader.close();
    w.close();
    dir.close();
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java
@ -0,0 +1,273 @@
 package org.apache.lucene.search;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.Arrays;
 import org.apache.lucene.util.BitDocIdSet;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 public class TestConjunctionDISI extends LuceneTestCase {
  private static TwoPhaseDocIdSetIterator approximation(final DocIdSetIterator iterator, final FixedBitSet confirmed) {
    return new TwoPhaseDocIdSetIterator() {
      @Override
      public DocIdSetIterator approximation() {
        return iterator;
      }
      @Override
      public boolean matches() throws IOException {
        return confirmed.get(iterator.docID());
      }
    };
  }
  private static Scorer scorer(TwoPhaseDocIdSetIterator twoPhaseIterator) {
    return scorer(TwoPhaseDocIdSetIterator.asDocIdSetIterator(twoPhaseIterator), twoPhaseIterator);
  }
  /**
   * Create a {@link Scorer} that wraps the given {@link DocIdSetIterator}. It
   * also accepts a {@link TwoPhaseDocIdSetIterator} view, which is exposed in
   * {@link Scorer#asTwoPhaseIterator()}. When the two-phase view is not null,
   * then {@link Scorer#nextDoc()} and {@link Scorer#advance(int)} will raise
   * an exception in order to make sure that {@link ConjunctionDISI} takes
   * advantage of the {@link TwoPhaseDocIdSetIterator} view.
   */
  private static Scorer scorer(DocIdSetIterator it, TwoPhaseDocIdSetIterator twoPhaseIterator) {
    return new Scorer(null) {
      @Override
      public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
        return twoPhaseIterator;
      }
      @Override
      public int docID() {
        if (twoPhaseIterator != null) {
          throw new UnsupportedOperationException("ConjunctionDISI should call the two-phase iterator");
        }
        return it.docID();
      }
      @Override
      public int nextDoc() throws IOException {
        if (twoPhaseIterator != null) {
          throw new UnsupportedOperationException("ConjunctionDISI should call the two-phase iterator");
        }
        return it.nextDoc();
      }
      @Override
      public int advance(int target) throws IOException {
        if (twoPhaseIterator != null) {
          throw new UnsupportedOperationException("ConjunctionDISI should call the two-phase iterator");
        }
        return it.advance(target);
      }
      @Override
      public long cost() {
        if (twoPhaseIterator != null) {
          throw new UnsupportedOperationException("ConjunctionDISI should call the two-phase iterator");
        }
        return it.cost();
      }
      @Override
      public float score() throws IOException {
        return 0;
      }
      @Override
      public int freq() throws IOException {
        return 0;
      }
      @Override
      public int nextPosition() throws IOException {
        return 0;
      }
      @Override
      public int startOffset() throws IOException {
        return 0;
      }
      @Override
      public int endOffset() throws IOException {
        return 0;
      }
      @Override
      public BytesRef getPayload() throws IOException {
        return null;
      }
    };
  }
  private static FixedBitSet randomSet(int maxDoc) {
    final int step = TestUtil.nextInt(random(), 1, 10);
    FixedBitSet set = new FixedBitSet(maxDoc);
    for (int doc = random().nextInt(step); doc < maxDoc; doc += TestUtil.nextInt(random(), 1, step)) {
      set.set(doc);
    }
    return set;
  }
  private static FixedBitSet clearRandomBits(FixedBitSet other) {
    final FixedBitSet set = new FixedBitSet(other.length());
    set.or(other);
    for (int i = 0; i < set.length(); ++i) {
      if (random().nextBoolean()) {
        set.clear(i);
      }
    }
    return set;
  }
  private static FixedBitSet intersect(FixedBitSet[] bitSets) {
    final FixedBitSet intersection = new FixedBitSet(bitSets[0].length());
    intersection.or(bitSets[0]);
    for (int i = 1; i < bitSets.length; ++i) {
      intersection.and(bitSets[i]);
    }
    return intersection;
  }
  private static FixedBitSet toBitSet(int maxDoc, DocIdSetIterator iterator) throws IOException {
    final FixedBitSet set = new FixedBitSet(maxDoc);
    for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
      set.set(doc);
    }
    return set;
  }
  // Test that the conjunction iterator is correct
  public void testConjunction() throws IOException {
    final int iters = atLeast(100);
    for (int iter = 0; iter < iters; ++iter) {
      final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
      final int numIterators = TestUtil.nextInt(random(), 2, 5);
      final FixedBitSet[] sets = new FixedBitSet[numIterators];
      final DocIdSetIterator[] iterators = new DocIdSetIterator[numIterators];
      for (int i = 0; i < iterators.length; ++i) {
        final FixedBitSet set = randomSet(maxDoc);
        if (random().nextBoolean()) {
          // simple iterator
          sets[i] = set;
          iterators[i] = new BitDocIdSet(set).iterator();
        } else {
          // scorer with approximation
          final FixedBitSet confirmed = clearRandomBits(set);
          sets[i] = confirmed;
          final TwoPhaseDocIdSetIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
          iterators[i] = scorer(approximation);
        }
      }
      final ConjunctionDISI conjunction = ConjunctionDISI.intersect(Arrays.asList(iterators));
      assertEquals(intersect(sets), toBitSet(maxDoc, conjunction));
    }
  }
  // Test that the conjunction approximation is correct
  public void testConjunctionApproximation() throws IOException {
    final int iters = atLeast(100);
    for (int iter = 0; iter < iters; ++iter) {
      final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
      final int numIterators = TestUtil.nextInt(random(), 2, 5);
      final FixedBitSet[] sets = new FixedBitSet[numIterators];
      final DocIdSetIterator[] iterators = new DocIdSetIterator[numIterators];
      boolean hasApproximation = false;
      for (int i = 0; i < iterators.length; ++i) {
        final FixedBitSet set = randomSet(maxDoc);
        if (random().nextBoolean()) {
          // simple iterator
          sets[i] = set;
          iterators[i] = new BitDocIdSet(set).iterator();
        } else {
          // scorer with approximation
          final FixedBitSet confirmed = clearRandomBits(set);
          sets[i] = confirmed;
          final TwoPhaseDocIdSetIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
          iterators[i] = scorer(approximation);
          hasApproximation = true;
        }
      }
      final ConjunctionDISI conjunction = ConjunctionDISI.intersect(Arrays.asList(iterators));
      TwoPhaseDocIdSetIterator twoPhaseIterator = conjunction.asTwoPhaseIterator();
      assertEquals(hasApproximation, twoPhaseIterator != null);
      if (hasApproximation) {
        assertEquals(intersect(sets), toBitSet(maxDoc, TwoPhaseDocIdSetIterator.asDocIdSetIterator(twoPhaseIterator)));
      }
    }
  }
  // This test makes sure that when nesting scorers with ConjunctionDISI, confirmations are pushed to the root.
  public void testRecursiveConjunctionApproximation() throws IOException {
    final int iters = atLeast(100);
    for (int iter = 0; iter < iters; ++iter) {
      final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
      final int numIterators = TestUtil.nextInt(random(), 2, 5);
      final FixedBitSet[] sets = new FixedBitSet[numIterators];
      DocIdSetIterator conjunction = null;
      boolean hasApproximation = false;
      for (int i = 0; i < numIterators; ++i) {
        final FixedBitSet set = randomSet(maxDoc);
        final DocIdSetIterator newIterator;
        if (random().nextBoolean()) {
          // simple iterator
          sets[i] = set;
          newIterator = new BitDocIdSet(set).iterator();
        } else {
          // scorer with approximation
          final FixedBitSet confirmed = clearRandomBits(set);
          sets[i] = confirmed;
          final TwoPhaseDocIdSetIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
          newIterator = scorer(approximation);
          hasApproximation = true;
        }
        if (conjunction == null) {
          conjunction = newIterator;
        } else {
          final ConjunctionDISI conj = ConjunctionDISI.intersect(Arrays.asList(conjunction, newIterator));
          conjunction = scorer(conj, conj.asTwoPhaseIterator());
        }
      }
      TwoPhaseDocIdSetIterator twoPhaseIterator = ((Scorer) conjunction).asTwoPhaseIterator();
      assertEquals(hasApproximation, twoPhaseIterator != null);
      if (hasApproximation) {
        assertEquals(intersect(sets), toBitSet(maxDoc, TwoPhaseDocIdSetIterator.asDocIdSetIterator(twoPhaseIterator)));
      } else {
        assertEquals(intersect(sets), toBitSet(maxDoc, conjunction));
      }
    }
  }
 }