LUCENE-6373: complete two phase doc id iteration support for Spans

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1675776 13f79535-47bb-0310-9956-ffa450edef68
2015-04-24 05:12:20 +00:00 · 2015-04-24 05:12:20 +00:00 · 5105d036bd
parent 558d85f892
commit 5105d036bd
15 changed files with 429 additions and 285 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -50,6 +50,10 @@ New Features
  FilterSpans to just have an accept(Spans candidate) method for
  subclasses. (Robert Muir)

+* LUCENE-6373: SpanOrQuery shares disjunction logic with boolean
+  queries, and supports two-phased iterators to avoid loading
+  positions when possible. (Paul Elschot via Robert Muir)
+
 * LUCENE-6352: Added a new query time join to the join module that uses
  global ordinals, which is faster for subsequent joins between reopens.
  (Martijn van Groningen, Adrien Grand)
--- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
@ -23,7 +23,6 @@ import java.util.Comparator;
 import java.util.List;

 import org.apache.lucene.util.CollectionUtil;
-import org.apache.lucene.search.spans.Spans;

 /** A conjunction of DocIdSetIterators.
 * This iterates over the doc ids that are present in each given DocIdSetIterator.
@ -35,20 +34,16 @@ public class ConjunctionDISI extends DocIdSetIterator {
  /** Create a conjunction over the provided iterators, taking advantage of
   *  {@link TwoPhaseIterator}. */
  public static ConjunctionDISI intersect(List<? extends DocIdSetIterator> iterators) {
+    assert iterators.size() >= 2;
    final List<DocIdSetIterator> allIterators = new ArrayList<>();
    final List<TwoPhaseIterator> twoPhaseIterators = new ArrayList<>();
-    for (DocIdSetIterator iterator : iterators) {
-      TwoPhaseIterator twoPhaseIterator = null;
-      if (iterator instanceof Scorer) { 
-        twoPhaseIterator = ((Scorer) iterator).asTwoPhaseIterator();
-      } else if (iterator instanceof Spans) {
-        twoPhaseIterator = ((Spans) iterator).asTwoPhaseIterator();
-      }
-      if (twoPhaseIterator != null) {
-        allIterators.add(twoPhaseIterator.approximation());
-        twoPhaseIterators.add(twoPhaseIterator);
+    for (DocIdSetIterator iter : iterators) {
+      TwoPhaseIterator twoPhaseIter = TwoPhaseIterator.asTwoPhaseIterator(iter);
+      if (twoPhaseIter != null) {
+        allIterators.add(twoPhaseIter.approximation());
+        twoPhaseIterators.add(twoPhaseIter);
      } else { // no approximation support, use the iterator as-is
-        allIterators.add(iterator);
+        allIterators.add(iter);
      }
    }

@ -63,6 +58,7 @@ public class ConjunctionDISI extends DocIdSetIterator {
  final DocIdSetIterator[] others;

  ConjunctionDISI(List<? extends DocIdSetIterator> iterators) {
+    assert iterators.size() >= 2;
    // Sort the array the first time to allow the least frequent DocsEnum to
    // lead the matching.
    CollectionUtil.timSort(iterators, new Comparator<DocIdSetIterator>() {
--- a/lucene/core/src/java/org/apache/lucene/search/ScorerPriorityQueue.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ScorerPriorityQueue.java
@ -23,37 +23,13 @@ import java.util.Iterator;
 import org.apache.lucene.util.PriorityQueue;

 /**
- * A priority queue of scorers that orders by current doc ID.
+ * A priority queue of DocIdSetIterators that orders by current doc ID.
 * This specialization is needed over {@link PriorityQueue} because the
 * pluggable comparison function makes the rebalancing quite slow.
+ * @lucene.internal
 */
-final class ScorerPriorityQueue implements Iterable<org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper> {
-
-  static class ScorerWrapper {
-    final Scorer scorer;
-    final long cost;
-    int doc; // the current doc, used for comparison
-    ScorerWrapper next; // reference to a next element, see #topList
-
-    // An approximation of the scorer, or the scorer itself if it does not
-    // support two-phase iteration
-    final DocIdSetIterator approximation;
-    // A two-phase view of the scorer, or null if the scorer does not support
-    // two-phase iteration
-    final TwoPhaseIterator twoPhaseView;
-
-    ScorerWrapper(Scorer scorer) {
-      this.scorer = scorer;
-      this.cost = scorer.cost();
-      this.doc = -1;
-      this.twoPhaseView = scorer.asTwoPhaseIterator();
-      if (twoPhaseView != null) {
-        approximation = twoPhaseView.approximation();
-      } else {
-        approximation = scorer;
-      }
-    }
-  }
+public final class DisiPriorityQueue<Iter extends DocIdSetIterator>
+implements Iterable<DisiWrapper<Iter>> {

  static int leftNode(int node) {
    return ((node + 1) << 1) - 1;
@ -67,27 +43,27 @@ final class ScorerPriorityQueue implements Iterable<org.apache.lucene.search.Sco
    return ((node + 1) >>> 1) - 1;
  }

-  private final ScorerWrapper[] heap;
+  private final DisiWrapper<Iter>[] heap;
  private int size;

-  ScorerPriorityQueue(int maxSize) {
-    heap = new ScorerWrapper[maxSize];
+  public DisiPriorityQueue(int maxSize) {
+    heap = new DisiWrapper[maxSize];
    size = 0;
  }

-  int size() {
+  public int size() {
    return size;
  }

-  ScorerWrapper top() {
+  public DisiWrapper<Iter> top() {
    return heap[0];
  }

  /** Get the list of scorers which are on the current doc. */
-  ScorerWrapper topList() {
-    final ScorerWrapper[] heap = this.heap;
+  public DisiWrapper<Iter> topList() {
+    final DisiWrapper<Iter>[] heap = this.heap;
    final int size = this.size;
-    ScorerWrapper list = heap[0];
+    DisiWrapper<Iter> list = heap[0];
    list.next = null;
    if (size >= 3) {
      list = topList(list, heap, size, 1);
@ -98,14 +74,15 @@ final class ScorerPriorityQueue implements Iterable<org.apache.lucene.search.Sco
    return list;
  }

-  // prepend w1 (scorer) to w2 (list)
-  private static ScorerWrapper prepend(ScorerWrapper w1, ScorerWrapper w2) {
+  // prepend w1 (iterator) to w2 (list)
+  private DisiWrapper<Iter> prepend(DisiWrapper<Iter> w1, DisiWrapper<Iter> w2) {
    w1.next = w2;
    return w1;
  }

-  private static ScorerWrapper topList(ScorerWrapper list, ScorerWrapper[] heap, int size, int i) {
-    final ScorerWrapper w = heap[i];
+  private DisiWrapper<Iter> topList(DisiWrapper<Iter> list, DisiWrapper<Iter>[] heap,
+                                    int size, int i) {
+    final DisiWrapper<Iter> w = heap[i];
    if (w.doc == list.doc) {
      list = prepend(w, list);
      final int left = leftNode(i);
@ -120,37 +97,37 @@ final class ScorerPriorityQueue implements Iterable<org.apache.lucene.search.Sco
    return list;
  }

-  ScorerWrapper add(ScorerWrapper entry) {
-    final ScorerWrapper[] heap = this.heap;
+  public DisiWrapper<Iter> add(DisiWrapper<Iter> entry) {
+    final DisiWrapper<Iter>[] heap = this.heap;
    final int size = this.size;
    heap[size] = entry;
-    upHeap(heap, size);
+    upHeap(size);
    this.size = size + 1;
    return heap[0];
  }

-  ScorerWrapper pop() {
-    final ScorerWrapper[] heap = this.heap;
-    final ScorerWrapper result = heap[0];
+  public DisiWrapper<Iter> pop() {
+    final DisiWrapper<Iter>[] heap = this.heap;
+    final DisiWrapper<Iter> result = heap[0];
    final int i = --size;
    heap[0] = heap[i];
    heap[i] = null;
-    downHeap(heap, i);
+    downHeap(i);
    return result;
  }

-  ScorerWrapper updateTop() {
-    downHeap(heap, size);
+  public DisiWrapper<Iter> updateTop() {
+    downHeap(size);
    return heap[0];
  }

-  ScorerWrapper updateTop(ScorerWrapper topReplacement) {
+  DisiWrapper<Iter> updateTop(DisiWrapper<Iter> topReplacement) {
    heap[0] = topReplacement;
    return updateTop();
  }

-  static void upHeap(ScorerWrapper[] heap, int i) {
-    final ScorerWrapper node = heap[i];
+  void upHeap(int i) {
+    final DisiWrapper<Iter> node = heap[i];
    final int nodeDoc = node.doc;
    int j = parentNode(i);
    while (j >= 0 && nodeDoc < heap[j].doc) {
@ -161,9 +138,9 @@ final class ScorerPriorityQueue implements Iterable<org.apache.lucene.search.Sco
    heap[i] = node;
  }

-  static void downHeap(ScorerWrapper[] heap, int size) {
+  void downHeap(int size) {
    int i = 0;
-    final ScorerWrapper node = heap[0];
+    final DisiWrapper<Iter> node = heap[0];
    int j = leftNode(i);
    if (j < size) {
      int k = rightNode(j);
@ -186,8 +163,10 @@ final class ScorerPriorityQueue implements Iterable<org.apache.lucene.search.Sco
  }

  @Override
-  public Iterator<ScorerWrapper> iterator() {
+  public Iterator<DisiWrapper<Iter>> iterator() {
    return Arrays.asList(heap).subList(0, size).iterator();
  }

 }
+
+
--- a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java
@ -0,0 +1,55 @@
+package org.apache.lucene.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Wrapper used in {@link DisiPriorityQueue}.
+ * @lucene.internal
+ */
+public class DisiWrapper<Iter extends DocIdSetIterator> {
+  public final Iter iterator;
+  public final long cost;
+  public int doc; // the current doc, used for comparison
+  public DisiWrapper<Iter> next; // reference to a next element, see #topList
+
+  // An approximation of the iterator, or the iterator itself if it does not
+  // support two-phase iteration
+  public final DocIdSetIterator approximation;
+  // A two-phase view of the iterator, or null if the iterator does not support
+  // two-phase iteration
+  public final TwoPhaseIterator twoPhaseView;
+  
+  public int lastApproxMatchDoc; // last doc of approximation that did match
+  public int lastApproxNonMatchDoc; // last doc of approximation that did not match
+
+  public DisiWrapper(Iter iterator) {
+    this.iterator = iterator;
+    this.cost = iterator.cost();
+    this.doc = -1;
+    this.twoPhaseView = TwoPhaseIterator.asTwoPhaseIterator(iterator);
+      
+    if (twoPhaseView != null) {
+      approximation = twoPhaseView.approximation();
+    } else {
+      approximation = iterator;
+    }
+    this.lastApproxNonMatchDoc = -2;
+    this.lastApproxMatchDoc = -2;
+  }
+}
+
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionDISIApproximation.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionDISIApproximation.java
@ -0,0 +1,75 @@
+package org.apache.lucene.search;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+/**
+ * A {@link DocIdSetIterator} which is a disjunction of the approximations of
+ * the provided iterators.
+ * @lucene.internal
+ */
+public class DisjunctionDISIApproximation<Iter extends DocIdSetIterator>
+extends DocIdSetIterator {
+
+  final DisiPriorityQueue<Iter> subIterators;
+  final long cost;
+
+  public DisjunctionDISIApproximation(DisiPriorityQueue<Iter> subIterators) {
+    this.subIterators = subIterators;
+    long cost = 0;
+    for (DisiWrapper<Iter> w : subIterators) {
+      cost += w.cost;
+    }
+    this.cost = cost;
+  }
+
+  @Override
+  public long cost() {
+    return cost;
+  }
+
+  @Override
+  public int docID() {
+   return subIterators.top().doc;
+  }
+
+  @Override
+  public int nextDoc() throws IOException {
+    DisiWrapper<Iter> top = subIterators.top();
+    final int doc = top.doc;
+    do {
+      top.doc = top.approximation.nextDoc();
+      top = subIterators.updateTop();
+    } while (top.doc == doc);
+
+    return top.doc;
+  }
+
+  @Override
+  public int advance(int target) throws IOException {
+    DisiWrapper<Iter> top = subIterators.top();
+    do {
+      top.doc = top.approximation.advance(target);
+      top = subIterators.updateTop();
+    } while (top.doc < target);
+
+    return top.doc;
+  }
+}
+
+
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
@ -19,8 +19,6 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.List;

-import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
-
 /**
 * The Scorer for DisjunctionMaxQuery.  The union of all documents generated by the the subquery scorers
 * is generated in document number order.  The score for each document is the maximum of the scores computed
@ -48,11 +46,11 @@ final class DisjunctionMaxScorer extends DisjunctionScorer {
  }

  @Override
-  protected float score(ScorerWrapper topList) throws IOException {
+  protected float score(DisiWrapper<Scorer> topList) throws IOException {
    float scoreSum = 0;
    float scoreMax = 0;
-    for (ScorerWrapper w = topList; w != null; w = w.next) {
-      final float subScore = w.scorer.score();
+    for (DisiWrapper<Scorer> w = topList; w != null; w = w.next) {
+      final float subScore = w.iterator.score();
      scoreSum += subScore;
      if (subScore > scoreMax) {
        scoreMax = subScore;
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
@ -22,29 +22,27 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;

-import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
-
 /**
 * Base class for Scorers that score disjunctions.
 */
 abstract class DisjunctionScorer extends Scorer {

  private final boolean needsScores;
-  private final ScorerPriorityQueue subScorers;
+  private final DisiPriorityQueue<Scorer> subScorers;
  private final long cost;

  /** Linked list of scorers which are on the current doc */
-  private ScorerWrapper topScorers;
+  private DisiWrapper<Scorer> topScorers;

  protected DisjunctionScorer(Weight weight, List<Scorer> subScorers, boolean needsScores) {
    super(weight);
    if (subScorers.size() <= 1) {
      throw new IllegalArgumentException("There must be at least 2 subScorers");
    }
-    this.subScorers = new ScorerPriorityQueue(subScorers.size());
+    this.subScorers = new DisiPriorityQueue<Scorer>(subScorers.size());
    long cost = 0;
    for (Scorer scorer : subScorers) {
-      final ScorerWrapper w = new ScorerWrapper(scorer);
+      final DisiWrapper<Scorer> w = new DisiWrapper<>(scorer);
      cost += w.cost;
      this.subScorers.add(w);
    }
@ -52,69 +50,17 @@ abstract class DisjunctionScorer extends Scorer {
    this.needsScores = needsScores;
  }

-  /**
-   * A {@link DocIdSetIterator} which is a disjunction of the approximations of
-   * the provided iterators.
-   */
-  private static class DisjunctionDISIApproximation extends DocIdSetIterator {
-
-    final ScorerPriorityQueue subScorers;
-    final long cost;
-
-    DisjunctionDISIApproximation(ScorerPriorityQueue subScorers) {
-      this.subScorers = subScorers;
-      long cost = 0;
-      for (ScorerWrapper w : subScorers) {
-        cost += w.cost;
-      }
-      this.cost = cost;
-    }
-
-    @Override
-    public long cost() {
-      return cost;
-    }
-
-    @Override
-    public int docID() {
-     return subScorers.top().doc;
-    }
-
-    @Override
-    public int nextDoc() throws IOException {
-      ScorerWrapper top = subScorers.top();
-      final int doc = top.doc;
-      do {
-        top.doc = top.approximation.nextDoc();
-        top = subScorers.updateTop();
-      } while (top.doc == doc);
-
-      return top.doc;
-    }
-
-    @Override
-    public int advance(int target) throws IOException {
-      ScorerWrapper top = subScorers.top();
-      do {
-        top.doc = top.approximation.advance(target);
-        top = subScorers.updateTop();
-      } while (top.doc < target);
-
-      return top.doc;
-    }
-  }
-
  @Override
  public TwoPhaseIterator asTwoPhaseIterator() {
    boolean hasApproximation = false;
-    for (ScorerWrapper w : subScorers) {
+    for (DisiWrapper<Scorer> w : subScorers) {
      if (w.twoPhaseView != null) {
        hasApproximation = true;
        break;
      }
    }

-    if (hasApproximation == false) {
+    if (! hasApproximation) {
      // none of the sub scorers supports approximations
      return null;
    }
@ -122,13 +68,13 @@ abstract class DisjunctionScorer extends Scorer {
    // note it is important to share the same pq as this scorer so that
    // rebalancing the pq through the approximation will also rebalance
    // the pq in this scorer.
-    return new TwoPhaseIterator(new DisjunctionDISIApproximation(subScorers)) {
+    return new TwoPhaseIterator(new DisjunctionDISIApproximation<Scorer>(subScorers)) {

      @Override
      public boolean matches() throws IOException {
-        ScorerWrapper topScorers = subScorers.topList();
+        DisiWrapper<Scorer> topScorers = subScorers.topList();
        // remove the head of the list as long as it does not match
-        while (topScorers.twoPhaseView != null && topScorers.twoPhaseView.matches() == false) {
+        while (topScorers.twoPhaseView != null && ! topScorers.twoPhaseView.matches()) {
          topScorers = topScorers.next;
          if (topScorers == null) {
            return false;
@ -138,9 +84,9 @@ abstract class DisjunctionScorer extends Scorer {
        if (needsScores) {
          // if scores or freqs are needed, we also need to remove scorers
          // from the top list that do not actually match
-          ScorerWrapper previous = topScorers;
-          for (ScorerWrapper w = topScorers.next; w != null; w = w.next) {
-            if (w.twoPhaseView != null && w.twoPhaseView.matches() == false) {
+          DisiWrapper<Scorer> previous = topScorers;
+          for (DisiWrapper<Scorer> w = topScorers.next; w != null; w = w.next) {
+            if (w.twoPhaseView != null && ! w.twoPhaseView.matches()) {
              // w does not match, remove it
              previous.next = w.next;
            } else {
@ -175,10 +121,10 @@ abstract class DisjunctionScorer extends Scorer {
  @Override
  public final int nextDoc() throws IOException {
    topScorers = null;
-    ScorerWrapper top = subScorers.top();
+    DisiWrapper<Scorer> top = subScorers.top();
    final int doc = top.doc;
    do {
-      top.doc = top.scorer.nextDoc();
+      top.doc = top.iterator.nextDoc();
      top = subScorers.updateTop();
    } while (top.doc == doc);

@ -188,9 +134,9 @@ abstract class DisjunctionScorer extends Scorer {
  @Override
  public final int advance(int target) throws IOException {
    topScorers = null;
-    ScorerWrapper top = subScorers.top();
+    DisiWrapper<Scorer> top = subScorers.top();
    do {
-      top.doc = top.scorer.advance(target);
+      top.doc = top.iterator.advance(target);
      top = subScorers.updateTop();
    } while (top.doc < target);

@ -203,7 +149,7 @@ abstract class DisjunctionScorer extends Scorer {
      topScorers = subScorers.topList();
    }
    int freq = 1;
-    for (ScorerWrapper w = topScorers.next; w != null; w = w.next) {
+    for (DisiWrapper<Scorer> w = topScorers.next; w != null; w = w.next) {
      freq += 1;
    }
    return freq;
@ -218,13 +164,13 @@ abstract class DisjunctionScorer extends Scorer {
  }

  /** Compute the score for the given linked list of scorers. */
-  protected abstract float score(ScorerWrapper topList) throws IOException;
+  protected abstract float score(DisiWrapper<Scorer> topList) throws IOException;

  @Override
  public final Collection<ChildScorer> getChildren() {
    ArrayList<ChildScorer> children = new ArrayList<>();
-    for (ScorerWrapper scorer : subScorers) {
-      children.add(new ChildScorer(scorer.scorer, "SHOULD"));
+    for (DisiWrapper<Scorer> scorer : subScorers) {
+      children.add(new ChildScorer(scorer.iterator, "SHOULD"));
    }
    return children;
  }
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
@ -20,8 +20,6 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.List;

-import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
-
 /** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
 * This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers. 
 */
@ -39,11 +37,11 @@ final class DisjunctionSumScorer extends DisjunctionScorer {
  }

  @Override
-  protected float score(ScorerWrapper topList) throws IOException {
+  protected float score(DisiWrapper<Scorer> topList) throws IOException {
    double score = 0;
    int freq = 0;
-    for (ScorerWrapper w = topList; w != null; w = w.next) {
-      score += w.scorer.score();
+    for (DisiWrapper<Scorer> w = topList; w != null; w = w.next) {
+      score += w.iterator.score();
      freq += 1;
    }
    return (float)score * coord[freq];
--- a/lucene/core/src/java/org/apache/lucene/search/DocIdSetIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DocIdSetIterator.java
@ -19,6 +19,8 @@ package org.apache.lucene.search;

 import java.io.IOException;

+import org.apache.lucene.search.spans.Spans;
+
 /**
 * This abstract class defines methods to iterate over a set of non-decreasing
 * doc ids. Note that this class assumes it iterates on doc Ids, and therefore
@ -175,4 +177,5 @@ public abstract class DocIdSetIterator {
   * completely inaccurate.
   */
  public abstract long cost();
+  
 }
--- a/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
@ -23,12 +23,11 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.List;

-import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
 import org.apache.lucene.util.PriorityQueue;

-import static org.apache.lucene.search.ScorerPriorityQueue.leftNode;
-import static org.apache.lucene.search.ScorerPriorityQueue.parentNode;
-import static org.apache.lucene.search.ScorerPriorityQueue.rightNode;
+import static org.apache.lucene.search.DisiPriorityQueue.leftNode;
+import static org.apache.lucene.search.DisiPriorityQueue.parentNode;
+import static org.apache.lucene.search.DisiPriorityQueue.rightNode;

 /**
 * A {@link Scorer} for {@link BooleanQuery} when
@ -83,17 +82,17 @@ final class MinShouldMatchSumScorer extends Scorer {

  // list of scorers which 'lead' the iteration and are currently
  // positioned on 'doc'
-  ScorerWrapper lead;
+  DisiWrapper<Scorer> lead;
  int doc;  // current doc ID of the leads
  int freq; // number of scorers on the desired doc ID

  // priority queue of scorers that are too advanced compared to the current
  // doc. Ordered by doc ID.
-  final ScorerPriorityQueue head;
+  final DisiPriorityQueue<Scorer> head;

  // priority queue of scorers which are behind the current doc.
  // Ordered by cost.
-  final ScorerWrapper[] tail;
+  final DisiWrapper<Scorer>[] tail;
  int tailSize;

  final Collection<ChildScorer> childScorers;
@ -113,13 +112,13 @@ final class MinShouldMatchSumScorer extends Scorer {
    this.coord = coord;
    this.doc = -1;

-    head = new ScorerPriorityQueue(scorers.size() - minShouldMatch + 1);
+    head = new DisiPriorityQueue<Scorer>(scorers.size() - minShouldMatch + 1);
    // there can be at most minShouldMatch - 1 scorers beyond the current position
    // otherwise we might be skipping over matching documents
-    tail = new ScorerWrapper[minShouldMatch - 1];
+    tail = new DisiWrapper[minShouldMatch - 1];

    for (Scorer scorer : scorers) {
-      addLead(new ScorerWrapper(scorer));
+      addLead(new DisiWrapper<Scorer>(scorer));
    }

    List<ChildScorer> children = new ArrayList<>();
@ -145,13 +144,13 @@ final class MinShouldMatchSumScorer extends Scorer {
    // We are moving to the next doc ID, so scorers in 'lead' need to go in
    // 'tail'. If there is not enough space in 'tail', then we take the least
    // costly scorers and advance them.
-    for (ScorerWrapper s = lead; s != null; s = s.next) {
-      final ScorerWrapper evicted = insertTailWithOverFlow(s);
+    for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
+      final DisiWrapper<Scorer> evicted = insertTailWithOverFlow(s);
      if (evicted != null) {
        if (evicted.doc == doc) {
-          evicted.doc = evicted.scorer.nextDoc();
+          evicted.doc = evicted.iterator.nextDoc();
        } else {
-          evicted.doc = evicted.scorer.advance(doc + 1);
+          evicted.doc = evicted.iterator.advance(doc + 1);
        }
        head.add(evicted);
      }
@ -164,23 +163,23 @@ final class MinShouldMatchSumScorer extends Scorer {
  @Override
  public int advance(int target) throws IOException {
    // Same logic as in nextDoc
-    for (ScorerWrapper s = lead; s != null; s = s.next) {
-      final ScorerWrapper evicted = insertTailWithOverFlow(s);
+    for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
+      final DisiWrapper<Scorer> evicted = insertTailWithOverFlow(s);
      if (evicted != null) {
-        evicted.doc = evicted.scorer.advance(target);
+        evicted.doc = evicted.iterator.advance(target);
        head.add(evicted);
      }
    }

    // But this time there might also be scorers in 'head' behind the desired
    // target so we need to do the same thing that we did on 'lead' on 'head'
-    ScorerWrapper headTop = head.top();
+    DisiWrapper<Scorer> headTop = head.top();
    while (headTop.doc < target) {
-      final ScorerWrapper evicted = insertTailWithOverFlow(headTop);
+      final DisiWrapper<Scorer> evicted = insertTailWithOverFlow(headTop);
      // We know that the tail is full since it contains at most
      // minShouldMatch - 1 entries and we just moved at least minShouldMatch
      // entries to it, so evicted is not null
-      evicted.doc = evicted.scorer.advance(target);
+      evicted.doc = evicted.iterator.advance(target);
      headTop = head.updateTop(evicted);
    }

@ -188,20 +187,20 @@ final class MinShouldMatchSumScorer extends Scorer {
    return doNext();
  }

-  private void addLead(ScorerWrapper lead) {
+  private void addLead(DisiWrapper<Scorer> lead) {
    lead.next = this.lead;
    this.lead = lead;
    freq += 1;
  }

  private void pushBackLeads() throws IOException {
-    for (ScorerWrapper s = lead; s != null; s = s.next) {
+    for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
      addTail(s);
    }
  }

-  private void advanceTail(ScorerWrapper top) throws IOException {
-    top.doc = top.scorer.advance(doc);
+  private void advanceTail(DisiWrapper<Scorer> top) throws IOException {
+    top.doc = top.iterator.advance(doc);
    if (top.doc == doc) {
      addLead(top);
    } else {
@ -210,7 +209,7 @@ final class MinShouldMatchSumScorer extends Scorer {
  }

  private void advanceTail() throws IOException {
-    final ScorerWrapper top = popTail();
+    final DisiWrapper<Scorer> top = popTail();
    advanceTail(top);
  }

@ -276,8 +275,8 @@ final class MinShouldMatchSumScorer extends Scorer {
    // we need to know about all matches
    updateFreq();
    double score = 0;
-    for (ScorerWrapper s = lead; s != null; s = s.next) {
-      score += s.scorer.score();
+    for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
+      score += s.iterator.score();
    }
    return coord[freq] * (float) score;
  }
@ -289,12 +288,12 @@ final class MinShouldMatchSumScorer extends Scorer {
  }

  /** Insert an entry in 'tail' and evict the least-costly scorer if full. */
-  private ScorerWrapper insertTailWithOverFlow(ScorerWrapper s) {
+  private DisiWrapper<Scorer> insertTailWithOverFlow(DisiWrapper<Scorer> s) {
    if (tailSize < tail.length) {
      addTail(s);
      return null;
    } else if (tail.length >= 1) {
-      final ScorerWrapper top = tail[0];
+      final DisiWrapper<Scorer> top = tail[0];
      if (top.cost < s.cost) {
        tail[0] = s;
        downHeapCost(tail, tailSize);
@ -305,16 +304,16 @@ final class MinShouldMatchSumScorer extends Scorer {
  }

  /** Add an entry to 'tail'. Fails if over capacity. */
-  private void addTail(ScorerWrapper s) {
+  private void addTail(DisiWrapper<Scorer> s) {
    tail[tailSize] = s;
    upHeapCost(tail, tailSize);
    tailSize += 1;
  }

  /** Pop the least-costly scorer from 'tail'. */
-  private ScorerWrapper popTail() {
+  private DisiWrapper<Scorer> popTail() {
    assert tailSize > 0;
-    final ScorerWrapper result = tail[0];
+    final DisiWrapper<Scorer> result = tail[0];
    tail[0] = tail[--tailSize];
    downHeapCost(tail, tailSize);
    return result;
@ -322,8 +321,8 @@ final class MinShouldMatchSumScorer extends Scorer {

  /** Heap helpers */

-  private static void upHeapCost(ScorerWrapper[] heap, int i) {
-    final ScorerWrapper node = heap[i];
+  private static void upHeapCost(DisiWrapper<Scorer>[] heap, int i) {
+    final DisiWrapper<Scorer> node = heap[i];
    final long nodeCost = node.cost;
    int j = parentNode(i);
    while (j >= 0 && nodeCost < heap[j].cost) {
@ -334,9 +333,9 @@ final class MinShouldMatchSumScorer extends Scorer {
    heap[i] = node;
  }

-  private static void downHeapCost(ScorerWrapper[] heap, int size) {
+  private static void downHeapCost(DisiWrapper<Scorer>[] heap, int size) {
    int i = 0;
-    final ScorerWrapper node = heap[0];
+    final DisiWrapper<Scorer> node = heap[0];
    int j = leftNode(i);
    if (j < size) {
      int k = rightNode(j);
--- a/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java
@ -20,9 +20,13 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.Objects;

+import org.apache.lucene.search.spans.Spans;
+
 /**
- * Returned by {@link Scorer#asTwoPhaseIterator()} to expose an approximation of
- * a {@link DocIdSetIterator}. When the {@link #approximation()}'s
+ * Returned by {@link Scorer#asTwoPhaseIterator()}
+ * and  {@link Spans#asTwoPhaseIterator()}
+ * to expose an approximation of a {@link DocIdSetIterator}.
+ * When the {@link #approximation()}'s
 * {@link DocIdSetIterator#nextDoc()} or {@link DocIdSetIterator#advance(int)}
 * return, {@link #matches()} needs to be checked in order to know whether the
 * returned doc ID actually matches.
@ -89,4 +93,16 @@ public abstract class TwoPhaseIterator {
   *  {@link DocIdSetIterator#NO_MORE_DOCS} -- and at most once. */
  public abstract boolean matches() throws IOException;

+  /**
+   * Returns a {@link TwoPhaseIterator} for this {@link DocIdSetIterator}
+   * when available * otherwise returns null.
+   */
+  public static TwoPhaseIterator asTwoPhaseIterator(DocIdSetIterator iter) {
+    return (iter instanceof Scorer)
+            ? ((Scorer) iter).asTwoPhaseIterator()
+            : (iter instanceof Spans)
+            ? ((Spans) iter).asTwoPhaseIterator()
+            : null;
+  }
+
 }
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
@ -31,9 +31,13 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermContext;
 import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.PriorityQueue;
 import org.apache.lucene.util.ToStringUtils;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.DisiPriorityQueue;
+import org.apache.lucene.search.DisiWrapper;
+import org.apache.lucene.search.TwoPhaseIterator;
+import org.apache.lucene.search.DisjunctionDISIApproximation;
+

 /** Matches the union of its clauses.
 */
@ -146,35 +150,16 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
  }


-  private class SpanQueue extends PriorityQueue<Spans> {
-    public SpanQueue(int size) {
-      super(size);
-    }
-
-    @Override
-    protected final boolean lessThan(Spans spans1, Spans spans2) {
-      if (spans1.docID() == spans2.docID()) {
-        if (spans1.startPosition() == spans2.startPosition()) {
-          return spans1.endPosition() < spans2.endPosition();
-        } else {
-          return spans1.startPosition() < spans2.startPosition();
-        }
-      } else {
-        return spans1.docID() < spans2.docID();
-      }
-    }
-  }
-
  @Override
  public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts)
  throws IOException {

    ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());

-    for (SpanQuery seq : clauses) {
-      Spans subSpan = seq.getSpans(context, acceptDocs, termContexts);
-      if (subSpan != null) {
-        subSpans.add(subSpan);
+    for (SpanQuery sq : clauses) {
+      Spans spans = sq.getSpans(context, acceptDocs, termContexts);
+      if (spans != null) {
+        subSpans.add(spans);
      }
    }

@ -184,114 +169,168 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
      return subSpans.get(0);
    }

-    SpanQueue queue = new SpanQueue(clauses.size());
+    DisiPriorityQueue<Spans> byDocQueue = new DisiPriorityQueue<>(subSpans.size());
    for (Spans spans : subSpans) {
-      queue.add(spans);
+      byDocQueue.add(new DisiWrapper<>(spans));
    }

+    SpanPositionQueue byPositionQueue = new SpanPositionQueue(subSpans.size()); // when empty use -1
+
    return new Spans() {
+      Spans topPositionSpans = null;

      @Override
      public int nextDoc() throws IOException {
-        if (queue.size() == 0) { // all done
-          return NO_MORE_DOCS;
-        }
-
-        int currentDoc = top().docID();
-
-        if (currentDoc == -1) { // initially
-          return advance(0);
-        }
-
+        topPositionSpans = null;
+        DisiWrapper<Spans> topDocSpans = byDocQueue.top();
+        int currentDoc = topDocSpans.doc;
        do {
-          if (top().nextDoc() != NO_MORE_DOCS) { // move top to next doc
-            queue.updateTop();
-          } else {
-            queue.pop(); // exhausted a clause
-            if (queue.size() == 0) {
-              return NO_MORE_DOCS;
-            }
-          }
-          // assert queue.size() > 0;
-          int doc = top().docID();
-          if (doc > currentDoc) {
-            return doc;
-          }
-        } while (true);
-      }
-
-      private Spans top() {
-        return queue.top();
+          topDocSpans.doc = topDocSpans.iterator.nextDoc();
+          topDocSpans = byDocQueue.updateTop();
+        } while (topDocSpans.doc == currentDoc);
+        return topDocSpans.doc;
      }

      @Override
      public int advance(int target) throws IOException {
-
-        while ((queue.size() > 0) && (top().docID() < target)) {
-          if (top().advance(target) != NO_MORE_DOCS) {
-            queue.updateTop();
-          } else {
-            queue.pop();
-          }
-        }
-
-        return (queue.size() > 0) ? top().docID() : NO_MORE_DOCS;
+        topPositionSpans = null;
+        DisiWrapper<Spans> topDocSpans = byDocQueue.top();
+        do {
+          topDocSpans.doc = topDocSpans.iterator.advance(target);
+          topDocSpans = byDocQueue.updateTop();
+        } while (topDocSpans.doc < target);
+        return topDocSpans.doc;
      }

      @Override
      public int docID() {
-        return (queue == null) ? -1
-              : (queue.size() > 0) ? top().docID()
-              : NO_MORE_DOCS;
+        DisiWrapper<Spans> topDocSpans = byDocQueue.top();
+        return topDocSpans.doc;
+      }
+
+      @Override
+      public TwoPhaseIterator asTwoPhaseIterator() {
+        boolean hasApproximation = false;
+        for (DisiWrapper<Spans> w : byDocQueue) {
+          if (w.twoPhaseView != null) {
+            hasApproximation = true;
+            break;
+          }
+        }
+
+        if (! hasApproximation) { // none of the sub spans supports approximations
+          return null;
+        }
+
+        return new TwoPhaseIterator(new DisjunctionDISIApproximation<Spans>(byDocQueue)) {
+          @Override
+          public boolean matches() throws IOException {
+            return twoPhaseCurrentDocMatches();
+          }
+        };
+      }
+      
+      int lastDocTwoPhaseMatched = -1;
+
+      boolean twoPhaseCurrentDocMatches() throws IOException {
+        DisiWrapper<Spans> listAtCurrentDoc = byDocQueue.topList();
+        // remove the head of the list as long as it does not match
+        final int currentDoc = listAtCurrentDoc.doc;
+        while (listAtCurrentDoc.twoPhaseView != null) {
+          if (listAtCurrentDoc.twoPhaseView.matches()) {
+            // use this spans for positions at current doc:
+            listAtCurrentDoc.lastApproxMatchDoc = currentDoc;
+            break;
+          }
+          // do not use this spans for positions at current doc:
+          listAtCurrentDoc.lastApproxNonMatchDoc = currentDoc;
+          listAtCurrentDoc = listAtCurrentDoc.next;
+          if (listAtCurrentDoc == null) {
+            return false;
+          }
+        }
+        lastDocTwoPhaseMatched = currentDoc;
+        topPositionSpans = null;
+        return true;
+      }
+
+      void fillPositionQueue() throws IOException { // called at first nextStartPosition
+        assert byPositionQueue.size() == 0;
+        // add all matching Spans at current doc to byPositionQueue
+        DisiWrapper<Spans> listAtCurrentDoc = byDocQueue.topList();
+        while (listAtCurrentDoc != null) {
+          Spans spansAtDoc = listAtCurrentDoc.iterator;
+          if (lastDocTwoPhaseMatched == listAtCurrentDoc.doc) { // matched by DisjunctionDisiApproximation
+            if (listAtCurrentDoc.twoPhaseView != null) { // matched by approximation
+              if (listAtCurrentDoc.lastApproxNonMatchDoc == listAtCurrentDoc.doc) { // matches() returned false
+                spansAtDoc = null;
+              } else {
+                if (listAtCurrentDoc.lastApproxMatchDoc != listAtCurrentDoc.doc) {
+                  if (! listAtCurrentDoc.twoPhaseView.matches()) {
+                    spansAtDoc = null;
+                  }
+                }
+              } 
+            }
+          }
+
+          if (spansAtDoc != null) {
+            assert spansAtDoc.docID() == listAtCurrentDoc.doc;
+            assert spansAtDoc.startPosition() == -1;
+            spansAtDoc.nextStartPosition();
+            assert spansAtDoc.startPosition() != NO_MORE_POSITIONS;
+            byPositionQueue.add(spansAtDoc);
+          }
+          listAtCurrentDoc = listAtCurrentDoc.next;
+        }
+        assert byPositionQueue.size() > 0;
      }
        
      @Override
      public int nextStartPosition() throws IOException {
-        top().nextStartPosition();
-        queue.updateTop();
-        int startPos = top().startPosition();
-        while (startPos == -1) { // initially at this doc
-          top().nextStartPosition();
-          queue.updateTop();
-          startPos = top().startPosition();
+        DisiWrapper<Spans> topDocSpans = byDocQueue.top();
+        assert topDocSpans.doc != NO_MORE_DOCS;
+        if (topPositionSpans == null) {
+          byPositionQueue.clear();
+          fillPositionQueue(); // fills byPositionQueue at first position
+          topPositionSpans = byPositionQueue.top();
+        } else {
+          topPositionSpans.nextStartPosition();
+          topPositionSpans = byPositionQueue.updateTop();
        }
-        return startPos;
+        return topPositionSpans.startPosition();
      }

      @Override
      public int startPosition() {
-        return top().startPosition();
+        return topPositionSpans == null ? -1 : topPositionSpans.startPosition();
      }

      @Override
      public int endPosition() {
-        return top().endPosition();
+        return topPositionSpans == null ? -1 : topPositionSpans.endPosition();
      }

      @Override
      public Collection<byte[]> getPayload() throws IOException {
-        ArrayList<byte[]> result = null;
-        Spans theTop = top();
-        if (theTop != null && theTop.isPayloadAvailable()) {
-          result = new ArrayList<>(theTop.getPayload());
-        }
-        return result;
+        return topPositionSpans == null
+                ? null
+                : topPositionSpans.isPayloadAvailable()
+                ? new ArrayList<>(topPositionSpans.getPayload())
+                : null;
      }

      @Override
      public boolean isPayloadAvailable() throws IOException {
-        Spans top = top();
-        return top != null && top.isPayloadAvailable();
+        return (topPositionSpans != null) && topPositionSpans.isPayloadAvailable();
      }

      @Override
      public String toString() {
-          return "spans("+SpanOrQuery.this+")@"+
-            ((queue == null)?"START"
-             :(queue.size()>0?(docID()+": "+top().startPosition()+" - "+top().endPosition()):"END"));
+        return "spanOr("+SpanOrQuery.this+")@"+docID()+": "+startPosition()+" - "+endPosition();
      }

-      private long cost = -1;
+      long cost = -1;

      @Override
      public long cost() {
@ -303,8 +342,8 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
        }
        return cost;
      }
-
    };
  }

 }
+
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionQueue.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionQueue.java
@ -0,0 +1,35 @@
+package org.apache.lucene.search.spans;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.PriorityQueue;
+
+class SpanPositionQueue extends PriorityQueue<Spans> {
+  SpanPositionQueue(int maxSize) {
+    super(maxSize, false); // do not prepopulate
+  }
+
+  protected boolean lessThan(Spans s1, Spans s2) {
+    int start1 = s1.startPosition();
+    int start2 = s2.startPosition();
+    return (start1 < start2) ? true
+          : (start1 == start2) ? s1.endPosition() < s2.endPosition()
+          : false;
+  }
+}
+
--- a/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java
@ -86,11 +86,12 @@ public abstract class Spans extends DocIdSetIterator {
   *
   * Note that the returned {@link TwoPhaseIterator}'s
   * {@link TwoPhaseIterator#approximation() approximation} must
-   * advance synchronously with this iterator: advancing the approximation must
+   * advance documents synchronously with this iterator:
+   * advancing the approximation must
   * advance this iterator and vice-versa.
   *
-   * Implementing this method is typically useful on {@link Spans}s
-   * that have a high per-document overhead in order to confirm matches.
+   * Implementing this method is typically useful on a {@link Spans}
+   * that has a high per-document overhead for confirming matches.
   *
   * The default implementation returns {@code null}.
   */
--- a/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java
+++ b/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java
@ -89,7 +89,7 @@ public abstract class PriorityQueue<T> {
   * value (i.e., {@link #lessThan} should always favor the
   * non-sentinel values).<br>
   * 
-   * By default, this method returns false, which means the queue will not be
+   * By default, this method returns null, which means the queue will not be
   * filled with sentinel values. Otherwise, the value returned will be used to
   * pre-populate the queue. Adds sentinel values to the queue.<br>
   *