LUCENE-8306: Allow iteration over submatches

Also includes LUCENE-8404, adding match iteration to SpanQuery
2018-07-22 22:38:55 +01:00 · 2018-07-22 22:38:55 +01:00 · 028c86b1fa
parent 995a902d1a
commit 028c86b1fa
16 changed files with 464 additions and 54 deletions
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMatchesIterator.java
@ -45,14 +45,14 @@ final class DisjunctionMatchesIterator implements MatchesIterator {
   *
   * Only terms that have at least one match in the given document will be included
   */
-  static MatchesIterator fromTerms(LeafReaderContext context, int doc, String field, List<Term> terms) throws IOException {
+  static MatchesIterator fromTerms(LeafReaderContext context, int doc, Query query, String field, List<Term> terms) throws IOException {
    Objects.requireNonNull(field);
    for (Term term : terms) {
      if (Objects.equals(field, term.field()) == false) {
        throw new IllegalArgumentException("Tried to generate iterator from terms in multiple fields: expected [" + field + "] but got [" + term.field() + "]");
      }
    }
-    return fromTermsEnum(context, doc, field, asBytesRefIterator(terms));
+    return fromTermsEnum(context, doc, query, field, asBytesRefIterator(terms));
  }

  private static BytesRefIterator asBytesRefIterator(List<Term> terms) {
@ -72,7 +72,7 @@ final class DisjunctionMatchesIterator implements MatchesIterator {
   *
   * Only terms that have at least one match in the given document will be included
   */
-  static MatchesIterator fromTermsEnum(LeafReaderContext context, int doc, String field, BytesRefIterator terms) throws IOException {
+  static MatchesIterator fromTermsEnum(LeafReaderContext context, int doc, Query query, String field, BytesRefIterator terms) throws IOException {
    Objects.requireNonNull(field);
    List<MatchesIterator> mis = new ArrayList<>();
    Terms t = context.reader().terms(field);
@ -84,7 +84,7 @@ final class DisjunctionMatchesIterator implements MatchesIterator {
      if (te.seekExact(term)) {
        PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
        if (pe.advance(doc) == doc) {
-          mis.add(new TermMatchesIterator(pe));
+          mis.add(new TermMatchesIterator(query, pe));
          reuse = null;
        }
        else {
@ -158,4 +158,13 @@ final class DisjunctionMatchesIterator implements MatchesIterator {
    return queue.top().endOffset();
  }

+  @Override
+  public MatchesIterator getSubMatches() throws IOException {
+    return queue.top().getSubMatches();
+  }
+
+  @Override
+  public Query getQuery() {
+    return queue.top().getQuery();
+  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java
@ -78,7 +78,7 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
        @Override
        public Matches matches(LeafReaderContext context, int doc) throws IOException {
          final SortedSetDocValues fcsi = DocValues.getSortedSet(context.reader(), query.field);
-          return Matches.forField(query.field, () -> DisjunctionMatchesIterator.fromTermsEnum(context, doc, query.field, getTermsEnum(fcsi)));
+          return Matches.forField(query.field, () -> DisjunctionMatchesIterator.fromTermsEnum(context, doc, query, query.field, getTermsEnum(fcsi)));
        }

        private TermsEnum getTermsEnum(SortedSetDocValues fcsi) throws IOException {
--- a/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MatchesIterator.java
@ -28,6 +28,9 @@ import org.apache.lucene.index.LeafReaderContext;
 * positions and/or offsets after each call.  You should not call the position or offset methods
 * before {@link #next()} has been called, or after {@link #next()} has returned {@code false}.
 *
+ * Matches from some queries may span multiple positions.  You can retrieve the positions of
+ * individual matching terms on the current match by calling {@link #getSubMatches()}.
+ *
 * Matches are ordered by start position, and then by end position.  Match intervals may overlap.
 *
 * @see Weight#matches(LeafReaderContext, int)
@ -70,4 +73,25 @@ public interface MatchesIterator {
   */
  int endOffset() throws IOException;

+  /**
+   * Returns a MatchesIterator that iterates over the positions and offsets of individual
+   * terms within the current match
+   *
+   * Returns {@code null} if there are no submatches (ie the current iterator is at the
+   * leaf level)
+   *
+   * Should only be called after {@link #next()} has returned {@code true}
+   */
+  MatchesIterator getSubMatches() throws IOException;
+
+  /**
+   * Returns the Query causing the current match
+   *
+   * If this {@link MatchesIterator} has been returned from a {@link #getSubMatches()}
+   * call, then returns a {@link TermQuery} equivalent to the current match
+   *
+   * Should only be called after {@link #next()} has returned {@code true}
+   */
+  Query getQuery();
+
 }
--- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
@ -269,7 +269,7 @@ public class MultiPhraseQuery extends Query {
            TermState termState = termStates.get(term).get(context);
            if (termState != null) {
              termsEnum.seekExact(term.bytes(), termState);
-              postings.add(termsEnum.postings(null, exposeOffsets ? PostingsEnum.OFFSETS : PostingsEnum.POSITIONS));
+              postings.add(termsEnum.postings(null, exposeOffsets ? PostingsEnum.ALL : PostingsEnum.POSITIONS));
              totalMatchCost += PhraseQuery.termPositionsCost(termsEnum);
            }
          }
@ -294,7 +294,7 @@ public class MultiPhraseQuery extends Query {
          return new ExactPhraseMatcher(postingsFreqs, totalMatchCost);
        }
        else {
-          return new SloppyPhraseMatcher(postingsFreqs, slop, totalMatchCost);
+          return new SloppyPhraseMatcher(postingsFreqs, slop, totalMatchCost, exposeOffsets);
        }

      }
@ -647,5 +647,6 @@ public class MultiPhraseQuery extends Query {
    public BytesRef getPayload() throws IOException {
      return posQueue.top().pe.getPayload();
    }
+
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java
@ -211,7 +211,7 @@ final class MultiTermQueryConstantScoreWrapper<Q extends MultiTermQuery> extends
        if (terms.hasPositions() == false) {
          return super.matches(context, doc);
        }
-        return Matches.forField(query.field, () -> DisjunctionMatchesIterator.fromTermsEnum(context, doc, query.field, query.getTermsEnum(terms)));
+        return Matches.forField(query.field, () -> DisjunctionMatchesIterator.fromTermsEnum(context, doc, query, query.field, query.getTermsEnum(terms)));
      }

      @Override
--- a/lucene/core/src/java/org/apache/lucene/search/PhraseMatcher.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhraseMatcher.java
@ -88,4 +88,5 @@ abstract class PhraseMatcher {
  public float getMatchCost() {
    return matchCost;
  }
+
 }
--- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
@ -446,7 +446,7 @@ public class PhraseQuery extends Query {
            return null;
          }
          te.seekExact(t.bytes(), state);
-          PostingsEnum postingsEnum = te.postings(null, exposeOffsets ? PostingsEnum.OFFSETS : PostingsEnum.POSITIONS);
+          PostingsEnum postingsEnum = te.postings(null, exposeOffsets ? PostingsEnum.ALL : PostingsEnum.POSITIONS);
          postingsFreqs[i] = new PostingsAndFreq(postingsEnum, positions[i], t);
          totalMatchCost += termPositionsCost(te);
        }
@ -457,7 +457,7 @@ public class PhraseQuery extends Query {
          return new ExactPhraseMatcher(postingsFreqs, totalMatchCost);
        }
        else {
-          return new SloppyPhraseMatcher(postingsFreqs, slop, totalMatchCost);
+          return new SloppyPhraseMatcher(postingsFreqs, slop, totalMatchCost, exposeOffsets);
        }
      }

--- a/lucene/core/src/java/org/apache/lucene/search/PhraseWeight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PhraseWeight.java
@ -123,6 +123,16 @@ abstract class PhraseWeight extends Weight {
        public int endOffset() throws IOException {
          return matcher.endOffset();
        }
+
+        @Override
+        public MatchesIterator getSubMatches() throws IOException {
+          return null;    // phrases are treated as leaves
+        }
+
+        @Override
+        public Query getQuery() {
+          return PhraseWeight.this.getQuery();
+        }
      };
    });
  }
--- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseMatcher.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseMatcher.java
@ -54,13 +54,14 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
  private final int slop;
  private final int numPostings;
  private final PhraseQueue pq; // for advancing min position
+  private final boolean captureLeadMatch;

  private int end; // current largest phrase position

  private int leadPosition;
  private int leadOffset;
-  private int currentEndPostings;
-  private int advanceEndPostings;
+  private int leadEndOffset;
+  private int leadOrd;

  private boolean hasRpts; // flag indicating that there are repetitions (as checked in first candidate doc)
  private boolean checkedRpts; // flag to only check for repetitions in first candidate doc
@ -71,10 +72,11 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
  private boolean positioned;
  private int matchLength;

-  SloppyPhraseMatcher(PhraseQuery.PostingsAndFreq[] postings, int slop, float matchCost) {
+  SloppyPhraseMatcher(PhraseQuery.PostingsAndFreq[] postings, int slop, float matchCost, boolean captureLeadMatch) {
    super(approximation(postings), matchCost);
    this.slop = slop;
    this.numPostings = postings.length;
+    this.captureLeadMatch = captureLeadMatch;
    pq = new PhraseQueue(postings.length);
    phrasePositions = new PhrasePositions[postings.length];
    for (int i = 0; i < postings.length; ++i) {
@ -120,10 +122,8 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
      return false;
    }
    PhrasePositions pp = pq.pop();
-    assert pp != null;  // if the pq is empty, then positioned == false
-    leadPosition = pp.position + pp.offset;
-    leadOffset = pp.postings.startOffset();
-    currentEndPostings = advanceEndPostings;
+    assert pp != null;  // if the pq is not full, then positioned == false
+    captureLead(pp);
    matchLength = end - pp.position;
    int next = pq.top().position; 
    while (advancePP(pp)) {
@ -137,6 +137,7 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
        }
        pp = pq.pop();
        next = pq.top().position;
+        assert pp != null;  // if the pq is not full, then positioned == false
        matchLength = end - pp.position;
      } else {
        int matchLength2 = end - pp.position;
@ -144,14 +145,22 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
          matchLength = matchLength2;
        }
      }
-      leadPosition = pp.position + pp.offset;
-      leadOffset = pp.postings.startOffset();
-      currentEndPostings = advanceEndPostings;
+      captureLead(pp);
    }
    positioned = false;
    return matchLength <= slop;
  }

+  private void captureLead(PhrasePositions pp) throws IOException {
+    if (captureLeadMatch == false) {
+      return;
+    }
+    leadOrd = pp.ord;
+    leadPosition = pp.position + pp.offset;
+    leadOffset = pp.postings.startOffset();
+    leadEndOffset = pp.postings.endOffset();
+  }
+
  @Override
  public int startPosition() {
    // when a match is detected, the top postings is advanced until it has moved
@ -160,6 +169,7 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
    // However, the priority queue doesn't guarantee that the top postings is in fact the
    // earliest in the list, so we need to cycle through all terms to check.
    // this is slow, but Matches is slow anyway...
+    int leadPosition = this.leadPosition;
    for (PhrasePositions pp : phrasePositions) {
      leadPosition = Math.min(leadPosition, pp.position + pp.offset);
    }
@ -168,7 +178,13 @@ final class SloppyPhraseMatcher extends PhraseMatcher {

  @Override
  public int endPosition() {
-    return phrasePositions[currentEndPostings].position + phrasePositions[currentEndPostings].offset;
+    int endPosition = leadPosition;
+    for (PhrasePositions pp : phrasePositions) {
+      if (pp.ord != leadOrd) {
+        endPosition = Math.max(endPosition, pp.position + pp.offset);
+      }
+    }
+    return endPosition;
  }

  @Override
@ -179,6 +195,7 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
    // However, the priority queue doesn't guarantee that the top postings is in fact the
    // earliest in the list, so we need to cycle through all terms to check
    // this is slow, but Matches is slow anyway...
+    int leadOffset = this.leadOffset;
    for (PhrasePositions pp : phrasePositions) {
      leadOffset = Math.min(leadOffset, pp.postings.startOffset());
    }
@ -187,7 +204,13 @@ final class SloppyPhraseMatcher extends PhraseMatcher {

  @Override
  public int endOffset() throws IOException {
-    return phrasePositions[currentEndPostings].postings.endOffset();
+    int endOffset = leadEndOffset;
+    for (PhrasePositions pp : phrasePositions) {
+      if (pp.ord != leadOrd) {
+        endOffset = Math.max(endOffset, pp.postings.endOffset());
+      }
+    }
+    return endOffset;
  }

  /** advance a PhrasePosition and update 'end', return false if exhausted */
@ -197,12 +220,6 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
    }
    if (pp.position > end) {
      end = pp.position;
-      advanceEndPostings = pp.ord;
-    }
-    if (pp.position == end) {
-      if (pp.ord > advanceEndPostings) {
-        advanceEndPostings = pp.ord;
-      }
    }
    return true;
  }
@ -307,12 +324,6 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
      pp.firstPosition();
      if (pp.position > end) {
        end = pp.position;
-        advanceEndPostings = pp.ord;
-      }
-      if (pp.position == end) {
-        if (pp.ord > advanceEndPostings) {
-          advanceEndPostings = pp.ord;
-        }
      }
      pq.add(pp);
    }
@ -342,12 +353,6 @@ final class SloppyPhraseMatcher extends PhraseMatcher {
    for (PhrasePositions pp : phrasePositions) {  // iterate cyclic list: done once handled max
      if (pp.position > end) {
        end = pp.position;
-        advanceEndPostings = pp.ord;
-      }
-      if (pp.position == end) {
-        if (pp.ord > advanceEndPostings) {
-          advanceEndPostings = pp.ord;
-        }
      }
      pq.add(pp);
    }
--- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
@ -176,7 +176,7 @@ public final class SynonymQuery extends Query {
      if (terms == null || terms.hasPositions() == false) {
        return super.matches(context, doc);
      }
-      return Matches.forField(field, () -> DisjunctionMatchesIterator.fromTerms(context, doc, field, Arrays.asList(SynonymQuery.this.terms)));
+      return Matches.forField(field, () -> DisjunctionMatchesIterator.fromTerms(context, doc, getQuery(), field, Arrays.asList(SynonymQuery.this.terms)));
    }

    @Override
--- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
@ -226,7 +226,7 @@ public class TermInSetQuery extends Query implements Accountable {
        if (terms == null || terms.hasPositions() == false) {
          return super.matches(context, doc);
        }
-        return Matches.forField(field, () -> DisjunctionMatchesIterator.fromTermsEnum(context, doc, field, termData.iterator()));
+        return Matches.forField(field, () -> DisjunctionMatchesIterator.fromTermsEnum(context, doc, getQuery(), field, termData.iterator()));
      }

      /**
--- a/lucene/core/src/java/org/apache/lucene/search/TermMatchesIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermMatchesIterator.java
@ -29,12 +29,14 @@ class TermMatchesIterator implements MatchesIterator {
  private int upto;
  private int pos;
  private final PostingsEnum pe;
+  private final Query query;

  /**
   * Create a new {@link TermMatchesIterator} for the given term and postings list
   */
-  TermMatchesIterator(PostingsEnum pe) throws IOException {
+  TermMatchesIterator(Query query, PostingsEnum pe) throws IOException {
    this.pe = pe;
+    this.query = query;
    this.upto = pe.freq();
  }

@ -67,4 +69,13 @@ class TermMatchesIterator implements MatchesIterator {
    return pe.endOffset();
  }

+  @Override
+  public MatchesIterator getSubMatches() throws IOException {
+    return null;
+  }
+
+  @Override
+  public Query getQuery() {
+    return query;
+  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
@ -94,7 +94,7 @@ public class TermQuery extends Query {
        if (pe.advance(doc) != doc) {
          return null;
        }
-        return new TermMatchesIterator(pe);
+        return new TermMatchesIterator(getQuery(), pe);
      });
    }

--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java
@ -18,6 +18,8 @@ package org.apache.lucene.search.spans;


 import java.io.IOException;
+import java.util.Arrays;
+import java.util.Comparator;
 import java.util.Map;

 import org.apache.lucene.index.LeafReaderContext;
@ -28,6 +30,10 @@ import org.apache.lucene.search.CollectionStatistics;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.LeafSimScorer;
+import org.apache.lucene.search.Matches;
+import org.apache.lucene.search.MatchesIterator;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermStatistics;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.search.similarities.Similarity;
@ -161,4 +167,138 @@ public abstract class SpanWeight extends Weight {

    return Explanation.noMatch("no matching term");
  }
+
+  private static class TermMatch {
+    Term term;
+    int position;
+    int startOffset;
+    int endOffset;
+  }
+
+  @Override
+  public Matches matches(LeafReaderContext context, int doc) throws IOException {
+    return Matches.forField(field, () -> {
+      Spans spans = getSpans(context, Postings.OFFSETS);
+      if (spans == null || spans.advance(doc) != doc) {
+        return null;
+      }
+      return new MatchesIterator() {
+
+        int innerTermCount = 0;
+        TermMatch[] innerTerms = new TermMatch[0];
+
+        SpanCollector termCollector = new SpanCollector() {
+          @Override
+          public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
+            innerTermCount++;
+            if (innerTermCount > innerTerms.length) {
+              TermMatch[] temp = new TermMatch[innerTermCount];
+              System.arraycopy(innerTerms, 0, temp, 0, innerTermCount - 1);
+              innerTerms = temp;
+              innerTerms[innerTermCount - 1] = new TermMatch();
+            }
+            innerTerms[innerTermCount - 1].term = term;
+            innerTerms[innerTermCount - 1].position = position;
+            innerTerms[innerTermCount - 1].startOffset = postings.startOffset();
+            innerTerms[innerTermCount - 1].endOffset = postings.endOffset();
+          }
+
+          @Override
+          public void reset() {
+            innerTermCount = 0;
+          }
+        };
+
+        @Override
+        public boolean next() throws IOException {
+          innerTermCount = 0;
+          return spans.nextStartPosition() != Spans.NO_MORE_POSITIONS;
+        }
+
+        @Override
+        public int startPosition() {
+          return spans.startPosition();
+        }
+
+        @Override
+        public int endPosition() {
+          return spans.endPosition() - 1;
+        }
+
+        @Override
+        public int startOffset() throws IOException {
+          if (innerTermCount == 0) {
+            collectInnerTerms();
+          }
+          return innerTerms[0].startOffset;
+        }
+
+        @Override
+        public int endOffset() throws IOException {
+          if (innerTermCount == 0) {
+            collectInnerTerms();
+          }
+          return innerTerms[innerTermCount - 1].endOffset;
+        }
+
+        @Override
+        public MatchesIterator getSubMatches() throws IOException {
+          if (innerTermCount == 0) {
+            collectInnerTerms();
+          }
+          return new MatchesIterator() {
+
+            int upto = -1;
+
+            @Override
+            public boolean next() throws IOException {
+              upto++;
+              return upto < innerTermCount;
+            }
+
+            @Override
+            public int startPosition() {
+              return innerTerms[upto].position;
+            }
+
+            @Override
+            public int endPosition() {
+              return innerTerms[upto].position;
+            }
+
+            @Override
+            public int startOffset() throws IOException {
+              return innerTerms[upto].startOffset;
+            }
+
+            @Override
+            public int endOffset() throws IOException {
+              return innerTerms[upto].endOffset;
+            }
+
+            @Override
+            public MatchesIterator getSubMatches() throws IOException {
+              return null;
+            }
+
+            @Override
+            public Query getQuery() {
+              return new TermQuery(innerTerms[upto].term);
+            }
+          };
+        }
+
+        @Override
+        public Query getQuery() {
+          return SpanWeight.this.getQuery();
+        }
+
+        void collectInnerTerms() throws IOException {
+          termCollector.reset();
+          spans.collect(termCollector);
+          Arrays.sort(innerTerms, 0, innerTermCount, Comparator.comparing(a -> a.position));
+        }
+      };
+    });
+  }
 }
--- a/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMatchesIterator.java
@ -18,8 +18,12 @@
 package org.apache.lucene.search;

 import java.io.IOException;
+import java.util.Arrays;
 import java.util.HashSet;
+import java.util.IdentityHashMap;
+import java.util.Objects;
 import java.util.Set;
+import java.util.stream.Collectors;

 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
@ -29,9 +33,14 @@ import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.ReaderUtil;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanOrQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;

@ -99,7 +108,7 @@ public class TestMatchesIterator extends LuceneTestCase {
      "nothing matches this document"
  };

-  void checkMatches(Query q, String field, int[][] expected) throws IOException {
+  private void checkMatches(Query q, String field, int[][] expected) throws IOException {
    Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
    for (int i = 0; i < expected.length; i++) {
      LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(expected[i][0], searcher.leafContexts));
@ -112,14 +121,40 @@ public class TestMatchesIterator extends LuceneTestCase {
      MatchesIterator it = matches.getMatches(field);
      if (expected[i].length == 1) {
        assertNull(it);
-        return;
+        continue;
      }
      checkFieldMatches(it, expected[i]);
      checkFieldMatches(matches.getMatches(field), expected[i]);  // test multiple calls
    }
  }

-  void checkFieldMatches(MatchesIterator it, int[] expected) throws IOException {
+  private void checkLabelCount(Query q, String field, int[] expected) throws IOException {
+    Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
+    for (int i = 0; i < expected.length; i++) {
+      LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
+      int doc = i - ctx.docBase;
+      Matches matches = w.matches(ctx, doc);
+      if (matches == null) {
+        assertEquals("Expected to get matches on document " + i, 0, expected[i]);
+        continue;
+      }
+      MatchesIterator it = matches.getMatches(field);
+      if (expected[i] == 0) {
+        assertNull(it);
+        continue;
+      }
+      else {
+        assertNotNull(it);
+      }
+      IdentityHashMap<Query, Integer> labels = new IdentityHashMap<>();
+      while (it.next()) {
+        labels.put(it.getQuery(), 1);
+      }
+      assertEquals(expected[i], labels.size());
+    }
+  }
+
+  private void checkFieldMatches(MatchesIterator it, int[] expected) throws IOException {
    int pos = 1;
    while (it.next()) {
      //System.out.println(expected[i][pos] + "->" + expected[i][pos + 1] + "[" + expected[i][pos + 2] + "->" + expected[i][pos + 3] + "]");
@ -132,7 +167,7 @@ public class TestMatchesIterator extends LuceneTestCase {
    assertEquals(expected.length, pos);
  }

-  void checkNoPositionsMatches(Query q, String field, boolean[] expected) throws IOException {
+  private void checkNoPositionsMatches(Query q, String field, boolean[] expected) throws IOException {
    Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
    for (int i = 0; i < expected.length; i++) {
      LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
@ -148,8 +183,109 @@ public class TestMatchesIterator extends LuceneTestCase {
    }
  }

+  private void assertIsLeafMatch(Query q, String field) throws IOException {
+    Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
+    for (int i = 0; i < searcher.reader.maxDoc(); i++) {
+      LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
+      int doc = i - ctx.docBase;
+      Matches matches = w.matches(ctx, doc);
+      if (matches == null) {
+        return;
+      }
+      MatchesIterator mi = matches.getMatches(field);
+      if (mi == null) {
+        return;
+      }
+      while (mi.next()) {
+        assertNull(mi.getSubMatches());
+      }
+    }
+  }
+
+  private void checkTermMatches(Query q, String field, TermMatch[][][] expected) throws IOException {
+    Weight w = searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
+    for (int i = 0; i < expected.length; i++) {
+      LeafReaderContext ctx = searcher.leafContexts.get(ReaderUtil.subIndex(i, searcher.leafContexts));
+      int doc = i - ctx.docBase;
+      Matches matches = w.matches(ctx, doc);
+      if (matches == null) {
+        assertEquals(expected[i].length, 0);
+        continue;
+      }
+      MatchesIterator it = matches.getMatches(field);
+      if (expected[i].length == 0) {
+        assertNull(it);
+        continue;
+      }
+      checkTerms(expected[i], it);
+    }
+  }
+
+  private void checkTerms(TermMatch[][] expected, MatchesIterator it) throws IOException {
+    int upTo = 0;
+    while (it.next()) {
+      Set<TermMatch> expectedMatches = new HashSet<>(Arrays.asList(expected[upTo]));
+      MatchesIterator submatches = it.getSubMatches();
+      while (submatches.next()) {
+        TermMatch tm = new TermMatch(submatches.startPosition(), submatches.startOffset(), submatches.endOffset());
+        if (expectedMatches.remove(tm) == false) {
+          fail("Unexpected term match: " + tm);
+        }
+      }
+      if (expectedMatches.size() != 0) {
+        fail("Missing term matches: " + expectedMatches.stream().map(Object::toString).collect(Collectors.joining(", ")));
+      }
+      upTo++;
+    }
+    if (upTo < expected.length - 1) {
+      fail("Missing expected match");
+    }
+  }
+
+  static class TermMatch {
+
+    public final int position;
+
+    public final int startOffset;
+
+    public final int endOffset;
+
+    public TermMatch(PostingsEnum pe, int position) throws IOException {
+      this.position = position;
+      this.startOffset = pe.startOffset();
+      this.endOffset = pe.endOffset();
+    }
+
+    public TermMatch(int position, int startOffset, int endOffset) {
+      this.position = position;
+      this.startOffset = startOffset;
+      this.endOffset = endOffset;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      TermMatch termMatch = (TermMatch) o;
+      return position == termMatch.position &&
+          startOffset == termMatch.startOffset &&
+          endOffset == termMatch.endOffset;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(position, startOffset, endOffset);
+    }
+
+    @Override
+    public String toString() {
+      return position + "[" + startOffset + "->" + endOffset + "]";
+    }
+  }
+
  public void testTermQuery() throws IOException {
-    Query q = new TermQuery(new Term(FIELD_WITH_OFFSETS, "w1"));
+    Term t = new Term(FIELD_WITH_OFFSETS, "w1");
+    Query q = new TermQuery(t);
    checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
        { 0, 0, 0, 0, 2 },
        { 1, 0, 0, 0, 2 },
@ -157,6 +293,8 @@ public class TestMatchesIterator extends LuceneTestCase {
        { 3, 0, 0, 0, 2, 2, 2, 6, 8 },
        { 4 }
    });
+    checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 1, 1, 1, 1, 0, 0 });
+    assertIsLeafMatch(q, FIELD_WITH_OFFSETS);
  }

  public void testTermQueryNoStoredOffsets() throws IOException {
@ -191,6 +329,8 @@ public class TestMatchesIterator extends LuceneTestCase {
        { 3, 0, 0, 0, 2, 2, 2, 6, 8, 5, 5, 15, 17 },
        { 4 }
    });
+    checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 2, 2, 1, 2, 0, 0 });
+    assertIsLeafMatch(q, FIELD_WITH_OFFSETS);
  }

  public void testDisjunctionNoPositions() throws IOException {
@ -215,6 +355,7 @@ public class TestMatchesIterator extends LuceneTestCase {
        { 3, 0, 0, 0, 2, 2, 2, 6, 8, 5, 5, 15, 17 },
        { 4 }
    });
+    checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 2, 2, 0, 2, 0, 0 });
  }

  public void testReqOptNoPositions() throws IOException {
@ -248,6 +389,8 @@ public class TestMatchesIterator extends LuceneTestCase {
        { 3, 0, 0, 0, 2, 2, 2, 6, 8, 3, 3, 9, 11, 5, 5, 15, 17 },
        { 4 }
    });
+    checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 3, 1, 3, 3, 0, 0 });
+    assertIsLeafMatch(q, FIELD_WITH_OFFSETS);
  }

  public void testMinShouldMatchNoPositions() throws IOException {
@ -331,6 +474,8 @@ public class TestMatchesIterator extends LuceneTestCase {
        { 3, 0, 0, 0, 2, 1, 1, 3, 5, 2, 2, 6, 8, 4, 4, 12, 14 },
        { 4 }
    });
+    checkLabelCount(rq, FIELD_WITH_OFFSETS, new int[]{ 1, 1, 1, 1, 0 });
+    assertIsLeafMatch(rq, FIELD_WITH_OFFSETS);

  }

@ -357,6 +502,7 @@ public class TestMatchesIterator extends LuceneTestCase {
        { 3, 0, 0, 0, 2, 1, 1, 3, 5, 2, 2, 6, 8, 4, 4, 12, 14 },
        { 4 }
    });
+    assertIsLeafMatch(q, FIELD_WITH_OFFSETS);
  }

  public void testSynonymQueryNoPositions() throws IOException {
@ -392,12 +538,25 @@ public class TestMatchesIterator extends LuceneTestCase {
  //  0         1         2         3         4         5         6         7
  // "a phrase sentence with many phrase sentence iterations of a phrase sentence",

+  public void testSloppyPhraseQueryWithRepeats() throws IOException {
+    Term p = new Term(FIELD_WITH_OFFSETS, "phrase");
+    Term s = new Term(FIELD_WITH_OFFSETS, "sentence");
+    PhraseQuery pq = new PhraseQuery(10, FIELD_WITH_OFFSETS, "phrase", "sentence", "sentence");
+    checkMatches(pq, FIELD_WITH_OFFSETS, new int[][]{
+        { 0 }, { 1 }, { 2 }, { 3 },
+        { 4, 1, 6, 2, 43, 2, 11, 9, 75, 5, 11, 28, 75, 6, 11, 35, 75 }
+    });
+    checkLabelCount(pq, FIELD_WITH_OFFSETS, new int[]{ 0, 0, 0, 0, 1 });
+    assertIsLeafMatch(pq, FIELD_WITH_OFFSETS);
+  }
+
  public void testSloppyPhraseQuery() throws IOException {
    PhraseQuery pq = new PhraseQuery(4, FIELD_WITH_OFFSETS, "a", "sentence");
    checkMatches(pq, FIELD_WITH_OFFSETS, new int[][]{
        { 0 }, { 1 }, { 2 }, { 3 },
        { 4, 0, 2, 0, 17, 6, 9, 35, 59, 9, 11, 58, 75 }
    });
+    assertIsLeafMatch(pq, FIELD_WITH_OFFSETS);
  }

  public void testExactPhraseQuery() throws IOException {
@ -407,29 +566,36 @@ public class TestMatchesIterator extends LuceneTestCase {
        { 4, 1, 2, 2, 17, 5, 6, 28, 43, 10, 11, 60, 75 }
    });

+    Term a = new Term(FIELD_WITH_OFFSETS, "a");
+    Term s = new Term(FIELD_WITH_OFFSETS, "sentence");
    PhraseQuery pq2 = new PhraseQuery.Builder()
-        .add(new Term(FIELD_WITH_OFFSETS, "a"))
-        .add(new Term(FIELD_WITH_OFFSETS, "sentence"), 2)
+        .add(a)
+        .add(s, 2)
        .build();
    checkMatches(pq2, FIELD_WITH_OFFSETS, new int[][]{
        { 0 }, { 1 }, { 2 }, { 3 },
        { 4, 0, 2, 0, 17, 9, 11, 58, 75 }
    });
+    assertIsLeafMatch(pq2, FIELD_WITH_OFFSETS);
  }

  //  0         1         2         3         4         5         6         7
  // "a phrase sentence with many phrase sentence iterations of a phrase sentence",

  public void testSloppyMultiPhraseQuery() throws IOException {
+    Term p = new Term(FIELD_WITH_OFFSETS, "phrase");
+    Term s = new Term(FIELD_WITH_OFFSETS, "sentence");
+    Term i = new Term(FIELD_WITH_OFFSETS, "iterations");
    MultiPhraseQuery mpq = new MultiPhraseQuery.Builder()
-        .add(new Term(FIELD_WITH_OFFSETS, "phrase"))
-        .add(new Term[]{ new Term(FIELD_WITH_OFFSETS, "sentence"), new Term(FIELD_WITH_OFFSETS, "iterations") })
+        .add(p)
+        .add(new Term[]{ s, i })
        .setSlop(4)
        .build();
    checkMatches(mpq, FIELD_WITH_OFFSETS, new int[][]{
        { 0 }, { 1 }, { 2 }, { 3 },
-        { 4, 1, 2, 2, 17, 5, 7, 28, 54, 5, 7, 28, 54, 10, 11, 60, 75 }
+        { 4, 1, 2, 2, 17, 5, 6, 28, 43, 5, 7, 28, 54, 10, 11, 60, 75 }
    });
+    assertIsLeafMatch(mpq, FIELD_WITH_OFFSETS);
  }

  public void testExactMultiPhraseQuery() throws IOException {
@ -450,6 +616,38 @@ public class TestMatchesIterator extends LuceneTestCase {
        { 0 }, { 1 }, { 2 }, { 3 },
        { 4, 0, 1, 0, 8, 4, 5, 23, 34, 9, 10, 58, 66 }
    });
+    assertIsLeafMatch(mpq2, FIELD_WITH_OFFSETS);
+  }
+
+  //  0         1         2         3         4         5         6         7
+  // "a phrase sentence with many phrase sentence iterations of a phrase sentence",
+
+  public void testSpanQuery() throws IOException {
+    SpanQuery subq = SpanNearQuery.newOrderedNearQuery(FIELD_WITH_OFFSETS)
+        .addClause(new SpanTermQuery(new Term(FIELD_WITH_OFFSETS, "with")))
+        .addClause(new SpanTermQuery(new Term(FIELD_WITH_OFFSETS, "many")))
+        .build();
+    Query q = SpanNearQuery.newOrderedNearQuery(FIELD_WITH_OFFSETS)
+        .addClause(new SpanTermQuery(new Term(FIELD_WITH_OFFSETS, "sentence")))
+        .addClause(new SpanOrQuery(subq, new SpanTermQuery(new Term(FIELD_WITH_OFFSETS, "iterations"))))
+        .build();
+    checkMatches(q, FIELD_WITH_OFFSETS, new int[][]{
+        { 0 }, { 1 }, { 2 }, { 3 },
+        { 4, 2, 4, 9, 27, 6, 7, 35, 54 }
+    });
+    checkLabelCount(q, FIELD_WITH_OFFSETS, new int[]{ 0, 0, 0, 0, 1 });
+    checkTermMatches(q, FIELD_WITH_OFFSETS, new TermMatch[][][]{
+        {}, {}, {}, {},
+        {
+            {
+                new TermMatch(2, 9, 17),
+                new TermMatch(3, 18, 22),
+                new TermMatch(4, 23, 27)
+            }, {
+              new TermMatch(6, 35, 43), new TermMatch(7, 44, 54)
+        }
+        }
+    });
  }

 }
--- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatchesIterator.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingMatchesIterator.java
@ -67,4 +67,15 @@ class AssertingMatchesIterator implements MatchesIterator {
    return in.endOffset();
  }

+  @Override
+  public MatchesIterator getSubMatches() throws IOException {
+    assert state == State.ITERATING : state;
+    return in.getSubMatches();
+  }
+
+  @Override
+  public Query getQuery() {
+    assert state == State.ITERATING : state;
+    return in.getQuery();
+  }
 }