From 1305501dbd5bf79811eab9da873cf1567605e6a5 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 10 Dec 2018 09:47:29 +0000 Subject: [PATCH] LUCENE-8597: Add IntervalIterator.gaps() and Intervals.maxgaps() --- lucene/CHANGES.txt | 4 ++ .../intervals/DifferenceIntervalFunction.java | 10 +++ .../intervals/DisjunctionIntervalsSource.java | 15 +++++ ...urce.java => FilteredIntervalsSource.java} | 65 +++++++++++-------- .../search/intervals/IntervalFilter.java | 5 ++ .../search/intervals/IntervalFunction.java | 54 ++++++++++++++- .../search/intervals/IntervalIterator.java | 11 ++++ .../search/intervals/IntervalMatches.java | 9 +++ .../intervals/IntervalMatchesIterator.java | 38 +++++++++++ .../lucene/search/intervals/Intervals.java | 23 ++++++- .../MinimizingConjunctionIntervalsSource.java | 7 +- .../search/intervals/TermIntervalsSource.java | 5 ++ .../lucene/search/intervals/package-info.java | 2 + .../search/intervals/TestIntervalQuery.java | 10 +++ .../search/intervals/TestIntervals.java | 60 ++++++++++++++++- 15 files changed, 285 insertions(+), 33 deletions(-) rename lucene/sandbox/src/java/org/apache/lucene/search/intervals/{LowpassIntervalsSource.java => FilteredIntervalsSource.java} (71%) create mode 100644 lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalMatchesIterator.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 007d35b3666..41259bd22e5 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -98,6 +98,10 @@ API Changes methods. This decouples normalization from tokenization entirely. (Mayya Sharipova, Alan Woodward) +* LUCENE-8597: IntervalIterator now exposes a gaps() method that reports the + number of gaps between its component sub-intervals. This can be used in a + new filter available via Intervals.maxgaps(). (Alan Woodward) + Changes in Runtime Behavior * LUCENE-8333: Switch MoreLikeThis.setMaxDocFreqPct to use maxDoc instead of diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalFunction.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalFunction.java index 18d4d677e53..def1d03d6fa 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalFunction.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalFunction.java @@ -125,6 +125,11 @@ abstract class DifferenceIntervalFunction { return a.end(); } + @Override + public int gaps() { + return a.gaps(); + } + @Override public float matchCost() { return a.matchCost() + b.matchCost(); @@ -232,6 +237,11 @@ abstract class DifferenceIntervalFunction { return newEnd; } + @Override + public int gaps() { + throw new UnsupportedOperationException(); + } + @Override public int nextInterval() throws IOException { if (positioned == false) { diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java index c54f18bec12..68a9e5decdd 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DisjunctionIntervalsSource.java @@ -135,6 +135,11 @@ class DisjunctionIntervalsSource extends IntervalsSource { return current.end(); } + @Override + public int gaps() { + return current.gaps(); + } + private void reset() throws IOException { intervalQueue.clear(); for (DisiWrapper dw = disiQueue.topList(); dw != null; dw = dw.next) { @@ -228,6 +233,11 @@ class DisjunctionIntervalsSource extends IntervalsSource { return -1; } + @Override + public int gaps() { + throw new UnsupportedOperationException(); + } + @Override public int nextInterval() { return NO_MORE_INTERVALS; @@ -271,6 +281,11 @@ class DisjunctionIntervalsSource extends IntervalsSource { return NO_MORE_INTERVALS; } + @Override + public int gaps() { + throw new UnsupportedOperationException(); + } + @Override public int nextInterval() { return NO_MORE_INTERVALS; diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/LowpassIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/FilteredIntervalsSource.java similarity index 71% rename from lucene/sandbox/src/java/org/apache/lucene/search/intervals/LowpassIntervalsSource.java rename to lucene/sandbox/src/java/org/apache/lucene/search/intervals/FilteredIntervalsSource.java index 61cb1fc07d9..8eac88d8b23 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/LowpassIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/FilteredIntervalsSource.java @@ -25,34 +25,28 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.MatchesIterator; -class LowpassIntervalsSource extends IntervalsSource { +/** + * An IntervalsSource that filters the intervals from another IntervalsSource + */ +public abstract class FilteredIntervalsSource extends IntervalsSource { - final IntervalsSource in; - private final int maxWidth; + private final String name; + private final IntervalsSource in; - LowpassIntervalsSource(IntervalsSource in, int maxWidth) { + /** + * Create a new FilteredIntervalsSource + * @param name the name of the filter + * @param in the source to filter + */ + public FilteredIntervalsSource(String name, IntervalsSource in) { + this.name = name; this.in = in; - this.maxWidth = maxWidth; } - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - LowpassIntervalsSource that = (LowpassIntervalsSource) o; - return maxWidth == that.maxWidth && - Objects.equals(in, that.in); - } - - @Override - public String toString() { - return "MAXWIDTH/" + maxWidth + "(" + in + ")"; - } - - @Override - public void extractTerms(String field, Set terms) { - in.extractTerms(field, terms); - } + /** + * @return {@code false} if the current interval should be filtered out + */ + protected abstract boolean accept(IntervalIterator it); @Override public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { @@ -63,7 +57,7 @@ class LowpassIntervalsSource extends IntervalsSource { return new IntervalFilter(i) { @Override protected boolean accept() { - return (i.end() - i.start()) + 1 <= maxWidth; + return FilteredIntervalsSource.this.accept(in); } }; } @@ -77,14 +71,33 @@ class LowpassIntervalsSource extends IntervalsSource { IntervalIterator filtered = new IntervalFilter(IntervalMatches.wrapMatches(mi, doc)) { @Override protected boolean accept() { - return (this.in.end() - this.in.start()) + 1 <= maxWidth; + return FilteredIntervalsSource.this.accept(in); } }; return IntervalMatches.asMatches(filtered, mi, doc); } + @Override + public void extractTerms(String field, Set terms) { + in.extractTerms(field, terms); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FilteredIntervalsSource that = (FilteredIntervalsSource) o; + return Objects.equals(name, that.name) && + Objects.equals(in, that.in); + } + @Override public int hashCode() { - return Objects.hash(in, maxWidth); + return Objects.hash(name, in); + } + + @Override + public String toString() { + return name + "(" + in + ")"; } } diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFilter.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFilter.java index a501b49b0e3..46c030217e9 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFilter.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFilter.java @@ -64,6 +64,11 @@ public abstract class IntervalFilter extends IntervalIterator { return in.end(); } + @Override + public int gaps() { + return in.gaps(); + } + @Override public float matchCost() { return in.matchCost(); diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFunction.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFunction.java index 787574e52e9..1a5eab63607 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFunction.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFunction.java @@ -18,6 +18,7 @@ package org.apache.lucene.search.intervals; import java.io.IOException; +import java.util.Arrays; import java.util.List; import org.apache.lucene.util.PriorityQueue; @@ -66,6 +67,11 @@ abstract class IntervalFunction { return end; } + @Override + public int gaps() { + return 0; + } + @Override public int nextInterval() throws IOException { if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS) @@ -109,6 +115,7 @@ abstract class IntervalFunction { private static class OrderedIntervalIterator extends ConjunctionIntervalIterator { int start = -1, end = -1, i; + int firstEnd; private OrderedIntervalIterator(List subIntervals) { super(subIntervals); @@ -143,6 +150,7 @@ abstract class IntervalFunction { i++; } start = subIterators.get(0).start(); + firstEnd = subIterators.get(0).end(); end = subIterators.get(subIterators.size() - 1).end(); b = subIterators.get(subIterators.size() - 1).start(); i = 1; @@ -151,11 +159,20 @@ abstract class IntervalFunction { } } + @Override + public int gaps() { + int gaps = subIterators.get(1).start() - firstEnd - 1; + for (int i = 2; i < subIterators.size(); i++) { + gaps += (subIterators.get(i).start() - subIterators.get(i - 1).end() - 1); + } + return gaps; + } + @Override protected void reset() throws IOException { subIterators.get(0).nextInterval(); i = 1; - start = end = -1; + start = end = firstEnd = -1; } } @@ -183,9 +200,10 @@ abstract class IntervalFunction { private final PriorityQueue queue; private final IntervalIterator[] subIterators; + private final int[] innerPositions; private final boolean allowOverlaps; - int start = -1, end = -1, queueEnd; + int start = -1, end = -1, firstEnd, queueEnd; UnorderedIntervalIterator(List subIterators, boolean allowOverlaps) { super(subIterators); @@ -196,6 +214,7 @@ abstract class IntervalFunction { } }; this.subIterators = new IntervalIterator[subIterators.size()]; + this.innerPositions = new int[subIterators.size() * 2]; this.allowOverlaps = allowOverlaps; for (int i = 0; i < subIterators.size(); i++) { @@ -241,6 +260,7 @@ abstract class IntervalFunction { // then, minimize it do { start = queue.top().start(); + firstEnd = queue.top().end(); end = queueEnd; if (queue.top().end() == end) return start; @@ -260,6 +280,26 @@ abstract class IntervalFunction { return start; } + @Override + public int gaps() { + for (int i = 0; i < subIterators.length; i++) { + if (subIterators[i].end() > end) { + innerPositions[i * 2] = start; + innerPositions[i * 2 + 1] = firstEnd; + } + else { + innerPositions[i * 2] = subIterators[i].start(); + innerPositions[i * 2 + 1] = subIterators[i].end(); + } + } + Arrays.sort(innerPositions); + int gaps = 0; + for (int i = 1; i < subIterators.length; i++) { + gaps += (innerPositions[i * 2] - innerPositions[i * 2 - 1] - 1); + } + return gaps; + } + @Override protected void reset() throws IOException { queueEnd = start = end = -1; @@ -324,6 +364,11 @@ abstract class IntervalFunction { return a.end(); } + @Override + public int gaps() { + return a.gaps(); + } + @Override public int nextInterval() throws IOException { if (bpos == false) @@ -371,6 +416,11 @@ abstract class IntervalFunction { return a.end(); } + @Override + public int gaps() { + return a.gaps(); + } + @Override public int nextInterval() throws IOException { if (bpos == false) diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalIterator.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalIterator.java index 613cb782e40..f819aab1bc2 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalIterator.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalIterator.java @@ -59,6 +59,17 @@ public abstract class IntervalIterator extends DocIdSetIterator { */ public abstract int end(); + /** + * The number of gaps within the current interval + * + * Note that this returns the number of gaps between the immediate sub-intervals + * of this interval, and does not include the gaps inside those sub-intervals. + * + * Should not be called before {@link #nextInterval()}, or after it has returned + * {@link #NO_MORE_INTERVALS} + */ + public abstract int gaps(); + /** * Advance the iterator to the next interval * diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalMatches.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalMatches.java index 89f115d3f2f..a28f6e47821 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalMatches.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalMatches.java @@ -98,6 +98,15 @@ final class IntervalMatches { return mi.endPosition(); } + @Override + public int gaps() { + assert state == State.ITERATING; + if (mi instanceof IntervalMatchesIterator) { + return ((IntervalMatchesIterator)mi).gaps(); + } + return 0; + } + @Override public int nextInterval() throws IOException { assert state == State.ITERATING; diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalMatchesIterator.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalMatchesIterator.java new file mode 100644 index 00000000000..55482e38cc3 --- /dev/null +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalMatchesIterator.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search.intervals; + +import org.apache.lucene.search.MatchesIterator; + +/** + * An extension of MatchesIterator that allows the gaps from a wrapped + * IntervalIterator to be reported. + * + * This is necessary because {@link MatchesIterator#getSubMatches()} returns + * the submatches of all nested matches as a flat iterator, but + * {@link IntervalIterator#gaps()} only returns the gaps between its immediate + * sub-matches, so we can't calculate the latter using the former. + */ +interface IntervalMatchesIterator extends MatchesIterator { + + /** + * The number of top-level gaps inside the current match + */ + int gaps(); + +} diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java index e9f1bd25812..b0a482943a0 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java @@ -77,11 +77,30 @@ public final class Intervals { /** * Create an {@link IntervalsSource} that filters a sub-source by the width of its intervals - * @param width the maximum width of intervals in the sub-source ot return + * @param width the maximum width of intervals in the sub-source to filter * @param subSource the sub-source to filter */ public static IntervalsSource maxwidth(int width, IntervalsSource subSource) { - return new LowpassIntervalsSource(subSource, width); + return new FilteredIntervalsSource("MAXWIDTH/" + width, subSource) { + @Override + protected boolean accept(IntervalIterator it) { + return (it.end() - it.start()) + 1 <= width; + } + }; + } + + /** + * Create an {@link IntervalsSource} that filters a sub-source by its gaps + * @param gaps the maximum number of gaps in the sub-source to filter + * @param subSource the sub-source to filter + */ + public static IntervalsSource maxgaps(int gaps, IntervalsSource subSource) { + return new FilteredIntervalsSource("MAXGAPS/" + gaps, subSource) { + @Override + protected boolean accept(IntervalIterator it) { + return it.gaps() <= gaps; + } + }; } /** diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MinimizingConjunctionIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MinimizingConjunctionIntervalsSource.java index 669acae727c..c509692a75c 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MinimizingConjunctionIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/MinimizingConjunctionIntervalsSource.java @@ -61,7 +61,7 @@ class MinimizingConjunctionIntervalsSource extends ConjunctionIntervalsSource { return new ConjunctionMatchesIterator(it, subs); } - private static class ConjunctionMatchesIterator implements MatchesIterator { + private static class ConjunctionMatchesIterator implements IntervalMatchesIterator { final IntervalIterator iterator; final List subs; @@ -111,6 +111,11 @@ class MinimizingConjunctionIntervalsSource extends ConjunctionIntervalsSource { return end; } + @Override + public int gaps() { + return iterator.gaps(); + } + @Override public MatchesIterator getSubMatches() throws IOException { List mis = new ArrayList<>(); diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/TermIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/TermIntervalsSource.java index 27e5b794f48..f58d8677c9d 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/TermIntervalsSource.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/TermIntervalsSource.java @@ -95,6 +95,11 @@ class TermIntervalsSource extends IntervalsSource { return pos; } + @Override + public int gaps() { + return 0; + } + @Override public int nextInterval() throws IOException { if (upto <= 0) diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/package-info.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/package-info.java index 88d93eaaa75..cfcd9e7125b 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/package-info.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/package-info.java @@ -44,6 +44,8 @@ *
    *
  • {@link org.apache.lucene.search.intervals.Intervals#maxwidth(int, org.apache.lucene.search.intervals.IntervalsSource)} * — Filters out intervals that are larger than a set width
  • + *
  • {@link org.apache.lucene.search.intervals.Intervals#maxgaps(int, org.apache.lucene.search.intervals.IntervalsSource)} + * — Filters out intervals that have more than a set number of gaps between their constituent sub-intervals
  • *
  • {@link org.apache.lucene.search.intervals.Intervals#containedBy(org.apache.lucene.search.intervals.IntervalsSource, org.apache.lucene.search.intervals.IntervalsSource)} * — Returns intervals that are contained by another interval
  • *
  • {@link org.apache.lucene.search.intervals.Intervals#notContainedBy(org.apache.lucene.search.intervals.IntervalsSource, org.apache.lucene.search.intervals.IntervalsSource)} diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java index 8f0623e8369..18c69a7d586 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java +++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java @@ -91,6 +91,16 @@ public class TestIntervalQuery extends LuceneTestCase { new int[]{0, 1, 2, 3, 5}); } + public void testOrderedNearQueryGaps1() throws IOException { + checkHits(new IntervalQuery(field, Intervals.maxgaps(1, Intervals.ordered(Intervals.term("w1"), Intervals.term("w2")))), + new int[]{0, 1, 2, 5}); + } + + public void testOrderedNearQueryGaps2() throws IOException { + checkHits(new IntervalQuery(field, Intervals.maxgaps(2, Intervals.ordered(Intervals.term("w1"), Intervals.term("w2")))), + new int[]{0, 1, 2, 3, 5}); + } + public void testNestedOrderedNearQuery() throws IOException { // onear/1(w1, onear/2(w2, w3)) Query q = new IntervalQuery(field, diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java index 6002b3a74fc..0dee62fb944 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java +++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java @@ -64,7 +64,7 @@ public class TestIntervals extends LuceneTestCase { "Down to a sunless sea", "So thrice five miles of fertile ground", "Pease hot porridge porridge", - "Pease porridge porridge hot" + "w1 w2 w3 w4 w1 w6 w3 w8 w4 w7 w1 w6" }; private static Directory directory; @@ -84,7 +84,7 @@ public class TestIntervals extends LuceneTestCase { for (int i = 0; i < field1_docs.length; i++) { Document doc = new Document(); doc.add(new Field("field1", field1_docs[i], FIELD_TYPE)); - doc.add(new TextField("field2", field2_docs[i], Field.Store.NO)); + doc.add(new Field("field2", field2_docs[i], FIELD_TYPE)); doc.add(new StringField("id", Integer.toString(i), Field.Store.NO)); doc.add(new NumericDocValuesField("id", i)); writer.addDocument(doc); @@ -149,6 +149,19 @@ public class TestIntervals extends LuceneTestCase { assertEquals(endOffset, mi.endOffset()); } + private void assertGaps(IntervalsSource source, int doc, String field, int[] expectedGaps) throws IOException { + int ord = ReaderUtil.subIndex(doc, searcher.getIndexReader().leaves()); + LeafReaderContext ctx = searcher.getIndexReader().leaves().get(ord); + IntervalIterator it = source.intervals(field, ctx); + assertEquals(doc, it.advance(doc)); + for (int expectedGap : expectedGaps) { + if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) { + fail("Unexpected interval " + it); + } + assertEquals(expectedGap, it.gaps()); + } + } + public void testIntervalsOnFieldWithNoPositions() throws IOException { IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> { Intervals.term("wibble").intervals("id", searcher.getIndexReader().leaves().get(0)); @@ -241,6 +254,10 @@ public class TestIntervals extends LuceneTestCase { assertMatch(sub, 17, 17, 96, 99); assertFalse(sub.next()); assertFalse(mi.next()); + + assertGaps(source, 1, "field1", new int[]{ + 1, 0, 10 + }); } public void testIntervalDisjunction() throws IOException { @@ -287,6 +304,7 @@ public class TestIntervals extends LuceneTestCase { { 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 17 }, {} }); + assertNull(getMatches(source, 0, "field1")); MatchesIterator mi = getMatches(source, 1, "field1"); assertMatch(mi, 0, 2, 0, 18); @@ -302,6 +320,11 @@ public class TestIntervals extends LuceneTestCase { assertMatch(mi, 5, 7, 35, 55); assertMatch(mi, 6, 17, 41, 99); assertFalse(mi.next()); + + assertGaps(source, 1, "field1", new int[]{ + 0, 0, 0, 0, 0, 0, 9 + }); + } public void testNesting2() throws IOException { @@ -447,4 +470,37 @@ public class TestIntervals extends LuceneTestCase { assertFalse(mi.next()); } + public void testMaxGaps() throws IOException { + + IntervalsSource source = Intervals.maxgaps(1, + Intervals.unordered(Intervals.term("w1"), Intervals.term("w3"), Intervals.term("w4"))); + checkIntervals(source, "field2", 1, new int[][]{ + {}, {}, {}, {}, {}, + { 0, 3, 2, 4, 3, 6 } + }); + + MatchesIterator mi = getMatches(source, 5, "field2"); + assertMatch(mi, 0, 3, 0, 11); + + } + + public void testNestedMaxGaps() throws IOException { + IntervalsSource source = Intervals.maxgaps(1, + Intervals.unordered( + Intervals.ordered(Intervals.term("w1"), Intervals.term("w3")), + Intervals.term("w4") + )); + checkIntervals(source, "field2", 1, new int[][]{ + {}, {}, {}, {}, {}, + { 0, 3, 3, 6, 4, 8 } + }); + + assertGaps(source, 5, "field2", new int[]{ 0, 0, 1 }); + + MatchesIterator mi = getMatches(source, 5, "field2"); + assertMatch(mi, 0, 3, 0, 11); + assertMatch(mi, 3, 6, 9, 20); + assertMatch(mi, 4, 8, 12, 26); + } + }