From bfad47b5c63bfd2f266e06eae5a0871f215f13ac Mon Sep 17 00:00:00 2001 From: Erik Hatcher Date: Fri, 14 Nov 2014 18:07:56 +0000 Subject: [PATCH] LUCENE-3229: Overlapping ordered SpanNearQuery spans should not match. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1639714 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 7 ++++ .../lucene/search/spans/NearSpansOrdered.java | 27 ++++++++------- .../search/spans/NearSpansUnordered.java | 14 +++++++- .../lucene/search/spans/SpanNearQuery.java | 9 +++-- .../search/spans/TestNearSpansOrdered.java | 33 ++++++++++++++++++- 5 files changed, 71 insertions(+), 19 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 2a2a5a83ca7..cf6b09c15a7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -335,6 +335,13 @@ Other * LUCENE-5915: Remove Pulsing postings format. (Robert Muir) +======================= Lucene 4.10.3 ====================== + +Bug fixes + +* LUCENE-3229: Overlapping ordered SpanNearQuery spans should not match. + (Ludovic Boutros, Paul Elschot, Greg Dearing, ehatcher) + ======================= Lucene 4.10.2 ====================== Bug fixes diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java index 6e804a85292..508c9661ed2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java @@ -234,24 +234,23 @@ public class NearSpansOrdered extends Spans { return true; } - /** Check whether two Spans in the same document are ordered. - * @return true iff spans1 starts before spans2 - * or the spans start at the same position, - * and spans1 ends before spans2. + /** Check whether two Spans in the same document are ordered and not overlapping. + * @return false iff spans2's start position is smaller than spans1's end position */ - static final boolean docSpansOrdered(Spans spans1, Spans spans2) { + static final boolean docSpansOrderedNonOverlap(Spans spans1, Spans spans2) { assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc(); - int start1 = spans1.start(); - int start2 = spans2.start(); - /* Do not call docSpansOrdered(int,int,int,int) to avoid invoking .end() : */ - return (start1 == start2) ? (spans1.end() < spans2.end()) : (start1 < start2); + assert spans1.start() < spans1.end(); + assert spans2.start() < spans2.end(); + return spans1.end() <= spans2.start(); } - /** Like {@link #docSpansOrdered(Spans,Spans)}, but use the spans + /** Like {@link #docSpansOrderedNonOverlap(Spans,Spans)}, but use the spans * starts and ends as parameters. */ - private static final boolean docSpansOrdered(int start1, int end1, int start2, int end2) { - return (start1 == start2) ? (end1 < end2) : (start1 < start2); + private static final boolean docSpansOrderedNonOverlap(int start1, int end1, int start2, int end2) { + assert start1 < end1; + assert start2 < end2; + return end1 <= start2; } /** Order the subSpans within the same document by advancing all later spans @@ -260,7 +259,7 @@ public class NearSpansOrdered extends Spans { private boolean stretchToOrder() throws IOException { matchDoc = subSpans[0].doc(); for (int i = 1; inSameDoc && (i < subSpans.length); i++) { - while (! docSpansOrdered(subSpans[i-1], subSpans[i])) { + while (! docSpansOrderedNonOverlap(subSpans[i-1], subSpans[i])) { if (! subSpans[i].next()) { inSameDoc = false; more = false; @@ -312,7 +311,7 @@ public class NearSpansOrdered extends Spans { } else { int ppStart = prevSpans.start(); int ppEnd = prevSpans.end(); // Cannot avoid invoking .end() - if (! docSpansOrdered(ppStart, ppEnd, lastStart, lastEnd)) { + if (! docSpansOrderedNonOverlap(ppStart, ppEnd, lastStart, lastEnd)) { break; // Check remaining subSpans. } else { // prevSpans still before (lastStart, lastEnd) prevStart = ppStart; diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java index 3c82cd2ab3b..168e52d2abf 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java @@ -63,7 +63,7 @@ public class NearSpansUnordered extends Spans { @Override protected final boolean lessThan(SpansCell spans1, SpansCell spans2) { if (spans1.doc() == spans2.doc()) { - return NearSpansOrdered.docSpansOrdered(spans1, spans2); + return docSpansOrdered(spans1, spans2); } else { return spans1.doc() < spans2.doc(); } @@ -233,6 +233,18 @@ public class NearSpansUnordered extends Spans { return more && (atMatch() || next()); } + /** Check whether two Spans in the same document are ordered with possible overlap. + * @return true iff spans1 starts before spans2 + * or the spans start at the same position, + * and spans1 ends before spans2. + */ + static final boolean docSpansOrdered(Spans spans1, Spans spans2) { + assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc(); + int start1 = spans1.start(); + int start2 = spans2.start(); + return (start1 == start2) ? (spans1.end() < spans2.end()) : (start1 < start2); + } + private SpansCell min() { return queue.top(); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java index 4f6f212c716..1e1d0831058 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java @@ -48,12 +48,15 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { /** Construct a SpanNearQuery. Matches spans matching a span from each * clause, with up to slop total unmatched positions between - * them. * When inOrder is true, the spans from each clause - * must be * ordered as in clauses. + * them. + *
When inOrder is true, the spans from each clause + * must be in the same order as in clauses and must be non-overlapping. + *
When inOrder is false, the spans from each clause + * need not be ordered and may overlap. * @param clauses the clauses to find near each other * @param slop The slop value * @param inOrder true if order is important - * */ + */ public SpanNearQuery(SpanQuery[] clauses, int slop, boolean inOrder) { this(clauses, slop, inOrder, true); } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java index 5b55d318fbc..649f3019268 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java @@ -82,6 +82,22 @@ public class TestNearSpansOrdered extends LuceneTestCase { protected SpanNearQuery makeQuery() { return makeQuery("w1","w2","w3",1,true); } + + protected SpanNearQuery makeOverlappedQuery( + String sqt1, String sqt2, boolean sqOrdered, + String t3, boolean ordered) { + return new SpanNearQuery( + new SpanQuery[] { + new SpanNearQuery(new SpanQuery[] { + new SpanTermQuery(new Term(FIELD, sqt1)), + new SpanTermQuery(new Term(FIELD, sqt2)) }, + 1, + sqOrdered + ), + new SpanTermQuery(new Term(FIELD, t3)) }, + 0, + ordered); + } public void testSpanNearQuery() throws Exception { SpanNearQuery q = makeQuery(); @@ -170,6 +186,22 @@ public class TestNearSpansOrdered extends LuceneTestCase { Scorer s = w.scorer(leave, leave.reader().getLiveDocs()); assertEquals(1, s.advance(1)); } + + public void testOverlappedOrderedSpan() throws Exception { + SpanNearQuery q = makeOverlappedQuery("w5", "w3", false, "w4", true); + CheckHits.checkHits(random(), q, FIELD, searcher, new int[] {}); + } + + public void testOverlappedNonOrderedSpan() throws Exception { + SpanNearQuery q = makeOverlappedQuery("w3", "w5", true, "w4", false); + CheckHits.checkHits(random(), q, FIELD, searcher, new int[] {0}); + } + + public void testNonOverlappedOrderedSpan() throws Exception { + SpanNearQuery q = makeOverlappedQuery("w3", "w4", true, "w5", true); + CheckHits.checkHits(random(), q, FIELD, searcher, new int[] {0}); + } + /** * not a direct test of NearSpans, but a demonstration of how/when @@ -182,5 +214,4 @@ public class TestNearSpansOrdered extends LuceneTestCase { + e.toString(), 0.0f < e.getValue()); } - }