diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 805fc7e4ca3..0ccb5ee8c59 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -63,6 +63,10 @@ Improvements
PhraseQuery or MultiPhraseQuery when the word automaton is simple
(Mike McCandless)
+* LUCENE-7431: Allow a certain amount of overlap to be specified between the include
+ and exclude arguments of SpanNotQuery via negative pre and/or post arguments.
+ (Marc Morissette via David Smiley)
+
======================= Lucene 6.3.0 =======================
API Changes
diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
index 05d3f8ef481..00bcc4c1ac7 100644
--- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
@@ -49,19 +49,23 @@ public final class SpanNotQuery extends SpanQuery {
/** Construct a SpanNotQuery matching spans from include
which
* have no overlap with spans from exclude
within
- * dist
tokens of include
. */
+ * dist
tokens of include
. Inversely, a negative
+ * dist
value may be used to specify a certain amount of allowable
+ * overlap. */
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) {
this(include, exclude, dist, dist);
}
/** Construct a SpanNotQuery matching spans from include
which
* have no overlap with spans from exclude
within
- * pre
tokens before or post
tokens of include
. */
+ * pre
tokens before or post
tokens of
+ * include
. Inversely, negative values for pre
and/or
+ * post
allow a certain amount of overlap to occur. */
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) {
this.include = Objects.requireNonNull(include);
this.exclude = Objects.requireNonNull(exclude);
- this.pre = (pre >=0) ? pre : 0;
- this.post = (post >= 0) ? post : 0;
+ this.pre = pre;
+ this.post = post;
if (include.getField() != null && exclude.getField() != null && !include.getField().equals(exclude.getField()))
throw new IllegalArgumentException("Clauses must have same field.");
@@ -226,4 +230,4 @@ public final class SpanNotQuery extends SpanQuery {
return h;
}
-}
\ No newline at end of file
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java
index b18a38df2d5..d699719e478 100644
--- a/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java
+++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java
@@ -274,20 +274,42 @@ public class TestBasics extends LuceneTestCase {
assertTrue(searcher.explain(query, 849).getValue() > 0.0f);
}
- public void testSpanNotWindowNeg() throws Exception {
+ public void testSpanNotWindowNegPost() throws Exception {
//test handling of invalid window < 0
SpanQuery near = spanNearOrderedQuery("field", 4, "eight", "one");
SpanQuery or = spanOrQuery("field", "forty");
- SpanQuery query = spanNotQuery(near, or);
-
+ SpanQuery query = spanNotQuery(near, or, 0, -1);
checkHits(query, new int[]
{801, 821, 831, 851, 861, 871, 881, 891,
1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
+ query = spanNotQuery(near, or, 0, -2);
+ checkHits(query, new int[]
+ {801, 821, 831, 841, 851, 861, 871, 881, 891,
+ 1801, 1821, 1831, 1841, 1851, 1861, 1871, 1881, 1891});
+
assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
}
-
+
+ public void testSpanNotWindowNegPre() throws Exception {
+ //test handling of invalid window < 0
+ SpanQuery near = spanNearOrderedQuery("field", 4, "eight", "one");
+ SpanQuery or = spanOrQuery("field", "forty");
+ SpanQuery query = spanNotQuery(near, or, -2, 0);
+ checkHits(query, new int[]
+ {801, 821, 831, 851, 861, 871, 881, 891,
+ 1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
+
+ query = spanNotQuery(near, or, -3, 0);
+ checkHits(query, new int[]
+ {801, 821, 831, 841, 851, 861, 871, 881, 891,
+ 1801, 1821, 1831, 1841, 1851, 1861, 1871, 1881, 1891});
+
+ assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
+ assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
+ }
+
public void testSpanNotWindowDoubleExcludesBefore() throws Exception {
//test hitting two excludes before an include
SpanQuery near = spanNearOrderedQuery("field", 2, "forty", "two");
diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java
index 2d5e05cf8e5..2b5b919f385 100644
--- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java
+++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpans.java
@@ -99,7 +99,6 @@ public class TestSpans extends LuceneTestCase {
"s2 s1 s1 xx xx s2 xx s2 xx s1 xx xx xx xx xx s2 xx",
"r1 s11",
"r1 s21"
-
};
private void checkHits(Query query, int[] results) throws IOException {
@@ -406,42 +405,54 @@ public class TestSpans extends LuceneTestCase {
}
-
- public void testSpanNots() throws Throwable{
- assertEquals("SpanNotIncludeExcludeSame1", 0, spanCount("s2", "s2", 0, 0), 0);
- assertEquals("SpanNotIncludeExcludeSame2", 0, spanCount("s2", "s2", 10, 10), 0);
-
- //focus on behind
- assertEquals("SpanNotS2NotS1_6_0", 1, spanCount("s2", "s1", 6, 0));
- assertEquals("SpanNotS2NotS1_5_0", 2, spanCount("s2", "s1", 5, 0));
- assertEquals("SpanNotS2NotS1_3_0", 3, spanCount("s2", "s1", 3, 0));
- assertEquals("SpanNotS2NotS1_2_0", 4, spanCount("s2", "s1", 2, 0));
- assertEquals("SpanNotS2NotS1_0_0", 4, spanCount("s2", "s1", 0, 0));
-
- //focus on both
- assertEquals("SpanNotS2NotS1_3_1", 2, spanCount("s2", "s1", 3, 1));
- assertEquals("SpanNotS2NotS1_2_1", 3, spanCount("s2", "s1", 2, 1));
- assertEquals("SpanNotS2NotS1_1_1", 3, spanCount("s2", "s1", 1, 1));
- assertEquals("SpanNotS2NotS1_10_10", 0, spanCount("s2", "s1", 10, 10));
-
- //focus on ahead
- assertEquals("SpanNotS1NotS2_10_10", 0, spanCount("s1", "s2", 10, 10));
- assertEquals("SpanNotS1NotS2_0_1", 3, spanCount("s1", "s2", 0, 1));
- assertEquals("SpanNotS1NotS2_0_2", 3, spanCount("s1", "s2", 0, 2));
- assertEquals("SpanNotS1NotS2_0_3", 2, spanCount("s1", "s2", 0, 3));
- assertEquals("SpanNotS1NotS2_0_4", 1, spanCount("s1", "s2", 0, 4));
- assertEquals("SpanNotS1NotS2_0_8", 0, spanCount("s1", "s2", 0, 8));
-
- //exclude doesn't exist
- assertEquals("SpanNotS1NotS3_8_8", 3, spanCount("s1", "s3", 8, 8));
- //include doesn't exist
- assertEquals("SpanNotS3NotS1_8_8", 0, spanCount("s3", "s1", 8, 8));
+ public void testSpanNots() throws Throwable {
+ assertEquals("SpanNotIncludeExcludeSame1", 0, spanCount("s2", 0, "s2", 0, 0), 0);
+ assertEquals("SpanNotIncludeExcludeSame2", 0, spanCount("s2", 0, "s2", 10, 10), 0);
+
+ //focus on behind
+ assertEquals("SpanNotS2NotS1_6_0", 1, spanCount("s2", 0, "s1", 6, 0));
+ assertEquals("SpanNotS2NotS1_5_0", 2, spanCount("s2", 0, "s1", 5, 0));
+ assertEquals("SpanNotS2NotS1_3_0", 3, spanCount("s2", 0, "s1", 3, 0));
+ assertEquals("SpanNotS2NotS1_2_0", 4, spanCount("s2", 0, "s1", 2, 0));
+ assertEquals("SpanNotS2NotS1_0_0", 4, spanCount("s2", 0, "s1", 0, 0));
+
+ //focus on both
+ assertEquals("SpanNotS2NotS1_3_1", 2, spanCount("s2", 0, "s1", 3, 1));
+ assertEquals("SpanNotS2NotS1_2_1", 3, spanCount("s2", 0, "s1", 2, 1));
+ assertEquals("SpanNotS2NotS1_1_1", 3, spanCount("s2", 0, "s1", 1, 1));
+ assertEquals("SpanNotS2NotS1_10_10", 0, spanCount("s2", 0, "s1", 10, 10));
+
+ //focus on ahead
+ assertEquals("SpanNotS1NotS2_10_10", 0, spanCount("s1", 0, "s2", 10, 10));
+ assertEquals("SpanNotS1NotS2_0_1", 3, spanCount("s1", 0, "s2", 0, 1));
+ assertEquals("SpanNotS1NotS2_0_2", 3, spanCount("s1", 0, "s2", 0, 2));
+ assertEquals("SpanNotS1NotS2_0_3", 2, spanCount("s1", 0, "s2", 0, 3));
+ assertEquals("SpanNotS1NotS2_0_4", 1, spanCount("s1", 0, "s2", 0, 4));
+ assertEquals("SpanNotS1NotS2_0_8", 0, spanCount("s1", 0, "s2", 0, 8));
+
+ //exclude doesn't exist
+ assertEquals("SpanNotS1NotS3_8_8", 3, spanCount("s1", 0, "s3", 8, 8));
+
+ //include doesn't exist
+ assertEquals("SpanNotS3NotS1_8_8", 0, spanCount("s3", 0, "s1", 8, 8));
+
+ // Negative values
+ assertEquals("SpanNotS2S1NotXXNeg_0_0", 1, spanCount("s2 s1", 10, "xx", 0, 0));
+ assertEquals("SpanNotS2S1NotXXNeg_1_1", 1, spanCount("s2 s1", 10, "xx", -1, -1));
+ assertEquals("SpanNotS2S1NotXXNeg_0_2", 2, spanCount("s2 s1", 10, "xx", 0, -2));
+ assertEquals("SpanNotS2S1NotXXNeg_1_2", 2, spanCount("s2 s1", 10, "xx", -1, -2));
+ assertEquals("SpanNotS2S1NotXXNeg_2_1", 2, spanCount("s2 s1", 10, "xx", -2, -1));
+ assertEquals("SpanNotS2S1NotXXNeg_3_1", 2, spanCount("s2 s1", 10, "xx", -3, -1));
+ assertEquals("SpanNotS2S1NotXXNeg_1_3", 2, spanCount("s2 s1", 10, "xx", -1, -3));
+ assertEquals("SpanNotS2S1NotXXNeg_2_2", 3, spanCount("s2 s1", 10, "xx", -2, -2));
}
-
- private int spanCount(String include, String exclude, int pre, int post) throws IOException{
- SpanQuery iq = spanTermQuery(field, include);
+
+
+ private int spanCount(String include, int slop, String exclude, int pre, int post) throws IOException{
+ String[] includeTerms = include.split(" +");
+ SpanQuery iq = includeTerms.length == 1 ? spanTermQuery(field, include) : spanNearOrderedQuery(field, slop, includeTerms);
SpanQuery eq = spanTermQuery(field, exclude);
SpanQuery snq = spanNotQuery(iq, eq, pre, post);
Spans spans = snq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);