mirror of https://github.com/apache/lucene.git
LUCENE-7431: SpanNotQuery should support negative pre/post distance for overlap
This commit is contained in:
parent
cfcf4081fc
commit
750da7c5f7
|
@ -63,6 +63,10 @@ Improvements
|
||||||
PhraseQuery or MultiPhraseQuery when the word automaton is simple
|
PhraseQuery or MultiPhraseQuery when the word automaton is simple
|
||||||
(Mike McCandless)
|
(Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-7431: Allow a certain amount of overlap to be specified between the include
|
||||||
|
and exclude arguments of SpanNotQuery via negative pre and/or post arguments.
|
||||||
|
(Marc Morissette via David Smiley)
|
||||||
|
|
||||||
======================= Lucene 6.3.0 =======================
|
======================= Lucene 6.3.0 =======================
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
|
@ -49,19 +49,23 @@ public final class SpanNotQuery extends SpanQuery {
|
||||||
|
|
||||||
/** Construct a SpanNotQuery matching spans from <code>include</code> which
|
/** Construct a SpanNotQuery matching spans from <code>include</code> which
|
||||||
* have no overlap with spans from <code>exclude</code> within
|
* have no overlap with spans from <code>exclude</code> within
|
||||||
* <code>dist</code> tokens of <code>include</code>. */
|
* <code>dist</code> tokens of <code>include</code>. Inversely, a negative
|
||||||
|
* <code>dist</code> value may be used to specify a certain amount of allowable
|
||||||
|
* overlap. */
|
||||||
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) {
|
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) {
|
||||||
this(include, exclude, dist, dist);
|
this(include, exclude, dist, dist);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Construct a SpanNotQuery matching spans from <code>include</code> which
|
/** Construct a SpanNotQuery matching spans from <code>include</code> which
|
||||||
* have no overlap with spans from <code>exclude</code> within
|
* have no overlap with spans from <code>exclude</code> within
|
||||||
* <code>pre</code> tokens before or <code>post</code> tokens of <code>include</code>. */
|
* <code>pre</code> tokens before or <code>post</code> tokens of
|
||||||
|
* <code>include</code>. Inversely, negative values for <code>pre</code> and/or
|
||||||
|
* <code>post</code> allow a certain amount of overlap to occur. */
|
||||||
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) {
|
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) {
|
||||||
this.include = Objects.requireNonNull(include);
|
this.include = Objects.requireNonNull(include);
|
||||||
this.exclude = Objects.requireNonNull(exclude);
|
this.exclude = Objects.requireNonNull(exclude);
|
||||||
this.pre = (pre >=0) ? pre : 0;
|
this.pre = pre;
|
||||||
this.post = (post >= 0) ? post : 0;
|
this.post = post;
|
||||||
|
|
||||||
if (include.getField() != null && exclude.getField() != null && !include.getField().equals(exclude.getField()))
|
if (include.getField() != null && exclude.getField() != null && !include.getField().equals(exclude.getField()))
|
||||||
throw new IllegalArgumentException("Clauses must have same field.");
|
throw new IllegalArgumentException("Clauses must have same field.");
|
||||||
|
|
|
@ -274,16 +274,38 @@ public class TestBasics extends LuceneTestCase {
|
||||||
assertTrue(searcher.explain(query, 849).getValue() > 0.0f);
|
assertTrue(searcher.explain(query, 849).getValue() > 0.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSpanNotWindowNeg() throws Exception {
|
public void testSpanNotWindowNegPost() throws Exception {
|
||||||
//test handling of invalid window < 0
|
//test handling of invalid window < 0
|
||||||
SpanQuery near = spanNearOrderedQuery("field", 4, "eight", "one");
|
SpanQuery near = spanNearOrderedQuery("field", 4, "eight", "one");
|
||||||
SpanQuery or = spanOrQuery("field", "forty");
|
SpanQuery or = spanOrQuery("field", "forty");
|
||||||
SpanQuery query = spanNotQuery(near, or);
|
SpanQuery query = spanNotQuery(near, or, 0, -1);
|
||||||
|
|
||||||
checkHits(query, new int[]
|
checkHits(query, new int[]
|
||||||
{801, 821, 831, 851, 861, 871, 881, 891,
|
{801, 821, 831, 851, 861, 871, 881, 891,
|
||||||
1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
|
1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
|
||||||
|
|
||||||
|
query = spanNotQuery(near, or, 0, -2);
|
||||||
|
checkHits(query, new int[]
|
||||||
|
{801, 821, 831, 841, 851, 861, 871, 881, 891,
|
||||||
|
1801, 1821, 1831, 1841, 1851, 1861, 1871, 1881, 1891});
|
||||||
|
|
||||||
|
assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
|
||||||
|
assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSpanNotWindowNegPre() throws Exception {
|
||||||
|
//test handling of invalid window < 0
|
||||||
|
SpanQuery near = spanNearOrderedQuery("field", 4, "eight", "one");
|
||||||
|
SpanQuery or = spanOrQuery("field", "forty");
|
||||||
|
SpanQuery query = spanNotQuery(near, or, -2, 0);
|
||||||
|
checkHits(query, new int[]
|
||||||
|
{801, 821, 831, 851, 861, 871, 881, 891,
|
||||||
|
1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
|
||||||
|
|
||||||
|
query = spanNotQuery(near, or, -3, 0);
|
||||||
|
checkHits(query, new int[]
|
||||||
|
{801, 821, 831, 841, 851, 861, 871, 881, 891,
|
||||||
|
1801, 1821, 1831, 1841, 1851, 1861, 1871, 1881, 1891});
|
||||||
|
|
||||||
assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
|
assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
|
||||||
assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
|
assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
|
||||||
}
|
}
|
||||||
|
|
|
@ -99,7 +99,6 @@ public class TestSpans extends LuceneTestCase {
|
||||||
"s2 s1 s1 xx xx s2 xx s2 xx s1 xx xx xx xx xx s2 xx",
|
"s2 s1 s1 xx xx s2 xx s2 xx s1 xx xx xx xx xx s2 xx",
|
||||||
"r1 s11",
|
"r1 s11",
|
||||||
"r1 s21"
|
"r1 s21"
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
private void checkHits(Query query, int[] results) throws IOException {
|
private void checkHits(Query query, int[] results) throws IOException {
|
||||||
|
@ -408,40 +407,52 @@ public class TestSpans extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSpanNots() throws Throwable {
|
public void testSpanNots() throws Throwable {
|
||||||
assertEquals("SpanNotIncludeExcludeSame1", 0, spanCount("s2", "s2", 0, 0), 0);
|
|
||||||
assertEquals("SpanNotIncludeExcludeSame2", 0, spanCount("s2", "s2", 10, 10), 0);
|
assertEquals("SpanNotIncludeExcludeSame1", 0, spanCount("s2", 0, "s2", 0, 0), 0);
|
||||||
|
assertEquals("SpanNotIncludeExcludeSame2", 0, spanCount("s2", 0, "s2", 10, 10), 0);
|
||||||
|
|
||||||
//focus on behind
|
//focus on behind
|
||||||
assertEquals("SpanNotS2NotS1_6_0", 1, spanCount("s2", "s1", 6, 0));
|
assertEquals("SpanNotS2NotS1_6_0", 1, spanCount("s2", 0, "s1", 6, 0));
|
||||||
assertEquals("SpanNotS2NotS1_5_0", 2, spanCount("s2", "s1", 5, 0));
|
assertEquals("SpanNotS2NotS1_5_0", 2, spanCount("s2", 0, "s1", 5, 0));
|
||||||
assertEquals("SpanNotS2NotS1_3_0", 3, spanCount("s2", "s1", 3, 0));
|
assertEquals("SpanNotS2NotS1_3_0", 3, spanCount("s2", 0, "s1", 3, 0));
|
||||||
assertEquals("SpanNotS2NotS1_2_0", 4, spanCount("s2", "s1", 2, 0));
|
assertEquals("SpanNotS2NotS1_2_0", 4, spanCount("s2", 0, "s1", 2, 0));
|
||||||
assertEquals("SpanNotS2NotS1_0_0", 4, spanCount("s2", "s1", 0, 0));
|
assertEquals("SpanNotS2NotS1_0_0", 4, spanCount("s2", 0, "s1", 0, 0));
|
||||||
|
|
||||||
//focus on both
|
//focus on both
|
||||||
assertEquals("SpanNotS2NotS1_3_1", 2, spanCount("s2", "s1", 3, 1));
|
assertEquals("SpanNotS2NotS1_3_1", 2, spanCount("s2", 0, "s1", 3, 1));
|
||||||
assertEquals("SpanNotS2NotS1_2_1", 3, spanCount("s2", "s1", 2, 1));
|
assertEquals("SpanNotS2NotS1_2_1", 3, spanCount("s2", 0, "s1", 2, 1));
|
||||||
assertEquals("SpanNotS2NotS1_1_1", 3, spanCount("s2", "s1", 1, 1));
|
assertEquals("SpanNotS2NotS1_1_1", 3, spanCount("s2", 0, "s1", 1, 1));
|
||||||
assertEquals("SpanNotS2NotS1_10_10", 0, spanCount("s2", "s1", 10, 10));
|
assertEquals("SpanNotS2NotS1_10_10", 0, spanCount("s2", 0, "s1", 10, 10));
|
||||||
|
|
||||||
//focus on ahead
|
//focus on ahead
|
||||||
assertEquals("SpanNotS1NotS2_10_10", 0, spanCount("s1", "s2", 10, 10));
|
assertEquals("SpanNotS1NotS2_10_10", 0, spanCount("s1", 0, "s2", 10, 10));
|
||||||
assertEquals("SpanNotS1NotS2_0_1", 3, spanCount("s1", "s2", 0, 1));
|
assertEquals("SpanNotS1NotS2_0_1", 3, spanCount("s1", 0, "s2", 0, 1));
|
||||||
assertEquals("SpanNotS1NotS2_0_2", 3, spanCount("s1", "s2", 0, 2));
|
assertEquals("SpanNotS1NotS2_0_2", 3, spanCount("s1", 0, "s2", 0, 2));
|
||||||
assertEquals("SpanNotS1NotS2_0_3", 2, spanCount("s1", "s2", 0, 3));
|
assertEquals("SpanNotS1NotS2_0_3", 2, spanCount("s1", 0, "s2", 0, 3));
|
||||||
assertEquals("SpanNotS1NotS2_0_4", 1, spanCount("s1", "s2", 0, 4));
|
assertEquals("SpanNotS1NotS2_0_4", 1, spanCount("s1", 0, "s2", 0, 4));
|
||||||
assertEquals("SpanNotS1NotS2_0_8", 0, spanCount("s1", "s2", 0, 8));
|
assertEquals("SpanNotS1NotS2_0_8", 0, spanCount("s1", 0, "s2", 0, 8));
|
||||||
|
|
||||||
//exclude doesn't exist
|
//exclude doesn't exist
|
||||||
assertEquals("SpanNotS1NotS3_8_8", 3, spanCount("s1", "s3", 8, 8));
|
assertEquals("SpanNotS1NotS3_8_8", 3, spanCount("s1", 0, "s3", 8, 8));
|
||||||
|
|
||||||
//include doesn't exist
|
//include doesn't exist
|
||||||
assertEquals("SpanNotS3NotS1_8_8", 0, spanCount("s3", "s1", 8, 8));
|
assertEquals("SpanNotS3NotS1_8_8", 0, spanCount("s3", 0, "s1", 8, 8));
|
||||||
|
|
||||||
|
// Negative values
|
||||||
|
assertEquals("SpanNotS2S1NotXXNeg_0_0", 1, spanCount("s2 s1", 10, "xx", 0, 0));
|
||||||
|
assertEquals("SpanNotS2S1NotXXNeg_1_1", 1, spanCount("s2 s1", 10, "xx", -1, -1));
|
||||||
|
assertEquals("SpanNotS2S1NotXXNeg_0_2", 2, spanCount("s2 s1", 10, "xx", 0, -2));
|
||||||
|
assertEquals("SpanNotS2S1NotXXNeg_1_2", 2, spanCount("s2 s1", 10, "xx", -1, -2));
|
||||||
|
assertEquals("SpanNotS2S1NotXXNeg_2_1", 2, spanCount("s2 s1", 10, "xx", -2, -1));
|
||||||
|
assertEquals("SpanNotS2S1NotXXNeg_3_1", 2, spanCount("s2 s1", 10, "xx", -3, -1));
|
||||||
|
assertEquals("SpanNotS2S1NotXXNeg_1_3", 2, spanCount("s2 s1", 10, "xx", -1, -3));
|
||||||
|
assertEquals("SpanNotS2S1NotXXNeg_2_2", 3, spanCount("s2 s1", 10, "xx", -2, -2));
|
||||||
}
|
}
|
||||||
|
|
||||||
private int spanCount(String include, String exclude, int pre, int post) throws IOException{
|
|
||||||
SpanQuery iq = spanTermQuery(field, include);
|
private int spanCount(String include, int slop, String exclude, int pre, int post) throws IOException{
|
||||||
|
String[] includeTerms = include.split(" +");
|
||||||
|
SpanQuery iq = includeTerms.length == 1 ? spanTermQuery(field, include) : spanNearOrderedQuery(field, slop, includeTerms);
|
||||||
SpanQuery eq = spanTermQuery(field, exclude);
|
SpanQuery eq = spanTermQuery(field, exclude);
|
||||||
SpanQuery snq = spanNotQuery(iq, eq, pre, post);
|
SpanQuery snq = spanNotQuery(iq, eq, pre, post);
|
||||||
Spans spans = snq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);
|
Spans spans = snq.createWeight(searcher, false, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);
|
||||||
|
|
Loading…
Reference in New Issue