LUCENE-5091: add not-near capability to SpanNotQuery

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1507396 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
David Wayne Smiley 2013-07-26 18:47:37 +00:00
parent 92021404d4
commit 2e1b9f5f16
5 changed files with 170 additions and 17 deletions

View File

@ -67,6 +67,9 @@ New features
* LUCENE-5118: SpatialStrategy.makeDistanceValueSource() now has an optional
multiplier for scaling degrees to another unit. (David Smiley)
* LUCENE-5091: SpanNotQuery can now be configured with pre and post slop to act
as a hypothetical SpanNotNearQuery. (Tim Allison via David Smiley)
Bug Fixes
* LUCENE-5116: IndexWriter.addIndexes(IndexReader...) should drop empty (or all

View File

@ -31,16 +31,36 @@ import java.util.Collection;
import java.util.Map;
import java.util.Set;
/** Removes matches which overlap with another SpanQuery. */
/** Removes matches which overlap with another SpanQuery or
* within a x tokens before or y tokens after another SpanQuery. */
public class SpanNotQuery extends SpanQuery implements Cloneable {
private SpanQuery include;
private SpanQuery exclude;
private final int pre;
private final int post;
/** Construct a SpanNotQuery matching spans from <code>include</code> which
* have no overlap with spans from <code>exclude</code>.*/
public SpanNotQuery(SpanQuery include, SpanQuery exclude) {
this(include, exclude, 0, 0);
}
/** Construct a SpanNotQuery matching spans from <code>include</code> which
* have no overlap with spans from <code>exclude</code> within
* <code>dist</code> tokens of <code>include</code>. */
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) {
this(include, exclude, dist, dist);
}
/** Construct a SpanNotQuery matching spans from <code>include</code> which
* have no overlap with spans from <code>exclude</code> within
* <code>pre</code> tokens before or <code>post</code> tokens of <code>include</code>. */
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) {
this.include = include;
this.exclude = exclude;
this.pre = (pre >=0) ? pre : 0;
this.post = (post >= 0) ? post : 0;
if (!include.getField().equals(exclude.getField()))
throw new IllegalArgumentException("Clauses must have same field.");
@ -65,6 +85,10 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
buffer.append(include.toString(field));
buffer.append(", ");
buffer.append(exclude.toString(field));
buffer.append(", ");
buffer.append(Integer.toString(pre));
buffer.append(", ");
buffer.append(Integer.toString(post));
buffer.append(")");
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
@ -72,7 +96,8 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
@Override
public SpanNotQuery clone() {
SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery)include.clone(),(SpanQuery) exclude.clone());
SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery)include.clone(),
(SpanQuery) exclude.clone(), pre, post);
spanNotQuery.setBoost(getBoost());
return spanNotQuery;
}
@ -98,13 +123,13 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
while (moreExclude // while exclude is before
&& includeSpans.doc() == excludeSpans.doc()
&& excludeSpans.end() <= includeSpans.start()) {
&& excludeSpans.end() <= includeSpans.start() - pre) {
moreExclude = excludeSpans.next(); // increment exclude
}
if (!moreExclude // if no intersection
|| includeSpans.doc() != excludeSpans.doc()
|| includeSpans.end() <= excludeSpans.start())
|| includeSpans.end()+post <= excludeSpans.start())
break; // we found a match
moreInclude = includeSpans.next(); // intersected: keep scanning
@ -126,13 +151,13 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
while (moreExclude // while exclude is before
&& includeSpans.doc() == excludeSpans.doc()
&& excludeSpans.end() <= includeSpans.start()) {
&& excludeSpans.end() <= includeSpans.start()-pre) {
moreExclude = excludeSpans.next(); // increment exclude
}
if (!moreExclude // if no intersection
|| includeSpans.doc() != excludeSpans.doc()
|| includeSpans.end() <= excludeSpans.start())
|| includeSpans.end()+post <= excludeSpans.start())
return true; // we found a match
return next(); // scan to next match
@ -199,23 +224,28 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
/** Returns true iff <code>o</code> is equal to this. */
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof SpanNotQuery)) return false;
if (!super.equals(o))
return false;
SpanNotQuery other = (SpanNotQuery)o;
return this.include.equals(other.include)
&& this.exclude.equals(other.exclude)
&& this.getBoost() == other.getBoost();
&& this.pre == other.pre
&& this.post == other.post;
}
@Override
public int hashCode() {
int h = include.hashCode();
h = (h<<1) | (h >>> 31); // rotate left
int h = super.hashCode();
h = Integer.rotateLeft(h, 1);
h ^= include.hashCode();
h = Integer.rotateLeft(h, 1);
h ^= exclude.hashCode();
h = (h<<1) | (h >>> 31); // rotate left
h ^= Float.floatToRawIntBits(getBoost());
h = Integer.rotateLeft(h, 1);
h ^= pre;
h = Integer.rotateLeft(h, 1);
h ^= post;
return h;
}
}
}

View File

@ -38,8 +38,8 @@ and inter-phrase proximity (when constructed from other {@link org.apache.lucene
number of other {@link org.apache.lucene.search.spans.SpanQuery}s.</li>
<li>A {@link org.apache.lucene.search.spans.SpanNotQuery SpanNotQuery} removes spans
matching one {@link org.apache.lucene.search.spans.SpanQuery SpanQuery} which overlap
another. This can be used, e.g., to implement within-paragraph
matching one {@link org.apache.lucene.search.spans.SpanQuery SpanQuery} which overlap (or comes
near) another. This can be used, e.g., to implement within-paragraph
search.</li>
<li>A {@link org.apache.lucene.search.spans.SpanFirstQuery SpanFirstQuery} matches spans

View File

@ -363,6 +363,77 @@ public class TestBasics extends LuceneTestCase {
1847, 1848, 1849, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949});
}
@Test
public void testSpanNotWindowOne() throws Exception {
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
SpanTermQuery term2 = new SpanTermQuery(new Term("field", "forty"));
SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
4, true);
SpanTermQuery term3 = new SpanTermQuery(new Term("field", "one"));
SpanNotQuery query = new SpanNotQuery(near, term3, 1, 1);
checkHits(query, new int[]
{840, 842, 843, 844, 845, 846, 847, 848, 849,
1840, 1842, 1843, 1844, 1845, 1846, 1847, 1848, 1849});
assertTrue(searcher.explain(query, 840).getValue() > 0.0f);
assertTrue(searcher.explain(query, 1842).getValue() > 0.0f);
}
@Test
public void testSpanNotWindowTwoBefore() throws Exception {
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
SpanTermQuery term2 = new SpanTermQuery(new Term("field", "forty"));
SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
4, true);
SpanTermQuery term3 = new SpanTermQuery(new Term("field", "one"));
SpanNotQuery query = new SpanNotQuery(near, term3, 2, 0);
checkHits(query, new int[]
{840, 841, 842, 843, 844, 845, 846, 847, 848, 849});
assertTrue(searcher.explain(query, 840).getValue() > 0.0f);
assertTrue(searcher.explain(query, 849).getValue() > 0.0f);
}
@Test
public void testSpanNotWindowNeg() throws Exception {
//test handling of invalid window < 0
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
SpanTermQuery term2 = new SpanTermQuery(new Term("field", "one"));
SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
4, true);
SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty"));
SpanOrQuery or = new SpanOrQuery(term3);
SpanNotQuery query = new SpanNotQuery(near, or);
checkHits(query, new int[]
{801, 821, 831, 851, 861, 871, 881, 891,
1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
}
@Test
public void testSpanNotWindowDoubleExcludesBefore() throws Exception {
//test hitting two excludes before an include
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "forty"));
SpanTermQuery term2 = new SpanTermQuery(new Term("field", "two"));
SpanNearQuery near = new SpanNearQuery(new SpanTermQuery[]{term1, term2}, 2, true);
SpanTermQuery exclude = new SpanTermQuery(new Term("field", "one"));
SpanNotQuery query = new SpanNotQuery(near, exclude, 4, 1);
checkHits(query, new int[]
{42, 242, 342, 442, 542, 642, 742, 842, 942});
assertTrue(searcher.explain(query, 242).getValue() > 0.0f);
assertTrue(searcher.explain(query, 942).getValue() > 0.0f);
}
@Test
public void testSpanFirst() throws Exception {
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five"));

View File

@ -84,7 +84,8 @@ public class TestSpans extends LuceneTestCase {
"u2 xx u1 u2",
"u2 u1 xx u2",
"u1 u2 xx u2",
"t1 t2 t1 t3 t2 t3"
"t1 t2 t1 t3 t2 t3",
"s2 s1 s1 xx xx s2 xx s2 xx s1 xx xx xx xx xx s2 xx"
};
public SpanTermQuery makeSpanTermQuery(String text) {
@ -502,4 +503,52 @@ public class TestSpans extends LuceneTestCase {
reader.close();
dir.close();
}
public void testSpanNots() throws Throwable{
assertEquals("SpanNotIncludeExcludeSame1", 0, spanCount("s2", "s2", 0, 0), 0);
assertEquals("SpanNotIncludeExcludeSame2", 0, spanCount("s2", "s2", 10, 10), 0);
//focus on behind
assertEquals("SpanNotS2NotS1_6_0", 1, spanCount("s2", "s1", 6, 0));
assertEquals("SpanNotS2NotS1_5_0", 2, spanCount("s2", "s1", 5, 0));
assertEquals("SpanNotS2NotS1_3_0", 3, spanCount("s2", "s1", 3, 0));
assertEquals("SpanNotS2NotS1_2_0", 4, spanCount("s2", "s1", 2, 0));
assertEquals("SpanNotS2NotS1_0_0", 4, spanCount("s2", "s1", 0, 0));
//focus on both
assertEquals("SpanNotS2NotS1_3_1", 2, spanCount("s2", "s1", 3, 1));
assertEquals("SpanNotS2NotS1_2_1", 3, spanCount("s2", "s1", 2, 1));
assertEquals("SpanNotS2NotS1_1_1", 3, spanCount("s2", "s1", 1, 1));
assertEquals("SpanNotS2NotS1_10_10", 0, spanCount("s2", "s1", 10, 10));
//focus on ahead
assertEquals("SpanNotS1NotS2_10_10", 0, spanCount("s1", "s2", 10, 10));
assertEquals("SpanNotS1NotS2_0_1", 3, spanCount("s1", "s2", 0, 1));
assertEquals("SpanNotS1NotS2_0_2", 3, spanCount("s1", "s2", 0, 2));
assertEquals("SpanNotS1NotS2_0_3", 2, spanCount("s1", "s2", 0, 3));
assertEquals("SpanNotS1NotS2_0_4", 1, spanCount("s1", "s2", 0, 4));
assertEquals("SpanNotS1NotS2_0_8", 0, spanCount("s1", "s2", 0, 8));
//exclude doesn't exist
assertEquals("SpanNotS1NotS3_8_8", 3, spanCount("s1", "s3", 8, 8));
//include doesn't exist
assertEquals("SpanNotS3NotS1_8_8", 0, spanCount("s3", "s1", 8, 8));
}
private int spanCount(String include, String exclude, int pre, int post) throws IOException{
SpanTermQuery iq = new SpanTermQuery(new Term(field, include));
SpanTermQuery eq = new SpanTermQuery(new Term(field, exclude));
SpanNotQuery snq = new SpanNotQuery(iq, eq, pre, post);
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq);
int i = 0;
while (spans.next()){
i++;
}
return i;
}
}