mirror of https://github.com/apache/lucene.git
LUCENE-5091: add not-near capability to SpanNotQuery
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1507396 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
92021404d4
commit
2e1b9f5f16
|
@ -67,6 +67,9 @@ New features
|
|||
* LUCENE-5118: SpatialStrategy.makeDistanceValueSource() now has an optional
|
||||
multiplier for scaling degrees to another unit. (David Smiley)
|
||||
|
||||
* LUCENE-5091: SpanNotQuery can now be configured with pre and post slop to act
|
||||
as a hypothetical SpanNotNearQuery. (Tim Allison via David Smiley)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-5116: IndexWriter.addIndexes(IndexReader...) should drop empty (or all
|
||||
|
|
|
@ -31,16 +31,36 @@ import java.util.Collection;
|
|||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/** Removes matches which overlap with another SpanQuery. */
|
||||
/** Removes matches which overlap with another SpanQuery or
|
||||
* within a x tokens before or y tokens after another SpanQuery. */
|
||||
public class SpanNotQuery extends SpanQuery implements Cloneable {
|
||||
private SpanQuery include;
|
||||
private SpanQuery exclude;
|
||||
private final int pre;
|
||||
private final int post;
|
||||
|
||||
/** Construct a SpanNotQuery matching spans from <code>include</code> which
|
||||
* have no overlap with spans from <code>exclude</code>.*/
|
||||
public SpanNotQuery(SpanQuery include, SpanQuery exclude) {
|
||||
this(include, exclude, 0, 0);
|
||||
}
|
||||
|
||||
|
||||
/** Construct a SpanNotQuery matching spans from <code>include</code> which
|
||||
* have no overlap with spans from <code>exclude</code> within
|
||||
* <code>dist</code> tokens of <code>include</code>. */
|
||||
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) {
|
||||
this(include, exclude, dist, dist);
|
||||
}
|
||||
|
||||
/** Construct a SpanNotQuery matching spans from <code>include</code> which
|
||||
* have no overlap with spans from <code>exclude</code> within
|
||||
* <code>pre</code> tokens before or <code>post</code> tokens of <code>include</code>. */
|
||||
public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) {
|
||||
this.include = include;
|
||||
this.exclude = exclude;
|
||||
this.pre = (pre >=0) ? pre : 0;
|
||||
this.post = (post >= 0) ? post : 0;
|
||||
|
||||
if (!include.getField().equals(exclude.getField()))
|
||||
throw new IllegalArgumentException("Clauses must have same field.");
|
||||
|
@ -65,6 +85,10 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
buffer.append(include.toString(field));
|
||||
buffer.append(", ");
|
||||
buffer.append(exclude.toString(field));
|
||||
buffer.append(", ");
|
||||
buffer.append(Integer.toString(pre));
|
||||
buffer.append(", ");
|
||||
buffer.append(Integer.toString(post));
|
||||
buffer.append(")");
|
||||
buffer.append(ToStringUtils.boost(getBoost()));
|
||||
return buffer.toString();
|
||||
|
@ -72,7 +96,8 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
|
||||
@Override
|
||||
public SpanNotQuery clone() {
|
||||
SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery)include.clone(),(SpanQuery) exclude.clone());
|
||||
SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery)include.clone(),
|
||||
(SpanQuery) exclude.clone(), pre, post);
|
||||
spanNotQuery.setBoost(getBoost());
|
||||
return spanNotQuery;
|
||||
}
|
||||
|
@ -98,13 +123,13 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
|
||||
while (moreExclude // while exclude is before
|
||||
&& includeSpans.doc() == excludeSpans.doc()
|
||||
&& excludeSpans.end() <= includeSpans.start()) {
|
||||
&& excludeSpans.end() <= includeSpans.start() - pre) {
|
||||
moreExclude = excludeSpans.next(); // increment exclude
|
||||
}
|
||||
|
||||
if (!moreExclude // if no intersection
|
||||
|| includeSpans.doc() != excludeSpans.doc()
|
||||
|| includeSpans.end() <= excludeSpans.start())
|
||||
|| includeSpans.end()+post <= excludeSpans.start())
|
||||
break; // we found a match
|
||||
|
||||
moreInclude = includeSpans.next(); // intersected: keep scanning
|
||||
|
@ -126,13 +151,13 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
|
||||
while (moreExclude // while exclude is before
|
||||
&& includeSpans.doc() == excludeSpans.doc()
|
||||
&& excludeSpans.end() <= includeSpans.start()) {
|
||||
&& excludeSpans.end() <= includeSpans.start()-pre) {
|
||||
moreExclude = excludeSpans.next(); // increment exclude
|
||||
}
|
||||
|
||||
if (!moreExclude // if no intersection
|
||||
|| includeSpans.doc() != excludeSpans.doc()
|
||||
|| includeSpans.end() <= excludeSpans.start())
|
||||
|| includeSpans.end()+post <= excludeSpans.start())
|
||||
return true; // we found a match
|
||||
|
||||
return next(); // scan to next match
|
||||
|
@ -199,23 +224,28 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
/** Returns true iff <code>o</code> is equal to this. */
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (!(o instanceof SpanNotQuery)) return false;
|
||||
if (!super.equals(o))
|
||||
return false;
|
||||
|
||||
SpanNotQuery other = (SpanNotQuery)o;
|
||||
return this.include.equals(other.include)
|
||||
&& this.exclude.equals(other.exclude)
|
||||
&& this.getBoost() == other.getBoost();
|
||||
&& this.pre == other.pre
|
||||
&& this.post == other.post;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int h = include.hashCode();
|
||||
h = (h<<1) | (h >>> 31); // rotate left
|
||||
int h = super.hashCode();
|
||||
h = Integer.rotateLeft(h, 1);
|
||||
h ^= include.hashCode();
|
||||
h = Integer.rotateLeft(h, 1);
|
||||
h ^= exclude.hashCode();
|
||||
h = (h<<1) | (h >>> 31); // rotate left
|
||||
h ^= Float.floatToRawIntBits(getBoost());
|
||||
h = Integer.rotateLeft(h, 1);
|
||||
h ^= pre;
|
||||
h = Integer.rotateLeft(h, 1);
|
||||
h ^= post;
|
||||
return h;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -38,8 +38,8 @@ and inter-phrase proximity (when constructed from other {@link org.apache.lucene
|
|||
number of other {@link org.apache.lucene.search.spans.SpanQuery}s.</li>
|
||||
|
||||
<li>A {@link org.apache.lucene.search.spans.SpanNotQuery SpanNotQuery} removes spans
|
||||
matching one {@link org.apache.lucene.search.spans.SpanQuery SpanQuery} which overlap
|
||||
another. This can be used, e.g., to implement within-paragraph
|
||||
matching one {@link org.apache.lucene.search.spans.SpanQuery SpanQuery} which overlap (or comes
|
||||
near) another. This can be used, e.g., to implement within-paragraph
|
||||
search.</li>
|
||||
|
||||
<li>A {@link org.apache.lucene.search.spans.SpanFirstQuery SpanFirstQuery} matches spans
|
||||
|
|
|
@ -363,6 +363,77 @@ public class TestBasics extends LuceneTestCase {
|
|||
1847, 1848, 1849, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpanNotWindowOne() throws Exception {
|
||||
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
|
||||
SpanTermQuery term2 = new SpanTermQuery(new Term("field", "forty"));
|
||||
SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
|
||||
4, true);
|
||||
SpanTermQuery term3 = new SpanTermQuery(new Term("field", "one"));
|
||||
SpanNotQuery query = new SpanNotQuery(near, term3, 1, 1);
|
||||
|
||||
checkHits(query, new int[]
|
||||
{840, 842, 843, 844, 845, 846, 847, 848, 849,
|
||||
1840, 1842, 1843, 1844, 1845, 1846, 1847, 1848, 1849});
|
||||
|
||||
assertTrue(searcher.explain(query, 840).getValue() > 0.0f);
|
||||
assertTrue(searcher.explain(query, 1842).getValue() > 0.0f);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpanNotWindowTwoBefore() throws Exception {
|
||||
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
|
||||
SpanTermQuery term2 = new SpanTermQuery(new Term("field", "forty"));
|
||||
SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
|
||||
4, true);
|
||||
SpanTermQuery term3 = new SpanTermQuery(new Term("field", "one"));
|
||||
SpanNotQuery query = new SpanNotQuery(near, term3, 2, 0);
|
||||
|
||||
checkHits(query, new int[]
|
||||
{840, 841, 842, 843, 844, 845, 846, 847, 848, 849});
|
||||
|
||||
assertTrue(searcher.explain(query, 840).getValue() > 0.0f);
|
||||
assertTrue(searcher.explain(query, 849).getValue() > 0.0f);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpanNotWindowNeg() throws Exception {
|
||||
//test handling of invalid window < 0
|
||||
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "eight"));
|
||||
SpanTermQuery term2 = new SpanTermQuery(new Term("field", "one"));
|
||||
SpanNearQuery near = new SpanNearQuery(new SpanQuery[] {term1, term2},
|
||||
4, true);
|
||||
SpanTermQuery term3 = new SpanTermQuery(new Term("field", "forty"));
|
||||
|
||||
SpanOrQuery or = new SpanOrQuery(term3);
|
||||
|
||||
SpanNotQuery query = new SpanNotQuery(near, or);
|
||||
|
||||
checkHits(query, new int[]
|
||||
{801, 821, 831, 851, 861, 871, 881, 891,
|
||||
1801, 1821, 1831, 1851, 1861, 1871, 1881, 1891});
|
||||
|
||||
assertTrue(searcher.explain(query, 801).getValue() > 0.0f);
|
||||
assertTrue(searcher.explain(query, 891).getValue() > 0.0f);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpanNotWindowDoubleExcludesBefore() throws Exception {
|
||||
//test hitting two excludes before an include
|
||||
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "forty"));
|
||||
SpanTermQuery term2 = new SpanTermQuery(new Term("field", "two"));
|
||||
SpanNearQuery near = new SpanNearQuery(new SpanTermQuery[]{term1, term2}, 2, true);
|
||||
SpanTermQuery exclude = new SpanTermQuery(new Term("field", "one"));
|
||||
|
||||
SpanNotQuery query = new SpanNotQuery(near, exclude, 4, 1);
|
||||
|
||||
checkHits(query, new int[]
|
||||
{42, 242, 342, 442, 542, 642, 742, 842, 942});
|
||||
|
||||
assertTrue(searcher.explain(query, 242).getValue() > 0.0f);
|
||||
assertTrue(searcher.explain(query, 942).getValue() > 0.0f);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSpanFirst() throws Exception {
|
||||
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five"));
|
||||
|
|
|
@ -84,7 +84,8 @@ public class TestSpans extends LuceneTestCase {
|
|||
"u2 xx u1 u2",
|
||||
"u2 u1 xx u2",
|
||||
"u1 u2 xx u2",
|
||||
"t1 t2 t1 t3 t2 t3"
|
||||
"t1 t2 t1 t3 t2 t3",
|
||||
"s2 s1 s1 xx xx s2 xx s2 xx s1 xx xx xx xx xx s2 xx"
|
||||
};
|
||||
|
||||
public SpanTermQuery makeSpanTermQuery(String text) {
|
||||
|
@ -502,4 +503,52 @@ public class TestSpans extends LuceneTestCase {
|
|||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
|
||||
public void testSpanNots() throws Throwable{
|
||||
assertEquals("SpanNotIncludeExcludeSame1", 0, spanCount("s2", "s2", 0, 0), 0);
|
||||
assertEquals("SpanNotIncludeExcludeSame2", 0, spanCount("s2", "s2", 10, 10), 0);
|
||||
|
||||
//focus on behind
|
||||
assertEquals("SpanNotS2NotS1_6_0", 1, spanCount("s2", "s1", 6, 0));
|
||||
assertEquals("SpanNotS2NotS1_5_0", 2, spanCount("s2", "s1", 5, 0));
|
||||
assertEquals("SpanNotS2NotS1_3_0", 3, spanCount("s2", "s1", 3, 0));
|
||||
assertEquals("SpanNotS2NotS1_2_0", 4, spanCount("s2", "s1", 2, 0));
|
||||
assertEquals("SpanNotS2NotS1_0_0", 4, spanCount("s2", "s1", 0, 0));
|
||||
|
||||
//focus on both
|
||||
assertEquals("SpanNotS2NotS1_3_1", 2, spanCount("s2", "s1", 3, 1));
|
||||
assertEquals("SpanNotS2NotS1_2_1", 3, spanCount("s2", "s1", 2, 1));
|
||||
assertEquals("SpanNotS2NotS1_1_1", 3, spanCount("s2", "s1", 1, 1));
|
||||
assertEquals("SpanNotS2NotS1_10_10", 0, spanCount("s2", "s1", 10, 10));
|
||||
|
||||
//focus on ahead
|
||||
assertEquals("SpanNotS1NotS2_10_10", 0, spanCount("s1", "s2", 10, 10));
|
||||
assertEquals("SpanNotS1NotS2_0_1", 3, spanCount("s1", "s2", 0, 1));
|
||||
assertEquals("SpanNotS1NotS2_0_2", 3, spanCount("s1", "s2", 0, 2));
|
||||
assertEquals("SpanNotS1NotS2_0_3", 2, spanCount("s1", "s2", 0, 3));
|
||||
assertEquals("SpanNotS1NotS2_0_4", 1, spanCount("s1", "s2", 0, 4));
|
||||
assertEquals("SpanNotS1NotS2_0_8", 0, spanCount("s1", "s2", 0, 8));
|
||||
|
||||
//exclude doesn't exist
|
||||
assertEquals("SpanNotS1NotS3_8_8", 3, spanCount("s1", "s3", 8, 8));
|
||||
|
||||
//include doesn't exist
|
||||
assertEquals("SpanNotS3NotS1_8_8", 0, spanCount("s3", "s1", 8, 8));
|
||||
|
||||
}
|
||||
|
||||
private int spanCount(String include, String exclude, int pre, int post) throws IOException{
|
||||
SpanTermQuery iq = new SpanTermQuery(new Term(field, include));
|
||||
SpanTermQuery eq = new SpanTermQuery(new Term(field, exclude));
|
||||
SpanNotQuery snq = new SpanNotQuery(iq, eq, pre, post);
|
||||
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq);
|
||||
|
||||
int i = 0;
|
||||
while (spans.next()){
|
||||
i++;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue