From ddf0c1d231160f271a161c0f8a3da34ed78b6a0e Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sun, 12 Apr 2015 18:24:14 +0000 Subject: [PATCH] LUCENE-6418, LUCENE-6411: add AssertingSpans and fix the bugs it found git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1673036 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/search/spans/SpanNotQuery.java | 15 +- .../org/apache/lucene/search/spans/Spans.java | 12 + .../lucene/search/spans/TestBasics.java | 2 +- .../spans/TestSpanSearchEquivalence.java | 189 ++++++++---- .../search/spans/AssertingSpanQuery.java | 123 ++++++++ .../lucene/search/spans/AssertingSpans.java | 273 ++++++++++++++++++ .../apache/lucene/search/spans/package.html | 26 ++ 7 files changed, 573 insertions(+), 67 deletions(-) create mode 100644 lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java create mode 100644 lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpans.java create mode 100644 lucene/test-framework/src/java/org/apache/lucene/search/spans/package.html diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java index 6b5f2b4e2e5..4a33890644b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java @@ -122,25 +122,30 @@ public class SpanNotQuery extends SpanQuery implements Cloneable { return new FilterSpans(includeSpans) { // last document we have checked matches() against for the exclusion, and failed // when using approximations, so we don't call it again, and pass thru all inclusions. - int lastNonMatchingDoc = -1; + int lastApproxDoc = -1; + boolean lastApproxResult = false; @Override protected AcceptStatus accept(Spans candidate) throws IOException { + // TODO: this logic is ugly and sneaky, can we clean it up? int doc = candidate.docID(); if (doc > excludeSpans.docID()) { // catch up 'exclude' to the current doc if (excludeTwoPhase != null) { if (excludeApproximation.advance(doc) == doc) { - if (!excludeTwoPhase.matches()) { - lastNonMatchingDoc = doc; // mark as non-match - } + lastApproxDoc = doc; + lastApproxResult = excludeTwoPhase.matches(); } } else { excludeSpans.advance(doc); } + } else if (excludeTwoPhase != null && doc == excludeSpans.docID() && doc != lastApproxDoc) { + // excludeSpans already sitting on our candidate doc, but matches not called yet. + lastApproxDoc = doc; + lastApproxResult = excludeTwoPhase.matches(); } - if (doc == lastNonMatchingDoc || doc != excludeSpans.docID()) { + if (doc != excludeSpans.docID() || (doc == lastApproxDoc && lastApproxResult == false)) { return AcceptStatus.YES; } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java b/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java index ea8bf8a5583..1a799727e62 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java @@ -97,4 +97,16 @@ public abstract class Spans extends DocIdSetIterator { public TwoPhaseIterator asTwoPhaseIterator() { return null; } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + Class clazz = getClass(); + sb.append(clazz.isAnonymousClass() ? clazz.getName() : clazz.getSimpleName()); + sb.append("(doc=").append(docID()); + sb.append(",start=").append(startPosition()); + sb.append(",end=").append(endPosition()); + sb.append(")"); + return sb.toString(); + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java index 1203dff6979..f5a51e6b88c 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java @@ -341,7 +341,7 @@ public class TestBasics extends LuceneTestCase { assertTrue(searcher.explain(query, 891).getValue() > 0.0f); } - @Test @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6418") + @Test public void testNpeInSpanNearInSpanFirstInSpanNot() throws Exception { int n = 5; SpanTermQuery hun = new SpanTermQuery(new Term("field", "hundred")); diff --git a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanSearchEquivalence.java b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanSearchEquivalence.java index b3720437e7e..9834ef24fa6 100644 --- a/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanSearchEquivalence.java +++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanSearchEquivalence.java @@ -33,11 +33,20 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { // TODO: we could go a little crazy for a lot of these, // but these are just simple minimal cases in case something // goes horribly wrong. Put more intense tests elsewhere. + + /** generally wrap with asserting. but not always, so we don't hide bugs */ + private SpanQuery span(SpanQuery query) { + if (random().nextInt(100) <= 95) { + return new AssertingSpanQuery(query); + } else { + return query; + } + } /** SpanTermQuery(A) = TermQuery(A) */ public void testSpanTermVersusTerm() throws Exception { Term t1 = randomTerm(); - assertSameSet(new TermQuery(t1), new SpanTermQuery(t1)); + assertSameSet(new TermQuery(t1), span(new SpanTermQuery(t1))); } /** SpanOrQuery(A, B) = (A B) */ @@ -47,7 +56,7 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { BooleanQuery q1 = new BooleanQuery(); q1.add(new TermQuery(t1), Occur.SHOULD); q1.add(new TermQuery(t2), Occur.SHOULD); - SpanOrQuery q2 = new SpanOrQuery(new SpanTermQuery(t1), new SpanTermQuery(t2)); + SpanQuery q2 = span(new SpanOrQuery(span(new SpanTermQuery(t1)), span(new SpanTermQuery(t2)))); assertSameSet(q1, q2); } @@ -55,7 +64,8 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { public void testSpanNotVersusSpanTerm() throws Exception { Term t1 = randomTerm(); Term t2 = randomTerm(); - assertSubsetOf(new SpanNotQuery(new SpanTermQuery(t1), new SpanTermQuery(t2)), new SpanTermQuery(t1)); + assertSubsetOf(span(new SpanNotQuery(span(new SpanTermQuery(t1)), span(new SpanTermQuery(t2)))), + span(new SpanTermQuery(t1))); } /** SpanNotQuery(A, [B C]) ⊆ SpanTermQuery(A) */ @@ -63,8 +73,11 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { Term t1 = randomTerm(); Term t2 = randomTerm(); Term t3 = randomTerm(); - SpanQuery near = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(t2), new SpanTermQuery(t3) }, 10, random().nextBoolean()); - assertSubsetOf(new SpanNotQuery(new SpanTermQuery(t1), near), new SpanTermQuery(t1)); + SpanQuery near = span(new SpanNearQuery(new SpanQuery[] { + span(new SpanTermQuery(t2)), + span(new SpanTermQuery(t3)) + }, 10, random().nextBoolean())); + assertSubsetOf(span(new SpanNotQuery(span(new SpanTermQuery(t1)), near)), span(new SpanTermQuery(t1))); } /** SpanNotQuery([A B], C) ⊆ SpanNearQuery([A B]) */ @@ -72,8 +85,11 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { Term t1 = randomTerm(); Term t2 = randomTerm(); Term t3 = randomTerm(); - SpanQuery near = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }, 10, random().nextBoolean()); - assertSubsetOf(new SpanNotQuery(near, new SpanTermQuery(t3)), near); + SpanQuery near = span(new SpanNearQuery(new SpanQuery[] { + span(new SpanTermQuery(t1)), + span(new SpanTermQuery(t2)) + }, 10, random().nextBoolean())); + assertSubsetOf(span(new SpanNotQuery(near, span(new SpanTermQuery(t3)))), near); } /** SpanNotQuery([A B], [C D]) ⊆ SpanNearQuery([A B]) */ @@ -82,23 +98,33 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { Term t2 = randomTerm(); Term t3 = randomTerm(); Term t4 = randomTerm(); - SpanQuery near1 = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }, 10, random().nextBoolean()); - SpanQuery near2 = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(t3), new SpanTermQuery(t4) }, 10, random().nextBoolean()); - assertSubsetOf(new SpanNotQuery(near1, near2), near1); + SpanQuery near1 = span(new SpanNearQuery(new SpanQuery[] { + span(new SpanTermQuery(t1)), + span(new SpanTermQuery(t2)) + }, 10, random().nextBoolean())); + SpanQuery near2 = span(new SpanNearQuery(new SpanQuery[] { + span(new SpanTermQuery(t3)), + span(new SpanTermQuery(t4)) + }, 10, random().nextBoolean())); + assertSubsetOf(span(new SpanNotQuery(near1, near2)), near1); } /** SpanFirstQuery(A, 10) ⊆ SpanTermQuery(A) */ public void testSpanFirstVersusSpanTerm() throws Exception { Term t1 = randomTerm(); - assertSubsetOf(new SpanFirstQuery(new SpanTermQuery(t1), 10), new SpanTermQuery(t1)); + assertSubsetOf(span(new SpanFirstQuery(span(new SpanTermQuery(t1)), 10)), + span(new SpanTermQuery(t1))); } /** SpanNearQuery([A, B], 0, true) = "A B" */ public void testSpanNearVersusPhrase() throws Exception { Term t1 = randomTerm(); Term t2 = randomTerm(); - SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; - SpanNearQuery q1 = new SpanNearQuery(subquery, 0, true); + SpanQuery subquery[] = new SpanQuery[] { + span(new SpanTermQuery(t1)), + span(new SpanTermQuery(t2)) + }; + SpanQuery q1 = span(new SpanNearQuery(subquery, 0, true)); PhraseQuery q2 = new PhraseQuery(); q2.add(t1); q2.add(t2); @@ -109,8 +135,11 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { public void testSpanNearVersusBooleanAnd() throws Exception { Term t1 = randomTerm(); Term t2 = randomTerm(); - SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; - SpanNearQuery q1 = new SpanNearQuery(subquery, Integer.MAX_VALUE, false); + SpanQuery subquery[] = new SpanQuery[] { + span(new SpanTermQuery(t1)), + span(new SpanTermQuery(t2)) + }; + SpanQuery q1 = span(new SpanNearQuery(subquery, Integer.MAX_VALUE, false)); BooleanQuery q2 = new BooleanQuery(); q2.add(new TermQuery(t1), Occur.MUST); q2.add(new TermQuery(t2), Occur.MUST); @@ -121,9 +150,12 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { public void testSpanNearVersusSloppySpanNear() throws Exception { Term t1 = randomTerm(); Term t2 = randomTerm(); - SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; - SpanNearQuery q1 = new SpanNearQuery(subquery, 0, false); - SpanNearQuery q2 = new SpanNearQuery(subquery, 1, false); + SpanQuery subquery[] = new SpanQuery[] { + span(new SpanTermQuery(t1)), + span(new SpanTermQuery(t2)) + }; + SpanQuery q1 = span(new SpanNearQuery(subquery, 0, false)); + SpanQuery q2 = span(new SpanNearQuery(subquery, 1, false)); assertSubsetOf(q1, q2); } @@ -131,9 +163,12 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { public void testSpanNearInOrderVersusOutOfOrder() throws Exception { Term t1 = randomTerm(); Term t2 = randomTerm(); - SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; - SpanNearQuery q1 = new SpanNearQuery(subquery, 3, true); - SpanNearQuery q2 = new SpanNearQuery(subquery, 3, false); + SpanQuery subquery[] = new SpanQuery[] { + span(new SpanTermQuery(t1)), + span(new SpanTermQuery(t2)) + }; + SpanQuery q1 = span(new SpanNearQuery(subquery, 3, true)); + SpanQuery q2 = span(new SpanNearQuery(subquery, 3, false)); assertSubsetOf(q1, q2); } @@ -141,10 +176,13 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { public void testSpanNearIncreasingSloppiness() throws Exception { Term t1 = randomTerm(); Term t2 = randomTerm(); - SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; + SpanQuery subquery[] = new SpanQuery[] { + span(new SpanTermQuery(t1)), + span(new SpanTermQuery(t2)) + }; for (int i = 0; i < 10; i++) { - SpanNearQuery q1 = new SpanNearQuery(subquery, i, false); - SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, false); + SpanQuery q1 = span(new SpanNearQuery(subquery, i, false)); + SpanQuery q2 = span(new SpanNearQuery(subquery, i+1, false)); assertSubsetOf(q1, q2); } } @@ -154,10 +192,14 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { Term t1 = randomTerm(); Term t2 = randomTerm(); Term t3 = randomTerm(); - SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2), new SpanTermQuery(t3) }; + SpanQuery subquery[] = new SpanQuery[] { + span(new SpanTermQuery(t1)), + span(new SpanTermQuery(t2)), + span(new SpanTermQuery(t3)) + }; for (int i = 0; i < 10; i++) { - SpanNearQuery q1 = new SpanNearQuery(subquery, i, false); - SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, false); + SpanQuery q1 = span(new SpanNearQuery(subquery, i, false)); + SpanQuery q2 = span(new SpanNearQuery(subquery, i+1, false)); assertSubsetOf(q1, q2); } } @@ -166,10 +208,13 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { public void testSpanNearIncreasingOrderedSloppiness() throws Exception { Term t1 = randomTerm(); Term t2 = randomTerm(); - SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; + SpanQuery subquery[] = new SpanQuery[] { + span(new SpanTermQuery(t1)), + span(new SpanTermQuery(t2)) + }; for (int i = 0; i < 10; i++) { - SpanNearQuery q1 = new SpanNearQuery(subquery, i, false); - SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, false); + SpanQuery q1 = span(new SpanNearQuery(subquery, i, false)); + SpanQuery q2 = span(new SpanNearQuery(subquery, i+1, false)); assertSubsetOf(q1, q2); } } @@ -179,10 +224,14 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { Term t1 = randomTerm(); Term t2 = randomTerm(); Term t3 = randomTerm(); - SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2), new SpanTermQuery(t3) }; + SpanQuery subquery[] = new SpanQuery[] { + span(new SpanTermQuery(t1)), + span(new SpanTermQuery(t2)), + span(new SpanTermQuery(t3)) + }; for (int i = 0; i < 10; i++) { - SpanNearQuery q1 = new SpanNearQuery(subquery, i, true); - SpanNearQuery q2 = new SpanNearQuery(subquery, i+1, true); + SpanQuery q1 = span(new SpanNearQuery(subquery, i, true)); + SpanQuery q2 = span(new SpanNearQuery(subquery, i+1, true)); assertSubsetOf(q1, q2); } } @@ -192,7 +241,7 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { Term t1 = randomTerm(); for (int i = 0; i < 5; i++) { for (int j = 0; j < 5; j++) { - Query q1 = new SpanPositionRangeQuery(new SpanTermQuery(t1), i, i+j); + Query q1 = span(new SpanPositionRangeQuery(span(new SpanTermQuery(t1)), i, i+j)); Query q2 = new TermQuery(t1); assertSubsetOf(q1, q2); } @@ -204,8 +253,8 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { Term t1 = randomTerm(); for (int i = 0; i < 5; i++) { for (int j = 0; j < 5; j++) { - Query q1 = new SpanPositionRangeQuery(new SpanTermQuery(t1), i, i+j); - Query q2 = new SpanPositionRangeQuery(new SpanTermQuery(t1), i, i+j+1); + Query q1 = span(new SpanPositionRangeQuery(span(new SpanTermQuery(t1)), i, i+j)); + Query q2 = span(new SpanPositionRangeQuery(span(new SpanTermQuery(t1)), i, i+j+1)); assertSubsetOf(q1, q2); } } @@ -214,7 +263,7 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { /** SpanPositionRangeQuery(A, 0, ∞) = TermQuery(A) */ public void testSpanRangeTermEverything() throws Exception { Term t1 = randomTerm(); - Query q1 = new SpanPositionRangeQuery(new SpanTermQuery(t1), 0, Integer.MAX_VALUE); + Query q1 = span(new SpanPositionRangeQuery(span(new SpanTermQuery(t1)), 0, Integer.MAX_VALUE)); Query q2 = new TermQuery(t1); assertSameSet(q1, q2); } @@ -223,11 +272,14 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { public void testSpanRangeNear() throws Exception { Term t1 = randomTerm(); Term t2 = randomTerm(); - SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; - SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true); + SpanQuery subquery[] = new SpanQuery[] { + span(new SpanTermQuery(t1)), + span(new SpanTermQuery(t2)) + }; + SpanQuery nearQuery = span(new SpanNearQuery(subquery, 10, true)); for (int i = 0; i < 5; i++) { for (int j = 0; j < 5; j++) { - Query q1 = new SpanPositionRangeQuery(nearQuery, i, i+j); + Query q1 = span(new SpanPositionRangeQuery(nearQuery, i, i+j)); Query q2 = nearQuery; assertSubsetOf(q1, q2); } @@ -238,12 +290,15 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { public void testSpanRangeNearIncreasingEnd() throws Exception { Term t1 = randomTerm(); Term t2 = randomTerm(); - SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; - SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true); + SpanQuery subquery[] = new SpanQuery[] { + span(new SpanTermQuery(t1)), + span(new SpanTermQuery(t2)) + }; + SpanQuery nearQuery = span(new SpanNearQuery(subquery, 10, true)); for (int i = 0; i < 5; i++) { for (int j = 0; j < 5; j++) { - Query q1 = new SpanPositionRangeQuery(nearQuery, i, i+j); - Query q2 = new SpanPositionRangeQuery(nearQuery, i, i+j+1); + Query q1 = span(new SpanPositionRangeQuery(nearQuery, i, i+j)); + Query q2 = span(new SpanPositionRangeQuery(nearQuery, i, i+j+1)); assertSubsetOf(q1, q2); } } @@ -253,9 +308,12 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { public void testSpanRangeNearEverything() throws Exception { Term t1 = randomTerm(); Term t2 = randomTerm(); - SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; - SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true); - Query q1 = new SpanPositionRangeQuery(nearQuery, 0, Integer.MAX_VALUE); + SpanQuery subquery[] = new SpanQuery[] { + span(new SpanTermQuery(t1)), + span(new SpanTermQuery(t2)) + }; + SpanQuery nearQuery = span(new SpanNearQuery(subquery, 10, true)); + Query q1 = span(new SpanPositionRangeQuery(nearQuery, 0, Integer.MAX_VALUE)); Query q2 = nearQuery; assertSameSet(q1, q2); } @@ -264,7 +322,7 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { public void testSpanFirstTerm() throws Exception { Term t1 = randomTerm(); for (int i = 0; i < 10; i++) { - Query q1 = new SpanFirstQuery(new SpanTermQuery(t1), i); + Query q1 = span(new SpanFirstQuery(span(new SpanTermQuery(t1)), i)); Query q2 = new TermQuery(t1); assertSubsetOf(q1, q2); } @@ -274,8 +332,8 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { public void testSpanFirstTermIncreasing() throws Exception { Term t1 = randomTerm(); for (int i = 0; i < 10; i++) { - Query q1 = new SpanFirstQuery(new SpanTermQuery(t1), i); - Query q2 = new SpanFirstQuery(new SpanTermQuery(t1), i+1); + Query q1 = span(new SpanFirstQuery(span(new SpanTermQuery(t1)), i)); + Query q2 = span(new SpanFirstQuery(span(new SpanTermQuery(t1)), i+1)); assertSubsetOf(q1, q2); } } @@ -283,7 +341,7 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { /** SpanFirstQuery(A, ∞) = TermQuery(A) */ public void testSpanFirstTermEverything() throws Exception { Term t1 = randomTerm(); - Query q1 = new SpanFirstQuery(new SpanTermQuery(t1), Integer.MAX_VALUE); + Query q1 = span(new SpanFirstQuery(span(new SpanTermQuery(t1)), Integer.MAX_VALUE)); Query q2 = new TermQuery(t1); assertSameSet(q1, q2); } @@ -292,10 +350,13 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { public void testSpanFirstNear() throws Exception { Term t1 = randomTerm(); Term t2 = randomTerm(); - SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; - SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true); + SpanQuery subquery[] = new SpanQuery[] { + span(new SpanTermQuery(t1)), + span(new SpanTermQuery(t2)) + }; + SpanQuery nearQuery = span(new SpanNearQuery(subquery, 10, true)); for (int i = 0; i < 10; i++) { - Query q1 = new SpanFirstQuery(nearQuery, i); + Query q1 = span(new SpanFirstQuery(nearQuery, i)); Query q2 = nearQuery; assertSubsetOf(q1, q2); } @@ -305,11 +366,14 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { public void testSpanFirstNearIncreasing() throws Exception { Term t1 = randomTerm(); Term t2 = randomTerm(); - SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; - SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true); + SpanQuery subquery[] = new SpanQuery[] { + span(new SpanTermQuery(t1)), + span(new SpanTermQuery(t2)) + }; + SpanQuery nearQuery = span(new SpanNearQuery(subquery, 10, true)); for (int i = 0; i < 10; i++) { - Query q1 = new SpanFirstQuery(nearQuery, i); - Query q2 = new SpanFirstQuery(nearQuery, i+1); + Query q1 = span(new SpanFirstQuery(nearQuery, i)); + Query q2 = span(new SpanFirstQuery(nearQuery, i+1)); assertSubsetOf(q1, q2); } } @@ -318,9 +382,12 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase { public void testSpanFirstNearEverything() throws Exception { Term t1 = randomTerm(); Term t2 = randomTerm(); - SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) }; - SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true); - Query q1 = new SpanFirstQuery(nearQuery, Integer.MAX_VALUE); + SpanQuery subquery[] = new SpanQuery[] { + span(new SpanTermQuery(t1)), + span(new SpanTermQuery(t2)) + }; + SpanQuery nearQuery = span(new SpanNearQuery(subquery, 10, true)); + Query q1 = span(new SpanFirstQuery(nearQuery, Integer.MAX_VALUE)); Query q2 = nearQuery; assertSameSet(q1, q2); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java new file mode 100644 index 00000000000..776eb2b53f0 --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanQuery.java @@ -0,0 +1,123 @@ +package org.apache.lucene.search.spans; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.Bits; + +/** Wraps a span query with asserts */ +public class AssertingSpanQuery extends SpanQuery { + private final SpanQuery in; + + public AssertingSpanQuery(SpanQuery in) { + this.in = in; + } + + @Override + public Spans getSpans(LeafReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { + Spans spans = in.getSpans(context, acceptDocs, termContexts); + if (spans == null) { + return null; + } else { + return new AssertingSpans(spans); + } + } + + @Override + public String getField() { + return in.getField(); + } + + @Override + public String toString(String field) { + return "AssertingSpanQuery(" + in.toString(field) + ")"; + } + + @Override + public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { + // TODO: we are wasteful and createWeight twice in this case... use VirtualMethod? + // we need to not wrap if the query is e.g. a Payload one that overrides this (it should really be final) + SpanWeight weight = in.createWeight(searcher, needsScores); + if (weight.getClass() == SpanWeight.class) { + return super.createWeight(searcher, needsScores); + } else { + return weight; + } + } + + @Override + public void setBoost(float b) { + in.setBoost(b); + } + + @Override + public float getBoost() { + return in.getBoost(); + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + Query q = in.rewrite(reader); + if (q == in) { + return this; + } else if (q instanceof SpanQuery) { + return new AssertingSpanQuery((SpanQuery) q); + } else { + return q; + } + } + + @Override + public void extractTerms(Set terms) { + in.extractTerms(terms); + } + + @Override + public Query clone() { + return new AssertingSpanQuery((SpanQuery) in.clone()); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((in == null) ? 0 : in.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!super.equals(obj)) return false; + if (getClass() != obj.getClass()) return false; + AssertingSpanQuery other = (AssertingSpanQuery) obj; + if (in == null) { + if (other.in != null) return false; + } else if (!in.equals(other.in)) return false; + return true; + } +} diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpans.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpans.java new file mode 100644 index 00000000000..59c69b1218f --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpans.java @@ -0,0 +1,273 @@ +package org.apache.lucene.search.spans; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Collection; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.spans.Spans; + +/** + * Wraps a Spans with additional asserts + */ +class AssertingSpans extends Spans { + final Spans in; + int doc = -1; + + /** + * tracks current state of this spans + */ + static enum State { + /** + * document iteration has not yet begun ({@link #docID()} = -1) + */ + DOC_START, + + /** + * two-phase iterator has moved to a new docid, but {@link TwoPhaseIterator#matches()} has + * not been called or it returned false (so you should not do things with the enum) + */ + DOC_UNVERIFIED, + + /** + * iterator set to a valid docID, but position iteration has not yet begun ({@link #startPosition() == -1}) + */ + POS_START, + + /** + * iterator set to a valid docID, and positioned (-1 < {@link #startPosition()} < {@link #NO_MORE_POSITIONS}) + */ + ITERATING, + + /** + * positions exhausted ({@link #startPosition()} = {@link #NO_MORE_POSITIONS}) + */ + POS_FINISHED, + + /** + * documents exhausted ({@link #docID()} = {@link #NO_MORE_DOCS}) + */ + DOC_FINISHED + }; + + State state = State.DOC_START; + + AssertingSpans(Spans in) { + this.in = in; + } + + @Override + public int nextStartPosition() throws IOException { + assert state != State.DOC_START : "invalid position access, state=" + state + ": " + in; + assert state != State.DOC_FINISHED : "invalid position access, state=" + state + ": " + in; + assert state != State.DOC_UNVERIFIED : "invalid position access, state=" + state + ": " + in; + + checkCurrentPositions(); + + // move to next position + int prev = in.startPosition(); + int start = in.nextStartPosition(); + assert start >= prev : "invalid startPosition (positions went backwards, previous=" + prev + "): " + in; + + // transition state if necessary + if (start == NO_MORE_POSITIONS) { + state = State.POS_FINISHED; + } else { + state = State.ITERATING; + } + + // check new positions + checkCurrentPositions(); + return start; + } + + private void checkCurrentPositions() { + int start = in.startPosition(); + int end = in.endPosition(); + + if (state == State.DOC_START || state == State.DOC_UNVERIFIED || state == State.POS_START) { + assert start == -1 : "invalid startPosition (should be -1): " + in; + assert end == -1 : "invalid endPosition (should be -1): " + in; + } else if (state == State.POS_FINISHED) { + assert start == NO_MORE_POSITIONS : "invalid startPosition (should be NO_MORE_POSITIONS): " + in; + assert end == NO_MORE_POSITIONS : "invalid endPosition (should be NO_MORE_POSITIONS): " + in; + } else { + assert start >= 0 : "invalid startPosition (negative): " + in; + assert start <= end : "invalid startPosition (> endPosition): " + in; + } + } + + @Override + public int startPosition() { + checkCurrentPositions(); + return in.startPosition(); + } + + @Override + public int endPosition() { + checkCurrentPositions(); + return in.endPosition(); + } + + @Override + public Collection getPayload() throws IOException { + assert state == State.ITERATING : "getPayload() called in illegal state: " + state + ": " + in; + return in.getPayload(); + } + + @Override + public boolean isPayloadAvailable() throws IOException { + assert state == State.ITERATING : "isPayloadAvailable() called in illegal state: " + state + ": " + in; + return in.isPayloadAvailable(); + } + + @Override + public int docID() { + int doc = in.docID(); + assert doc == this.doc : "broken docID() impl: docID() = " + doc + ", but next/advance last returned: " + this.doc + ": " + in; + return doc; + } + + @Override + public int nextDoc() throws IOException { + assert state != State.DOC_FINISHED : "nextDoc() called after NO_MORE_DOCS: " + in; + int nextDoc = in.nextDoc(); + assert nextDoc > doc : "backwards nextDoc from " + doc + " to " + nextDoc + ": " + in; + if (nextDoc == DocIdSetIterator.NO_MORE_DOCS) { + state = State.DOC_FINISHED; + } else { + assert in.startPosition() == -1 : "invalid initial startPosition() [should be -1]: " + in; + assert in.endPosition() == -1 : "invalid initial endPosition() [should be -1]: " + in; + state = State.POS_START; + } + doc = nextDoc; + return docID(); + } + + @Override + public int advance(int target) throws IOException { + assert state != State.DOC_FINISHED : "advance() called after NO_MORE_DOCS: " + in; + assert target > doc : "target must be > docID(), got " + target + " <= " + doc + ": " + in; + int advanced = in.advance(target); + assert advanced >= target : "backwards advance from: " + target + " to: " + advanced + ": " + in; + if (advanced == DocIdSetIterator.NO_MORE_DOCS) { + state = State.DOC_FINISHED; + } else { + assert in.startPosition() == -1 : "invalid initial startPosition() [should be -1]: " + in; + assert in.endPosition() == -1 : "invalid initial endPosition() [should be -1]: " + in; + state = State.POS_START; + } + doc = advanced; + return docID(); + } + + @Override + public String toString() { + return "Asserting(" + in + ")"; + } + + @Override + public long cost() { + return in.cost(); + } + + @Override + public TwoPhaseIterator asTwoPhaseIterator() { + final TwoPhaseIterator iterator = in.asTwoPhaseIterator(); + if (iterator == null) { + return null; + } + return new AssertingTwoPhaseView(iterator); + } + + class AssertingTwoPhaseView extends TwoPhaseIterator { + final TwoPhaseIterator in; + int lastDoc = -1; + + AssertingTwoPhaseView(TwoPhaseIterator iterator) { + super(new AssertingDISI(iterator.approximation())); + this.in = iterator; + } + + @Override + public boolean matches() throws IOException { + if (approximation.docID() == -1 || approximation.docID() == DocIdSetIterator.NO_MORE_DOCS) { + throw new AssertionError("matches() should not be called on doc ID " + approximation.docID()); + } + if (lastDoc == approximation.docID()) { + throw new AssertionError("matches() has been called twice on doc ID " + approximation.docID()); + } + lastDoc = approximation.docID(); + boolean v = in.matches(); + if (v) { + state = State.POS_START; + } + return v; + } + } + + class AssertingDISI extends DocIdSetIterator { + final DocIdSetIterator in; + + AssertingDISI(DocIdSetIterator in) { + this.in = in; + } + + @Override + public int docID() { + assert in.docID() == AssertingSpans.this.docID(); + return in.docID(); + } + + @Override + public int nextDoc() throws IOException { + assert state != State.DOC_FINISHED : "nextDoc() called after NO_MORE_DOCS: " + in; + int nextDoc = in.nextDoc(); + assert nextDoc > doc : "backwards nextDoc from " + doc + " to " + nextDoc + ": " + in; + if (nextDoc == DocIdSetIterator.NO_MORE_DOCS) { + state = State.DOC_FINISHED; + } else { + state = State.DOC_UNVERIFIED; + } + doc = nextDoc; + return docID(); + } + + @Override + public int advance(int target) throws IOException { + assert state != State.DOC_FINISHED : "advance() called after NO_MORE_DOCS: " + in; + assert target > doc : "target must be > docID(), got " + target + " <= " + doc + ": " + in; + int advanced = in.advance(target); + assert advanced >= target : "backwards advance from: " + target + " to: " + advanced + ": " + in; + if (advanced == DocIdSetIterator.NO_MORE_DOCS) { + state = State.DOC_FINISHED; + } else { + state = State.DOC_UNVERIFIED; + } + doc = advanced; + return docID(); + } + + @Override + public long cost() { + return in.cost(); + } + } +} diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/package.html b/lucene/test-framework/src/java/org/apache/lucene/search/spans/package.html new file mode 100644 index 00000000000..276fab45b65 --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/package.html @@ -0,0 +1,26 @@ + + + + + + + + +Support for testing search spanqueries. + +