LUCENE-6393: Add two-phase support to SpanPositionCheckQuery and subclasses

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1671420 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2015-04-05 19:44:31 +00:00
parent f7d633a56e
commit 7f4379762e
5 changed files with 148 additions and 38 deletions

View File

@ -45,6 +45,10 @@ New Features
code with boolean queries, and use two-phased iterators for
faster intersection by avoiding loading positions in certain cases.
(Paul Elschot, Terry Smith, Robert Muir via Mike McCandless)
* LUCENE-6393: Add two-phase support to SpanPositionCheckQuery
and its subclasses: SpanPositionRangeQuery, SpanPayloadCheckQuery,
SpanNearPayloadCheckQuery, SpanFirstQuery. (Paul Elschot, Robert Muir)
* LUCENE-6352: Added a new query time join to the join module that uses
global ordinals, which is faster for subsequent joins between reopens.

View File

@ -27,7 +27,7 @@ import org.apache.lucene.search.TwoPhaseIterator;
* A {@link Spans} implementation wrapping another spans instance,
* allowing to override selected methods in a subclass.
*/
public class FilterSpans extends Spans {
public abstract class FilterSpans extends Spans {
/** The wrapped spans instance. */
protected final Spans in;
@ -89,6 +89,31 @@ public class FilterSpans extends Spans {
@Override
public TwoPhaseIterator asTwoPhaseIterator() {
return in.asTwoPhaseIterator();
TwoPhaseIterator inner = in.asTwoPhaseIterator();
if (inner != null) {
// wrapped instance has an approximation
return new TwoPhaseIterator(inner.approximation()) {
@Override
public boolean matches() throws IOException {
return inner.matches() && twoPhaseCurrentDocMatches();
}
};
} else {
// wrapped instance has no approximation, but
// we can still defer matching until absolutely needed.
return new TwoPhaseIterator(in) {
@Override
public boolean matches() throws IOException {
return twoPhaseCurrentDocMatches();
}
};
}
}
/**
* Returns true if the current document matches.
* <p>
* This is called during two-phase processing.
*/
public abstract boolean twoPhaseCurrentDocMatches() throws IOException;
}

View File

@ -105,6 +105,11 @@ public class NearSpansUnordered extends NearSpans {
}
}
@Override
public boolean twoPhaseCurrentDocMatches() throws IOException {
return true; // we don't modify the spans, we just capture information from it.
}
@Override
public String toString() {
return "NearSpansUnordered.SpansCell(" + in.toString() + ")";

View File

@ -127,45 +127,27 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
@Override
public int nextDoc() throws IOException {
if (in.nextDoc() == NO_MORE_DOCS)
return NO_MORE_DOCS;
return toNextDocWithAllowedPosition();
while (true) {
int doc = in.nextDoc();
if (doc == NO_MORE_DOCS) {
return NO_MORE_DOCS;
} else if (twoPhaseCurrentDocMatches()) {
return doc;
}
}
}
@Override
public int advance(int target) throws IOException {
if (in.advance(target) == NO_MORE_DOCS)
return NO_MORE_DOCS;
return toNextDocWithAllowedPosition();
}
@SuppressWarnings("fallthrough")
protected int toNextDocWithAllowedPosition() throws IOException {
startPos = in.nextStartPosition();
assert startPos != NO_MORE_POSITIONS;
for (;;) {
switch(acceptPosition(in)) {
case YES:
atFirstInCurrentDoc = true;
return in.docID();
case NO:
startPos = in.nextStartPosition();
if (startPos != NO_MORE_POSITIONS) {
break;
}
// else fallthrough
case NO_MORE_IN_CURRENT_DOC:
if (in.nextDoc() == NO_MORE_DOCS) {
startPos = -1;
return NO_MORE_DOCS;
}
startPos = in.nextStartPosition();
assert startPos != NO_MORE_POSITIONS : "no start position at doc="+in.docID();
break;
int doc = in.advance(target);
while (doc != NO_MORE_DOCS) {
if (twoPhaseCurrentDocMatches()) {
break;
}
doc = in.nextDoc();
}
return doc;
}
@Override
@ -190,6 +172,30 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
}
}
}
// return true if the current document matches
@SuppressWarnings("fallthrough")
public boolean twoPhaseCurrentDocMatches() throws IOException {
atFirstInCurrentDoc = false;
startPos = in.nextStartPosition();
assert startPos != NO_MORE_POSITIONS;
for (;;) {
switch(acceptPosition(in)) {
case YES:
atFirstInCurrentDoc = true;
return true;
case NO:
startPos = in.nextStartPosition();
if (startPos != NO_MORE_POSITIONS) {
break;
}
// else fallthrough
case NO_MORE_IN_CURRENT_DOC:
startPos = -1;
return false;
}
}
}
@Override
public int startPosition() {

View File

@ -158,6 +158,79 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase {
}
}
/** SpanPositionRangeQuery(A, M, N) ⊆ TermQuery(A) */
public void testSpanRangeTerm() throws Exception {
Term t1 = randomTerm();
for (int i = 0; i < 5; i++) {
for (int j = 0; j < 5; j++) {
Query q1 = new SpanPositionRangeQuery(new SpanTermQuery(t1), i, i+j);
Query q2 = new TermQuery(t1);
assertSubsetOf(q1, q2);
}
}
}
/** SpanPositionRangeQuery(A, M, N) ⊆ SpanFirstQuery(A, M, N+1) */
public void testSpanRangeTermIncreasingEnd() throws Exception {
Term t1 = randomTerm();
for (int i = 0; i < 5; i++) {
for (int j = 0; j < 5; j++) {
Query q1 = new SpanPositionRangeQuery(new SpanTermQuery(t1), i, i+j);
Query q2 = new SpanPositionRangeQuery(new SpanTermQuery(t1), i, i+j+1);
assertSubsetOf(q1, q2);
}
}
}
/** SpanPositionRangeQuery(A, 0, ∞) = TermQuery(A) */
public void testSpanRangeTermEverything() throws Exception {
Term t1 = randomTerm();
Query q1 = new SpanPositionRangeQuery(new SpanTermQuery(t1), 0, Integer.MAX_VALUE);
Query q2 = new TermQuery(t1);
assertSameSet(q1, q2);
}
/** SpanPositionRangeQuery([A B], M, N) ⊆ SpanNearQuery([A B]) */
public void testSpanRangeNear() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
for (int i = 0; i < 5; i++) {
for (int j = 0; j < 5; j++) {
Query q1 = new SpanPositionRangeQuery(nearQuery, i, i+j);
Query q2 = nearQuery;
assertSubsetOf(q1, q2);
}
}
}
/** SpanPositionRangeQuery([A B], M, N) ⊆ SpanFirstQuery([A B], M, N+1) */
public void testSpanRangeNearIncreasingEnd() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
for (int i = 0; i < 5; i++) {
for (int j = 0; j < 5; j++) {
Query q1 = new SpanPositionRangeQuery(nearQuery, i, i+j);
Query q2 = new SpanPositionRangeQuery(nearQuery, i, i+j+1);
assertSubsetOf(q1, q2);
}
}
}
/** SpanPositionRangeQuery([A B], ∞) = SpanNearQuery([A B]) */
public void testSpanRangeNearEverything() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
Query q1 = new SpanPositionRangeQuery(nearQuery, 0, Integer.MAX_VALUE);
Query q2 = nearQuery;
assertSameSet(q1, q2);
}
/** SpanFirstQuery(A, N) ⊆ TermQuery(A) */
public void testSpanFirstTerm() throws Exception {
Term t1 = randomTerm();
@ -187,7 +260,6 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase {
}
/** SpanFirstQuery([A B], N) ⊆ SpanNearQuery([A B]) */
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
public void testSpanFirstNear() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
@ -201,7 +273,6 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase {
}
/** SpanFirstQuery([A B], N) ⊆ SpanFirstQuery([A B], N+1) */
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
public void testSpanFirstNearIncreasing() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();
@ -215,7 +286,6 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase {
}
/** SpanFirstQuery([A B], ∞) = SpanNearQuery([A B]) */
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
public void testSpanFirstNearEverything() throws Exception {
Term t1 = randomTerm();
Term t2 = randomTerm();