mirror of https://github.com/apache/lucene.git
LUCENE-6393: Add two-phase support to SpanPositionCheckQuery and subclasses
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1671420 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f7d633a56e
commit
7f4379762e
|
@ -45,6 +45,10 @@ New Features
|
|||
code with boolean queries, and use two-phased iterators for
|
||||
faster intersection by avoiding loading positions in certain cases.
|
||||
(Paul Elschot, Terry Smith, Robert Muir via Mike McCandless)
|
||||
|
||||
* LUCENE-6393: Add two-phase support to SpanPositionCheckQuery
|
||||
and its subclasses: SpanPositionRangeQuery, SpanPayloadCheckQuery,
|
||||
SpanNearPayloadCheckQuery, SpanFirstQuery. (Paul Elschot, Robert Muir)
|
||||
|
||||
* LUCENE-6352: Added a new query time join to the join module that uses
|
||||
global ordinals, which is faster for subsequent joins between reopens.
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.search.TwoPhaseIterator;
|
|||
* A {@link Spans} implementation wrapping another spans instance,
|
||||
* allowing to override selected methods in a subclass.
|
||||
*/
|
||||
public class FilterSpans extends Spans {
|
||||
public abstract class FilterSpans extends Spans {
|
||||
|
||||
/** The wrapped spans instance. */
|
||||
protected final Spans in;
|
||||
|
@ -89,6 +89,31 @@ public class FilterSpans extends Spans {
|
|||
|
||||
@Override
|
||||
public TwoPhaseIterator asTwoPhaseIterator() {
|
||||
return in.asTwoPhaseIterator();
|
||||
TwoPhaseIterator inner = in.asTwoPhaseIterator();
|
||||
if (inner != null) {
|
||||
// wrapped instance has an approximation
|
||||
return new TwoPhaseIterator(inner.approximation()) {
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
return inner.matches() && twoPhaseCurrentDocMatches();
|
||||
}
|
||||
};
|
||||
} else {
|
||||
// wrapped instance has no approximation, but
|
||||
// we can still defer matching until absolutely needed.
|
||||
return new TwoPhaseIterator(in) {
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
return twoPhaseCurrentDocMatches();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the current document matches.
|
||||
* <p>
|
||||
* This is called during two-phase processing.
|
||||
*/
|
||||
public abstract boolean twoPhaseCurrentDocMatches() throws IOException;
|
||||
}
|
||||
|
|
|
@ -105,6 +105,11 @@ public class NearSpansUnordered extends NearSpans {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean twoPhaseCurrentDocMatches() throws IOException {
|
||||
return true; // we don't modify the spans, we just capture information from it.
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "NearSpansUnordered.SpansCell(" + in.toString() + ")";
|
||||
|
|
|
@ -127,45 +127,27 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
|||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (in.nextDoc() == NO_MORE_DOCS)
|
||||
return NO_MORE_DOCS;
|
||||
|
||||
return toNextDocWithAllowedPosition();
|
||||
while (true) {
|
||||
int doc = in.nextDoc();
|
||||
if (doc == NO_MORE_DOCS) {
|
||||
return NO_MORE_DOCS;
|
||||
} else if (twoPhaseCurrentDocMatches()) {
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (in.advance(target) == NO_MORE_DOCS)
|
||||
return NO_MORE_DOCS;
|
||||
|
||||
return toNextDocWithAllowedPosition();
|
||||
}
|
||||
|
||||
@SuppressWarnings("fallthrough")
|
||||
protected int toNextDocWithAllowedPosition() throws IOException {
|
||||
startPos = in.nextStartPosition();
|
||||
assert startPos != NO_MORE_POSITIONS;
|
||||
for (;;) {
|
||||
switch(acceptPosition(in)) {
|
||||
case YES:
|
||||
atFirstInCurrentDoc = true;
|
||||
return in.docID();
|
||||
case NO:
|
||||
startPos = in.nextStartPosition();
|
||||
if (startPos != NO_MORE_POSITIONS) {
|
||||
break;
|
||||
}
|
||||
// else fallthrough
|
||||
case NO_MORE_IN_CURRENT_DOC:
|
||||
if (in.nextDoc() == NO_MORE_DOCS) {
|
||||
startPos = -1;
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
startPos = in.nextStartPosition();
|
||||
assert startPos != NO_MORE_POSITIONS : "no start position at doc="+in.docID();
|
||||
break;
|
||||
int doc = in.advance(target);
|
||||
while (doc != NO_MORE_DOCS) {
|
||||
if (twoPhaseCurrentDocMatches()) {
|
||||
break;
|
||||
}
|
||||
doc = in.nextDoc();
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -190,6 +172,30 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// return true if the current document matches
|
||||
@SuppressWarnings("fallthrough")
|
||||
public boolean twoPhaseCurrentDocMatches() throws IOException {
|
||||
atFirstInCurrentDoc = false;
|
||||
startPos = in.nextStartPosition();
|
||||
assert startPos != NO_MORE_POSITIONS;
|
||||
for (;;) {
|
||||
switch(acceptPosition(in)) {
|
||||
case YES:
|
||||
atFirstInCurrentDoc = true;
|
||||
return true;
|
||||
case NO:
|
||||
startPos = in.nextStartPosition();
|
||||
if (startPos != NO_MORE_POSITIONS) {
|
||||
break;
|
||||
}
|
||||
// else fallthrough
|
||||
case NO_MORE_IN_CURRENT_DOC:
|
||||
startPos = -1;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
|
|
|
@ -158,6 +158,79 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase {
|
|||
}
|
||||
}
|
||||
|
||||
/** SpanPositionRangeQuery(A, M, N) ⊆ TermQuery(A) */
|
||||
public void testSpanRangeTerm() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
for (int i = 0; i < 5; i++) {
|
||||
for (int j = 0; j < 5; j++) {
|
||||
Query q1 = new SpanPositionRangeQuery(new SpanTermQuery(t1), i, i+j);
|
||||
Query q2 = new TermQuery(t1);
|
||||
assertSubsetOf(q1, q2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** SpanPositionRangeQuery(A, M, N) ⊆ SpanFirstQuery(A, M, N+1) */
|
||||
public void testSpanRangeTermIncreasingEnd() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
for (int i = 0; i < 5; i++) {
|
||||
for (int j = 0; j < 5; j++) {
|
||||
Query q1 = new SpanPositionRangeQuery(new SpanTermQuery(t1), i, i+j);
|
||||
Query q2 = new SpanPositionRangeQuery(new SpanTermQuery(t1), i, i+j+1);
|
||||
assertSubsetOf(q1, q2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** SpanPositionRangeQuery(A, 0, ∞) = TermQuery(A) */
|
||||
public void testSpanRangeTermEverything() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Query q1 = new SpanPositionRangeQuery(new SpanTermQuery(t1), 0, Integer.MAX_VALUE);
|
||||
Query q2 = new TermQuery(t1);
|
||||
assertSameSet(q1, q2);
|
||||
}
|
||||
|
||||
/** SpanPositionRangeQuery([A B], M, N) ⊆ SpanNearQuery([A B]) */
|
||||
public void testSpanRangeNear() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
|
||||
SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
|
||||
for (int i = 0; i < 5; i++) {
|
||||
for (int j = 0; j < 5; j++) {
|
||||
Query q1 = new SpanPositionRangeQuery(nearQuery, i, i+j);
|
||||
Query q2 = nearQuery;
|
||||
assertSubsetOf(q1, q2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** SpanPositionRangeQuery([A B], M, N) ⊆ SpanFirstQuery([A B], M, N+1) */
|
||||
public void testSpanRangeNearIncreasingEnd() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
|
||||
SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
|
||||
for (int i = 0; i < 5; i++) {
|
||||
for (int j = 0; j < 5; j++) {
|
||||
Query q1 = new SpanPositionRangeQuery(nearQuery, i, i+j);
|
||||
Query q2 = new SpanPositionRangeQuery(nearQuery, i, i+j+1);
|
||||
assertSubsetOf(q1, q2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** SpanPositionRangeQuery([A B], ∞) = SpanNearQuery([A B]) */
|
||||
public void testSpanRangeNearEverything() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
|
||||
SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
|
||||
Query q1 = new SpanPositionRangeQuery(nearQuery, 0, Integer.MAX_VALUE);
|
||||
Query q2 = nearQuery;
|
||||
assertSameSet(q1, q2);
|
||||
}
|
||||
|
||||
/** SpanFirstQuery(A, N) ⊆ TermQuery(A) */
|
||||
public void testSpanFirstTerm() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
|
@ -187,7 +260,6 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase {
|
|||
}
|
||||
|
||||
/** SpanFirstQuery([A B], N) ⊆ SpanNearQuery([A B]) */
|
||||
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
|
||||
public void testSpanFirstNear() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
|
@ -201,7 +273,6 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase {
|
|||
}
|
||||
|
||||
/** SpanFirstQuery([A B], N) ⊆ SpanFirstQuery([A B], N+1) */
|
||||
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
|
||||
public void testSpanFirstNearIncreasing() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
|
@ -215,7 +286,6 @@ public class TestSpanSearchEquivalence extends SearchEquivalenceTestBase {
|
|||
}
|
||||
|
||||
/** SpanFirstQuery([A B], ∞) = SpanNearQuery([A B]) */
|
||||
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
|
||||
public void testSpanFirstNearEverything() throws Exception {
|
||||
Term t1 = randomTerm();
|
||||
Term t2 = randomTerm();
|
||||
|
|
Loading…
Reference in New Issue