mirror of https://github.com/apache/lucene.git
Fix ordered intervals query over interleaved terms (#12214)
Given an input text 'A B A C A B C' and search ORDERED(A, B, C), we should retrieve hits [0,3] and [4,6]; currently [4,6] is skipped. After finding the first interval [0, 3], the subintervals will become A[0,0], B[1,1], C[3,3]; then the algorithm will try to minimize it and the subintervals will become: A:[2,2], B:[5,5], C:[3,3] (after finding 5 > 3 it breaks the minimization) And when finding next interval, it will do advance(B) before checking whether it is after A(the do-while loop), so subintervals will become A[2,2], B[inf, inf], C[3,3] and return NO_MORE_INTERVAL. This commit instead continues advancing subintervals from where the last `nextInterval` call stopped, rather than always advancing all subintervals.
This commit is contained in:
parent
0782535017
commit
a6475cecbf
|
@ -179,6 +179,8 @@ Bug Fixes
|
||||||
|
|
||||||
* GITHUB#12202: Fix MultiFieldQueryParser to apply boosts to regexp, wildcard, prefix, range, fuzzy queries. (Jasir KT)
|
* GITHUB#12202: Fix MultiFieldQueryParser to apply boosts to regexp, wildcard, prefix, range, fuzzy queries. (Jasir KT)
|
||||||
|
|
||||||
|
* GITHUB#12214: Fix ordered intervals query to avoid skipping some of the results over interleaved terms. (Hongyu Yan)
|
||||||
|
|
||||||
Build
|
Build
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
|
|
|
@ -112,7 +112,7 @@ class OrderedIntervalsSource extends MinimizingConjunctionIntervalsSource {
|
||||||
|
|
||||||
private static class OrderedIntervalIterator extends ConjunctionIntervalIterator {
|
private static class OrderedIntervalIterator extends ConjunctionIntervalIterator {
|
||||||
|
|
||||||
int start = -1, end = -1, i;
|
int start = -1, end = -1, i = 1;
|
||||||
int slop;
|
int slop;
|
||||||
final MatchCallback onMatch;
|
final MatchCallback onMatch;
|
||||||
|
|
||||||
|
@ -136,7 +136,6 @@ class OrderedIntervalsSource extends MinimizingConjunctionIntervalsSource {
|
||||||
start = end = slop = IntervalIterator.NO_MORE_INTERVALS;
|
start = end = slop = IntervalIterator.NO_MORE_INTERVALS;
|
||||||
int lastStart = Integer.MAX_VALUE;
|
int lastStart = Integer.MAX_VALUE;
|
||||||
boolean minimizing = false;
|
boolean minimizing = false;
|
||||||
i = 1;
|
|
||||||
while (true) {
|
while (true) {
|
||||||
while (true) {
|
while (true) {
|
||||||
if (subIterators.get(i - 1).end() >= lastStart) {
|
if (subIterators.get(i - 1).end() >= lastStart) {
|
||||||
|
|
|
@ -84,7 +84,8 @@ public class TestIntervalQuery extends LuceneTestCase {
|
||||||
"greater new york",
|
"greater new york",
|
||||||
"x x x x x intend x x x message x x x message x x x addressed x x",
|
"x x x x x intend x x x message x x x message x x x addressed x x",
|
||||||
"issue with intervals queries from search engine. So it's a big issue for us as we need to do ordered searches. Thank you to help us concerning that issue",
|
"issue with intervals queries from search engine. So it's a big issue for us as we need to do ordered searches. Thank you to help us concerning that issue",
|
||||||
"場外好朋友"
|
"場外好朋友",
|
||||||
|
"alice bob alice alice carl alice bob alice carl"
|
||||||
};
|
};
|
||||||
|
|
||||||
private void checkHits(Query query, int[] results) throws IOException {
|
private void checkHits(Query query, int[] results) throws IOException {
|
||||||
|
@ -348,6 +349,17 @@ public class TestIntervalQuery extends LuceneTestCase {
|
||||||
checkHits(q, new int[] {});
|
checkHits(q, new int[] {});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testOrderedWithGaps2() throws IOException {
|
||||||
|
Query q =
|
||||||
|
new IntervalQuery(
|
||||||
|
field,
|
||||||
|
Intervals.maxgaps(
|
||||||
|
1,
|
||||||
|
Intervals.ordered(
|
||||||
|
Intervals.term("alice"), Intervals.term("bob"), Intervals.term("carl"))));
|
||||||
|
checkHits(q, new int[] {12});
|
||||||
|
}
|
||||||
|
|
||||||
public void testNestedOrInContainedBy() throws IOException {
|
public void testNestedOrInContainedBy() throws IOException {
|
||||||
Query q =
|
Query q =
|
||||||
new IntervalQuery(
|
new IntervalQuery(
|
||||||
|
|
Loading…
Reference in New Issue