Prevent flattening of ordered and unordered interval sources (#13819)

This commit removes the flattening of ordered and unordered interval sources, as it alters the gap visibility for parent intervals. For example, ordered("a", ordered("b", "c")) should result in a different gap compared to ordered("a", "b", "c").

Phrase/Block operators will continue to flatten their sub-sources since this does not affect the inner gap (which is always 0 in the case of blocks).
This commit is contained in:
Jim Ferenczi 2024-10-03 12:21:43 +01:00 committed by Jim Ferenczi
parent 3ac1db6470
commit 0f2fac699a
4 changed files with 27 additions and 26 deletions

View File

@ -38,6 +38,7 @@ class BlockIntervalsSource extends ConjunctionIntervalsSource {
List<IntervalsSource> flattened = new ArrayList<>();
for (IntervalsSource s : sources) {
if (s instanceof BlockIntervalsSource) {
// Block sources can be flattened because they do not increase the gap (gap = 0)
flattened.addAll(((BlockIntervalsSource) s).subSources);
} else {
flattened.add(s);

View File

@ -30,25 +30,13 @@ class OrderedIntervalsSource extends MinimizingConjunctionIntervalsSource {
if (sources.size() == 1) {
return sources.get(0);
}
List<IntervalsSource> rewritten = deduplicate(flatten(sources));
List<IntervalsSource> rewritten = deduplicate(sources);
if (rewritten.size() == 1) {
return rewritten.get(0);
}
return new OrderedIntervalsSource(rewritten);
}
private static List<IntervalsSource> flatten(List<IntervalsSource> sources) {
List<IntervalsSource> flattened = new ArrayList<>();
for (IntervalsSource s : sources) {
if (s instanceof OrderedIntervalsSource) {
flattened.addAll(((OrderedIntervalsSource) s).subSources);
} else {
flattened.add(s);
}
}
return flattened;
}
private static List<IntervalsSource> deduplicate(List<IntervalsSource> sources) {
List<IntervalsSource> deduplicated = new ArrayList<>();
List<IntervalsSource> current = new ArrayList<>();

View File

@ -33,7 +33,7 @@ class UnorderedIntervalsSource extends MinimizingConjunctionIntervalsSource {
if (sources.size() == 1) {
return sources.get(0);
}
List<IntervalsSource> rewritten = deduplicate(flatten(sources));
List<IntervalsSource> rewritten = deduplicate(sources);
if (rewritten.size() == 1) {
return rewritten.get(0);
}
@ -55,18 +55,6 @@ class UnorderedIntervalsSource extends MinimizingConjunctionIntervalsSource {
return deduplicated;
}
private static List<IntervalsSource> flatten(List<IntervalsSource> sources) {
List<IntervalsSource> flattened = new ArrayList<>();
for (IntervalsSource s : sources) {
if (s instanceof UnorderedIntervalsSource) {
flattened.addAll(((UnorderedIntervalsSource) s).subSources);
} else {
flattened.add(s);
}
}
return flattened;
}
private UnorderedIntervalsSource(List<IntervalsSource> sources) {
super(sources);
}

View File

@ -338,6 +338,18 @@ public class TestIntervalQuery extends LuceneTestCase {
checkHits(q, new int[] {6, 7});
}
public void testUnorderedWithNoGap() throws IOException {
Query q =
new IntervalQuery(
field,
Intervals.maxgaps(
0,
Intervals.unordered(
Intervals.term("w3"),
Intervals.unordered(Intervals.term("w1"), Intervals.term("w5")))));
checkHits(q, new int[] {0});
}
public void testOrderedWithGaps() throws IOException {
Query q =
new IntervalQuery(
@ -360,6 +372,18 @@ public class TestIntervalQuery extends LuceneTestCase {
checkHits(q, new int[] {12});
}
public void testOrderedWithNoGap() throws IOException {
Query q =
new IntervalQuery(
field,
Intervals.maxgaps(
0,
Intervals.ordered(
Intervals.ordered(Intervals.term("w1"), Intervals.term("w4")),
Intervals.term("w5"))));
checkHits(q, new int[] {0});
}
public void testNestedOrInContainedBy() throws IOException {
Query q =
new IntervalQuery(