SqlSegmentsMetadataQuery: Fix OVERLAPS for wide target segments. (#12600)

* SqlSegmentsMetadataQuery: Fix OVERLAPS for wide target segments.

Segments with endpoints prior to year 0 or after year 9999 may overlap
the search intervals but not match the generated SQL conditions. So, we
need to add an additional OR condition to catch these.

I checked a real, live MySQL metadata store to confirm that the query
still uses metadata store indexes. It does.

* Add comments.
This commit is contained in:
Gian Merlino 2022-06-07 11:33:46 -07:00 committed by GitHub
parent 59a0c10c47
commit 8fbf92e047
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 131 additions and 8 deletions

View File

@ -233,12 +233,25 @@ public class SqlSegmentsMetadataQuery
)
);
if (i == intervals.size() - 1) {
sb.append(")");
} else {
if (i != intervals.size() - 1) {
sb.append(" OR ");
}
}
if (matchMode == IntervalMode.OVERLAPS) {
// Segments with both endpoints outside 0000/10000 may overlap the search intervals but not match the
// generated SQL conditions. We need to add one more OR condition to catch all of these.
sb.append(
StringUtils.format(
" OR start < %2$s OR %1$send%1$s >= %3$s",
connector.getQuoteString(),
":minmatch",
":maxmatch"
)
);
}
sb.append(")");
}
final Query<Map<String, Object>> sql = handle
@ -247,12 +260,17 @@ public class SqlSegmentsMetadataQuery
.bind("used", used)
.bind("dataSource", dataSource);
if (compareAsString) {
if (compareAsString && !intervals.isEmpty()) {
final Iterator<Interval> iterator = intervals.iterator();
for (int i = 0; iterator.hasNext(); i++) {
Interval interval = iterator.next();
sql.bind(StringUtils.format("start%d", i), interval.getStart().toString())
.bind(StringUtils.format("end%d", i), interval.getEnd().toString());
final Interval interval = iterator.next();
sql.bind(StringUtils.format("start%d", i), interval.getStart().toString());
sql.bind(StringUtils.format("end%d", i), interval.getEnd().toString());
}
if (matchMode == IntervalMode.OVERLAPS) {
sql.bind("minmatch", "0000-"); // '-' is lexicographically lower than '0' so this catches negative-year starts
sql.bind("maxmatch", "10000-"); // Catches end points at 10000 or after
}
}
@ -269,7 +287,8 @@ public class SqlSegmentsMetadataQuery
} else {
// Must re-check that the interval matches, even if comparing as string, because the *segment interval*
// might not be string-comparable. (Consider a query interval like "2000-01-01/3000-01-01" and a
// segment interval like "20010/20011".)
// segment interval like "20010/20011". Consider also a segment interval with endpoints prior to
// year 0000 or after year 9999.)
for (Interval interval : intervals) {
if (matchMode.apply(interval, dataSegment.getInterval())) {
return true;

View File

@ -254,6 +254,42 @@ public class IndexerSQLMetadataStorageCoordinatorTest
100
);
private final DataSegment eternityRangeSegment1 = new DataSegment(
"eternity1",
Intervals.ETERNITY,
"zversion",
ImmutableMap.of(),
ImmutableList.of("dim1"),
ImmutableList.of("m1"),
new NumberedShardSpec(0, 1),
9,
100
);
private final DataSegment halfEternityRangeSegment1 = new DataSegment(
"halfEternity",
new Interval(DateTimes.MIN, DateTimes.of("1970")),
"zversion",
ImmutableMap.of(),
ImmutableList.of("dim1"),
ImmutableList.of("m1"),
new NumberedShardSpec(0, 1),
9,
100
);
private final DataSegment halfEternityRangeSegment2 = new DataSegment(
"halfEternity",
new Interval(DateTimes.of("1970"), DateTimes.MAX),
"zversion",
ImmutableMap.of(),
ImmutableList.of("dim1"),
ImmutableList.of("m1"),
new NumberedShardSpec(0, 1),
9,
100
);
private final Set<DataSegment> SEGMENTS = ImmutableSet.of(defaultSegment, defaultSegment2);
private final AtomicLong metadataUpdateCounter = new AtomicLong();
private final AtomicLong segmentTableDropUpdateCounter = new AtomicLong();
@ -1133,6 +1169,74 @@ public class IndexerSQLMetadataStorageCoordinatorTest
);
}
@Test
public void testUsedEternitySegmentEternityFilter() throws IOException
{
coordinator.announceHistoricalSegments(ImmutableSet.of(eternityRangeSegment1));
Assert.assertEquals(
ImmutableSet.of(eternityRangeSegment1),
ImmutableSet.copyOf(
coordinator.retrieveUsedSegmentsForIntervals(
eternityRangeSegment1.getDataSource(),
Intervals.ONLY_ETERNITY,
Segments.ONLY_VISIBLE
)
)
);
}
@Test
public void testUsedEternitySegment2000Filter() throws IOException
{
coordinator.announceHistoricalSegments(ImmutableSet.of(eternityRangeSegment1));
Assert.assertEquals(
ImmutableSet.of(eternityRangeSegment1),
ImmutableSet.copyOf(
coordinator.retrieveUsedSegmentsForIntervals(
eternityRangeSegment1.getDataSource(),
Collections.singletonList(Intervals.of("2000/2030")),
Segments.ONLY_VISIBLE
)
)
);
}
@Test
public void testUsedHalfEternitySegmentEternityFilter() throws IOException
{
coordinator.announceHistoricalSegments(ImmutableSet.of(halfEternityRangeSegment1, halfEternityRangeSegment2));
Assert.assertEquals(
ImmutableSet.of(halfEternityRangeSegment1, halfEternityRangeSegment2),
ImmutableSet.copyOf(
coordinator.retrieveUsedSegmentsForIntervals(
halfEternityRangeSegment1.getDataSource(),
Intervals.ONLY_ETERNITY,
Segments.ONLY_VISIBLE
)
)
);
}
@Test
public void testUsedHalfEternitySegment2000Filter() throws IOException
{
coordinator.announceHistoricalSegments(ImmutableSet.of(halfEternityRangeSegment1, halfEternityRangeSegment2));
Assert.assertEquals(
ImmutableSet.of(halfEternityRangeSegment2),
ImmutableSet.copyOf(
coordinator.retrieveUsedSegmentsForIntervals(
halfEternityRangeSegment1.getDataSource(),
Collections.singletonList(Intervals.of("2000/2030")),
Segments.ONLY_VISIBLE
)
)
);
}
@Test
public void testUsedHugeTimeRangeEternityFilter() throws IOException
{