From 8fbf92e047f792ff1c69bf67d14784ac55eee88f Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 7 Jun 2022 11:33:46 -0700 Subject: [PATCH] SqlSegmentsMetadataQuery: Fix OVERLAPS for wide target segments. (#12600) * SqlSegmentsMetadataQuery: Fix OVERLAPS for wide target segments. Segments with endpoints prior to year 0 or after year 9999 may overlap the search intervals but not match the generated SQL conditions. So, we need to add an additional OR condition to catch these. I checked a real, live MySQL metadata store to confirm that the query still uses metadata store indexes. It does. * Add comments. --- .../metadata/SqlSegmentsMetadataQuery.java | 35 ++++-- ...exerSQLMetadataStorageCoordinatorTest.java | 104 ++++++++++++++++++ 2 files changed, 131 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java index 4ecf7bebe6d..737b5c6d360 100644 --- a/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java +++ b/server/src/main/java/org/apache/druid/metadata/SqlSegmentsMetadataQuery.java @@ -233,12 +233,25 @@ public class SqlSegmentsMetadataQuery ) ); - if (i == intervals.size() - 1) { - sb.append(")"); - } else { + if (i != intervals.size() - 1) { sb.append(" OR "); } } + + if (matchMode == IntervalMode.OVERLAPS) { + // Segments with both endpoints outside 0000/10000 may overlap the search intervals but not match the + // generated SQL conditions. We need to add one more OR condition to catch all of these. + sb.append( + StringUtils.format( + " OR start < %2$s OR %1$send%1$s >= %3$s", + connector.getQuoteString(), + ":minmatch", + ":maxmatch" + ) + ); + } + + sb.append(")"); } final Query> sql = handle @@ -247,12 +260,17 @@ public class SqlSegmentsMetadataQuery .bind("used", used) .bind("dataSource", dataSource); - if (compareAsString) { + if (compareAsString && !intervals.isEmpty()) { final Iterator iterator = intervals.iterator(); for (int i = 0; iterator.hasNext(); i++) { - Interval interval = iterator.next(); - sql.bind(StringUtils.format("start%d", i), interval.getStart().toString()) - .bind(StringUtils.format("end%d", i), interval.getEnd().toString()); + final Interval interval = iterator.next(); + sql.bind(StringUtils.format("start%d", i), interval.getStart().toString()); + sql.bind(StringUtils.format("end%d", i), interval.getEnd().toString()); + } + + if (matchMode == IntervalMode.OVERLAPS) { + sql.bind("minmatch", "0000-"); // '-' is lexicographically lower than '0' so this catches negative-year starts + sql.bind("maxmatch", "10000-"); // Catches end points at 10000 or after } } @@ -269,7 +287,8 @@ public class SqlSegmentsMetadataQuery } else { // Must re-check that the interval matches, even if comparing as string, because the *segment interval* // might not be string-comparable. (Consider a query interval like "2000-01-01/3000-01-01" and a - // segment interval like "20010/20011".) + // segment interval like "20010/20011". Consider also a segment interval with endpoints prior to + // year 0000 or after year 9999.) for (Interval interval : intervals) { if (matchMode.apply(interval, dataSegment.getInterval())) { return true; diff --git a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java index fca3366b886..e42478b933e 100644 --- a/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java +++ b/server/src/test/java/org/apache/druid/metadata/IndexerSQLMetadataStorageCoordinatorTest.java @@ -254,6 +254,42 @@ public class IndexerSQLMetadataStorageCoordinatorTest 100 ); + private final DataSegment eternityRangeSegment1 = new DataSegment( + "eternity1", + Intervals.ETERNITY, + "zversion", + ImmutableMap.of(), + ImmutableList.of("dim1"), + ImmutableList.of("m1"), + new NumberedShardSpec(0, 1), + 9, + 100 + ); + + private final DataSegment halfEternityRangeSegment1 = new DataSegment( + "halfEternity", + new Interval(DateTimes.MIN, DateTimes.of("1970")), + "zversion", + ImmutableMap.of(), + ImmutableList.of("dim1"), + ImmutableList.of("m1"), + new NumberedShardSpec(0, 1), + 9, + 100 + ); + + private final DataSegment halfEternityRangeSegment2 = new DataSegment( + "halfEternity", + new Interval(DateTimes.of("1970"), DateTimes.MAX), + "zversion", + ImmutableMap.of(), + ImmutableList.of("dim1"), + ImmutableList.of("m1"), + new NumberedShardSpec(0, 1), + 9, + 100 + ); + private final Set SEGMENTS = ImmutableSet.of(defaultSegment, defaultSegment2); private final AtomicLong metadataUpdateCounter = new AtomicLong(); private final AtomicLong segmentTableDropUpdateCounter = new AtomicLong(); @@ -1133,6 +1169,74 @@ public class IndexerSQLMetadataStorageCoordinatorTest ); } + @Test + public void testUsedEternitySegmentEternityFilter() throws IOException + { + coordinator.announceHistoricalSegments(ImmutableSet.of(eternityRangeSegment1)); + + Assert.assertEquals( + ImmutableSet.of(eternityRangeSegment1), + ImmutableSet.copyOf( + coordinator.retrieveUsedSegmentsForIntervals( + eternityRangeSegment1.getDataSource(), + Intervals.ONLY_ETERNITY, + Segments.ONLY_VISIBLE + ) + ) + ); + } + + @Test + public void testUsedEternitySegment2000Filter() throws IOException + { + coordinator.announceHistoricalSegments(ImmutableSet.of(eternityRangeSegment1)); + + Assert.assertEquals( + ImmutableSet.of(eternityRangeSegment1), + ImmutableSet.copyOf( + coordinator.retrieveUsedSegmentsForIntervals( + eternityRangeSegment1.getDataSource(), + Collections.singletonList(Intervals.of("2000/2030")), + Segments.ONLY_VISIBLE + ) + ) + ); + } + + @Test + public void testUsedHalfEternitySegmentEternityFilter() throws IOException + { + coordinator.announceHistoricalSegments(ImmutableSet.of(halfEternityRangeSegment1, halfEternityRangeSegment2)); + + Assert.assertEquals( + ImmutableSet.of(halfEternityRangeSegment1, halfEternityRangeSegment2), + ImmutableSet.copyOf( + coordinator.retrieveUsedSegmentsForIntervals( + halfEternityRangeSegment1.getDataSource(), + Intervals.ONLY_ETERNITY, + Segments.ONLY_VISIBLE + ) + ) + ); + } + + @Test + public void testUsedHalfEternitySegment2000Filter() throws IOException + { + coordinator.announceHistoricalSegments(ImmutableSet.of(halfEternityRangeSegment1, halfEternityRangeSegment2)); + + Assert.assertEquals( + ImmutableSet.of(halfEternityRangeSegment2), + ImmutableSet.copyOf( + coordinator.retrieveUsedSegmentsForIntervals( + halfEternityRangeSegment1.getDataSource(), + Collections.singletonList(Intervals.of("2000/2030")), + Segments.ONLY_VISIBLE + ) + ) + ); + } + @Test public void testUsedHugeTimeRangeEternityFilter() throws IOException {