Process retrieval of parent and child segment ids in batches (#16734)

This commit is contained in:
AmatyaAvadhanula 2024-07-15 18:24:23 +05:30 committed by GitHub
parent 78a4a09d01
commit 6891866c43
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 144 additions and 48 deletions

View File

@ -2954,18 +2954,19 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor
return Collections.emptyMap();
}
final List<String> segmentIdList = ImmutableList.copyOf(segmentIds);
final Map<String, String> upgradedFromSegmentIds = new HashMap<>();
final List<List<String>> partitions = Lists.partition(ImmutableList.copyOf(segmentIds), 100);
for (List<String> partition : partitions) {
final String sql = StringUtils.format(
"SELECT id, upgraded_from_segment_id FROM %s WHERE dataSource = :dataSource %s",
dbTables.getSegmentsTable(),
SqlSegmentsMetadataQuery.getParameterizedInConditionForColumn("id", segmentIdList)
SqlSegmentsMetadataQuery.getParameterizedInConditionForColumn("id", partition)
);
final Map<String, String> upgradedFromSegmentIds = new HashMap<>();
connector.retryWithHandle(
handle -> {
Query<Map<String, Object>> query = handle.createQuery(sql)
.bind("dataSource", dataSource);
SqlSegmentsMetadataQuery.bindColumnValuesToQueryWithInCondition("id", segmentIdList, query);
SqlSegmentsMetadataQuery.bindColumnValuesToQueryWithInCondition("id", partition, query);
return query.map((index, r, ctx) -> {
final String id = r.getString(1);
final String upgradedFromSegmentId = r.getString(2);
@ -2976,6 +2977,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor
}).list();
}
);
}
return upgradedFromSegmentIds;
}
@ -2989,28 +2991,28 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor
return Collections.emptyMap();
}
final List<String> upgradedFromSegmentIdList = ImmutableList.copyOf(segmentIds);
final String sql = StringUtils.format(
"SELECT id, upgraded_from_segment_id FROM %s WHERE dataSource = :dataSource %s",
dbTables.getSegmentsTable(),
SqlSegmentsMetadataQuery.getParameterizedInConditionForColumn(
"upgraded_from_segment_id",
upgradedFromSegmentIdList
)
);
final Map<String, Set<String>> upgradedToSegmentIds = new HashMap<>();
retrieveSegmentsById(dataSource, segmentIds)
.stream()
.map(DataSegment::getId)
.map(SegmentId::toString)
.forEach(id -> upgradedToSegmentIds.computeIfAbsent(id, k -> new HashSet<>()).add(id));
final List<List<String>> partitions = Lists.partition(ImmutableList.copyOf(segmentIds), 100);
for (List<String> partition : partitions) {
final String sql = StringUtils.format(
"SELECT id, upgraded_from_segment_id FROM %s WHERE dataSource = :dataSource %s",
dbTables.getSegmentsTable(),
SqlSegmentsMetadataQuery.getParameterizedInConditionForColumn("upgraded_from_segment_id", partition)
);
connector.retryWithHandle(
handle -> {
Query<Map<String, Object>> query = handle.createQuery(sql)
.bind("dataSource", dataSource);
SqlSegmentsMetadataQuery.bindColumnValuesToQueryWithInCondition(
"upgraded_from_segment_id",
upgradedFromSegmentIdList,
partition,
query
);
return query.map((index, r, ctx) -> {
@ -3022,6 +3024,7 @@ public class IndexerSQLMetadataStorageCoordinator implements IndexerMetadataStor
}).list();
}
);
}
return upgradedToSegmentIds;
}

View File

@ -3452,6 +3452,48 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata
);
}
@Test
public void testRetrieveUpgradedFromSegmentIdsInBatches()
{
final int size = 500;
final int batchSize = 100;
List<DataSegment> segments = new ArrayList<>();
for (int i = 0; i < size; i++) {
segments.add(
new DataSegment(
"DS",
Intervals.ETERNITY,
"v " + (i % 5),
ImmutableMap.of("num", i / 5),
ImmutableList.of("dim"),
ImmutableList.of("agg"),
new NumberedShardSpec(i / 5, 0),
0,
100L
)
);
}
Map<String, String> expected = new HashMap<>();
for (int i = 0; i < batchSize; i++) {
for (int j = 1; j < 5; j++) {
expected.put(
segments.get(5 * i + j).getId().toString(),
segments.get(5 * i).getId().toString()
);
}
}
insertUsedSegments(ImmutableSet.copyOf(segments), expected);
Map<String, String> actual = coordinator.retrieveUpgradedFromSegmentIds(
"DS",
segments.stream().map(DataSegment::getId).map(SegmentId::toString).collect(Collectors.toSet())
);
Assert.assertEquals(400, actual.size());
Assert.assertEquals(expected, actual);
}
@Test
public void testRetrieveUpgradedToSegmentIds()
{
@ -3478,6 +3520,57 @@ public class IndexerSQLMetadataStorageCoordinatorTest extends IndexerSqlMetadata
);
}
@Test
public void testRetrieveUpgradedToSegmentIdsInBatches()
{
final int size = 500;
final int batchSize = 100;
List<DataSegment> segments = new ArrayList<>();
for (int i = 0; i < size; i++) {
segments.add(
new DataSegment(
"DS",
Intervals.ETERNITY,
"v " + (i % 5),
ImmutableMap.of("num", i / 5),
ImmutableList.of("dim"),
ImmutableList.of("agg"),
new NumberedShardSpec(i / 5, 0),
0,
100L
)
);
}
Map<String, Set<String>> expected = new HashMap<>();
for (DataSegment segment : segments) {
final String id = segment.getId().toString();
expected.put(id, new HashSet<>());
expected.get(id).add(id);
}
Map<String, String> upgradeMap = new HashMap<>();
for (int i = 0; i < batchSize; i++) {
for (int j = 1; j < 5; j++) {
upgradeMap.put(
segments.get(5 * i + j).getId().toString(),
segments.get(5 * i).getId().toString()
);
expected.get(segments.get(5 * i).getId().toString())
.add(segments.get(5 * i + j).getId().toString());
}
}
insertUsedSegments(ImmutableSet.copyOf(segments), upgradeMap);
Map<String, Set<String>> actual = coordinator.retrieveUpgradedToSegmentIds(
"DS",
segments.stream().map(DataSegment::getId).map(SegmentId::toString).collect(Collectors.toSet())
);
Assert.assertEquals(500, actual.size());
Assert.assertEquals(expected, actual);
}
private void insertUsedSegments(Set<DataSegment> segments, Map<String, String> upgradedFromSegmentIdMap)
{
final String table = derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable();