Improve metrics for Auto Compaction (#12413)

* add impl

* add docs

* fix
This commit is contained in:
Maytas Monsereenusorn 2022-04-08 20:14:36 -07:00 committed by GitHub
parent 8edea5a82d
commit 36e17a20ea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 45 additions and 38 deletions

View File

@ -360,13 +360,13 @@ public class ITAutoCompactionTest extends AbstractIndexerTest
AutoCompactionSnapshot.AutoCompactionScheduleStatus.RUNNING,
0,
14906,
0,
14905,
0,
2,
0,
2,
0,
1,
0);
1);
submitCompactionConfig(MAX_ROWS_PER_SEGMENT_COMPACTED, NO_SKIP_OFFSET);
//...compacted into 1 new segment for the remaining one day. 2 day compacted and 0 day uncompacted. (2 total)
forceTriggerAutoCompaction(2);

View File

@ -162,7 +162,7 @@ public class NewestSegmentFirstIterator implements CompactionSegmentIterator
timeline = timelineWithConfiguredSegmentGranularity;
}
final List<Interval> searchIntervals =
findInitialSearchInterval(timeline, config.getSkipOffsetFromLatest(), configuredSegmentGranularity, skipIntervals.get(dataSource));
findInitialSearchInterval(dataSource, timeline, config.getSkipOffsetFromLatest(), configuredSegmentGranularity, skipIntervals.get(dataSource));
if (!searchIntervals.isEmpty()) {
timelineIterators.put(dataSource, new CompactibleTimelineObjectHolderCursor(timeline, searchIntervals, originalTimeline));
}
@ -595,7 +595,8 @@ public class NewestSegmentFirstIterator implements CompactionSegmentIterator
*
* @return found interval to search or null if it's not found
*/
private static List<Interval> findInitialSearchInterval(
private List<Interval> findInitialSearchInterval(
String dataSourceName,
VersionedIntervalTimeline<String, DataSegment> timeline,
Period skipOffset,
Granularity configuredSegmentGranularity,
@ -614,6 +615,12 @@ public class NewestSegmentFirstIterator implements CompactionSegmentIterator
skipIntervals
);
// Calcuate stats of all skipped segments
for (Interval skipInterval : fullSkipIntervals) {
final List<DataSegment> segments = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(skipInterval, Partitions.ONLY_COMPLETE));
collectSegmentStatistics(skippedSegments, dataSourceName, new SegmentsToCompact(segments));
}
final Interval totalInterval = new Interval(first.getInterval().getStart(), last.getInterval().getEnd());
final List<Interval> filteredInterval = filterSkipIntervals(totalInterval, fullSkipIntervals);
final List<Interval> searchIntervals = new ArrayList<>();

View File

@ -367,13 +367,13 @@ public class CompactSegmentsTest
DATA_SOURCE_PREFIX + i,
0,
TOTAL_BYTE_PER_DATASOURCE,
0,
40,
0,
TOTAL_INTERVAL_PER_DATASOURCE,
0,
1,
0,
TOTAL_SEGMENT_PER_DATASOURCE / 2,
0
4
);
}
@ -389,13 +389,13 @@ public class CompactSegmentsTest
DATA_SOURCE_PREFIX + i,
0,
TOTAL_BYTE_PER_DATASOURCE,
0,
40,
0,
TOTAL_INTERVAL_PER_DATASOURCE,
0,
1,
0,
TOTAL_SEGMENT_PER_DATASOURCE / 2,
0
4
);
}
@ -465,16 +465,16 @@ public class CompactSegmentsTest
dataSourceName,
TOTAL_BYTE_PER_DATASOURCE - 120 - 40 * (compactionRunCount + 1),
120 + 40 * (compactionRunCount + 1),
0,
40,
TOTAL_INTERVAL_PER_DATASOURCE - 3 - (compactionRunCount + 1),
3 + (compactionRunCount + 1),
0,
1,
TOTAL_SEGMENT_PER_DATASOURCE - 12 - 4 * (compactionRunCount + 1),
// 12 segments was compressed before any auto compaction
// 4 segments was compressed in this run of auto compaction
// Each previous auto compaction run resulted in 2 compacted segments (4 segments compacted into 2 segments)
12 + 4 + 2 * (compactionRunCount),
0
4
);
}
@ -490,15 +490,15 @@ public class CompactSegmentsTest
dataSourceName,
0,
TOTAL_BYTE_PER_DATASOURCE,
0,
40,
0,
TOTAL_INTERVAL_PER_DATASOURCE,
0,
1,
0,
// 12 segments was compressed before any auto compaction
// 32 segments needs compaction which is now compacted into 16 segments (4 segments compacted into 2 segments each run)
12 + 16,
0
4
);
}
@ -530,13 +530,13 @@ public class CompactSegmentsTest
DATA_SOURCE_PREFIX + i,
0,
TOTAL_BYTE_PER_DATASOURCE,
0,
40,
0,
TOTAL_INTERVAL_PER_DATASOURCE,
0,
1,
0,
TOTAL_SEGMENT_PER_DATASOURCE / 2,
0
4
);
}
@ -553,13 +553,13 @@ public class CompactSegmentsTest
DATA_SOURCE_PREFIX + i,
0,
TOTAL_BYTE_PER_DATASOURCE,
0,
40,
0,
TOTAL_INTERVAL_PER_DATASOURCE,
0,
1,
0,
TOTAL_SEGMENT_PER_DATASOURCE / 2,
0
4
);
}
@ -626,13 +626,13 @@ public class CompactSegmentsTest
// Minus 120 bytes accounting for the three skipped segments' original size
TOTAL_BYTE_PER_DATASOURCE - 120 - 40 * (compactionRunCount + 1),
40 * (compactionRunCount + 1),
1200,
1240,
TOTAL_INTERVAL_PER_DATASOURCE - 3 - (compactionRunCount + 1),
(compactionRunCount + 1),
3,
4,
TOTAL_SEGMENT_PER_DATASOURCE - 12 - 4 * (compactionRunCount + 1),
4 + 2 * (compactionRunCount),
12
16
);
}
@ -649,13 +649,13 @@ public class CompactSegmentsTest
0,
// Minus 120 bytes accounting for the three skipped segments' original size
TOTAL_BYTE_PER_DATASOURCE - 120,
1200,
1240,
0,
TOTAL_INTERVAL_PER_DATASOURCE - 3,
3,
4,
0,
16,
12
16
);
}
@ -1794,13 +1794,13 @@ public class CompactSegmentsTest
DATA_SOURCE_PREFIX + i,
TOTAL_BYTE_PER_DATASOURCE - 40 * (compactionRunCount + 1),
40 * (compactionRunCount + 1),
0,
40,
TOTAL_INTERVAL_PER_DATASOURCE - (compactionRunCount + 1),
(compactionRunCount + 1),
0,
1,
TOTAL_SEGMENT_PER_DATASOURCE - 4 * (compactionRunCount + 1),
2 * (compactionRunCount + 1),
0
4
);
} else {
verifySnapshot(
@ -1809,13 +1809,13 @@ public class CompactSegmentsTest
DATA_SOURCE_PREFIX + i,
TOTAL_BYTE_PER_DATASOURCE - 40 * (compactionRunCount + 1),
40 * (compactionRunCount + 1),
0,
40,
TOTAL_INTERVAL_PER_DATASOURCE - (compactionRunCount + 1),
(compactionRunCount + 1),
0,
1,
TOTAL_SEGMENT_PER_DATASOURCE - 4 * (compactionRunCount + 1),
2 * compactionRunCount + 4,
0
4
);
}
}
@ -1828,13 +1828,13 @@ public class CompactSegmentsTest
DATA_SOURCE_PREFIX + i,
TOTAL_BYTE_PER_DATASOURCE - 40 * compactionRunCount,
40 * compactionRunCount,
0,
40,
TOTAL_INTERVAL_PER_DATASOURCE - compactionRunCount,
compactionRunCount,
0,
1,
TOTAL_SEGMENT_PER_DATASOURCE - 4 * compactionRunCount,
2 * compactionRunCount,
0
4
);
}
}