Deduplicate Index Meta Generations when Deserializing (#65619) (#65666)

These strings are quite long individually and will be repeated
potentially up to the number of snapshots in the repository times.
Since these make up more than half of the size of the repository metadata
and are likely the same for all snapshots the savings from deduplicating them
can make up for more than half the size of `RepositoryData` easily in most real-world
cases.
This commit is contained in:
Armin Braun 2020-12-01 12:34:35 +01:00 committed by GitHub
parent f8f08ba3a7
commit 745f527fea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 2 additions and 1 deletions

View File

@ -610,6 +610,7 @@ public final class RepositoryData {
Map<String, Version> snapshotVersions,
Map<SnapshotId, Map<String, String>> indexMetaLookup) throws IOException {
XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_ARRAY, parser.nextToken(), parser);
final Map<String, String> stringDeduplicator = new HashMap<>();
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
String name = null;
String uuid = null;
@ -630,7 +631,7 @@ public final class RepositoryData {
state = SnapshotState.fromValue((byte) parser.intValue());
break;
case INDEX_METADATA_LOOKUP:
metaGenerations = parser.mapStrings();
metaGenerations = parser.map(HashMap::new, p -> stringDeduplicator.computeIfAbsent(p.text(), Function.identity()));
break;
case VERSION:
version = Version.fromString(parser.text());