Deduplicate Index Meta Generations when Deserializing (#65619) (#65666)

These strings are quite long individually and will be repeated potentially up to the number of snapshots in the repository times. Since these make up more than half of the size of the repository metadata and are likely the same for all snapshots the savings from deduplicating them can make up for more than half the size of `RepositoryData` easily in most real-world cases.
2020-12-01 12:34:35 +01:00 · 2020-12-01 12:34:35 +01:00 · 745f527fea
parent f8f08ba3a7
commit 745f527fea
1 changed files with 2 additions and 1 deletions
--- a/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java
+++ b/server/src/main/java/org/elasticsearch/repositories/RepositoryData.java
@ -610,6 +610,7 @@ public final class RepositoryData {
                                       Map<String, Version> snapshotVersions,
                                       Map<SnapshotId, Map<String, String>> indexMetaLookup) throws IOException {
        XContentParserUtils.ensureExpectedToken(XContentParser.Token.START_ARRAY, parser.nextToken(), parser);
+        final Map<String, String> stringDeduplicator = new HashMap<>();
        while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
            String name = null;
            String uuid = null;
@ -630,7 +631,7 @@ public final class RepositoryData {
                        state = SnapshotState.fromValue((byte) parser.intValue());
                        break;
                    case INDEX_METADATA_LOOKUP:
-                        metaGenerations = parser.mapStrings();
+                        metaGenerations = parser.map(HashMap::new, p -> stringDeduplicator.computeIfAbsent(p.text(), Function.identity()));
                        break;
                    case VERSION:
                        version = Version.fromString(parser.text());