Make RepositoryData Less Memory Heavy (#55293) (#55468)

We don't really need `LinkedHashSet` here. We can assume that all the
entries are unique and just use a list and use the list utilities to
create the cheapest possible version of the list.
Also, this fixes a bug in `addSnapshot` which would mutate the existing
linked hash set on the current instance (fortunately this never caused a real world bug)
and brings the collection in line with the java docs on its getter that claim immutability.
This commit is contained in:
Armin Braun 2020-04-20 18:28:06 +02:00 committed by GitHub
parent e0195fa1a6
commit a0763d958d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 43 additions and 32 deletions

View File

@ -36,11 +36,10 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
@ -90,7 +89,7 @@ public final class RepositoryData {
/**
* The snapshots that each index belongs to.
*/
private final Map<IndexId, Set<SnapshotId>> indexSnapshots;
private final Map<IndexId, List<SnapshotId>> indexSnapshots;
private final Map<String, Version> snapshotVersions;
@ -100,7 +99,7 @@ public final class RepositoryData {
private final ShardGenerations shardGenerations;
public RepositoryData(long genId, Map<String, SnapshotId> snapshotIds, Map<String, SnapshotState> snapshotStates,
Map<String, Version> snapshotVersions, Map<IndexId, Set<SnapshotId>> indexSnapshots,
Map<String, Version> snapshotVersions, Map<IndexId, List<SnapshotId>> indexSnapshots,
ShardGenerations shardGenerations) {
this.genId = genId;
this.snapshotIds = Collections.unmodifiableMap(snapshotIds);
@ -112,6 +111,8 @@ public final class RepositoryData {
this.snapshotVersions = snapshotVersions;
assert indices.values().containsAll(shardGenerations.indices()) : "ShardGenerations contained indices "
+ shardGenerations.indices() + " but snapshots only reference indices " + indices.values();
assert indexSnapshots.values().stream().noneMatch(snapshotIdList -> new HashSet<>(snapshotIdList).size() != snapshotIdList.size()) :
"Found duplicate snapshot ids per index in [" + indexSnapshots + "]";
}
protected RepositoryData copy() {
@ -213,9 +214,17 @@ public final class RepositoryData {
newSnapshotStates.put(snapshotId.getUUID(), snapshotState);
Map<String, Version> newSnapshotVersions = new HashMap<>(snapshotVersions);
newSnapshotVersions.put(snapshotId.getUUID(), version);
Map<IndexId, Set<SnapshotId>> allIndexSnapshots = new HashMap<>(indexSnapshots);
Map<IndexId, List<SnapshotId>> allIndexSnapshots = new HashMap<>(indexSnapshots);
for (final IndexId indexId : shardGenerations.indices()) {
allIndexSnapshots.computeIfAbsent(indexId, k -> new LinkedHashSet<>()).add(snapshotId);
final List<SnapshotId> snapshotIds = allIndexSnapshots.get(indexId);
if (snapshotIds == null) {
allIndexSnapshots.put(indexId, Collections.singletonList(snapshotId));
} else {
final List<SnapshotId> copy = new ArrayList<>(snapshotIds.size() + 1);
copy.addAll(snapshotIds);
copy.add(snapshotId);
allIndexSnapshots.put(indexId, Collections.unmodifiableList(copy));
}
}
return new RepositoryData(genId, snapshots, newSnapshotStates, newSnapshotVersions, allIndexSnapshots,
ShardGenerations.builder().putAll(this.shardGenerations).putAll(shardGenerations).build());
@ -253,23 +262,25 @@ public final class RepositoryData {
newSnapshotStates.remove(snapshotId.getUUID());
final Map<String, Version> newSnapshotVersions = new HashMap<>(snapshotVersions);
newSnapshotVersions.remove(snapshotId.getUUID());
Map<IndexId, Set<SnapshotId>> indexSnapshots = new HashMap<>();
Map<IndexId, List<SnapshotId>> indexSnapshots = new HashMap<>();
for (final IndexId indexId : indices.values()) {
Set<SnapshotId> set;
Set<SnapshotId> snapshotIds = this.indexSnapshots.get(indexId);
List<SnapshotId> remaining;
List<SnapshotId> snapshotIds = this.indexSnapshots.get(indexId);
assert snapshotIds != null;
if (snapshotIds.contains(snapshotId)) {
final int listIndex = snapshotIds.indexOf(snapshotId);
if (listIndex > -1) {
if (snapshotIds.size() == 1) {
// removing the snapshot will mean no more snapshots
// have this index, so just skip over it
continue;
}
set = new LinkedHashSet<>(snapshotIds);
set.remove(snapshotId);
remaining = new ArrayList<>(snapshotIds);
remaining.remove(listIndex);
remaining = Collections.unmodifiableList(remaining);
} else {
set = snapshotIds;
remaining = snapshotIds;
}
indexSnapshots.put(indexId, set);
indexSnapshots.put(indexId, remaining);
}
return new RepositoryData(genId, newSnapshotIds, newSnapshotStates, newSnapshotVersions, indexSnapshots,
@ -281,8 +292,8 @@ public final class RepositoryData {
/**
* Returns an immutable collection of the snapshot ids for the snapshots that contain the given index.
*/
public Set<SnapshotId> getSnapshots(final IndexId indexId) {
Set<SnapshotId> snapshotIds = indexSnapshots.get(indexId);
public List<SnapshotId> getSnapshots(final IndexId indexId) {
List<SnapshotId> snapshotIds = indexSnapshots.get(indexId);
if (snapshotIds == null) {
throw new IllegalArgumentException("unknown snapshot index " + indexId);
}
@ -384,7 +395,7 @@ public final class RepositoryData {
builder.startObject(indexId.getName());
builder.field(INDEX_ID, indexId.getId());
builder.startArray(SNAPSHOTS);
Set<SnapshotId> snapshotIds = indexSnapshots.get(indexId);
List<SnapshotId> snapshotIds = indexSnapshots.get(indexId);
assert snapshotIds != null;
for (final SnapshotId snapshotId : snapshotIds) {
builder.value(snapshotId.getUUID());
@ -415,7 +426,7 @@ public final class RepositoryData {
final Map<String, SnapshotId> snapshots = new HashMap<>();
final Map<String, SnapshotState> snapshotStates = new HashMap<>();
final Map<String, Version> snapshotVersions = new HashMap<>();
final Map<IndexId, Set<SnapshotId>> indexSnapshots = new HashMap<>();
final Map<IndexId, List<SnapshotId>> indexSnapshots = new HashMap<>();
final ShardGenerations.Builder shardGenerations = ShardGenerations.builder();
if (parser.nextToken() == XContentParser.Token.START_OBJECT) {
@ -459,7 +470,7 @@ public final class RepositoryData {
}
while (parser.nextToken() != XContentParser.Token.END_OBJECT) {
final String indexName = parser.currentName();
final Set<SnapshotId> snapshotIds = new LinkedHashSet<>();
final List<SnapshotId> snapshotIds = new ArrayList<>();
final List<String> gens = new ArrayList<>();
IndexId indexId = null;
@ -513,7 +524,7 @@ public final class RepositoryData {
}
}
assert indexId != null;
indexSnapshots.put(indexId, snapshotIds);
indexSnapshots.put(indexId, Collections.unmodifiableList(snapshotIds));
for (int i = 0; i < gens.size(); i++) {
shardGenerations.put(indexId, i, gens.get(i));
}

View File

@ -64,7 +64,7 @@ public class RepositoryDataTests extends ESTestCase {
final List<IndexId> indicesBefore = new ArrayList<>(repositoryData.getIndices().values());
final SnapshotId randomSnapshot = randomFrom(repositoryData.getSnapshotIds());
final IndexId[] indicesToUpdate = indicesBefore.stream().filter(index -> {
final Set<SnapshotId> snapshotIds = repositoryData.getSnapshots(index);
final List<SnapshotId> snapshotIds = repositoryData.getSnapshots(index);
return snapshotIds.contains(randomSnapshot) && snapshotIds.size() > 1;
}).toArray(IndexId[]::new);
assertThat(repositoryData.indicesToUpdateAfterRemovingSnapshot(randomSnapshot), containsInAnyOrder(indicesToUpdate));
@ -111,7 +111,7 @@ public class RepositoryDataTests extends ESTestCase {
// verify that the new repository data has the new snapshot and its indices
assertTrue(newRepoData.getSnapshotIds().contains(newSnapshot));
for (IndexId indexId : indices) {
Set<SnapshotId> snapshotIds = newRepoData.getSnapshots(indexId);
List<SnapshotId> snapshotIds = newRepoData.getSnapshots(indexId);
assertTrue(snapshotIds.contains(newSnapshot));
if (newIndices.contains(indexId)) {
assertEquals(snapshotIds.size(), 1); // if it was a new index, only the new snapshot should be in its set
@ -134,7 +134,7 @@ public class RepositoryDataTests extends ESTestCase {
RepositoryData repositoryData = new RepositoryData(EMPTY_REPO_GEN, snapshotIds,
Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), ShardGenerations.EMPTY);
// test that initializing indices works
Map<IndexId, Set<SnapshotId>> indices = randomIndices(snapshotIds);
Map<IndexId, List<SnapshotId>> indices = randomIndices(snapshotIds);
RepositoryData newRepoData =
new RepositoryData(repositoryData.getGenId(), snapshotIds, snapshotStates, snapshotVersions, indices, ShardGenerations.EMPTY);
List<SnapshotId> expected = new ArrayList<>(repositoryData.getSnapshotIds());
@ -201,11 +201,11 @@ public class RepositoryDataTests extends ESTestCase {
final IndexId corruptedIndexId = randomFrom(parsedRepositoryData.getIndices().values());
Map<IndexId, Set<SnapshotId>> indexSnapshots = new HashMap<>();
Map<IndexId, List<SnapshotId>> indexSnapshots = new HashMap<>();
final ShardGenerations.Builder shardGenBuilder = ShardGenerations.builder();
for (Map.Entry<String, IndexId> snapshottedIndex : parsedRepositoryData.getIndices().entrySet()) {
IndexId indexId = snapshottedIndex.getValue();
Set<SnapshotId> snapshotsIds = new LinkedHashSet<>(parsedRepositoryData.getSnapshots(indexId));
List<SnapshotId> snapshotsIds = new ArrayList<>(parsedRepositoryData.getSnapshots(indexId));
if (corruptedIndexId.equals(indexId)) {
snapshotsIds.add(new SnapshotId("_uuid", "_does_not_exist"));
}
@ -293,11 +293,11 @@ public class RepositoryDataTests extends ESTestCase {
return repositoryData;
}
private static Map<IndexId, Set<SnapshotId>> randomIndices(final Map<String, SnapshotId> snapshotIdsMap) {
private static Map<IndexId, List<SnapshotId>> randomIndices(final Map<String, SnapshotId> snapshotIdsMap) {
final List<SnapshotId> snapshotIds = new ArrayList<>(snapshotIdsMap.values());
final int totalSnapshots = snapshotIds.size();
final int numIndices = randomIntBetween(1, 30);
final Map<IndexId, Set<SnapshotId>> indices = new HashMap<>(numIndices);
final Map<IndexId, List<SnapshotId>> indices = new HashMap<>(numIndices);
for (int i = 0; i < numIndices; i++) {
final IndexId indexId = new IndexId(randomAlphaOfLength(8), UUIDs.randomBase64UUID());
final Set<SnapshotId> indexSnapshots = new LinkedHashSet<>();
@ -305,7 +305,7 @@ public class RepositoryDataTests extends ESTestCase {
for (int j = 0; j < numIndicesForSnapshot; j++) {
indexSnapshots.add(snapshotIds.get(randomIntBetween(0, totalSnapshots - 1)));
}
indices.put(indexId, indexSnapshots);
indices.put(indexId, Collections.unmodifiableList(new ArrayList<>(indexSnapshots)));
}
return indices;
}

View File

@ -44,7 +44,7 @@ import java.util.Collections;
import java.util.List;
import java.util.Map;
import static java.util.Collections.emptySet;
import static java.util.Collections.emptyList;
import static org.elasticsearch.repositories.RepositoryData.EMPTY_REPO_GEN;
/** A dummy repository for testing which just needs restore overridden */
@ -91,7 +91,7 @@ public abstract class RestoreOnlyRepository extends AbstractLifecycleComponent i
public void getRepositoryData(ActionListener<RepositoryData> listener) {
final IndexId indexId = new IndexId(indexName, "blah");
listener.onResponse(new RepositoryData(EMPTY_REPO_GEN, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(),
Collections.singletonMap(indexId, emptySet()), ShardGenerations.EMPTY));
Collections.singletonMap(indexId, emptyList()), ShardGenerations.EMPTY));
}
@Override

View File

@ -238,7 +238,7 @@ public class CcrRepository extends AbstractLifecycleComponent implements Reposit
Map<String, SnapshotId> copiedSnapshotIds = new HashMap<>();
Map<String, SnapshotState> snapshotStates = new HashMap<>(copiedSnapshotIds.size());
Map<String, Version> snapshotVersions = new HashMap<>(copiedSnapshotIds.size());
Map<IndexId, Set<SnapshotId>> indexSnapshots = new HashMap<>(copiedSnapshotIds.size());
Map<IndexId, List<SnapshotId>> indexSnapshots = new HashMap<>(copiedSnapshotIds.size());
ImmutableOpenMap<String, IndexMetadata> remoteIndices = remoteMetadata.getIndices();
for (String indexName : remoteMetadata.getConcreteAllIndices()) {
@ -248,7 +248,7 @@ public class CcrRepository extends AbstractLifecycleComponent implements Reposit
snapshotStates.put(indexName, SnapshotState.SUCCESS);
snapshotVersions.put(indexName, Version.CURRENT);
Index index = remoteIndices.get(indexName).getIndex();
indexSnapshots.put(new IndexId(indexName, index.getUUID()), Collections.singleton(snapshotId));
indexSnapshots.put(new IndexId(indexName, index.getUUID()), Collections.singletonList(snapshotId));
}
return new RepositoryData(1, copiedSnapshotIds, snapshotStates, snapshotVersions, indexSnapshots, ShardGenerations.EMPTY);
});