Snapshot/Restore: fix snapshot of a single closed index

Snapshot of a closed index can leave snapshot hanging in initializing state.

Fixes #8046
This commit is contained in:
Igor Motov 2014-10-09 20:37:08 -04:00
parent 249a145a5c
commit e3d379fb08
2 changed files with 54 additions and 43 deletions

View File

@ -323,6 +323,12 @@ public class SnapshotsService extends AbstractLifecycleComponent<SnapshotsServic
@Override @Override
public void onFailure(String source, Throwable t) { public void onFailure(String source, Throwable t) {
logger.warn("[{}] failed to create snapshot", t, snapshot.snapshotId()); logger.warn("[{}] failed to create snapshot", t, snapshot.snapshotId());
removeSnapshotFromClusterState(snapshot.snapshotId(), null, t);
try {
repositoriesService.repository(snapshot.snapshotId().getRepository()).finalizeSnapshot(snapshot.snapshotId(), ExceptionsHelper.detailedMessage(t), 0, ImmutableList.<SnapshotShardFailure>of());
} catch (Throwable t2) {
logger.warn("[{}] failed to close snapshot in repository", snapshot.snapshotId());
}
userCreateSnapshotListener.onFailure(t); userCreateSnapshotListener.onFailure(t);
} }
@ -345,28 +351,7 @@ public class SnapshotsService extends AbstractLifecycleComponent<SnapshotsServic
}); });
} catch (Throwable t) { } catch (Throwable t) {
logger.warn("failed to create snapshot [{}]", t, snapshot.snapshotId()); logger.warn("failed to create snapshot [{}]", t, snapshot.snapshotId());
clusterService.submitStateUpdateTask("fail_snapshot [" + snapshot.snapshotId() + "]", new ClusterStateUpdateTask() { removeSnapshotFromClusterState(snapshot.snapshotId(), null, t);
@Override
public ClusterState execute(ClusterState currentState) {
MetaData metaData = currentState.metaData();
MetaData.Builder mdBuilder = MetaData.builder(currentState.metaData());
SnapshotMetaData snapshots = metaData.custom(SnapshotMetaData.TYPE);
ImmutableList.Builder<SnapshotMetaData.Entry> entries = ImmutableList.builder();
for (SnapshotMetaData.Entry entry : snapshots.entries()) {
if (!entry.snapshotId().equals(snapshot.snapshotId())) {
entries.add(entry);
}
}
mdBuilder.putCustom(SnapshotMetaData.TYPE, new SnapshotMetaData(entries.build()));
return ClusterState.builder(currentState).metaData(mdBuilder).build();
}
@Override
public void onFailure(String source, Throwable t) {
logger.warn("[{}] failed to delete snapshot", t, snapshot.snapshotId());
}
});
if (snapshotCreated) { if (snapshotCreated) {
try { try {
repositoriesService.repository(snapshot.snapshotId().getRepository()).finalizeSnapshot(snapshot.snapshotId(), ExceptionsHelper.detailedMessage(t), 0, ImmutableList.<SnapshotShardFailure>of()); repositoriesService.repository(snapshot.snapshotId().getRepository()).finalizeSnapshot(snapshot.snapshotId(), ExceptionsHelper.detailedMessage(t), 0, ImmutableList.<SnapshotShardFailure>of());
@ -1046,7 +1031,7 @@ public class SnapshotsService extends AbstractLifecycleComponent<SnapshotsServic
listener.onSnapshotFailure(snapshotId, t); listener.onSnapshotFailure(snapshotId, t);
} }
} catch (Throwable t) { } catch (Throwable t) {
logger.warn("failed to refresh settings for [{}]", t, listener); logger.warn("failed to notify listener [{}]", t, listener);
} }
} }
@ -1127,17 +1112,21 @@ public class SnapshotsService extends AbstractLifecycleComponent<SnapshotsServic
logger.trace("adding snapshot completion listener to wait for deleted snapshot to finish"); logger.trace("adding snapshot completion listener to wait for deleted snapshot to finish");
addListener(new SnapshotCompletionListener() { addListener(new SnapshotCompletionListener() {
@Override @Override
public void onSnapshotCompletion(SnapshotId snapshotId, SnapshotInfo snapshot) { public void onSnapshotCompletion(SnapshotId completedSnapshotId, SnapshotInfo snapshot) {
logger.trace("deleted snapshot completed - deleting files"); if (completedSnapshotId.equals(snapshotId)) {
removeListener(this); logger.trace("deleted snapshot completed - deleting files");
deleteSnapshotFromRepository(snapshotId, listener); removeListener(this);
deleteSnapshotFromRepository(snapshotId, listener);
}
} }
@Override @Override
public void onSnapshotFailure(SnapshotId snapshotId, Throwable t) { public void onSnapshotFailure(SnapshotId failedSnapshotId, Throwable t) {
logger.trace("deleted snapshot failed - deleting files", t); if (failedSnapshotId.equals(snapshotId)) {
removeListener(this); logger.trace("deleted snapshot failed - deleting files", t);
deleteSnapshotFromRepository(snapshotId, listener); removeListener(this);
deleteSnapshotFromRepository(snapshotId, listener);
}
} }
}); });
} else { } else {
@ -1203,21 +1192,22 @@ public class SnapshotsService extends AbstractLifecycleComponent<SnapshotsServic
for (String index : indices) { for (String index : indices) {
IndexMetaData indexMetaData = metaData.index(index); IndexMetaData indexMetaData = metaData.index(index);
IndexRoutingTable indexRoutingTable = clusterState.getRoutingTable().index(index); IndexRoutingTable indexRoutingTable = clusterState.getRoutingTable().index(index);
if (indexRoutingTable == null) {
throw new SnapshotCreationException(snapshotId, "Missing routing table for index [" + index + "]");
}
for (int i = 0; i < indexMetaData.numberOfShards(); i++) { for (int i = 0; i < indexMetaData.numberOfShards(); i++) {
ShardId shardId = new ShardId(index, i); ShardId shardId = new ShardId(index, i);
ShardRouting primary = indexRoutingTable.shard(i).primaryShard(); if (indexRoutingTable != null) {
if (primary == null || !primary.assignedToNode()) { ShardRouting primary = indexRoutingTable.shard(i).primaryShard();
builder.put(shardId, new SnapshotMetaData.ShardSnapshotStatus(null, State.MISSING, "primary shard is not allocated")); if (primary == null || !primary.assignedToNode()) {
} else if (clusterState.getNodes().smallestVersion().onOrAfter(Version.V_1_2_0) && (primary.relocating() || primary.initializing())) { builder.put(shardId, new SnapshotMetaData.ShardSnapshotStatus(null, State.MISSING, "primary shard is not allocated"));
// The WAITING state was introduced in V1.2.0 - don't use it if there are nodes with older version in the cluster } else if (clusterState.getNodes().smallestVersion().onOrAfter(Version.V_1_2_0) && (primary.relocating() || primary.initializing())) {
builder.put(shardId, new SnapshotMetaData.ShardSnapshotStatus(primary.currentNodeId(), State.WAITING)); // The WAITING state was introduced in V1.2.0 - don't use it if there are nodes with older version in the cluster
} else if (!primary.started()) { builder.put(shardId, new SnapshotMetaData.ShardSnapshotStatus(primary.currentNodeId(), State.WAITING));
builder.put(shardId, new SnapshotMetaData.ShardSnapshotStatus(primary.currentNodeId(), State.MISSING, "primary shard hasn't been started yet")); } else if (!primary.started()) {
builder.put(shardId, new SnapshotMetaData.ShardSnapshotStatus(primary.currentNodeId(), State.MISSING, "primary shard hasn't been started yet"));
} else {
builder.put(shardId, new SnapshotMetaData.ShardSnapshotStatus(primary.currentNodeId()));
}
} else { } else {
builder.put(shardId, new SnapshotMetaData.ShardSnapshotStatus(primary.currentNodeId())); builder.put(shardId, new SnapshotMetaData.ShardSnapshotStatus(null, State.MISSING, "missing routing table"));
} }
} }
} }

View File

@ -804,6 +804,27 @@ public class SharedClusterSnapshotRestoreTests extends AbstractSnapshotTests {
client.admin().cluster().prepareDeleteSnapshot("test-repo", "test-snap").get(); client.admin().cluster().prepareDeleteSnapshot("test-repo", "test-snap").get();
} }
@Test
public void snapshotSingleClosedIndexTest() throws Exception {
Client client = client();
logger.info("--> creating repository");
assertAcked(client.admin().cluster().preparePutRepository("test-repo")
.setType("fs").setSettings(ImmutableSettings.settingsBuilder()
.put("location", newTempDir(LifecycleScope.SUITE))));
createIndex("test-idx");
ensureGreen();
logger.info("--> closing index test-idx");
assertAcked(client.admin().indices().prepareClose("test-idx"));
logger.info("--> snapshot");
CreateSnapshotResponse createSnapshotResponse = client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap-1")
.setWaitForCompletion(true).setIndices("test-idx").get();
assertThat(createSnapshotResponse.getSnapshotInfo().indices().size(), equalTo(1));
assertThat(createSnapshotResponse.getSnapshotInfo().state(), equalTo(SnapshotState.FAILED));
}
@Test @Test
public void renameOnRestoreTest() throws Exception { public void renameOnRestoreTest() throws Exception {
Client client = client(); Client client = client();