Snapshot/Restore: fix snapshot of a single closed index
Snapshot of a closed index can leave snapshot hanging in initializing state. Fixes #8046
This commit is contained in:
parent
249a145a5c
commit
e3d379fb08
|
@ -323,6 +323,12 @@ public class SnapshotsService extends AbstractLifecycleComponent<SnapshotsServic
|
||||||
@Override
|
@Override
|
||||||
public void onFailure(String source, Throwable t) {
|
public void onFailure(String source, Throwable t) {
|
||||||
logger.warn("[{}] failed to create snapshot", t, snapshot.snapshotId());
|
logger.warn("[{}] failed to create snapshot", t, snapshot.snapshotId());
|
||||||
|
removeSnapshotFromClusterState(snapshot.snapshotId(), null, t);
|
||||||
|
try {
|
||||||
|
repositoriesService.repository(snapshot.snapshotId().getRepository()).finalizeSnapshot(snapshot.snapshotId(), ExceptionsHelper.detailedMessage(t), 0, ImmutableList.<SnapshotShardFailure>of());
|
||||||
|
} catch (Throwable t2) {
|
||||||
|
logger.warn("[{}] failed to close snapshot in repository", snapshot.snapshotId());
|
||||||
|
}
|
||||||
userCreateSnapshotListener.onFailure(t);
|
userCreateSnapshotListener.onFailure(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -345,28 +351,7 @@ public class SnapshotsService extends AbstractLifecycleComponent<SnapshotsServic
|
||||||
});
|
});
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
logger.warn("failed to create snapshot [{}]", t, snapshot.snapshotId());
|
logger.warn("failed to create snapshot [{}]", t, snapshot.snapshotId());
|
||||||
clusterService.submitStateUpdateTask("fail_snapshot [" + snapshot.snapshotId() + "]", new ClusterStateUpdateTask() {
|
removeSnapshotFromClusterState(snapshot.snapshotId(), null, t);
|
||||||
|
|
||||||
@Override
|
|
||||||
public ClusterState execute(ClusterState currentState) {
|
|
||||||
MetaData metaData = currentState.metaData();
|
|
||||||
MetaData.Builder mdBuilder = MetaData.builder(currentState.metaData());
|
|
||||||
SnapshotMetaData snapshots = metaData.custom(SnapshotMetaData.TYPE);
|
|
||||||
ImmutableList.Builder<SnapshotMetaData.Entry> entries = ImmutableList.builder();
|
|
||||||
for (SnapshotMetaData.Entry entry : snapshots.entries()) {
|
|
||||||
if (!entry.snapshotId().equals(snapshot.snapshotId())) {
|
|
||||||
entries.add(entry);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mdBuilder.putCustom(SnapshotMetaData.TYPE, new SnapshotMetaData(entries.build()));
|
|
||||||
return ClusterState.builder(currentState).metaData(mdBuilder).build();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void onFailure(String source, Throwable t) {
|
|
||||||
logger.warn("[{}] failed to delete snapshot", t, snapshot.snapshotId());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
if (snapshotCreated) {
|
if (snapshotCreated) {
|
||||||
try {
|
try {
|
||||||
repositoriesService.repository(snapshot.snapshotId().getRepository()).finalizeSnapshot(snapshot.snapshotId(), ExceptionsHelper.detailedMessage(t), 0, ImmutableList.<SnapshotShardFailure>of());
|
repositoriesService.repository(snapshot.snapshotId().getRepository()).finalizeSnapshot(snapshot.snapshotId(), ExceptionsHelper.detailedMessage(t), 0, ImmutableList.<SnapshotShardFailure>of());
|
||||||
|
@ -1046,7 +1031,7 @@ public class SnapshotsService extends AbstractLifecycleComponent<SnapshotsServic
|
||||||
listener.onSnapshotFailure(snapshotId, t);
|
listener.onSnapshotFailure(snapshotId, t);
|
||||||
}
|
}
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
logger.warn("failed to refresh settings for [{}]", t, listener);
|
logger.warn("failed to notify listener [{}]", t, listener);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1127,17 +1112,21 @@ public class SnapshotsService extends AbstractLifecycleComponent<SnapshotsServic
|
||||||
logger.trace("adding snapshot completion listener to wait for deleted snapshot to finish");
|
logger.trace("adding snapshot completion listener to wait for deleted snapshot to finish");
|
||||||
addListener(new SnapshotCompletionListener() {
|
addListener(new SnapshotCompletionListener() {
|
||||||
@Override
|
@Override
|
||||||
public void onSnapshotCompletion(SnapshotId snapshotId, SnapshotInfo snapshot) {
|
public void onSnapshotCompletion(SnapshotId completedSnapshotId, SnapshotInfo snapshot) {
|
||||||
logger.trace("deleted snapshot completed - deleting files");
|
if (completedSnapshotId.equals(snapshotId)) {
|
||||||
removeListener(this);
|
logger.trace("deleted snapshot completed - deleting files");
|
||||||
deleteSnapshotFromRepository(snapshotId, listener);
|
removeListener(this);
|
||||||
|
deleteSnapshotFromRepository(snapshotId, listener);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onSnapshotFailure(SnapshotId snapshotId, Throwable t) {
|
public void onSnapshotFailure(SnapshotId failedSnapshotId, Throwable t) {
|
||||||
logger.trace("deleted snapshot failed - deleting files", t);
|
if (failedSnapshotId.equals(snapshotId)) {
|
||||||
removeListener(this);
|
logger.trace("deleted snapshot failed - deleting files", t);
|
||||||
deleteSnapshotFromRepository(snapshotId, listener);
|
removeListener(this);
|
||||||
|
deleteSnapshotFromRepository(snapshotId, listener);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
|
@ -1203,21 +1192,22 @@ public class SnapshotsService extends AbstractLifecycleComponent<SnapshotsServic
|
||||||
for (String index : indices) {
|
for (String index : indices) {
|
||||||
IndexMetaData indexMetaData = metaData.index(index);
|
IndexMetaData indexMetaData = metaData.index(index);
|
||||||
IndexRoutingTable indexRoutingTable = clusterState.getRoutingTable().index(index);
|
IndexRoutingTable indexRoutingTable = clusterState.getRoutingTable().index(index);
|
||||||
if (indexRoutingTable == null) {
|
|
||||||
throw new SnapshotCreationException(snapshotId, "Missing routing table for index [" + index + "]");
|
|
||||||
}
|
|
||||||
for (int i = 0; i < indexMetaData.numberOfShards(); i++) {
|
for (int i = 0; i < indexMetaData.numberOfShards(); i++) {
|
||||||
ShardId shardId = new ShardId(index, i);
|
ShardId shardId = new ShardId(index, i);
|
||||||
ShardRouting primary = indexRoutingTable.shard(i).primaryShard();
|
if (indexRoutingTable != null) {
|
||||||
if (primary == null || !primary.assignedToNode()) {
|
ShardRouting primary = indexRoutingTable.shard(i).primaryShard();
|
||||||
builder.put(shardId, new SnapshotMetaData.ShardSnapshotStatus(null, State.MISSING, "primary shard is not allocated"));
|
if (primary == null || !primary.assignedToNode()) {
|
||||||
} else if (clusterState.getNodes().smallestVersion().onOrAfter(Version.V_1_2_0) && (primary.relocating() || primary.initializing())) {
|
builder.put(shardId, new SnapshotMetaData.ShardSnapshotStatus(null, State.MISSING, "primary shard is not allocated"));
|
||||||
// The WAITING state was introduced in V1.2.0 - don't use it if there are nodes with older version in the cluster
|
} else if (clusterState.getNodes().smallestVersion().onOrAfter(Version.V_1_2_0) && (primary.relocating() || primary.initializing())) {
|
||||||
builder.put(shardId, new SnapshotMetaData.ShardSnapshotStatus(primary.currentNodeId(), State.WAITING));
|
// The WAITING state was introduced in V1.2.0 - don't use it if there are nodes with older version in the cluster
|
||||||
} else if (!primary.started()) {
|
builder.put(shardId, new SnapshotMetaData.ShardSnapshotStatus(primary.currentNodeId(), State.WAITING));
|
||||||
builder.put(shardId, new SnapshotMetaData.ShardSnapshotStatus(primary.currentNodeId(), State.MISSING, "primary shard hasn't been started yet"));
|
} else if (!primary.started()) {
|
||||||
|
builder.put(shardId, new SnapshotMetaData.ShardSnapshotStatus(primary.currentNodeId(), State.MISSING, "primary shard hasn't been started yet"));
|
||||||
|
} else {
|
||||||
|
builder.put(shardId, new SnapshotMetaData.ShardSnapshotStatus(primary.currentNodeId()));
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
builder.put(shardId, new SnapshotMetaData.ShardSnapshotStatus(primary.currentNodeId()));
|
builder.put(shardId, new SnapshotMetaData.ShardSnapshotStatus(null, State.MISSING, "missing routing table"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -804,6 +804,27 @@ public class SharedClusterSnapshotRestoreTests extends AbstractSnapshotTests {
|
||||||
client.admin().cluster().prepareDeleteSnapshot("test-repo", "test-snap").get();
|
client.admin().cluster().prepareDeleteSnapshot("test-repo", "test-snap").get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void snapshotSingleClosedIndexTest() throws Exception {
|
||||||
|
Client client = client();
|
||||||
|
|
||||||
|
logger.info("--> creating repository");
|
||||||
|
assertAcked(client.admin().cluster().preparePutRepository("test-repo")
|
||||||
|
.setType("fs").setSettings(ImmutableSettings.settingsBuilder()
|
||||||
|
.put("location", newTempDir(LifecycleScope.SUITE))));
|
||||||
|
|
||||||
|
createIndex("test-idx");
|
||||||
|
ensureGreen();
|
||||||
|
logger.info("--> closing index test-idx");
|
||||||
|
assertAcked(client.admin().indices().prepareClose("test-idx"));
|
||||||
|
|
||||||
|
logger.info("--> snapshot");
|
||||||
|
CreateSnapshotResponse createSnapshotResponse = client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap-1")
|
||||||
|
.setWaitForCompletion(true).setIndices("test-idx").get();
|
||||||
|
assertThat(createSnapshotResponse.getSnapshotInfo().indices().size(), equalTo(1));
|
||||||
|
assertThat(createSnapshotResponse.getSnapshotInfo().state(), equalTo(SnapshotState.FAILED));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void renameOnRestoreTest() throws Exception {
|
public void renameOnRestoreTest() throws Exception {
|
||||||
Client client = client();
|
Client client = client();
|
||||||
|
|
Loading…
Reference in New Issue