Fix possible race condition during snapshot deletion

This commit is contained in:
Igor Motov 2013-11-13 23:20:58 -05:00
parent 0a7c6c9288
commit 3c0cc22d36
1 changed files with 35 additions and 22 deletions

View File

@ -74,7 +74,7 @@ import static com.google.common.collect.Maps.newHashMapWithExpectedSize;
* <li>Once shard snapshot is created data node updates state of the shard in the cluster state using the {@link #updateIndexShardSnapshotStatus(UpdateIndexShardSnapshotStatusRequest)} method</li> * <li>Once shard snapshot is created data node updates state of the shard in the cluster state using the {@link #updateIndexShardSnapshotStatus(UpdateIndexShardSnapshotStatusRequest)} method</li>
* <li>When last shard is completed master node in {@link #innerUpdateSnapshotState} method marks the snapshot as completed</li> * <li>When last shard is completed master node in {@link #innerUpdateSnapshotState} method marks the snapshot as completed</li>
* <li>After cluster state is updated, the {@link #endSnapshot(SnapshotMetaData.Entry)} finalizes snapshot in the repository, * <li>After cluster state is updated, the {@link #endSnapshot(SnapshotMetaData.Entry)} finalizes snapshot in the repository,
* notifies all {@link #snapshotCompletionListeners} that snapshot is completed, and finally calls {@link #removeSnapshotFromClusterState(SnapshotId)} to remove snapshot from cluster state</li> * notifies all {@link #snapshotCompletionListeners} that snapshot is completed, and finally calls {@link #removeSnapshotFromClusterState(SnapshotId, SnapshotInfo, Throwable)} to remove snapshot from cluster state</li>
* </ul> * </ul>
*/ */
public class SnapshotsService extends AbstractComponent implements ClusterStateListener { public class SnapshotsService extends AbstractComponent implements ClusterStateListener {
@ -681,25 +681,12 @@ public class SnapshotsService extends AbstractComponent implements ClusterStateL
} }
} }
Snapshot snapshot = repository.finalizeSnapshot(snapshotId, null, entry.shards().size(), ImmutableList.copyOf(shardFailures)); Snapshot snapshot = repository.finalizeSnapshot(snapshotId, null, entry.shards().size(), ImmutableList.copyOf(shardFailures));
for (SnapshotCompletionListener listener : snapshotCompletionListeners) { removeSnapshotFromClusterState(snapshotId, new SnapshotInfo(snapshot), null);
try {
listener.onSnapshotCompletion(snapshotId, new SnapshotInfo(snapshot));
} catch (Throwable t) {
logger.warn("failed to refresh settings for [{}]", t, listener);
}
}
} catch (Throwable t) { } catch (Throwable t) {
logger.warn("[{}] failed to finalize snapshot", t, snapshotId); logger.warn("[{}] failed to finalize snapshot", t, snapshotId);
for (SnapshotCompletionListener listener : snapshotCompletionListeners) { removeSnapshotFromClusterState(snapshotId, null, t);
try {
listener.onSnapshotFailure(snapshotId, t);
} catch (Throwable t2) {
logger.warn("failed to update snapshot status for [{}]", t2, listener);
} }
} }
}
removeSnapshotFromClusterState(snapshotId);
}
}); });
} }
@ -707,9 +694,11 @@ public class SnapshotsService extends AbstractComponent implements ClusterStateL
* Removes record of running snapshot from cluster state * Removes record of running snapshot from cluster state
* *
* @param snapshotId snapshot id * @param snapshotId snapshot id
* @param snapshot snapshot info if snapshot was successful
* @param t exception if snapshot failed
*/ */
private void removeSnapshotFromClusterState(final SnapshotId snapshotId) { private void removeSnapshotFromClusterState(final SnapshotId snapshotId, final SnapshotInfo snapshot, final Throwable t) {
clusterService.submitStateUpdateTask("remove snapshot metadata", new ClusterStateUpdateTask() { clusterService.submitStateUpdateTask("remove snapshot metadata", new ProcessedClusterStateUpdateTask() {
@Override @Override
public ClusterState execute(ClusterState currentState) { public ClusterState execute(ClusterState currentState) {
MetaData metaData = currentState.metaData(); MetaData metaData = currentState.metaData();
@ -738,6 +727,22 @@ public class SnapshotsService extends AbstractComponent implements ClusterStateL
public void onFailure(String source, Throwable t) { public void onFailure(String source, Throwable t) {
logger.warn("[{}][{}] failed to remove snapshot metadata", t, snapshotId); logger.warn("[{}][{}] failed to remove snapshot metadata", t, snapshotId);
} }
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
for (SnapshotCompletionListener listener : snapshotCompletionListeners) {
try {
if (snapshot != null) {
listener.onSnapshotCompletion(snapshotId, snapshot);
} else {
listener.onSnapshotFailure(snapshotId, t);
}
} catch (Throwable t) {
logger.warn("failed to refresh settings for [{}]", t, listener);
}
}
}
}); });
} }
@ -787,10 +792,14 @@ public class SnapshotsService extends AbstractComponent implements ClusterStateL
} }
} }
shards = shardsBuilder.build(); shards = shardsBuilder.build();
} else { } else if (snapshot.state() == State.INIT) {
// snapshot hasn't started yet or already finished - just end it // snapshot hasn't started yet - end it
shards = snapshot.shards(); shards = snapshot.shards();
endSnapshot(snapshot); endSnapshot(snapshot);
} else {
// snapshot is being finalized - wait for it
logger.trace("trying to delete completed snapshot - save to delete");
return currentState;
} }
SnapshotMetaData.Entry newSnapshot = new SnapshotMetaData.Entry(snapshotId, snapshot.includeGlobalState(), State.ABORTED, snapshot.indices(), shards); SnapshotMetaData.Entry newSnapshot = new SnapshotMetaData.Entry(snapshotId, snapshot.includeGlobalState(), State.ABORTED, snapshot.indices(), shards);
snapshots = new SnapshotMetaData(newSnapshot); snapshots = new SnapshotMetaData(newSnapshot);
@ -807,20 +816,24 @@ public class SnapshotsService extends AbstractComponent implements ClusterStateL
@Override @Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
if (waitForSnapshot) { if (waitForSnapshot) {
logger.trace("adding snapshot completion listener to wait for deleted snapshot to finish");
addListener(new SnapshotCompletionListener() { addListener(new SnapshotCompletionListener() {
@Override @Override
public void onSnapshotCompletion(SnapshotId snapshotId, SnapshotInfo snapshot) { public void onSnapshotCompletion(SnapshotId snapshotId, SnapshotInfo snapshot) {
logger.trace("deleted snapshot completed - deleting files");
removeListener(this); removeListener(this);
deleteSnapshotFromRepository(snapshotId, listener); deleteSnapshotFromRepository(snapshotId, listener);
} }
@Override @Override
public void onSnapshotFailure(SnapshotId snapshotId, Throwable t) { public void onSnapshotFailure(SnapshotId snapshotId, Throwable t) {
logger.trace("deleted snapshot failed - deleting files", t);
removeListener(this); removeListener(this);
deleteSnapshotFromRepository(snapshotId, listener); deleteSnapshotFromRepository(snapshotId, listener);
} }
}); });
} else { } else {
logger.trace("deleted snapshot is not running - deleting files");
deleteSnapshotFromRepository(snapshotId, listener); deleteSnapshotFromRepository(snapshotId, listener);
} }
} }