skip everything between file checks and translog prepare if sync ids coincide

also throw exception instead of assert if num docs no coincide
This commit is contained in:
Britta Weber 2015-05-18 22:30:57 +02:00
parent 8ef734908c
commit 8a3d504efb
1 changed files with 211 additions and 212 deletions

View File

@ -50,6 +50,7 @@ import org.elasticsearch.common.util.CancellableThreads;
import org.elasticsearch.common.util.CancellableThreads.Interruptable; import org.elasticsearch.common.util.CancellableThreads.Interruptable;
import org.elasticsearch.common.util.concurrent.AbstractRunnable; import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.gateway.CorruptStateException;
import org.elasticsearch.index.IndexService; import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.deletionpolicy.SnapshotIndexCommit; import org.elasticsearch.index.deletionpolicy.SnapshotIndexCommit;
import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.engine.Engine;
@ -131,7 +132,9 @@ public class RecoverySourceHandler {
this.response = new RecoveryResponse(); this.response = new RecoveryResponse();
} }
/** performs the recovery from the local engine to the target */ /**
* performs the recovery from the local engine to the target
*/
public RecoveryResponse recoverToTarget() { public RecoveryResponse recoverToTarget() {
final Engine engine = shard.engine(); final Engine engine = shard.engine();
assert engine.getTranslog() != null : "translog must not be null"; assert engine.getTranslog() != null : "translog must not be null";
@ -207,17 +210,13 @@ public class RecoverySourceHandler {
final boolean recoverWithSyncId = recoverySourceSyncId != null && final boolean recoverWithSyncId = recoverySourceSyncId != null &&
recoverySourceSyncId.equals(recoveryTargetSyncId); recoverySourceSyncId.equals(recoveryTargetSyncId);
if (recoverWithSyncId) { if (recoverWithSyncId) {
assert request.metadataSnapshot().getNumDocs() == recoverySourceMetadata.getNumDocs(); long numDocsTarget = request.metadataSnapshot().getNumDocs();
for (StoreFileMetaData md : request.metadataSnapshot()) { long numDocsSource = recoverySourceMetadata.getNumDocs();
response.phase1ExistingFileNames.add(md.name()); if (numDocsTarget != numDocsSource) {
response.phase1ExistingFileSizes.add(md.length()); throw new IllegalStateException("try to recover " + request.shardId() + " from primary shard with sync id but number of docs differ: " + numDocsTarget + " (" + request.sourceNode().getName() + ", primary) vs " + numDocsSource + "(" + request.targetNode().getName() + ")");
existingTotalSize += md.length();
if (logger.isTraceEnabled()) {
logger.trace("[{}][{}] recovery [phase1] to {}: not recovering [{}], checksum [{}], size [{}], sync ids {} coincide, will skip file copy",
indexName, shardId, request.targetNode(), md.name(), md.checksum(), md.length(), recoverySourceMetadata.getCommitUserData().get(Engine.SYNC_COMMIT_ID));
}
totalSize += md.length();
} }
// we shortcut recovery here because we have nothing to copy. but we must still start the engine on the target.
// so we don't return here
} else { } else {
final Store.RecoveryDiff diff = recoverySourceMetadata.recoveryDiff(request.metadataSnapshot()); final Store.RecoveryDiff diff = recoverySourceMetadata.recoveryDiff(request.metadataSnapshot());
for (StoreFileMetaData md : diff.identical) { for (StoreFileMetaData md : diff.identical) {
@ -242,7 +241,7 @@ public class RecoverySourceHandler {
response.phase1FileSizes.add(md.length()); response.phase1FileSizes.add(md.length());
totalSize += md.length(); totalSize += md.length();
} }
}
response.phase1TotalSize = totalSize; response.phase1TotalSize = totalSize;
response.phase1ExistingTotalSize = existingTotalSize; response.phase1ExistingTotalSize = existingTotalSize;
@ -416,9 +415,8 @@ public class RecoverySourceHandler {
// related to this recovery (out of date segments, for example) // related to this recovery (out of date segments, for example)
// are deleted // are deleted
try { try {
final Store.MetadataSnapshot remainingFilesAfterCleanup = recoverWithSyncId ? request.metadataSnapshot() : recoverySourceMetadata;
transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.CLEAN_FILES, transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.CLEAN_FILES,
new RecoveryCleanFilesRequest(request.recoveryId(), shard.shardId(), remainingFilesAfterCleanup, translogView.totalOperations()), new RecoveryCleanFilesRequest(request.recoveryId(), shard.shardId(), recoverySourceMetadata, translogView.totalOperations()),
TransportRequestOptions.options().withTimeout(recoverySettings.internalActionTimeout()), TransportRequestOptions.options().withTimeout(recoverySettings.internalActionTimeout()),
EmptyTransportResponseHandler.INSTANCE_SAME).txGet(); EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
} catch (RemoteTransportException remoteException) { } catch (RemoteTransportException remoteException) {
@ -459,6 +457,7 @@ public class RecoverySourceHandler {
} }
} }
}); });
}
prepareTargetForTranslog(translogView); prepareTargetForTranslog(translogView);