skip everything between file checks and translog prepare if sync ids coincide
also throw exception instead of assert if num docs no coincide
This commit is contained in:
parent
8ef734908c
commit
8a3d504efb
|
@ -50,6 +50,7 @@ import org.elasticsearch.common.util.CancellableThreads;
|
||||||
import org.elasticsearch.common.util.CancellableThreads.Interruptable;
|
import org.elasticsearch.common.util.CancellableThreads.Interruptable;
|
||||||
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
|
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
|
||||||
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
|
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
|
||||||
|
import org.elasticsearch.gateway.CorruptStateException;
|
||||||
import org.elasticsearch.index.IndexService;
|
import org.elasticsearch.index.IndexService;
|
||||||
import org.elasticsearch.index.deletionpolicy.SnapshotIndexCommit;
|
import org.elasticsearch.index.deletionpolicy.SnapshotIndexCommit;
|
||||||
import org.elasticsearch.index.engine.Engine;
|
import org.elasticsearch.index.engine.Engine;
|
||||||
|
@ -131,7 +132,9 @@ public class RecoverySourceHandler {
|
||||||
this.response = new RecoveryResponse();
|
this.response = new RecoveryResponse();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** performs the recovery from the local engine to the target */
|
/**
|
||||||
|
* performs the recovery from the local engine to the target
|
||||||
|
*/
|
||||||
public RecoveryResponse recoverToTarget() {
|
public RecoveryResponse recoverToTarget() {
|
||||||
final Engine engine = shard.engine();
|
final Engine engine = shard.engine();
|
||||||
assert engine.getTranslog() != null : "translog must not be null";
|
assert engine.getTranslog() != null : "translog must not be null";
|
||||||
|
@ -207,17 +210,13 @@ public class RecoverySourceHandler {
|
||||||
final boolean recoverWithSyncId = recoverySourceSyncId != null &&
|
final boolean recoverWithSyncId = recoverySourceSyncId != null &&
|
||||||
recoverySourceSyncId.equals(recoveryTargetSyncId);
|
recoverySourceSyncId.equals(recoveryTargetSyncId);
|
||||||
if (recoverWithSyncId) {
|
if (recoverWithSyncId) {
|
||||||
assert request.metadataSnapshot().getNumDocs() == recoverySourceMetadata.getNumDocs();
|
long numDocsTarget = request.metadataSnapshot().getNumDocs();
|
||||||
for (StoreFileMetaData md : request.metadataSnapshot()) {
|
long numDocsSource = recoverySourceMetadata.getNumDocs();
|
||||||
response.phase1ExistingFileNames.add(md.name());
|
if (numDocsTarget != numDocsSource) {
|
||||||
response.phase1ExistingFileSizes.add(md.length());
|
throw new IllegalStateException("try to recover " + request.shardId() + " from primary shard with sync id but number of docs differ: " + numDocsTarget + " (" + request.sourceNode().getName() + ", primary) vs " + numDocsSource + "(" + request.targetNode().getName() + ")");
|
||||||
existingTotalSize += md.length();
|
|
||||||
if (logger.isTraceEnabled()) {
|
|
||||||
logger.trace("[{}][{}] recovery [phase1] to {}: not recovering [{}], checksum [{}], size [{}], sync ids {} coincide, will skip file copy",
|
|
||||||
indexName, shardId, request.targetNode(), md.name(), md.checksum(), md.length(), recoverySourceMetadata.getCommitUserData().get(Engine.SYNC_COMMIT_ID));
|
|
||||||
}
|
|
||||||
totalSize += md.length();
|
|
||||||
}
|
}
|
||||||
|
// we shortcut recovery here because we have nothing to copy. but we must still start the engine on the target.
|
||||||
|
// so we don't return here
|
||||||
} else {
|
} else {
|
||||||
final Store.RecoveryDiff diff = recoverySourceMetadata.recoveryDiff(request.metadataSnapshot());
|
final Store.RecoveryDiff diff = recoverySourceMetadata.recoveryDiff(request.metadataSnapshot());
|
||||||
for (StoreFileMetaData md : diff.identical) {
|
for (StoreFileMetaData md : diff.identical) {
|
||||||
|
@ -242,7 +241,7 @@ public class RecoverySourceHandler {
|
||||||
response.phase1FileSizes.add(md.length());
|
response.phase1FileSizes.add(md.length());
|
||||||
totalSize += md.length();
|
totalSize += md.length();
|
||||||
}
|
}
|
||||||
}
|
|
||||||
response.phase1TotalSize = totalSize;
|
response.phase1TotalSize = totalSize;
|
||||||
response.phase1ExistingTotalSize = existingTotalSize;
|
response.phase1ExistingTotalSize = existingTotalSize;
|
||||||
|
|
||||||
|
@ -416,9 +415,8 @@ public class RecoverySourceHandler {
|
||||||
// related to this recovery (out of date segments, for example)
|
// related to this recovery (out of date segments, for example)
|
||||||
// are deleted
|
// are deleted
|
||||||
try {
|
try {
|
||||||
final Store.MetadataSnapshot remainingFilesAfterCleanup = recoverWithSyncId ? request.metadataSnapshot() : recoverySourceMetadata;
|
|
||||||
transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.CLEAN_FILES,
|
transportService.submitRequest(request.targetNode(), RecoveryTarget.Actions.CLEAN_FILES,
|
||||||
new RecoveryCleanFilesRequest(request.recoveryId(), shard.shardId(), remainingFilesAfterCleanup, translogView.totalOperations()),
|
new RecoveryCleanFilesRequest(request.recoveryId(), shard.shardId(), recoverySourceMetadata, translogView.totalOperations()),
|
||||||
TransportRequestOptions.options().withTimeout(recoverySettings.internalActionTimeout()),
|
TransportRequestOptions.options().withTimeout(recoverySettings.internalActionTimeout()),
|
||||||
EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
|
EmptyTransportResponseHandler.INSTANCE_SAME).txGet();
|
||||||
} catch (RemoteTransportException remoteException) {
|
} catch (RemoteTransportException remoteException) {
|
||||||
|
@ -459,6 +457,7 @@ public class RecoverySourceHandler {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
|
||||||
prepareTargetForTranslog(translogView);
|
prepareTargetForTranslog(translogView);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue