Failure to recover a shard might cause loosing translog data (especially with no replicas), closes #869.
This commit is contained in:
parent
3c233347b8
commit
f5dbcb2fa4
|
@ -144,15 +144,24 @@ public class LocalIndexShardGateway extends AbstractIndexShardComponent implemen
|
|||
try {
|
||||
InputStreamStreamInput si = new InputStreamStreamInput(new FileInputStream(recoveringTranslogFile));
|
||||
while (true) {
|
||||
int opSize = si.readInt();
|
||||
Translog.Operation operation = TranslogStreams.readTranslogOperation(si);
|
||||
Translog.Operation operation;
|
||||
try {
|
||||
int opSize = si.readInt();
|
||||
operation = TranslogStreams.readTranslogOperation(si);
|
||||
} catch (EOFException e) {
|
||||
// ignore, not properly written the last op
|
||||
break;
|
||||
} catch (IOException e) {
|
||||
// ignore, not properly written last op
|
||||
break;
|
||||
}
|
||||
recoveryStatus.translog().addTranslogOperations(1);
|
||||
indexShard.performRecoveryOperation(operation);
|
||||
}
|
||||
} catch (EOFException e) {
|
||||
// ignore this exception, its fine
|
||||
} catch (IOException e) {
|
||||
// ignore this as well
|
||||
} catch (Throwable e) {
|
||||
// we failed to recovery, make sure to delete the translog file (and keep the recovering one)
|
||||
indexShard.translog().close(true);
|
||||
throw new IndexShardGatewayRecoveryException(shardId, "failed to recover shard", e);
|
||||
}
|
||||
indexShard.performRecoveryFinalization(true);
|
||||
|
||||
|
|
|
@ -539,7 +539,7 @@ public class IndicesClusterStateService extends AbstractLifecycleComponent<Indic
|
|||
synchronized (mutex) {
|
||||
if (indexService.hasShard(shardRouting.shardId().id())) {
|
||||
try {
|
||||
indexService.cleanShard(shardRouting.shardId().id(), "recovery failure [" + ExceptionsHelper.detailedMessage(failure) + "]");
|
||||
indexService.removeShard(shardRouting.shardId().id(), "recovery failure [" + ExceptionsHelper.detailedMessage(failure) + "]");
|
||||
} catch (IndexShardMissingException e) {
|
||||
// the node got closed on us, ignore it
|
||||
} catch (Exception e1) {
|
||||
|
@ -576,7 +576,7 @@ public class IndicesClusterStateService extends AbstractLifecycleComponent<Indic
|
|||
synchronized (mutex) {
|
||||
if (indexService.hasShard(shardId.id())) {
|
||||
try {
|
||||
indexService.cleanShard(shardId.id(), "engine failure [" + ExceptionsHelper.detailedMessage(failure) + "]");
|
||||
indexService.removeShard(shardId.id(), "engine failure [" + ExceptionsHelper.detailedMessage(failure) + "]");
|
||||
} catch (IndexShardMissingException e) {
|
||||
// the node got closed on us, ignore it
|
||||
} catch (Exception e1) {
|
||||
|
|
Loading…
Reference in New Issue