Assert that NOOPs must succeed (#43483)

We currently assert that adding deletion tombstones to Lucene must always succeed if it's not a
tragic exception, and the same should also hold true for NOOP tombstones. We rely on this
assumption, as without this, we have the risk of creating gaps in the history, which will break
operation-based recoveries and CCR.
This commit is contained in:
Yannick Welsch 2019-06-24 11:38:13 +02:00
parent ccaa8c33ba
commit 127a608147
1 changed files with 6 additions and 13 deletions

View File

@ -1496,7 +1496,6 @@ public class InternalEngine extends Engine {
try (Releasable ignored = noOpKeyedLock.acquire(seqNo)) {
final NoOpResult noOpResult;
final Optional<Exception> preFlightError = preFlightCheckForNoOp(noOp);
Exception failure = null;
if (preFlightError.isPresent()) {
noOpResult = new NoOpResult(getPrimaryTerm(), SequenceNumbers.UNASSIGNED_SEQ_NO, preFlightError.get());
} else {
@ -1516,17 +1515,13 @@ public class InternalEngine extends Engine {
doc.add(softDeletesField);
indexWriter.addDocument(doc);
} catch (Exception ex) {
if (maybeFailEngine("noop", ex)) {
if (indexWriter.getTragicException() == null) {
throw new AssertionError("noop operation should never fail at document level", ex);
}
throw ex;
}
failure = ex;
}
}
if (failure == null) {
noOpResult = new NoOpResult(getPrimaryTerm(), noOp.seqNo());
} else {
noOpResult = new NoOpResult(getPrimaryTerm(), noOp.seqNo(), failure);
}
if (noOp.origin().isFromTranslog() == false && noOpResult.getResultType() == Result.Type.SUCCESS) {
final Translog.Location location = translog.add(new Translog.NoOp(noOp.seqNo(), noOp.primaryTerm(), noOp.reason()));
noOpResult.setTranslogLocation(location);
@ -1534,10 +1529,8 @@ public class InternalEngine extends Engine {
}
localCheckpointTracker.markSeqNoAsProcessed(noOpResult.getSeqNo());
if (noOpResult.getTranslogLocation() == null) {
// the op is coming from the translog (and is hence persisted already) or it does not have a sequence number, or we failed
// to add a tombstone doc to Lucene with a non-fatal error, which would be very surprising
// TODO: always fail the engine in the last case, as this creates gaps in the history
assert noOp.origin().isFromTranslog() || noOpResult.getSeqNo() == SequenceNumbers.UNASSIGNED_SEQ_NO || failure != null;
// the op is coming from the translog (and is hence persisted already) or it does not have a sequence number
assert noOp.origin().isFromTranslog() || noOpResult.getSeqNo() == SequenceNumbers.UNASSIGNED_SEQ_NO;
localCheckpointTracker.markSeqNoAsPersisted(noOpResult.getSeqNo());
}
noOpResult.setTook(System.nanoTime() - noOp.startTime());