HDFS-13145. SBN crash when transition to ANN with in-progress edit tailing enabled. Contributed by Chao Sun.

(cherry picked from commit ae290a4bb4)
This commit is contained in:
Chao Sun 2018-02-26 15:37:27 -08:00 committed by Konstantin V Shvachko
parent 1fb87df87e
commit 26395aef67
2 changed files with 28 additions and 2 deletions

View File

@ -496,7 +496,9 @@ public class QuorumJournalManager implements JournalManager {
// If it's bounded by durable Txns, endTxId could not be larger
// than committedTxnId. This ensures the consistency.
if (onlyDurableTxns && inProgressOk) {
// We don't do the following for finalized log segments, since all
// edits in those are guaranteed to be committed.
if (onlyDurableTxns && inProgressOk && remoteLog.isInProgress()) {
endTxId = Math.min(endTxId, committedTxnId);
if (endTxId < remoteLog.getStartTxId()) {
LOG.warn("Found endTxId (" + endTxId + ") that is less than " +

View File

@ -934,6 +934,30 @@ public class TestQuorumJournalManager {
verifyEdits(streams, 25, 50);
}
@Test
public void testInProgressRecovery() throws Exception {
// Test the case when in-progress edit log tailing is on, and
// new active performs recovery when the old active crashes
// without closing the last log segment.
// See HDFS-13145 for more details.
// Write two batches of edits. After these, the commitId on the
// journals should be 5, and endTxnId should be 8.
EditLogOutputStream stm = qjm.startLogSegment(1,
NameNodeLayoutVersion.CURRENT_LAYOUT_VERSION);
writeTxns(stm, 1, 5);
writeTxns(stm, 6, 3);
// Do recovery from a separate QJM, just like in failover.
QuorumJournalManager qjm2 = createSpyingQJM();
qjm2.recoverUnfinalizedSegments();
checkRecovery(cluster, 1, 8);
// When selecting input stream, we should see all txns up to 8.
List<EditLogInputStream> streams = new ArrayList<>();
qjm2.selectInputStreams(streams, 1, true, true);
verifyEdits(streams, 1, 8);
}
private QuorumJournalManager createSpyingQJM()
throws IOException, URISyntaxException {