diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index f5e28b3cd86..70d4954bcf9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -561,6 +561,9 @@ Release 2.0.3-alpha - Unreleased children of the child have to be updated to the new child. (Jing Zhao via szetszwo) + HDFS-4238. Standby namenode should not do purging of shared + storage edits. (todd) + BREAKDOWN OF HDFS-3077 SUBTASKS HDFS-3077. Quorum-based protocol for reading and writing edit logs. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java index 129ae1592bf..8c3fb70ea40 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java @@ -878,6 +878,11 @@ public class FSEditLog implements LogsPurgeable { return journalSet; } + @VisibleForTesting + synchronized void setJournalSetForTesting(JournalSet js) { + this.journalSet = js; + } + /** * Used only by tests. */ @@ -1031,9 +1036,18 @@ public class FSEditLog implements LogsPurgeable { /** * Archive any log files that are older than the given txid. + * + * If the edit log is not open for write, then this call returns with no + * effect. */ @Override public synchronized void purgeLogsOlderThan(final long minTxIdToKeep) { + // Should not purge logs unless they are open for write. + // This prevents the SBN from purging logs on shared storage, for example. + if (!isOpenForWrite()) { + return; + } + assert curSegmentTxId == HdfsConstants.INVALID_TXID || // on format this is no-op minTxIdToKeep <= curSegmentTxId : "cannot purge logs older than txid " + minTxIdToKeep + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java index f310959d9a1..cf64c335bac 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java @@ -179,6 +179,13 @@ public class NameNodeAdapter { return spy; } + public static JournalSet spyOnJournalSet(NameNode nn) { + FSEditLog editLog = nn.getFSImage().getEditLog(); + JournalSet js = Mockito.spy(editLog.getJournalSet()); + editLog.setJournalSetForTesting(js); + return js; + } + public static String getMkdirOpPath(FSEditLogOp op) { if (op.opCode == FSEditLogOpCodes.OP_MKDIR) { return ((MkdirOp) op).path; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java index 61016c9540e..c449acae564 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.server.namenode.FSImage; import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; +import org.apache.hadoop.hdfs.server.namenode.JournalSet; import org.apache.hadoop.hdfs.server.namenode.NNStorage; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; @@ -66,6 +67,12 @@ public class TestStandbyCheckpoints { conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1); conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 5); conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1); + + // Dial down the retention of extra edits and checkpoints. This is to + // help catch regressions of HDFS-4238 (SBN should not purge shared edits) + conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY, 1); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY, 0); + conf.setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, true); conf.set(DFSConfigKeys.DFS_IMAGE_COMPRESSION_CODEC_KEY, SlowCodec.class.getCanonicalName()); @@ -99,15 +106,20 @@ public class TestStandbyCheckpoints { @Test public void testSBNCheckpoints() throws Exception { - doEdits(0, 10); + JournalSet standbyJournalSet = NameNodeAdapter.spyOnJournalSet(nn1); + doEdits(0, 10); HATestUtil.waitForStandbyToCatchUp(nn0, nn1); // Once the standby catches up, it should notice that it needs to // do a checkpoint and save one to its local directories. - HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(0, 12)); + HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12)); // It should also upload it back to the active. - HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(0, 12)); + HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(12)); + + // The standby should never try to purge edit logs on shared storage. + Mockito.verify(standbyJournalSet, Mockito.never()). + purgeLogsOlderThan(Mockito.anyLong()); } /** @@ -129,8 +141,8 @@ public class TestStandbyCheckpoints { // so the standby will catch up. Then, both will be in standby mode // with enough uncheckpointed txns to cause a checkpoint, and they // will each try to take a checkpoint and upload to each other. - HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(0, 12)); - HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(0, 12)); + HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12)); + HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(12)); assertEquals(12, nn0.getNamesystem().getFSImage() .getMostRecentCheckpointTxId());