diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt index 10706d0abda..baeffd7bf34 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt @@ -176,3 +176,5 @@ HDFS-2752. HA: exit if multiple shared dirs are configured. (eli) HDFS-2894. HA: automatically determine the nameservice Id if only one nameservice is configured. (eli) HDFS-2733. Document HA configuration and CLI. (atm) + +HDFS-2794. Active NN may purge edit log files before standby NN has a chance to read them (todd) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index b655bbddaad..2843d8d4074 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -144,6 +144,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final boolean DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT = true; public static final String DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY = "dfs.namenode.num.checkpoints.retained"; public static final int DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_DEFAULT = 2; + public static final String DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY = "dfs.namenode.num.extra.edits.retained"; + public static final int DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_DEFAULT = 1000000; //1M + public static final String DFS_NAMENODE_EDITS_DIR_MINIMUM_KEY = "dfs.namenode.edits.dir.minimum"; public static final int DFS_NAMENODE_EDITS_DIR_MINIMUM_DEFAULT = 1; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java index fe651001aa3..fe75247b8e0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFil import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile; import org.apache.hadoop.hdfs.util.MD5FileUtils; +import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -46,6 +47,7 @@ import com.google.common.collect.Sets; public class NNStorageRetentionManager { private final int numCheckpointsToRetain; + private final long numExtraEditsToRetain; private static final Log LOG = LogFactory.getLog( NNStorageRetentionManager.class); private final NNStorage storage; @@ -60,6 +62,15 @@ public class NNStorageRetentionManager { this.numCheckpointsToRetain = conf.getInt( DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY, DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_DEFAULT); + this.numExtraEditsToRetain = conf.getLong( + DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY, + DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_DEFAULT); + Preconditions.checkArgument(numCheckpointsToRetain > 0, + "Must retain at least one checkpoint"); + Preconditions.checkArgument(numExtraEditsToRetain >= 0, + DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY + + " must not be negative"); + this.storage = storage; this.editLog = editLog; this.purger = purger; @@ -79,8 +90,12 @@ public class NNStorageRetentionManager { purgeCheckpointsOlderThan(inspector, minImageTxId); // If fsimage_N is the image we want to keep, then we need to keep // all txns > N. We can remove anything < N+1, since fsimage_N - // reflects the state up to and including N. - editLog.purgeLogsOlderThan(minImageTxId + 1); + // reflects the state up to and including N. However, we also + // provide a "cushion" of older txns that we keep, which is + // handy for HA, where a remote node may not have as many + // new images. + long purgeLogsFrom = Math.max(0, minImageTxId + 1 - numExtraEditsToRetain); + editLog.purgeLogsOlderThan(purgeLogsFrom); } private void purgeCheckpointsOlderThan( diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 9fa8e26d4d2..d84f5da65e8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -637,6 +637,19 @@ + + dfs.namenode.num.extra.edits.retained + 1000000 + The number of extra transactions which should be retained + beyond what is minimally necessary for a NN restart. This can be useful for + audit purposes or for an HA setup where a remote Standby Node may have + been offline for some time and need to have a longer backlog of retained + edits in order to start again. + Typically each edit is on the order of a few hundred bytes, so the default + of 1 million edits should be on the order of hundreds of MBs or low GBs. + + + dfs.namenode.delegation.key.update-interval 86400000 diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionFunctional.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionFunctional.java index aad8d7dc0a2..e7a9cc1d49a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionFunctional.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionFunctional.java @@ -61,6 +61,7 @@ public class TestNNStorageRetentionFunctional { throws IOException { MiniDFSCluster cluster = null; Configuration conf = new HdfsConfiguration(); + conf.setLong(DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY, 0); File sd0 = new File(TEST_ROOT_DIR, "nn0"); File sd1 = new File(TEST_ROOT_DIR, "nn1"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java index aadca5cc20d..6ff91f41a28 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java @@ -23,6 +23,7 @@ import java.util.Map; import java.util.Set; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile; import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile; @@ -33,6 +34,7 @@ import static org.apache.hadoop.hdfs.server.namenode.NNStorage.getImageFileName; import org.apache.hadoop.hdfs.server.namenode.NNStorageRetentionManager.StoragePurger; import org.junit.Assert; +import org.junit.Before; import org.junit.Test; import org.mockito.ArgumentCaptor; import org.mockito.Mockito; @@ -46,6 +48,17 @@ import com.google.common.collect.Sets; public class TestNNStorageRetentionManager { + Configuration conf = new Configuration(); + + /** + * For the purpose of this test, purge as many edits as we can + * with no extra "safety cushion" + */ + @Before + public void setNoExtraEditRetention() { + conf.setLong(DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY, 0); + } + /** * Test the "easy case" where we have more images in the * directory than we need to keep. Should purge the @@ -163,9 +176,27 @@ public class TestNNStorageRetentionManager { runTest(tc); } - private void runTest(TestCaseDescription tc) throws IOException { - Configuration conf = new Configuration(); + @Test + public void testRetainExtraLogs() throws IOException { + conf.setLong(DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY, + 50); + TestCaseDescription tc = new TestCaseDescription(); + tc.addRoot("/foo1", NameNodeDirType.IMAGE); + tc.addRoot("/foo2", NameNodeDirType.EDITS); + tc.addImage("/foo1/current/" + getImageFileName(100), true); + tc.addImage("/foo1/current/" + getImageFileName(200), true); + tc.addImage("/foo1/current/" + getImageFileName(300), false); + tc.addImage("/foo1/current/" + getImageFileName(400), false); + tc.addLog("/foo2/current/" + getFinalizedEditsFileName(101, 200), true); + // Since we need 50 extra edits, *do* retain the 201-300 segment + tc.addLog("/foo2/current/" + getFinalizedEditsFileName(201, 300), false); + tc.addLog("/foo2/current/" + getFinalizedEditsFileName(301, 400), false); + tc.addLog("/foo2/current/" + getInProgressEditsFileName(401), false); + runTest(tc); + } + + private void runTest(TestCaseDescription tc) throws IOException { StoragePurger mockPurger = Mockito.mock(NNStorageRetentionManager.StoragePurger.class); ArgumentCaptor imagesPurgedCaptor =