diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index f5a4fd39cef..898089a81f5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -217,6 +217,8 @@ Release 2.2.1 - UNRELEASED
HDFS-4633 TestDFSClientExcludedNodes fails sporadically if excluded nodes
cache expires too quickly (Chris Nauroth via Sanjay)
+ HDFS-5037. Active NN should trigger its own edit log rolls. (wang)
+
Release 2.2.0 - 2013-10-13
INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index e195a071f81..2f4b7776c9d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -182,6 +182,11 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final String DFS_NAMENODE_EDITS_DIR_MINIMUM_KEY = "dfs.namenode.edits.dir.minimum";
public static final int DFS_NAMENODE_EDITS_DIR_MINIMUM_DEFAULT = 1;
+ public static final String DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD = "dfs.namenode.edit.log.autoroll.multiplier.threshold";
+ public static final float DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT = 2.0f;
+ public static final String DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS = "dfs.namenode.edit.log.autoroll.check.interval.ms";
+ public static final int DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT = 5*60*1000;
+
public static final String DFS_NAMENODE_EDITS_NOEDITLOGCHANNELFLUSH = "dfs.namenode.edits.noeditlogchannelflush";
public static final boolean DFS_NAMENODE_EDITS_NOEDITLOGCHANNELFLUSH_DEFAULT = false;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 9da125c4c27..49e6f217194 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -38,6 +38,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECI
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY;
@@ -49,6 +51,10 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT;
@@ -375,6 +381,16 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
Daemon nnrmthread = null; // NamenodeResourceMonitor thread
+ Daemon nnEditLogRoller = null; // NameNodeEditLogRoller thread
+ /**
+ * When an active namenode will roll its own edit log, in # edits
+ */
+ private final long editLogRollerThreshold;
+ /**
+ * Check interval of an active namenode's edit log roller thread
+ */
+ private final int editLogRollerInterval;
+
private volatile boolean hasResourcesAvailable = false;
private volatile boolean fsRunning = true;
@@ -688,7 +704,17 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
this.standbyShouldCheckpoint = conf.getBoolean(
DFS_HA_STANDBY_CHECKPOINTS_KEY, DFS_HA_STANDBY_CHECKPOINTS_DEFAULT);
-
+ // # edit autoroll threshold is a multiple of the checkpoint threshold
+ this.editLogRollerThreshold = (long)
+ (conf.getFloat(
+ DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD,
+ DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT) *
+ conf.getLong(
+ DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
+ DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT));
+ this.editLogRollerInterval = conf.getInt(
+ DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS,
+ DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT);
this.inodeId = new INodeId();
// For testing purposes, allow the DT secret manager to be started regardless
@@ -956,6 +982,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
//ResourceMonitor required only at ActiveNN. See HDFS-2914
this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
nnrmthread.start();
+
+ nnEditLogRoller = new Daemon(new NameNodeEditLogRoller(
+ editLogRollerThreshold, editLogRollerInterval));
+ nnEditLogRoller.start();
+
} finally {
writeUnlock();
startingActiveService = false;
@@ -993,6 +1024,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor();
nnrmthread.interrupt();
}
+ if (nnEditLogRoller != null) {
+ ((NameNodeEditLogRoller)nnEditLogRoller.getRunnable()).stop();
+ nnEditLogRoller.interrupt();
+ }
if (dir != null && dir.fsImage != null) {
if (dir.fsImage.editLog != null) {
dir.fsImage.editLog.close();
@@ -4124,7 +4159,48 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
shouldNNRmRun = false;
}
}
-
+
+ class NameNodeEditLogRoller implements Runnable {
+
+ private boolean shouldRun = true;
+ private final long rollThreshold;
+ private final long sleepIntervalMs;
+
+ public NameNodeEditLogRoller(long rollThreshold, int sleepIntervalMs) {
+ this.rollThreshold = rollThreshold;
+ this.sleepIntervalMs = sleepIntervalMs;
+ }
+
+ @Override
+ public void run() {
+ while (fsRunning && shouldRun) {
+ try {
+ FSEditLog editLog = getFSImage().getEditLog();
+ long numEdits =
+ editLog.getLastWrittenTxId() - editLog.getCurSegmentTxId();
+ if (numEdits > rollThreshold) {
+ FSNamesystem.LOG.info("NameNode rolling its own edit log because"
+ + " number of edits in open segment exceeds threshold of "
+ + rollThreshold);
+ rollEditLog();
+ }
+ Thread.sleep(sleepIntervalMs);
+ } catch (InterruptedException e) {
+ FSNamesystem.LOG.info(NameNodeEditLogRoller.class.getSimpleName()
+ + " was interrupted, exiting");
+ break;
+ } catch (Exception e) {
+ FSNamesystem.LOG.error("Swallowing exception in "
+ + NameNodeEditLogRoller.class.getSimpleName() + ":", e);
+ }
+ }
+ }
+
+ public void stop() {
+ shouldRun = false;
+ }
+ }
+
public FSImage getFSImage() {
return dir.fsImage;
}
@@ -5141,7 +5217,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
try {
checkOperation(OperationCategory.JOURNAL);
checkNameNodeSafeMode("Log not rolled");
- LOG.info("Roll Edit Log from " + Server.getRemoteAddress());
+ if (Server.isRpcInvocation()) {
+ LOG.info("Roll Edit Log from " + Server.getRemoteAddress());
+ }
return getFSImage().rollEditLog();
} finally {
writeUnlock();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
index a61e134cc72..f05bf8e7861 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
@@ -38,7 +38,7 @@ public class ActiveState extends HAState {
@Override
public void checkOperation(HAContext context, OperationCategory op) {
- return; // Other than journal all operations are allowed in active state
+ return; // All operations are allowed in active state
}
@Override
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 14580952390..7685f094338 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -1459,4 +1459,29 @@
+
+ dfs.namenode.edit.log.autoroll.multiplier.threshold
+ 2.0
+
+ Determines when an active namenode will roll its own edit log.
+ The actual threshold (in number of edits) is determined by multiplying
+ this value by dfs.namenode.checkpoint.txns.
+
+ This prevents extremely large edit files from accumulating on the active
+ namenode, which can cause timeouts during namenode startup and pose an
+ administrative hassle. This behavior is intended as a failsafe for when
+ the standby or secondary namenode fail to roll the edit log by the normal
+ checkpoint threshold.
+
+
+
+
+ dfs.namenode.edit.log.autoroll.check.interval.ms
+ 300000
+
+ How often an active namenode will check if it needs to roll its edit log,
+ in milliseconds.
+
+
+
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogAutoroll.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogAutoroll.java
new file mode 100644
index 00000000000..bea5ee4e567
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogAutoroll.java
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.NameNodeEditLogRoller;
+import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.common.base.Supplier;
+
+public class TestEditLogAutoroll {
+
+ private Configuration conf;
+ private MiniDFSCluster cluster;
+ private NameNode nn0;
+ private FileSystem fs;
+ private FSEditLog editLog;
+
+ @Before
+ public void setUp() throws Exception {
+ conf = new Configuration();
+ // Stall the standby checkpointer in two ways
+ conf.setLong(DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, Long.MAX_VALUE);
+ conf.setLong(DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 20);
+ // Make it autoroll after 10 edits
+ conf.setFloat(DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD, 0.5f);
+ conf.setInt(DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS, 100);
+
+ MiniDFSNNTopology topology = new MiniDFSNNTopology()
+ .addNameservice(new MiniDFSNNTopology.NSConf("ns1")
+ .addNN(new MiniDFSNNTopology.NNConf("nn1").setHttpPort(10061))
+ .addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(10062)));
+
+ cluster = new MiniDFSCluster.Builder(conf)
+ .nnTopology(topology)
+ .numDataNodes(0)
+ .build();
+ cluster.waitActive();
+
+ nn0 = cluster.getNameNode(0);
+ fs = HATestUtil.configureFailoverFs(cluster, conf);
+
+ cluster.transitionToActive(0);
+
+ fs = cluster.getFileSystem(0);
+ editLog = nn0.getNamesystem().getEditLog();
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ if (fs != null) {
+ fs.close();
+ }
+ if (cluster != null) {
+ cluster.shutdown();
+ }
+ }
+
+ @Test(timeout=60000)
+ public void testEditLogAutoroll() throws Exception {
+ // Make some edits
+ final long startTxId = editLog.getCurSegmentTxId();
+ for (int i=0; i<11; i++) {
+ fs.mkdirs(new Path("testEditLogAutoroll-" + i));
+ }
+ // Wait for the NN to autoroll
+ GenericTestUtils.waitFor(new Supplier() {
+ @Override
+ public Boolean get() {
+ return editLog.getCurSegmentTxId() > startTxId;
+ }
+ }, 1000, 5000);
+ // Transition to standby and make sure the roller stopped
+ nn0.transitionToStandby();
+ GenericTestUtils.assertNoThreadsMatching(
+ ".*" + NameNodeEditLogRoller.class.getSimpleName() + ".*");
+ }
+}