diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index f5a4fd39cef..898089a81f5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -217,6 +217,8 @@ Release 2.2.1 - UNRELEASED HDFS-4633 TestDFSClientExcludedNodes fails sporadically if excluded nodes cache expires too quickly (Chris Nauroth via Sanjay) + HDFS-5037. Active NN should trigger its own edit log rolls. (wang) + Release 2.2.0 - 2013-10-13 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index e195a071f81..2f4b7776c9d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -182,6 +182,11 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_NAMENODE_EDITS_DIR_MINIMUM_KEY = "dfs.namenode.edits.dir.minimum"; public static final int DFS_NAMENODE_EDITS_DIR_MINIMUM_DEFAULT = 1; + public static final String DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD = "dfs.namenode.edit.log.autoroll.multiplier.threshold"; + public static final float DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT = 2.0f; + public static final String DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS = "dfs.namenode.edit.log.autoroll.check.interval.ms"; + public static final int DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT = 5*60*1000; + public static final String DFS_NAMENODE_EDITS_NOEDITLOGCHANNELFLUSH = "dfs.namenode.edits.noeditlogchannelflush"; public static final boolean DFS_NAMENODE_EDITS_NOEDITLOGCHANNELFLUSH_DEFAULT = false; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 9da125c4c27..49e6f217194 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -38,6 +38,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECI import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY; @@ -49,6 +51,10 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT; @@ -375,6 +381,16 @@ public class FSNamesystem implements Namesystem, FSClusterStats, Daemon nnrmthread = null; // NamenodeResourceMonitor thread + Daemon nnEditLogRoller = null; // NameNodeEditLogRoller thread + /** + * When an active namenode will roll its own edit log, in # edits + */ + private final long editLogRollerThreshold; + /** + * Check interval of an active namenode's edit log roller thread + */ + private final int editLogRollerInterval; + private volatile boolean hasResourcesAvailable = false; private volatile boolean fsRunning = true; @@ -688,7 +704,17 @@ public class FSNamesystem implements Namesystem, FSClusterStats, this.standbyShouldCheckpoint = conf.getBoolean( DFS_HA_STANDBY_CHECKPOINTS_KEY, DFS_HA_STANDBY_CHECKPOINTS_DEFAULT); - + // # edit autoroll threshold is a multiple of the checkpoint threshold + this.editLogRollerThreshold = (long) + (conf.getFloat( + DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD, + DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT) * + conf.getLong( + DFS_NAMENODE_CHECKPOINT_TXNS_KEY, + DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT)); + this.editLogRollerInterval = conf.getInt( + DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS, + DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT); this.inodeId = new INodeId(); // For testing purposes, allow the DT secret manager to be started regardless @@ -956,6 +982,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats, //ResourceMonitor required only at ActiveNN. See HDFS-2914 this.nnrmthread = new Daemon(new NameNodeResourceMonitor()); nnrmthread.start(); + + nnEditLogRoller = new Daemon(new NameNodeEditLogRoller( + editLogRollerThreshold, editLogRollerInterval)); + nnEditLogRoller.start(); + } finally { writeUnlock(); startingActiveService = false; @@ -993,6 +1024,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, ((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor(); nnrmthread.interrupt(); } + if (nnEditLogRoller != null) { + ((NameNodeEditLogRoller)nnEditLogRoller.getRunnable()).stop(); + nnEditLogRoller.interrupt(); + } if (dir != null && dir.fsImage != null) { if (dir.fsImage.editLog != null) { dir.fsImage.editLog.close(); @@ -4124,7 +4159,48 @@ public class FSNamesystem implements Namesystem, FSClusterStats, shouldNNRmRun = false; } } - + + class NameNodeEditLogRoller implements Runnable { + + private boolean shouldRun = true; + private final long rollThreshold; + private final long sleepIntervalMs; + + public NameNodeEditLogRoller(long rollThreshold, int sleepIntervalMs) { + this.rollThreshold = rollThreshold; + this.sleepIntervalMs = sleepIntervalMs; + } + + @Override + public void run() { + while (fsRunning && shouldRun) { + try { + FSEditLog editLog = getFSImage().getEditLog(); + long numEdits = + editLog.getLastWrittenTxId() - editLog.getCurSegmentTxId(); + if (numEdits > rollThreshold) { + FSNamesystem.LOG.info("NameNode rolling its own edit log because" + + " number of edits in open segment exceeds threshold of " + + rollThreshold); + rollEditLog(); + } + Thread.sleep(sleepIntervalMs); + } catch (InterruptedException e) { + FSNamesystem.LOG.info(NameNodeEditLogRoller.class.getSimpleName() + + " was interrupted, exiting"); + break; + } catch (Exception e) { + FSNamesystem.LOG.error("Swallowing exception in " + + NameNodeEditLogRoller.class.getSimpleName() + ":", e); + } + } + } + + public void stop() { + shouldRun = false; + } + } + public FSImage getFSImage() { return dir.fsImage; } @@ -5141,7 +5217,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats, try { checkOperation(OperationCategory.JOURNAL); checkNameNodeSafeMode("Log not rolled"); - LOG.info("Roll Edit Log from " + Server.getRemoteAddress()); + if (Server.isRpcInvocation()) { + LOG.info("Roll Edit Log from " + Server.getRemoteAddress()); + } return getFSImage().rollEditLog(); } finally { writeUnlock(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java index a61e134cc72..f05bf8e7861 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java @@ -38,7 +38,7 @@ public class ActiveState extends HAState { @Override public void checkOperation(HAContext context, OperationCategory op) { - return; // Other than journal all operations are allowed in active state + return; // All operations are allowed in active state } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 14580952390..7685f094338 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -1459,4 +1459,29 @@ + + dfs.namenode.edit.log.autoroll.multiplier.threshold + 2.0 + + Determines when an active namenode will roll its own edit log. + The actual threshold (in number of edits) is determined by multiplying + this value by dfs.namenode.checkpoint.txns. + + This prevents extremely large edit files from accumulating on the active + namenode, which can cause timeouts during namenode startup and pose an + administrative hassle. This behavior is intended as a failsafe for when + the standby or secondary namenode fail to roll the edit log by the normal + checkpoint threshold. + + + + + dfs.namenode.edit.log.autoroll.check.interval.ms + 300000 + + How often an active namenode will check if it needs to roll its edit log, + in milliseconds. + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogAutoroll.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogAutoroll.java new file mode 100644 index 00000000000..bea5ee4e567 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogAutoroll.java @@ -0,0 +1,106 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.NameNodeEditLogRoller; +import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import com.google.common.base.Supplier; + +public class TestEditLogAutoroll { + + private Configuration conf; + private MiniDFSCluster cluster; + private NameNode nn0; + private FileSystem fs; + private FSEditLog editLog; + + @Before + public void setUp() throws Exception { + conf = new Configuration(); + // Stall the standby checkpointer in two ways + conf.setLong(DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, Long.MAX_VALUE); + conf.setLong(DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 20); + // Make it autoroll after 10 edits + conf.setFloat(DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD, 0.5f); + conf.setInt(DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS, 100); + + MiniDFSNNTopology topology = new MiniDFSNNTopology() + .addNameservice(new MiniDFSNNTopology.NSConf("ns1") + .addNN(new MiniDFSNNTopology.NNConf("nn1").setHttpPort(10061)) + .addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(10062))); + + cluster = new MiniDFSCluster.Builder(conf) + .nnTopology(topology) + .numDataNodes(0) + .build(); + cluster.waitActive(); + + nn0 = cluster.getNameNode(0); + fs = HATestUtil.configureFailoverFs(cluster, conf); + + cluster.transitionToActive(0); + + fs = cluster.getFileSystem(0); + editLog = nn0.getNamesystem().getEditLog(); + } + + @After + public void tearDown() throws Exception { + if (fs != null) { + fs.close(); + } + if (cluster != null) { + cluster.shutdown(); + } + } + + @Test(timeout=60000) + public void testEditLogAutoroll() throws Exception { + // Make some edits + final long startTxId = editLog.getCurSegmentTxId(); + for (int i=0; i<11; i++) { + fs.mkdirs(new Path("testEditLogAutoroll-" + i)); + } + // Wait for the NN to autoroll + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + return editLog.getCurSegmentTxId() > startTxId; + } + }, 1000, 5000); + // Transition to standby and make sure the roller stopped + nn0.transitionToStandby(); + GenericTestUtils.assertNoThreadsMatching( + ".*" + NameNodeEditLogRoller.class.getSimpleName() + ".*"); + } +}