HDFS-5037. Active NN should trigger its own edit log rolls. Contributed by Andrew Wang.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1538059 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Wang 2013-11-01 20:49:54 +00:00
parent 1606a76a54
commit 3b91b7dece
6 changed files with 220 additions and 4 deletions

View File

@ -574,6 +574,8 @@ Release 2.2.1 - UNRELEASED
HDFS-4633 TestDFSClientExcludedNodes fails sporadically if excluded nodes
cache expires too quickly (Chris Nauroth via Sanjay)
HDFS-5037. Active NN should trigger its own edit log rolls (wang)
Release 2.2.0 - 2013-10-13
INCOMPATIBLE CHANGES

View File

@ -188,6 +188,11 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final String DFS_NAMENODE_EDITS_DIR_MINIMUM_KEY = "dfs.namenode.edits.dir.minimum";
public static final int DFS_NAMENODE_EDITS_DIR_MINIMUM_DEFAULT = 1;
public static final String DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD = "dfs.namenode.edit.log.autoroll.multiplier.threshold";
public static final float DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT = 2.0f;
public static final String DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS = "dfs.namenode.edit.log.autoroll.check.interval.ms";
public static final int DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT = 5*60*1000;
public static final String DFS_NAMENODE_EDITS_NOEDITLOGCHANNELFLUSH = "dfs.namenode.edits.noeditlogchannelflush";
public static final boolean DFS_NAMENODE_EDITS_NOEDITLOGCHANNELFLUSH_DEFAULT = false;

View File

@ -38,6 +38,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECI
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY;
@ -49,6 +51,10 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT;
@ -395,6 +401,16 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
Daemon nnrmthread = null; // NamenodeResourceMonitor thread
Daemon nnEditLogRoller = null; // NameNodeEditLogRoller thread
/**
* When an active namenode will roll its own edit log, in # edits
*/
private final long editLogRollerThreshold;
/**
* Check interval of an active namenode's edit log roller thread
*/
private final int editLogRollerInterval;
private volatile boolean hasResourcesAvailable = false;
private volatile boolean fsRunning = true;
@ -708,7 +724,17 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
this.standbyShouldCheckpoint = conf.getBoolean(
DFS_HA_STANDBY_CHECKPOINTS_KEY, DFS_HA_STANDBY_CHECKPOINTS_DEFAULT);
// # edit autoroll threshold is a multiple of the checkpoint threshold
this.editLogRollerThreshold = (long)
(conf.getFloat(
DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD,
DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD_DEFAULT) *
conf.getLong(
DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT));
this.editLogRollerInterval = conf.getInt(
DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS,
DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS_DEFAULT);
this.inodeId = new INodeId();
// For testing purposes, allow the DT secret manager to be started regardless
@ -983,6 +1009,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
//ResourceMonitor required only at ActiveNN. See HDFS-2914
this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
nnrmthread.start();
nnEditLogRoller = new Daemon(new NameNodeEditLogRoller(
editLogRollerThreshold, editLogRollerInterval));
nnEditLogRoller.start();
cacheManager.activate();
blockManager.getDatanodeManager().setSendCachingCommands(true);
} finally {
@ -1022,6 +1053,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
((NameNodeResourceMonitor) nnrmthread.getRunnable()).stopMonitor();
nnrmthread.interrupt();
}
if (nnEditLogRoller != null) {
((NameNodeEditLogRoller)nnEditLogRoller.getRunnable()).stop();
nnEditLogRoller.interrupt();
}
if (dir != null && dir.fsImage != null) {
if (dir.fsImage.editLog != null) {
dir.fsImage.editLog.close();
@ -4164,6 +4199,47 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
}
}
class NameNodeEditLogRoller implements Runnable {
private boolean shouldRun = true;
private final long rollThreshold;
private final long sleepIntervalMs;
public NameNodeEditLogRoller(long rollThreshold, int sleepIntervalMs) {
this.rollThreshold = rollThreshold;
this.sleepIntervalMs = sleepIntervalMs;
}
@Override
public void run() {
while (fsRunning && shouldRun) {
try {
FSEditLog editLog = getFSImage().getEditLog();
long numEdits =
editLog.getLastWrittenTxId() - editLog.getCurSegmentTxId();
if (numEdits > rollThreshold) {
FSNamesystem.LOG.info("NameNode rolling its own edit log because"
+ " number of edits in open segment exceeds threshold of "
+ rollThreshold);
rollEditLog();
}
Thread.sleep(sleepIntervalMs);
} catch (InterruptedException e) {
FSNamesystem.LOG.info(NameNodeEditLogRoller.class.getSimpleName()
+ " was interrupted, exiting");
break;
} catch (Exception e) {
FSNamesystem.LOG.error("Swallowing exception in "
+ NameNodeEditLogRoller.class.getSimpleName() + ":", e);
}
}
}
public void stop() {
shouldRun = false;
}
}
public FSImage getFSImage() {
return dir.fsImage;
}
@ -5180,7 +5256,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
try {
checkOperation(OperationCategory.JOURNAL);
checkNameNodeSafeMode("Log not rolled");
if (Server.isRpcInvocation()) {
LOG.info("Roll Edit Log from " + Server.getRemoteAddress());
}
return getFSImage().rollEditLog();
} finally {
writeUnlock();

View File

@ -38,7 +38,7 @@ public class ActiveState extends HAState {
@Override
public void checkOperation(HAContext context, OperationCategory op) {
return; // Other than journal all operations are allowed in active state
return; // All operations are allowed in active state
}
@Override

View File

@ -1543,4 +1543,29 @@
</description>
</property>
<property>
<name>dfs.namenode.edit.log.autoroll.multiplier.threshold</name>
<value>2.0</value>
<description>
Determines when an active namenode will roll its own edit log.
The actual threshold (in number of edits) is determined by multiplying
this value by dfs.namenode.checkpoint.txns.
This prevents extremely large edit files from accumulating on the active
namenode, which can cause timeouts during namenode startup and pose an
administrative hassle. This behavior is intended as a failsafe for when
the standby or secondary namenode fail to roll the edit log by the normal
checkpoint threshold.
</description>
</property>
<property>
<name>dfs.namenode.edit.log.autoroll.check.interval.ms</name>
<value>300000</value>
<description>
How often an active namenode will check if it needs to roll its edit log,
in milliseconds.
</description>
</property>
</configuration>

View File

@ -0,0 +1,106 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.namenode;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.NameNodeEditLogRoller;
import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import com.google.common.base.Supplier;
public class TestEditLogAutoroll {
private Configuration conf;
private MiniDFSCluster cluster;
private NameNode nn0;
private FileSystem fs;
private FSEditLog editLog;
@Before
public void setUp() throws Exception {
conf = new Configuration();
// Stall the standby checkpointer in two ways
conf.setLong(DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, Long.MAX_VALUE);
conf.setLong(DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 20);
// Make it autoroll after 10 edits
conf.setFloat(DFS_NAMENODE_EDIT_LOG_AUTOROLL_MULTIPLIER_THRESHOLD, 0.5f);
conf.setInt(DFS_NAMENODE_EDIT_LOG_AUTOROLL_CHECK_INTERVAL_MS, 100);
MiniDFSNNTopology topology = new MiniDFSNNTopology()
.addNameservice(new MiniDFSNNTopology.NSConf("ns1")
.addNN(new MiniDFSNNTopology.NNConf("nn1").setHttpPort(10061))
.addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(10062)));
cluster = new MiniDFSCluster.Builder(conf)
.nnTopology(topology)
.numDataNodes(0)
.build();
cluster.waitActive();
nn0 = cluster.getNameNode(0);
fs = HATestUtil.configureFailoverFs(cluster, conf);
cluster.transitionToActive(0);
fs = cluster.getFileSystem(0);
editLog = nn0.getNamesystem().getEditLog();
}
@After
public void tearDown() throws Exception {
if (fs != null) {
fs.close();
}
if (cluster != null) {
cluster.shutdown();
}
}
@Test(timeout=60000)
public void testEditLogAutoroll() throws Exception {
// Make some edits
final long startTxId = editLog.getCurSegmentTxId();
for (int i=0; i<11; i++) {
fs.mkdirs(new Path("testEditLogAutoroll-" + i));
}
// Wait for the NN to autoroll
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
return editLog.getCurSegmentTxId() > startTxId;
}
}, 1000, 5000);
// Transition to standby and make sure the roller stopped
nn0.transitionToStandby();
GenericTestUtils.assertNoThreadsMatching(
".*" + NameNodeEditLogRoller.class.getSimpleName() + ".*");
}
}