HFS-4059. Merging change r1398949 from trunk.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1399391 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Suresh Srinivas 2012-10-17 18:54:59 +00:00
parent 070ae0d73e
commit 32b27ebec1
6 changed files with 64 additions and 2 deletions

View File

@ -12,6 +12,8 @@ Release 2.0.3-alpha - Unreleased
HDFS-3912. Detect and avoid stale datanodes for writes. HDFS-3912. Detect and avoid stale datanodes for writes.
(Jing Zhao via suresh) (Jing Zhao via suresh)
HDFS-4059. Add number of stale DataNodes to metrics. (Jing Zhao via suresh)
IMPROVEMENTS IMPROVEMENTS
HDFS-3925. Prettify PipelineAck#toString() for printing to a log HDFS-3925. Prettify PipelineAck#toString() for printing to a log

View File

@ -868,7 +868,7 @@ public class DatanodeManager {
* @return Return the current number of stale DataNodes (detected by * @return Return the current number of stale DataNodes (detected by
* HeartbeatManager). * HeartbeatManager).
*/ */
int getNumStaleNodes() { public int getNumStaleNodes() {
return this.numStaleNodes; return this.numStaleNodes;
} }

View File

@ -4663,6 +4663,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
return getBlockManager().getDatanodeManager().getNumDeadDataNodes(); return getBlockManager().getDatanodeManager().getNumDeadDataNodes();
} }
@Override // FSNamesystemMBean
@Metric({"StaleDataNodes",
"Number of datanodes marked stale due to delayed heartbeat"})
public int getNumStaleDataNodes() {
return getBlockManager().getDatanodeManager().getNumStaleNodes();
}
/** /**
* Sets the generation stamp for this filesystem * Sets the generation stamp for this filesystem
*/ */

View File

@ -112,4 +112,10 @@ public interface FSNamesystemMBean {
* @return number of dead data nodes * @return number of dead data nodes
*/ */
public int getNumDeadDataNodes(); public int getNumDeadDataNodes();
/**
* Number of stale data nodes
* @return number of stale data nodes
*/
public int getNumStaleDataNodes();
} }

View File

@ -191,4 +191,12 @@ public class BlockManagerTestUtil {
"Must use default policy, got %s", bpp.getClass()); "Must use default policy, got %s", bpp.getClass());
((BlockPlacementPolicyDefault)bpp).setPreferLocalNode(prefer); ((BlockPlacementPolicyDefault)bpp).setPreferLocalNode(prefer);
} }
/**
* Call heartbeat check function of HeartbeatManager
* @param bm the BlockManager to manipulate
*/
public static void checkHeartbeat(BlockManager bm) {
bm.getDatanodeManager().getHeartbeatManager().heartbeatCheck();
}
} }

View File

@ -41,10 +41,14 @@ import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.test.MetricsAsserts; import org.apache.hadoop.test.MetricsAsserts;
import org.apache.hadoop.util.Time;
import org.apache.log4j.Level; import org.apache.log4j.Level;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
@ -77,7 +81,8 @@ public class TestNameNodeMetrics {
DFS_REPLICATION_INTERVAL); DFS_REPLICATION_INTERVAL);
CONF.set(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY, CONF.set(DFSConfigKeys.DFS_METRICS_PERCENTILES_INTERVALS_KEY,
"" + PERCENTILES_INTERVAL); "" + PERCENTILES_INTERVAL);
// Enable stale DataNodes checking
CONF.setBoolean(DFSConfigKeys.DFS_NAMENODE_CHECK_STALE_DATANODE_KEY, true);
((Log4JLogger)LogFactory.getLog(MetricsAsserts.class)) ((Log4JLogger)LogFactory.getLog(MetricsAsserts.class))
.getLogger().setLevel(Level.DEBUG); .getLogger().setLevel(Level.DEBUG);
} }
@ -125,6 +130,40 @@ public class TestNameNodeMetrics {
stm.close(); stm.close();
} }
/** Test metrics indicating the number of stale DataNodes */
@Test
public void testStaleNodes() throws Exception {
// Set two datanodes as stale
for (int i = 0; i < 2; i++) {
DataNode dn = cluster.getDataNodes().get(i);
DataNodeTestUtils.setHeartbeatsDisabledForTests(dn, true);
long staleInterval = CONF.getLong(
DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY,
DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_DEFAULT);
cluster.getNameNode().getNamesystem().getBlockManager()
.getDatanodeManager().getDatanode(dn.getDatanodeId())
.setLastUpdate(Time.now() - staleInterval - 1);
}
// Let HeartbeatManager to check heartbeat
BlockManagerTestUtil.checkHeartbeat(cluster.getNameNode().getNamesystem()
.getBlockManager());
assertGauge("StaleDataNodes", 2, getMetrics(NS_METRICS));
// Reset stale datanodes
for (int i = 0; i < 2; i++) {
DataNode dn = cluster.getDataNodes().get(i);
DataNodeTestUtils.setHeartbeatsDisabledForTests(dn, false);
cluster.getNameNode().getNamesystem().getBlockManager()
.getDatanodeManager().getDatanode(dn.getDatanodeId())
.setLastUpdate(Time.now());
}
// Let HeartbeatManager to refresh
BlockManagerTestUtil.checkHeartbeat(cluster.getNameNode().getNamesystem()
.getBlockManager());
assertGauge("StaleDataNodes", 0, getMetrics(NS_METRICS));
}
/** Test metrics associated with addition of a file */ /** Test metrics associated with addition of a file */
@Test @Test
public void testFileAdd() throws Exception { public void testFileAdd() throws Exception {