HDFS-5342. Provide more information in the FSNamesystem JMX interfaces. Contributed by Haohui Mai.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1532090 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jing Zhao 2013-10-14 20:56:37 +00:00
parent ed45d97ed7
commit cf0cf0a691
6 changed files with 102 additions and 33 deletions

View File

@ -253,6 +253,9 @@ Release 2.3.0 - UNRELEASED
HDFS-4953. Enable HDFS local reads via mmap.
(Colin Patrick McCabe via wang).
HDFS-5342. Provide more information in the FSNamesystem JMX interfaces.
(Haohui Mai via jing9)
IMPROVEMENTS
HDFS-5267. Remove volatile from LightWeightHashSet. (Junping Du via llu)

View File

@ -165,7 +165,13 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
import org.apache.hadoop.hdfs.server.blockmanagement.*;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStatistics;
import org.apache.hadoop.hdfs.server.blockmanagement.OutOfV1GenerationStampsException;
import org.apache.hadoop.hdfs.server.common.GenerationStamp;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
@ -178,12 +184,6 @@ import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream;
import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
import org.apache.hadoop.hdfs.server.namenode.startupprogress.Status;
import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer;
import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
@ -195,6 +195,12 @@ import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottab
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeFileWithSnapshot;
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager;
import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
import org.apache.hadoop.hdfs.server.namenode.startupprogress.Status;
import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
@ -207,10 +213,10 @@ import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
import org.apache.hadoop.hdfs.util.ChunkedArrayList;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.ipc.RetriableException;
import org.apache.hadoop.ipc.RetryCache;
import org.apache.hadoop.ipc.RetryCache.CacheEntry;
import org.apache.hadoop.ipc.RetryCache.CacheEntryWithPayload;
import org.apache.hadoop.ipc.RetriableException;
import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.ipc.StandbyException;
import org.apache.hadoop.metrics2.annotation.Metric;
@ -236,6 +242,7 @@ import org.mortbay.util.ajax.JSON;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
/**
@ -4225,7 +4232,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
return JSON.toString(info);
}
int getNumberOfDatanodes(DatanodeReportType type) {
readLock();
try {
@ -5263,7 +5269,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
/**
* Get the total number of objects in the system.
*/
long getMaxObjects() {
@Override // FSNamesystemMBean
public long getMaxObjects() {
return maxFsObjects;
}
@ -5408,7 +5415,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
@Override // FSNamesystemMBean
public int getNumDecomDeadDataNodes() {
final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
getBlockManager().getDatanodeManager().fetchDatanodes(dead, null, true);
getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, true);
int deadDecommissioned = 0;
for (DatanodeDescriptor node : dead) {
deadDecommissioned += node.isDecommissioned() ? 1 : 0;
@ -5416,6 +5423,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
return deadDecommissioned;
}
@Override // FSNamesystemMBean
public int getNumDecommissioningDataNodes() {
return getBlockManager().getDatanodeManager().getDecommissioningNodes()
.size();
}
@Override // FSNamesystemMBean
@Metric({"StaleDataNodes",
"Number of datanodes marked stale due to delayed heartbeat"})
@ -6255,14 +6268,25 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
blockManager.getDatanodeManager().fetchDatanodes(live, null, true);
for (DatanodeDescriptor node : live) {
final Map<String, Object> innerinfo = new HashMap<String, Object>();
innerinfo.put("lastContact", getLastContact(node));
innerinfo.put("usedSpace", getDfsUsed(node));
innerinfo.put("adminState", node.getAdminState().toString());
innerinfo.put("nonDfsUsedSpace", node.getNonDfsUsed());
innerinfo.put("capacity", node.getCapacity());
innerinfo.put("numBlocks", node.numBlocks());
innerinfo.put("version", node.getSoftwareVersion());
Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder()
.put("infoAddr", node.getInfoAddr())
.put("infoSecureAddr", node.getInfoSecureAddr())
.put("xferaddr", node.getXferAddr())
.put("lastContact", getLastContact(node))
.put("usedSpace", getDfsUsed(node))
.put("adminState", node.getAdminState().toString())
.put("nonDfsUsedSpace", node.getNonDfsUsed())
.put("capacity", node.getCapacity())
.put("numBlocks", node.numBlocks())
.put("version", node.getSoftwareVersion())
.put("used", node.getDfsUsed())
.put("remaining", node.getRemaining())
.put("blockScheduled", node.getBlocksScheduled())
.put("blockPoolUsed", node.getBlockPoolUsed())
.put("blockPoolUsedPercent", node.getBlockPoolUsedPercent())
.put("volfails", node.getVolumeFailures())
.build();
info.put(node.getHostName(), innerinfo);
}
return JSON.toString(info);
@ -6279,9 +6303,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
blockManager.getDatanodeManager().fetchDatanodes(null, dead, true);
for (DatanodeDescriptor node : dead) {
final Map<String, Object> innerinfo = new HashMap<String, Object>();
innerinfo.put("lastContact", getLastContact(node));
innerinfo.put("decommissioned", node.isDecommissioned());
Map<String, Object> innerinfo = ImmutableMap.<String, Object>builder()
.put("lastContact", getLastContact(node))
.put("decommissioned", node.isDecommissioned())
.put("xferaddr", node.getXferAddr())
.build();
info.put(node.getHostName(), innerinfo);
}
return JSON.toString(info);
@ -6298,13 +6324,16 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
final List<DatanodeDescriptor> decomNodeList = blockManager.getDatanodeManager(
).getDecommissioningNodes();
for (DatanodeDescriptor node : decomNodeList) {
final Map<String, Object> innerinfo = new HashMap<String, Object>();
innerinfo.put("underReplicatedBlocks", node.decommissioningStatus
.getUnderReplicatedBlocks());
innerinfo.put("decommissionOnlyReplicas", node.decommissioningStatus
.getDecommissionOnlyReplicas());
innerinfo.put("underReplicateInOpenFiles", node.decommissioningStatus
.getUnderReplicatedInOpenFiles());
Map<String, Object> innerinfo = ImmutableMap
.<String, Object> builder()
.put("xferaddr", node.getXferAddr())
.put("underReplicatedBlocks",
node.decommissioningStatus.getUnderReplicatedBlocks())
.put("decommissionOnlyReplicas",
node.decommissioningStatus.getDecommissionOnlyReplicas())
.put("underReplicateInOpenFiles",
node.decommissioningStatus.getUnderReplicatedInOpenFiles())
.build();
info.put(node.getHostName(), innerinfo);
}
return JSON.toString(info);

View File

@ -44,6 +44,7 @@ public class StartupProgressServlet extends DfsServlet {
private static final String ELAPSED_TIME = "elapsedTime";
private static final String FILE = "file";
private static final String NAME = "name";
private static final String DESC = "desc";
private static final String PERCENT_COMPLETE = "percentComplete";
private static final String PHASES = "phases";
private static final String SIZE = "size";
@ -70,6 +71,7 @@ public class StartupProgressServlet extends DfsServlet {
for (Phase phase: view.getPhases()) {
json.writeStartObject();
json.writeStringField(NAME, phase.getName());
json.writeStringField(DESC, phase.getDescription());
json.writeStringField(STATUS, view.getStatus(phase).toString());
json.writeNumberField(PERCENT_COMPLETE, view.getPercentComplete(phase));
json.writeNumberField(ELAPSED_TIME, view.getElapsedTime(phase));
@ -80,8 +82,10 @@ public class StartupProgressServlet extends DfsServlet {
for (Step step: view.getSteps(phase)) {
json.writeStartObject();
StepType type = step.getType();
String name = type != null ? type.getName() : null;
writeStringFieldIfNotNull(json, NAME, name);
if (type != null) {
json.writeStringField(NAME, type.getName());
json.writeStringField(DESC, type.getDescription());
}
json.writeNumberField(COUNT, view.getCount(phase, step));
writeStringFieldIfNotNull(json, FILE, step.getFile());
writeNumberFieldIfDefined(json, SIZE, step.getSize());

View File

@ -131,8 +131,18 @@ public interface FSNamesystemMBean {
*/
public int getNumDecomDeadDataNodes();
/**
* Number of data nodes that are in the decommissioning state
*/
public int getNumDecommissioningDataNodes();
/**
* The statistics of snapshots
*/
public String getSnapshotStats();
/**
* Return the maximum number of inodes in the file system
*/
public long getMaxObjects();
}

View File

@ -370,13 +370,20 @@ public class TestDecommission {
for (int i = 0; i < numNamenodes; i++) {
ArrayList<DatanodeInfo> decommissionedNodes = namenodeDecomList.get(i);
FileSystem fileSys = cluster.getFileSystem(i);
FSNamesystem ns = cluster.getNamesystem(i);
writeFile(fileSys, file1, replicas);
int deadDecomissioned = ns.getNumDecomDeadDataNodes();
int liveDecomissioned = ns.getNumDecomLiveDataNodes();
// Decommission one node. Verify that node is decommissioned.
DatanodeInfo decomNode = decommissionNode(i, decommissionedNodes,
AdminStates.DECOMMISSIONED);
decommissionedNodes.add(decomNode);
assertEquals(deadDecomissioned, ns.getNumDecomDeadDataNodes());
assertEquals(liveDecomissioned + 1, ns.getNumDecomLiveDataNodes());
// Ensure decommissioned datanode is not automatically shutdown
DFSClient client = getDfsClient(cluster.getNameNode(i), conf);
assertEquals("All datanodes must be alive", numDatanodes,

View File

@ -73,24 +73,28 @@ public class TestStartupProgressServlet {
.put("phases", Arrays.<Object>asList(
ImmutableMap.<String, Object>builder()
.put("name", "LoadingFsImage")
.put("desc", "Loading fsimage")
.put("status", "PENDING")
.put("percentComplete", 0.0f)
.put("steps", Collections.emptyList())
.build(),
ImmutableMap.<String, Object>builder()
.put("name", "LoadingEdits")
.put("desc", "Loading edits")
.put("status", "PENDING")
.put("percentComplete", 0.0f)
.put("steps", Collections.emptyList())
.build(),
ImmutableMap.<String, Object>builder()
.put("name", "SavingCheckpoint")
.put("desc", "Saving checkpoint")
.put("status", "PENDING")
.put("percentComplete", 0.0f)
.put("steps", Collections.emptyList())
.build(),
ImmutableMap.<String, Object>builder()
.put("name", "SafeMode")
.put("desc", "Safe mode")
.put("status", "PENDING")
.put("percentComplete", 0.0f)
.put("steps", Collections.emptyList())
@ -111,11 +115,13 @@ public class TestStartupProgressServlet {
.put("phases", Arrays.<Object>asList(
ImmutableMap.<String, Object>builder()
.put("name", "LoadingFsImage")
.put("desc", "Loading fsimage")
.put("status", "COMPLETE")
.put("percentComplete", 1.0f)
.put("steps", Collections.<Object>singletonList(
ImmutableMap.<String, Object>builder()
.put("name", "Inodes")
.put("desc", "inodes")
.put("count", 100L)
.put("total", 100L)
.put("percentComplete", 1.0f)
@ -124,6 +130,7 @@ public class TestStartupProgressServlet {
.build(),
ImmutableMap.<String, Object>builder()
.put("name", "LoadingEdits")
.put("desc", "Loading edits")
.put("status", "RUNNING")
.put("percentComplete", 0.5f)
.put("steps", Collections.<Object>singletonList(
@ -138,12 +145,14 @@ public class TestStartupProgressServlet {
.build(),
ImmutableMap.<String, Object>builder()
.put("name", "SavingCheckpoint")
.put("desc", "Saving checkpoint")
.put("status", "PENDING")
.put("percentComplete", 0.0f)
.put("steps", Collections.emptyList())
.build(),
ImmutableMap.<String, Object>builder()
.put("name", "SafeMode")
.put("desc", "Safe mode")
.put("status", "PENDING")
.put("percentComplete", 0.0f)
.put("steps", Collections.emptyList())
@ -164,11 +173,13 @@ public class TestStartupProgressServlet {
.put("phases", Arrays.<Object>asList(
ImmutableMap.<String, Object>builder()
.put("name", "LoadingFsImage")
.put("desc", "Loading fsimage")
.put("status", "COMPLETE")
.put("percentComplete", 1.0f)
.put("steps", Collections.<Object>singletonList(
ImmutableMap.<String, Object>builder()
.put("name", "Inodes")
.put("desc", "inodes")
.put("count", 100L)
.put("total", 100L)
.put("percentComplete", 1.0f)
@ -177,6 +188,7 @@ public class TestStartupProgressServlet {
.build(),
ImmutableMap.<String, Object>builder()
.put("name", "LoadingEdits")
.put("desc", "Loading edits")
.put("status", "COMPLETE")
.put("percentComplete", 1.0f)
.put("steps", Collections.<Object>singletonList(
@ -191,11 +203,13 @@ public class TestStartupProgressServlet {
.build(),
ImmutableMap.<String, Object>builder()
.put("name", "SavingCheckpoint")
.put("desc", "Saving checkpoint")
.put("status", "COMPLETE")
.put("percentComplete", 1.0f)
.put("steps", Collections.<Object>singletonList(
ImmutableMap.<String, Object>builder()
.put("name", "Inodes")
.put("desc", "inodes")
.put("count", 300L)
.put("total", 300L)
.put("percentComplete", 1.0f)
@ -204,11 +218,13 @@ public class TestStartupProgressServlet {
.build(),
ImmutableMap.<String, Object>builder()
.put("name", "SafeMode")
.put("desc", "Safe mode")
.put("status", "COMPLETE")
.put("percentComplete", 1.0f)
.put("steps", Collections.<Object>singletonList(
ImmutableMap.<String, Object>builder()
.put("name", "AwaitingReportedBlocks")
.put("desc", "awaiting reported blocks")
.put("count", 400L)
.put("total", 400L)
.put("percentComplete", 1.0f)