From cf0cf0a6910244d929f40842223e7d0b2c9445e8 Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Mon, 14 Oct 2013 20:56:37 +0000 Subject: [PATCH] HDFS-5342. Provide more information in the FSNamesystem JMX interfaces. Contributed by Haohui Mai. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1532090 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../hdfs/server/namenode/FSNamesystem.java | 87 ++++++++++++------- .../namenode/StartupProgressServlet.java | 8 +- .../namenode/metrics/FSNamesystemMBean.java | 10 +++ .../apache/hadoop/hdfs/TestDecommission.java | 11 ++- .../namenode/TestStartupProgressServlet.java | 16 ++++ 6 files changed, 102 insertions(+), 33 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index f2924e201b3..586c5a88e00 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -253,6 +253,9 @@ Release 2.3.0 - UNRELEASED HDFS-4953. Enable HDFS local reads via mmap. (Colin Patrick McCabe via wang). + HDFS-5342. Provide more information in the FSNamesystem JMX interfaces. + (Haohui Mai via jing9) + IMPROVEMENTS HDFS-5267. Remove volatile from LightWeightHashSet. (Junping Du via llu) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 3a911e8f56a..d4d3ae5c5ae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -165,7 +165,13 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager; import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; -import org.apache.hadoop.hdfs.server.blockmanagement.*; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; +import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; +import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; +import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStatistics; +import org.apache.hadoop.hdfs.server.blockmanagement.OutOfV1GenerationStampsException; import org.apache.hadoop.hdfs.server.common.GenerationStamp; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; @@ -178,12 +184,6 @@ import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo; import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream; import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory; -import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; -import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; -import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter; -import org.apache.hadoop.hdfs.server.namenode.startupprogress.Status; -import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; -import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType; import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer; import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; import org.apache.hadoop.hdfs.server.namenode.ha.HAState; @@ -195,6 +195,12 @@ import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottab import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeFileWithSnapshot; import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotManager; +import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; +import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; +import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter; +import org.apache.hadoop.hdfs.server.namenode.startupprogress.Status; +import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; +import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType; import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods; import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; @@ -207,10 +213,10 @@ import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo; import org.apache.hadoop.hdfs.util.ChunkedArrayList; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.Text; +import org.apache.hadoop.ipc.RetriableException; import org.apache.hadoop.ipc.RetryCache; import org.apache.hadoop.ipc.RetryCache.CacheEntry; import org.apache.hadoop.ipc.RetryCache.CacheEntryWithPayload; -import org.apache.hadoop.ipc.RetriableException; import org.apache.hadoop.ipc.Server; import org.apache.hadoop.ipc.StandbyException; import org.apache.hadoop.metrics2.annotation.Metric; @@ -236,6 +242,7 @@ import org.mortbay.util.ajax.JSON; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Charsets; import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; /** @@ -4225,7 +4232,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats, return JSON.toString(info); } - int getNumberOfDatanodes(DatanodeReportType type) { readLock(); try { @@ -5263,7 +5269,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats, /** * Get the total number of objects in the system. */ - long getMaxObjects() { + @Override // FSNamesystemMBean + public long getMaxObjects() { return maxFsObjects; } @@ -5408,7 +5415,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, @Override // FSNamesystemMBean public int getNumDecomDeadDataNodes() { final List dead = new ArrayList(); - getBlockManager().getDatanodeManager().fetchDatanodes(dead, null, true); + getBlockManager().getDatanodeManager().fetchDatanodes(null, dead, true); int deadDecommissioned = 0; for (DatanodeDescriptor node : dead) { deadDecommissioned += node.isDecommissioned() ? 1 : 0; @@ -5416,6 +5423,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats, return deadDecommissioned; } + @Override // FSNamesystemMBean + public int getNumDecommissioningDataNodes() { + return getBlockManager().getDatanodeManager().getDecommissioningNodes() + .size(); + } + @Override // FSNamesystemMBean @Metric({"StaleDataNodes", "Number of datanodes marked stale due to delayed heartbeat"}) @@ -6255,14 +6268,25 @@ public class FSNamesystem implements Namesystem, FSClusterStats, final List live = new ArrayList(); blockManager.getDatanodeManager().fetchDatanodes(live, null, true); for (DatanodeDescriptor node : live) { - final Map innerinfo = new HashMap(); - innerinfo.put("lastContact", getLastContact(node)); - innerinfo.put("usedSpace", getDfsUsed(node)); - innerinfo.put("adminState", node.getAdminState().toString()); - innerinfo.put("nonDfsUsedSpace", node.getNonDfsUsed()); - innerinfo.put("capacity", node.getCapacity()); - innerinfo.put("numBlocks", node.numBlocks()); - innerinfo.put("version", node.getSoftwareVersion()); + Map innerinfo = ImmutableMap.builder() + .put("infoAddr", node.getInfoAddr()) + .put("infoSecureAddr", node.getInfoSecureAddr()) + .put("xferaddr", node.getXferAddr()) + .put("lastContact", getLastContact(node)) + .put("usedSpace", getDfsUsed(node)) + .put("adminState", node.getAdminState().toString()) + .put("nonDfsUsedSpace", node.getNonDfsUsed()) + .put("capacity", node.getCapacity()) + .put("numBlocks", node.numBlocks()) + .put("version", node.getSoftwareVersion()) + .put("used", node.getDfsUsed()) + .put("remaining", node.getRemaining()) + .put("blockScheduled", node.getBlocksScheduled()) + .put("blockPoolUsed", node.getBlockPoolUsed()) + .put("blockPoolUsedPercent", node.getBlockPoolUsedPercent()) + .put("volfails", node.getVolumeFailures()) + .build(); + info.put(node.getHostName(), innerinfo); } return JSON.toString(info); @@ -6279,9 +6303,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats, final List dead = new ArrayList(); blockManager.getDatanodeManager().fetchDatanodes(null, dead, true); for (DatanodeDescriptor node : dead) { - final Map innerinfo = new HashMap(); - innerinfo.put("lastContact", getLastContact(node)); - innerinfo.put("decommissioned", node.isDecommissioned()); + Map innerinfo = ImmutableMap.builder() + .put("lastContact", getLastContact(node)) + .put("decommissioned", node.isDecommissioned()) + .put("xferaddr", node.getXferAddr()) + .build(); info.put(node.getHostName(), innerinfo); } return JSON.toString(info); @@ -6298,13 +6324,16 @@ public class FSNamesystem implements Namesystem, FSClusterStats, final List decomNodeList = blockManager.getDatanodeManager( ).getDecommissioningNodes(); for (DatanodeDescriptor node : decomNodeList) { - final Map innerinfo = new HashMap(); - innerinfo.put("underReplicatedBlocks", node.decommissioningStatus - .getUnderReplicatedBlocks()); - innerinfo.put("decommissionOnlyReplicas", node.decommissioningStatus - .getDecommissionOnlyReplicas()); - innerinfo.put("underReplicateInOpenFiles", node.decommissioningStatus - .getUnderReplicatedInOpenFiles()); + Map innerinfo = ImmutableMap + . builder() + .put("xferaddr", node.getXferAddr()) + .put("underReplicatedBlocks", + node.decommissioningStatus.getUnderReplicatedBlocks()) + .put("decommissionOnlyReplicas", + node.decommissioningStatus.getDecommissionOnlyReplicas()) + .put("underReplicateInOpenFiles", + node.decommissioningStatus.getUnderReplicatedInOpenFiles()) + .build(); info.put(node.getHostName(), innerinfo); } return JSON.toString(info); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StartupProgressServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StartupProgressServlet.java index a6b9afbd1f1..f18fd920c92 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StartupProgressServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StartupProgressServlet.java @@ -44,6 +44,7 @@ public class StartupProgressServlet extends DfsServlet { private static final String ELAPSED_TIME = "elapsedTime"; private static final String FILE = "file"; private static final String NAME = "name"; + private static final String DESC = "desc"; private static final String PERCENT_COMPLETE = "percentComplete"; private static final String PHASES = "phases"; private static final String SIZE = "size"; @@ -70,6 +71,7 @@ public class StartupProgressServlet extends DfsServlet { for (Phase phase: view.getPhases()) { json.writeStartObject(); json.writeStringField(NAME, phase.getName()); + json.writeStringField(DESC, phase.getDescription()); json.writeStringField(STATUS, view.getStatus(phase).toString()); json.writeNumberField(PERCENT_COMPLETE, view.getPercentComplete(phase)); json.writeNumberField(ELAPSED_TIME, view.getElapsedTime(phase)); @@ -80,8 +82,10 @@ public class StartupProgressServlet extends DfsServlet { for (Step step: view.getSteps(phase)) { json.writeStartObject(); StepType type = step.getType(); - String name = type != null ? type.getName() : null; - writeStringFieldIfNotNull(json, NAME, name); + if (type != null) { + json.writeStringField(NAME, type.getName()); + json.writeStringField(DESC, type.getDescription()); + } json.writeNumberField(COUNT, view.getCount(phase, step)); writeStringFieldIfNotNull(json, FILE, step.getFile()); writeNumberFieldIfDefined(json, SIZE, step.getSize()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java index 4835fd86be3..340d9a3fce4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java @@ -131,8 +131,18 @@ public interface FSNamesystemMBean { */ public int getNumDecomDeadDataNodes(); + /** + * Number of data nodes that are in the decommissioning state + */ + public int getNumDecommissioningDataNodes(); + /** * The statistics of snapshots */ public String getSnapshotStats(); + + /** + * Return the maximum number of inodes in the file system + */ + public long getMaxObjects(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java index 9c2e038f3f1..39088620c84 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java @@ -370,13 +370,20 @@ public class TestDecommission { for (int i = 0; i < numNamenodes; i++) { ArrayList decommissionedNodes = namenodeDecomList.get(i); FileSystem fileSys = cluster.getFileSystem(i); + FSNamesystem ns = cluster.getNamesystem(i); + writeFile(fileSys, file1, replicas); - + + int deadDecomissioned = ns.getNumDecomDeadDataNodes(); + int liveDecomissioned = ns.getNumDecomLiveDataNodes(); + // Decommission one node. Verify that node is decommissioned. DatanodeInfo decomNode = decommissionNode(i, decommissionedNodes, AdminStates.DECOMMISSIONED); decommissionedNodes.add(decomNode); - + assertEquals(deadDecomissioned, ns.getNumDecomDeadDataNodes()); + assertEquals(liveDecomissioned + 1, ns.getNumDecomLiveDataNodes()); + // Ensure decommissioned datanode is not automatically shutdown DFSClient client = getDfsClient(cluster.getNameNode(i), conf); assertEquals("All datanodes must be alive", numDatanodes, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartupProgressServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartupProgressServlet.java index 544f44f12f4..0f22e9a4bca 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartupProgressServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartupProgressServlet.java @@ -73,24 +73,28 @@ public class TestStartupProgressServlet { .put("phases", Arrays.asList( ImmutableMap.builder() .put("name", "LoadingFsImage") + .put("desc", "Loading fsimage") .put("status", "PENDING") .put("percentComplete", 0.0f) .put("steps", Collections.emptyList()) .build(), ImmutableMap.builder() .put("name", "LoadingEdits") + .put("desc", "Loading edits") .put("status", "PENDING") .put("percentComplete", 0.0f) .put("steps", Collections.emptyList()) .build(), ImmutableMap.builder() .put("name", "SavingCheckpoint") + .put("desc", "Saving checkpoint") .put("status", "PENDING") .put("percentComplete", 0.0f) .put("steps", Collections.emptyList()) .build(), ImmutableMap.builder() .put("name", "SafeMode") + .put("desc", "Safe mode") .put("status", "PENDING") .put("percentComplete", 0.0f) .put("steps", Collections.emptyList()) @@ -111,11 +115,13 @@ public class TestStartupProgressServlet { .put("phases", Arrays.asList( ImmutableMap.builder() .put("name", "LoadingFsImage") + .put("desc", "Loading fsimage") .put("status", "COMPLETE") .put("percentComplete", 1.0f) .put("steps", Collections.singletonList( ImmutableMap.builder() .put("name", "Inodes") + .put("desc", "inodes") .put("count", 100L) .put("total", 100L) .put("percentComplete", 1.0f) @@ -124,6 +130,7 @@ public class TestStartupProgressServlet { .build(), ImmutableMap.builder() .put("name", "LoadingEdits") + .put("desc", "Loading edits") .put("status", "RUNNING") .put("percentComplete", 0.5f) .put("steps", Collections.singletonList( @@ -138,12 +145,14 @@ public class TestStartupProgressServlet { .build(), ImmutableMap.builder() .put("name", "SavingCheckpoint") + .put("desc", "Saving checkpoint") .put("status", "PENDING") .put("percentComplete", 0.0f) .put("steps", Collections.emptyList()) .build(), ImmutableMap.builder() .put("name", "SafeMode") + .put("desc", "Safe mode") .put("status", "PENDING") .put("percentComplete", 0.0f) .put("steps", Collections.emptyList()) @@ -164,11 +173,13 @@ public class TestStartupProgressServlet { .put("phases", Arrays.asList( ImmutableMap.builder() .put("name", "LoadingFsImage") + .put("desc", "Loading fsimage") .put("status", "COMPLETE") .put("percentComplete", 1.0f) .put("steps", Collections.singletonList( ImmutableMap.builder() .put("name", "Inodes") + .put("desc", "inodes") .put("count", 100L) .put("total", 100L) .put("percentComplete", 1.0f) @@ -177,6 +188,7 @@ public class TestStartupProgressServlet { .build(), ImmutableMap.builder() .put("name", "LoadingEdits") + .put("desc", "Loading edits") .put("status", "COMPLETE") .put("percentComplete", 1.0f) .put("steps", Collections.singletonList( @@ -191,11 +203,13 @@ public class TestStartupProgressServlet { .build(), ImmutableMap.builder() .put("name", "SavingCheckpoint") + .put("desc", "Saving checkpoint") .put("status", "COMPLETE") .put("percentComplete", 1.0f) .put("steps", Collections.singletonList( ImmutableMap.builder() .put("name", "Inodes") + .put("desc", "inodes") .put("count", 300L) .put("total", 300L) .put("percentComplete", 1.0f) @@ -204,11 +218,13 @@ public class TestStartupProgressServlet { .build(), ImmutableMap.builder() .put("name", "SafeMode") + .put("desc", "Safe mode") .put("status", "COMPLETE") .put("percentComplete", 1.0f) .put("steps", Collections.singletonList( ImmutableMap.builder() .put("name", "AwaitingReportedBlocks") + .put("desc", "awaiting reported blocks") .put("count", 400L) .put("total", 400L) .put("percentComplete", 1.0f)