HDFS-3245. Add metrics and web UI for cluster version summary. Contributed by Ravi Prakash.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1517942 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ba89f8efc7
commit
7246828377
|
@ -107,6 +107,9 @@ Release 2.1.1-beta - UNRELEASED
|
|||
HDFS-5045. Add more unit tests for retry cache to cover all AtMostOnce
|
||||
methods. (jing9)
|
||||
|
||||
HDFS-3245. Add metrics and web UI for cluster version summary. (Ravi
|
||||
Prakash via kihwal)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
|
|
@ -47,6 +47,7 @@ public class DatanodeInfo extends DatanodeID implements Node {
|
|||
private long lastUpdate;
|
||||
private int xceiverCount;
|
||||
private String location = NetworkTopology.DEFAULT_RACK;
|
||||
private String softwareVersion;
|
||||
|
||||
// Datanode administrative states
|
||||
public enum AdminStates {
|
||||
|
@ -381,4 +382,12 @@ public class DatanodeInfo extends DatanodeID implements Node {
|
|||
// by DatanodeID
|
||||
return (this == obj) || super.equals(obj);
|
||||
}
|
||||
|
||||
public String getSoftwareVersion() {
|
||||
return softwareVersion;
|
||||
}
|
||||
|
||||
public void setSoftwareVersion(String softwareVersion) {
|
||||
this.softwareVersion = softwareVersion;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.net.UnknownHostException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.NavigableMap;
|
||||
|
@ -55,13 +56,6 @@ import org.apache.hadoop.hdfs.server.namenode.HostFileManager.MutableEntrySet;
|
|||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||
import org.apache.hadoop.hdfs.server.namenode.Namesystem;
|
||||
import org.apache.hadoop.hdfs.server.protocol.BalancerBandwidthCommand;
|
||||
import org.apache.hadoop.hdfs.server.namenode.HostFileManager;
|
||||
import org.apache.hadoop.hdfs.server.namenode.HostFileManager.Entry;
|
||||
import org.apache.hadoop.hdfs.server.namenode.HostFileManager.EntrySet;
|
||||
import org.apache.hadoop.hdfs.server.namenode.HostFileManager.MutableEntrySet;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||
import org.apache.hadoop.hdfs.server.namenode.Namesystem;
|
||||
import org.apache.hadoop.hdfs.server.protocol.BalancerBandwidthCommand;
|
||||
import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
|
||||
import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand;
|
||||
import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock;
|
||||
|
@ -71,7 +65,6 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
|||
import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException;
|
||||
import org.apache.hadoop.hdfs.server.protocol.RegisterCommand;
|
||||
import org.apache.hadoop.hdfs.util.CyclicIteration;
|
||||
import org.apache.hadoop.io.IOUtils;
|
||||
import org.apache.hadoop.ipc.Server;
|
||||
import org.apache.hadoop.net.CachedDNSToSwitchMapping;
|
||||
import org.apache.hadoop.net.DNSToSwitchMapping;
|
||||
|
@ -172,6 +165,14 @@ public class DatanodeManager {
|
|||
* according to the NetworkTopology.
|
||||
*/
|
||||
private boolean hasClusterEverBeenMultiRack = false;
|
||||
|
||||
/**
|
||||
* The number of datanodes for each software version. This list should change
|
||||
* during rolling upgrades.
|
||||
* Software version -> Number of datanodes with this version
|
||||
*/
|
||||
private HashMap<String, Integer> datanodesSoftwareVersions =
|
||||
new HashMap<String, Integer>(4, 0.75f);
|
||||
|
||||
DatanodeManager(final BlockManager blockManager, final Namesystem namesystem,
|
||||
final Configuration conf) throws IOException {
|
||||
|
@ -463,6 +464,7 @@ public class DatanodeManager {
|
|||
heartbeatManager.removeDatanode(nodeInfo);
|
||||
blockManager.removeBlocksAssociatedTo(nodeInfo);
|
||||
networktopology.remove(nodeInfo);
|
||||
decrementVersionCount(nodeInfo.getSoftwareVersion());
|
||||
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("remove datanode " + nodeInfo);
|
||||
|
@ -545,6 +547,61 @@ public class DatanodeManager {
|
|||
}
|
||||
}
|
||||
|
||||
private void incrementVersionCount(String version) {
|
||||
if (version == null) {
|
||||
return;
|
||||
}
|
||||
synchronized(datanodeMap) {
|
||||
Integer count = this.datanodesSoftwareVersions.get(version);
|
||||
count = count == null ? 1 : count + 1;
|
||||
this.datanodesSoftwareVersions.put(version, count);
|
||||
}
|
||||
}
|
||||
|
||||
private void decrementVersionCount(String version) {
|
||||
if (version == null) {
|
||||
return;
|
||||
}
|
||||
synchronized(datanodeMap) {
|
||||
Integer count = this.datanodesSoftwareVersions.get(version);
|
||||
if(count != null) {
|
||||
if(count > 1) {
|
||||
this.datanodesSoftwareVersions.put(version, count-1);
|
||||
} else {
|
||||
this.datanodesSoftwareVersions.remove(version);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean shouldCountVersion(DatanodeDescriptor node) {
|
||||
return node.getSoftwareVersion() != null && node.isAlive &&
|
||||
!isDatanodeDead(node);
|
||||
}
|
||||
|
||||
private void countSoftwareVersions() {
|
||||
synchronized(datanodeMap) {
|
||||
HashMap<String, Integer> versionCount = new HashMap<String, Integer>();
|
||||
for(DatanodeDescriptor dn: datanodeMap.values()) {
|
||||
// Check isAlive too because right after removeDatanode(),
|
||||
// isDatanodeDead() is still true
|
||||
if(shouldCountVersion(dn))
|
||||
{
|
||||
Integer num = versionCount.get(dn.getSoftwareVersion());
|
||||
num = num == null ? 1 : num+1;
|
||||
versionCount.put(dn.getSoftwareVersion(), num);
|
||||
}
|
||||
}
|
||||
this.datanodesSoftwareVersions = versionCount;
|
||||
}
|
||||
}
|
||||
|
||||
public HashMap<String, Integer> getDatanodesSoftwareVersions() {
|
||||
synchronized(datanodeMap) {
|
||||
return new HashMap<String, Integer> (this.datanodesSoftwareVersions);
|
||||
}
|
||||
}
|
||||
|
||||
/* Resolve a node's network location */
|
||||
private String resolveNetworkLocation (DatanodeID node) {
|
||||
List<String> names = new ArrayList<String>(1);
|
||||
|
@ -761,21 +818,28 @@ public class DatanodeManager {
|
|||
try {
|
||||
// update cluster map
|
||||
getNetworkTopology().remove(nodeS);
|
||||
if(shouldCountVersion(nodeS)) {
|
||||
decrementVersionCount(nodeS.getSoftwareVersion());
|
||||
}
|
||||
nodeS.updateRegInfo(nodeReg);
|
||||
|
||||
nodeS.setSoftwareVersion(nodeReg.getSoftwareVersion());
|
||||
nodeS.setDisallowed(false); // Node is in the include list
|
||||
|
||||
|
||||
// resolve network location
|
||||
nodeS.setNetworkLocation(resolveNetworkLocation(nodeS));
|
||||
getNetworkTopology().add(nodeS);
|
||||
|
||||
// also treat the registration message as a heartbeat
|
||||
heartbeatManager.register(nodeS);
|
||||
incrementVersionCount(nodeS.getSoftwareVersion());
|
||||
checkDecommissioning(nodeS);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
removeDatanode(nodeS);
|
||||
wipeDatanode(nodeS);
|
||||
countSoftwareVersions();
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
@ -799,6 +863,7 @@ public class DatanodeManager {
|
|||
try {
|
||||
nodeDescr.setNetworkLocation(resolveNetworkLocation(nodeDescr));
|
||||
networktopology.add(nodeDescr);
|
||||
nodeDescr.setSoftwareVersion(nodeReg.getSoftwareVersion());
|
||||
|
||||
// register new datanode
|
||||
addDatanode(nodeDescr);
|
||||
|
@ -809,10 +874,12 @@ public class DatanodeManager {
|
|||
// because its is done when the descriptor is created
|
||||
heartbeatManager.addDatanode(nodeDescr);
|
||||
success = true;
|
||||
incrementVersionCount(nodeReg.getSoftwareVersion());
|
||||
} finally {
|
||||
if (!success) {
|
||||
removeDatanode(nodeDescr);
|
||||
wipeDatanode(nodeDescr);
|
||||
countSoftwareVersions();
|
||||
}
|
||||
}
|
||||
} catch (InvalidTopologyException e) {
|
||||
|
@ -834,6 +901,7 @@ public class DatanodeManager {
|
|||
namesystem.writeLock();
|
||||
try {
|
||||
refreshDatanodes();
|
||||
countSoftwareVersions();
|
||||
} finally {
|
||||
namesystem.writeUnlock();
|
||||
}
|
||||
|
|
|
@ -359,6 +359,7 @@ class ClusterJspHelper {
|
|||
nn.httpAddress = httpAddress;
|
||||
getLiveNodeCount(getProperty(props, "LiveNodes").getValueAsText(), nn);
|
||||
getDeadNodeCount(getProperty(props, "DeadNodes").getValueAsText(), nn);
|
||||
nn.softwareVersion = getProperty(props, "SoftwareVersion").getTextValue();
|
||||
return nn;
|
||||
}
|
||||
|
||||
|
@ -596,6 +597,7 @@ class ClusterJspHelper {
|
|||
toXmlItemBlockWithLink(doc, nn.deadDatanodeCount + " (" +
|
||||
nn.deadDecomCount + ")", nn.httpAddress+"/dfsnodelist.jsp?whatNodes=DEAD"
|
||||
, "Dead Datanode (Decommissioned)");
|
||||
toXmlItemBlock(doc, "Software Version", nn.softwareVersion);
|
||||
doc.endTag(); // node
|
||||
}
|
||||
doc.endTag(); // namenodes
|
||||
|
@ -624,6 +626,7 @@ class ClusterJspHelper {
|
|||
int deadDatanodeCount = 0;
|
||||
int deadDecomCount = 0;
|
||||
String httpAddress = null;
|
||||
String softwareVersion = "";
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -6208,6 +6208,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
innerinfo.put("nonDfsUsedSpace", node.getNonDfsUsed());
|
||||
innerinfo.put("capacity", node.getCapacity());
|
||||
innerinfo.put("numBlocks", node.numBlocks());
|
||||
innerinfo.put("version", node.getSoftwareVersion());
|
||||
info.put(node.getHostName(), innerinfo);
|
||||
}
|
||||
return JSON.toString(info);
|
||||
|
@ -6419,6 +6420,22 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
|||
return JSON.toString(list);
|
||||
}
|
||||
|
||||
@Override //NameNodeMXBean
|
||||
public int getDistinctVersionCount() {
|
||||
return blockManager.getDatanodeManager().getDatanodesSoftwareVersions()
|
||||
.size();
|
||||
}
|
||||
|
||||
@Override //NameNodeMXBean
|
||||
public Map<String, Integer> getDistinctVersions() {
|
||||
return blockManager.getDatanodeManager().getDatanodesSoftwareVersions();
|
||||
}
|
||||
|
||||
@Override //NameNodeMXBean
|
||||
public String getSoftwareVersion() {
|
||||
return VersionInfo.getVersion();
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies that the given identifier and password are valid and match.
|
||||
* @param identifier Token identifier.
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
*/
|
||||
package org.apache.hadoop.hdfs.server.namenode;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
|
||||
|
@ -33,7 +35,13 @@ public interface NameNodeMXBean {
|
|||
* @return the version
|
||||
*/
|
||||
public String getVersion();
|
||||
|
||||
|
||||
/**
|
||||
* Get the version of software running on the Namenode
|
||||
* @return a string representing the version
|
||||
*/
|
||||
public String getSoftwareVersion();
|
||||
|
||||
/**
|
||||
* Gets the used space by data nodes.
|
||||
*
|
||||
|
@ -215,4 +223,19 @@ public interface NameNodeMXBean {
|
|||
* @return the list of corrupt files, as a JSON string.
|
||||
*/
|
||||
public String getCorruptFiles();
|
||||
|
||||
/**
|
||||
* Get the number of distinct versions of live datanodes
|
||||
*
|
||||
* @return the number of distinct versions of live datanodes
|
||||
*/
|
||||
public int getDistinctVersionCount();
|
||||
|
||||
/**
|
||||
* Get the number of live datanodes for each distinct versions
|
||||
*
|
||||
* @return the number of live datanodes for each distinct versions
|
||||
*/
|
||||
public Map<String, Integer> getDistinctVersions();
|
||||
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.servlet.ServletContext;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
|
@ -99,6 +100,20 @@ class NamenodeJspHelper {
|
|||
}
|
||||
}
|
||||
|
||||
static String getRollingUpgradeText(FSNamesystem fsn) {
|
||||
DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
|
||||
Map<String, Integer> list = dm.getDatanodesSoftwareVersions();
|
||||
if(list.size() > 1) {
|
||||
StringBuffer status = new StringBuffer("Rolling upgrades in progress. " +
|
||||
"There are " + list.size() + " versions of datanodes currently live: ");
|
||||
for(Map.Entry<String, Integer> ver: list.entrySet()) {
|
||||
status.append(ver.getKey() + "(" + ver.getValue() + "), ");
|
||||
}
|
||||
return status.substring(0, status.length()-2);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
static String getInodeLimitText(FSNamesystem fsn) {
|
||||
if (fsn == null) {
|
||||
return "";
|
||||
|
@ -802,7 +817,9 @@ class NamenodeJspHelper {
|
|||
+ "<td align=\"right\" class=\"pcbpused\">"
|
||||
+ percentBpUsed
|
||||
+ "<td align=\"right\" class=\"volfails\">"
|
||||
+ d.getVolumeFailures() + "\n");
|
||||
+ d.getVolumeFailures()
|
||||
+ "<td align=\"right\" class=\"version\">"
|
||||
+ d.getSoftwareVersion() + "\n");
|
||||
}
|
||||
|
||||
void generateNodesList(ServletContext context, JspWriter out,
|
||||
|
@ -900,7 +917,9 @@ class NamenodeJspHelper {
|
|||
+ nodeHeaderStr("pcbpused")
|
||||
+ "> Block Pool<br>Used (%)" + " <th "
|
||||
+ nodeHeaderStr("volfails")
|
||||
+"> Failed Volumes\n");
|
||||
+"> Failed Volumes <th "
|
||||
+ nodeHeaderStr("versionString")
|
||||
+"> Version\n");
|
||||
|
||||
JspHelper.sortNodeList(live, sorterField, sorterOrder);
|
||||
for (int i = 0; i < live.size(); i++) {
|
||||
|
|
|
@ -65,6 +65,7 @@
|
|||
<h3>Cluster Summary</h3>
|
||||
<b> <%= NamenodeJspHelper.getSecurityModeText()%> </b>
|
||||
<b> <%= NamenodeJspHelper.getSafeModeText(fsn)%> </b>
|
||||
<b> <%= NamenodeJspHelper.getRollingUpgradeText(fsn)%> </b>
|
||||
<b> <%= NamenodeJspHelper.getInodeLimitText(fsn)%> </b>
|
||||
<%= NamenodeJspHelper.getCorruptFilesWarning(fsn)%>
|
||||
|
||||
|
|
Loading…
Reference in New Issue