HDFS-3245. Add metrics and web UI for cluster version summary. Contributed by Ravi Prakash.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1517937 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5adba5597c
commit
39252995c4
|
@ -333,6 +333,9 @@ Release 2.1.1-beta - UNRELEASED
|
||||||
HDFS-5045. Add more unit tests for retry cache to cover all AtMostOnce
|
HDFS-5045. Add more unit tests for retry cache to cover all AtMostOnce
|
||||||
methods. (jing9)
|
methods. (jing9)
|
||||||
|
|
||||||
|
HDFS-3245. Add metrics and web UI for cluster version summary. (Ravi
|
||||||
|
Prakash via kihwal)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
|
@ -47,6 +47,7 @@ public class DatanodeInfo extends DatanodeID implements Node {
|
||||||
private long lastUpdate;
|
private long lastUpdate;
|
||||||
private int xceiverCount;
|
private int xceiverCount;
|
||||||
private String location = NetworkTopology.DEFAULT_RACK;
|
private String location = NetworkTopology.DEFAULT_RACK;
|
||||||
|
private String softwareVersion;
|
||||||
|
|
||||||
// Datanode administrative states
|
// Datanode administrative states
|
||||||
public enum AdminStates {
|
public enum AdminStates {
|
||||||
|
@ -383,4 +384,12 @@ public class DatanodeInfo extends DatanodeID implements Node {
|
||||||
// by DatanodeID
|
// by DatanodeID
|
||||||
return (this == obj) || super.equals(obj);
|
return (this == obj) || super.equals(obj);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getSoftwareVersion() {
|
||||||
|
return softwareVersion;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSoftwareVersion(String softwareVersion) {
|
||||||
|
this.softwareVersion = softwareVersion;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,6 +26,7 @@ import java.net.UnknownHostException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.NavigableMap;
|
import java.util.NavigableMap;
|
||||||
|
@ -64,7 +65,6 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException;
|
import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.RegisterCommand;
|
import org.apache.hadoop.hdfs.server.protocol.RegisterCommand;
|
||||||
import org.apache.hadoop.hdfs.util.CyclicIteration;
|
import org.apache.hadoop.hdfs.util.CyclicIteration;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
|
||||||
import org.apache.hadoop.ipc.Server;
|
import org.apache.hadoop.ipc.Server;
|
||||||
import org.apache.hadoop.net.CachedDNSToSwitchMapping;
|
import org.apache.hadoop.net.CachedDNSToSwitchMapping;
|
||||||
import org.apache.hadoop.net.DNSToSwitchMapping;
|
import org.apache.hadoop.net.DNSToSwitchMapping;
|
||||||
|
@ -165,6 +165,14 @@ public class DatanodeManager {
|
||||||
* according to the NetworkTopology.
|
* according to the NetworkTopology.
|
||||||
*/
|
*/
|
||||||
private boolean hasClusterEverBeenMultiRack = false;
|
private boolean hasClusterEverBeenMultiRack = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The number of datanodes for each software version. This list should change
|
||||||
|
* during rolling upgrades.
|
||||||
|
* Software version -> Number of datanodes with this version
|
||||||
|
*/
|
||||||
|
private HashMap<String, Integer> datanodesSoftwareVersions =
|
||||||
|
new HashMap<String, Integer>(4, 0.75f);
|
||||||
|
|
||||||
DatanodeManager(final BlockManager blockManager, final Namesystem namesystem,
|
DatanodeManager(final BlockManager blockManager, final Namesystem namesystem,
|
||||||
final Configuration conf) throws IOException {
|
final Configuration conf) throws IOException {
|
||||||
|
@ -456,6 +464,7 @@ public class DatanodeManager {
|
||||||
heartbeatManager.removeDatanode(nodeInfo);
|
heartbeatManager.removeDatanode(nodeInfo);
|
||||||
blockManager.removeBlocksAssociatedTo(nodeInfo);
|
blockManager.removeBlocksAssociatedTo(nodeInfo);
|
||||||
networktopology.remove(nodeInfo);
|
networktopology.remove(nodeInfo);
|
||||||
|
decrementVersionCount(nodeInfo.getSoftwareVersion());
|
||||||
|
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("remove datanode " + nodeInfo);
|
LOG.debug("remove datanode " + nodeInfo);
|
||||||
|
@ -538,6 +547,61 @@ public class DatanodeManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void incrementVersionCount(String version) {
|
||||||
|
if (version == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
synchronized(datanodeMap) {
|
||||||
|
Integer count = this.datanodesSoftwareVersions.get(version);
|
||||||
|
count = count == null ? 1 : count + 1;
|
||||||
|
this.datanodesSoftwareVersions.put(version, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void decrementVersionCount(String version) {
|
||||||
|
if (version == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
synchronized(datanodeMap) {
|
||||||
|
Integer count = this.datanodesSoftwareVersions.get(version);
|
||||||
|
if(count != null) {
|
||||||
|
if(count > 1) {
|
||||||
|
this.datanodesSoftwareVersions.put(version, count-1);
|
||||||
|
} else {
|
||||||
|
this.datanodesSoftwareVersions.remove(version);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean shouldCountVersion(DatanodeDescriptor node) {
|
||||||
|
return node.getSoftwareVersion() != null && node.isAlive &&
|
||||||
|
!isDatanodeDead(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void countSoftwareVersions() {
|
||||||
|
synchronized(datanodeMap) {
|
||||||
|
HashMap<String, Integer> versionCount = new HashMap<String, Integer>();
|
||||||
|
for(DatanodeDescriptor dn: datanodeMap.values()) {
|
||||||
|
// Check isAlive too because right after removeDatanode(),
|
||||||
|
// isDatanodeDead() is still true
|
||||||
|
if(shouldCountVersion(dn))
|
||||||
|
{
|
||||||
|
Integer num = versionCount.get(dn.getSoftwareVersion());
|
||||||
|
num = num == null ? 1 : num+1;
|
||||||
|
versionCount.put(dn.getSoftwareVersion(), num);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.datanodesSoftwareVersions = versionCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public HashMap<String, Integer> getDatanodesSoftwareVersions() {
|
||||||
|
synchronized(datanodeMap) {
|
||||||
|
return new HashMap<String, Integer> (this.datanodesSoftwareVersions);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Resolve a node's network location */
|
/* Resolve a node's network location */
|
||||||
private String resolveNetworkLocation (DatanodeID node) {
|
private String resolveNetworkLocation (DatanodeID node) {
|
||||||
List<String> names = new ArrayList<String>(1);
|
List<String> names = new ArrayList<String>(1);
|
||||||
|
@ -755,21 +819,28 @@ public class DatanodeManager {
|
||||||
try {
|
try {
|
||||||
// update cluster map
|
// update cluster map
|
||||||
getNetworkTopology().remove(nodeS);
|
getNetworkTopology().remove(nodeS);
|
||||||
|
if(shouldCountVersion(nodeS)) {
|
||||||
|
decrementVersionCount(nodeS.getSoftwareVersion());
|
||||||
|
}
|
||||||
nodeS.updateRegInfo(nodeReg);
|
nodeS.updateRegInfo(nodeReg);
|
||||||
|
|
||||||
|
nodeS.setSoftwareVersion(nodeReg.getSoftwareVersion());
|
||||||
nodeS.setDisallowed(false); // Node is in the include list
|
nodeS.setDisallowed(false); // Node is in the include list
|
||||||
|
|
||||||
// resolve network location
|
// resolve network location
|
||||||
nodeS.setNetworkLocation(resolveNetworkLocation(nodeS));
|
nodeS.setNetworkLocation(resolveNetworkLocation(nodeS));
|
||||||
getNetworkTopology().add(nodeS);
|
getNetworkTopology().add(nodeS);
|
||||||
|
|
||||||
// also treat the registration message as a heartbeat
|
// also treat the registration message as a heartbeat
|
||||||
heartbeatManager.register(nodeS);
|
heartbeatManager.register(nodeS);
|
||||||
|
incrementVersionCount(nodeS.getSoftwareVersion());
|
||||||
checkDecommissioning(nodeS);
|
checkDecommissioning(nodeS);
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
removeDatanode(nodeS);
|
removeDatanode(nodeS);
|
||||||
wipeDatanode(nodeS);
|
wipeDatanode(nodeS);
|
||||||
|
countSoftwareVersions();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
@ -793,6 +864,7 @@ public class DatanodeManager {
|
||||||
try {
|
try {
|
||||||
nodeDescr.setNetworkLocation(resolveNetworkLocation(nodeDescr));
|
nodeDescr.setNetworkLocation(resolveNetworkLocation(nodeDescr));
|
||||||
networktopology.add(nodeDescr);
|
networktopology.add(nodeDescr);
|
||||||
|
nodeDescr.setSoftwareVersion(nodeReg.getSoftwareVersion());
|
||||||
|
|
||||||
// register new datanode
|
// register new datanode
|
||||||
addDatanode(nodeDescr);
|
addDatanode(nodeDescr);
|
||||||
|
@ -803,10 +875,12 @@ public class DatanodeManager {
|
||||||
// because its is done when the descriptor is created
|
// because its is done when the descriptor is created
|
||||||
heartbeatManager.addDatanode(nodeDescr);
|
heartbeatManager.addDatanode(nodeDescr);
|
||||||
success = true;
|
success = true;
|
||||||
|
incrementVersionCount(nodeReg.getSoftwareVersion());
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
removeDatanode(nodeDescr);
|
removeDatanode(nodeDescr);
|
||||||
wipeDatanode(nodeDescr);
|
wipeDatanode(nodeDescr);
|
||||||
|
countSoftwareVersions();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (InvalidTopologyException e) {
|
} catch (InvalidTopologyException e) {
|
||||||
|
@ -828,6 +902,7 @@ public class DatanodeManager {
|
||||||
namesystem.writeLock();
|
namesystem.writeLock();
|
||||||
try {
|
try {
|
||||||
refreshDatanodes();
|
refreshDatanodes();
|
||||||
|
countSoftwareVersions();
|
||||||
} finally {
|
} finally {
|
||||||
namesystem.writeUnlock();
|
namesystem.writeUnlock();
|
||||||
}
|
}
|
||||||
|
|
|
@ -359,6 +359,7 @@ class ClusterJspHelper {
|
||||||
nn.httpAddress = httpAddress;
|
nn.httpAddress = httpAddress;
|
||||||
getLiveNodeCount(getProperty(props, "LiveNodes").getValueAsText(), nn);
|
getLiveNodeCount(getProperty(props, "LiveNodes").getValueAsText(), nn);
|
||||||
getDeadNodeCount(getProperty(props, "DeadNodes").getValueAsText(), nn);
|
getDeadNodeCount(getProperty(props, "DeadNodes").getValueAsText(), nn);
|
||||||
|
nn.softwareVersion = getProperty(props, "SoftwareVersion").getTextValue();
|
||||||
return nn;
|
return nn;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -596,6 +597,7 @@ class ClusterJspHelper {
|
||||||
toXmlItemBlockWithLink(doc, nn.deadDatanodeCount + " (" +
|
toXmlItemBlockWithLink(doc, nn.deadDatanodeCount + " (" +
|
||||||
nn.deadDecomCount + ")", nn.httpAddress+"/dfsnodelist.jsp?whatNodes=DEAD"
|
nn.deadDecomCount + ")", nn.httpAddress+"/dfsnodelist.jsp?whatNodes=DEAD"
|
||||||
, "Dead Datanode (Decommissioned)");
|
, "Dead Datanode (Decommissioned)");
|
||||||
|
toXmlItemBlock(doc, "Software Version", nn.softwareVersion);
|
||||||
doc.endTag(); // node
|
doc.endTag(); // node
|
||||||
}
|
}
|
||||||
doc.endTag(); // namenodes
|
doc.endTag(); // namenodes
|
||||||
|
@ -624,6 +626,7 @@ class ClusterJspHelper {
|
||||||
int deadDatanodeCount = 0;
|
int deadDatanodeCount = 0;
|
||||||
int deadDecomCount = 0;
|
int deadDecomCount = 0;
|
||||||
String httpAddress = null;
|
String httpAddress = null;
|
||||||
|
String softwareVersion = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -6225,6 +6225,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
innerinfo.put("nonDfsUsedSpace", node.getNonDfsUsed());
|
innerinfo.put("nonDfsUsedSpace", node.getNonDfsUsed());
|
||||||
innerinfo.put("capacity", node.getCapacity());
|
innerinfo.put("capacity", node.getCapacity());
|
||||||
innerinfo.put("numBlocks", node.numBlocks());
|
innerinfo.put("numBlocks", node.numBlocks());
|
||||||
|
innerinfo.put("version", node.getSoftwareVersion());
|
||||||
info.put(node.getHostName(), innerinfo);
|
info.put(node.getHostName(), innerinfo);
|
||||||
}
|
}
|
||||||
return JSON.toString(info);
|
return JSON.toString(info);
|
||||||
|
@ -6436,6 +6437,22 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
|
||||||
return JSON.toString(list);
|
return JSON.toString(list);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override //NameNodeMXBean
|
||||||
|
public int getDistinctVersionCount() {
|
||||||
|
return blockManager.getDatanodeManager().getDatanodesSoftwareVersions()
|
||||||
|
.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override //NameNodeMXBean
|
||||||
|
public Map<String, Integer> getDistinctVersions() {
|
||||||
|
return blockManager.getDatanodeManager().getDatanodesSoftwareVersions();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override //NameNodeMXBean
|
||||||
|
public String getSoftwareVersion() {
|
||||||
|
return VersionInfo.getVersion();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Verifies that the given identifier and password are valid and match.
|
* Verifies that the given identifier and password are valid and match.
|
||||||
* @param identifier Token identifier.
|
* @param identifier Token identifier.
|
||||||
|
|
|
@ -17,6 +17,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hdfs.server.namenode;
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
|
||||||
|
@ -33,7 +35,13 @@ public interface NameNodeMXBean {
|
||||||
* @return the version
|
* @return the version
|
||||||
*/
|
*/
|
||||||
public String getVersion();
|
public String getVersion();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the version of software running on the Namenode
|
||||||
|
* @return a string representing the version
|
||||||
|
*/
|
||||||
|
public String getSoftwareVersion();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the used space by data nodes.
|
* Gets the used space by data nodes.
|
||||||
*
|
*
|
||||||
|
@ -215,4 +223,19 @@ public interface NameNodeMXBean {
|
||||||
* @return the list of corrupt files, as a JSON string.
|
* @return the list of corrupt files, as a JSON string.
|
||||||
*/
|
*/
|
||||||
public String getCorruptFiles();
|
public String getCorruptFiles();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the number of distinct versions of live datanodes
|
||||||
|
*
|
||||||
|
* @return the number of distinct versions of live datanodes
|
||||||
|
*/
|
||||||
|
public int getDistinctVersionCount();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the number of live datanodes for each distinct versions
|
||||||
|
*
|
||||||
|
* @return the number of live datanodes for each distinct versions
|
||||||
|
*/
|
||||||
|
public Map<String, Integer> getDistinctVersions();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,7 @@ import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import javax.servlet.ServletContext;
|
import javax.servlet.ServletContext;
|
||||||
import javax.servlet.http.HttpServletRequest;
|
import javax.servlet.http.HttpServletRequest;
|
||||||
|
@ -99,6 +100,20 @@ class NamenodeJspHelper {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static String getRollingUpgradeText(FSNamesystem fsn) {
|
||||||
|
DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
|
||||||
|
Map<String, Integer> list = dm.getDatanodesSoftwareVersions();
|
||||||
|
if(list.size() > 1) {
|
||||||
|
StringBuffer status = new StringBuffer("Rolling upgrades in progress. " +
|
||||||
|
"There are " + list.size() + " versions of datanodes currently live: ");
|
||||||
|
for(Map.Entry<String, Integer> ver: list.entrySet()) {
|
||||||
|
status.append(ver.getKey() + "(" + ver.getValue() + "), ");
|
||||||
|
}
|
||||||
|
return status.substring(0, status.length()-2);
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
static String getInodeLimitText(FSNamesystem fsn) {
|
static String getInodeLimitText(FSNamesystem fsn) {
|
||||||
if (fsn == null) {
|
if (fsn == null) {
|
||||||
return "";
|
return "";
|
||||||
|
@ -802,7 +817,9 @@ class NamenodeJspHelper {
|
||||||
+ "<td align=\"right\" class=\"pcbpused\">"
|
+ "<td align=\"right\" class=\"pcbpused\">"
|
||||||
+ percentBpUsed
|
+ percentBpUsed
|
||||||
+ "<td align=\"right\" class=\"volfails\">"
|
+ "<td align=\"right\" class=\"volfails\">"
|
||||||
+ d.getVolumeFailures() + "\n");
|
+ d.getVolumeFailures()
|
||||||
|
+ "<td align=\"right\" class=\"version\">"
|
||||||
|
+ d.getSoftwareVersion() + "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void generateNodesList(ServletContext context, JspWriter out,
|
void generateNodesList(ServletContext context, JspWriter out,
|
||||||
|
@ -900,7 +917,9 @@ class NamenodeJspHelper {
|
||||||
+ nodeHeaderStr("pcbpused")
|
+ nodeHeaderStr("pcbpused")
|
||||||
+ "> Block Pool<br>Used (%)" + " <th "
|
+ "> Block Pool<br>Used (%)" + " <th "
|
||||||
+ nodeHeaderStr("volfails")
|
+ nodeHeaderStr("volfails")
|
||||||
+"> Failed Volumes\n");
|
+"> Failed Volumes <th "
|
||||||
|
+ nodeHeaderStr("versionString")
|
||||||
|
+"> Version\n");
|
||||||
|
|
||||||
JspHelper.sortNodeList(live, sorterField, sorterOrder);
|
JspHelper.sortNodeList(live, sorterField, sorterOrder);
|
||||||
for (int i = 0; i < live.size(); i++) {
|
for (int i = 0; i < live.size(); i++) {
|
||||||
|
|
|
@ -65,6 +65,7 @@
|
||||||
<h3>Cluster Summary</h3>
|
<h3>Cluster Summary</h3>
|
||||||
<b> <%= NamenodeJspHelper.getSecurityModeText()%> </b>
|
<b> <%= NamenodeJspHelper.getSecurityModeText()%> </b>
|
||||||
<b> <%= NamenodeJspHelper.getSafeModeText(fsn)%> </b>
|
<b> <%= NamenodeJspHelper.getSafeModeText(fsn)%> </b>
|
||||||
|
<b> <%= NamenodeJspHelper.getRollingUpgradeText(fsn)%> </b>
|
||||||
<b> <%= NamenodeJspHelper.getInodeLimitText(fsn)%> </b>
|
<b> <%= NamenodeJspHelper.getInodeLimitText(fsn)%> </b>
|
||||||
<%= NamenodeJspHelper.getCorruptFilesWarning(fsn)%>
|
<%= NamenodeJspHelper.getCorruptFilesWarning(fsn)%>
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue