YARN-686. Flatten NodeReport. (sandyr via tucu)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1490827 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2a76cddcd5
commit
af8514eef2
|
@ -301,6 +301,8 @@ Release 2.1.0-beta - UNRELEASED
|
|||
YARN-750. Allow for black-listing resources in YARN API and Impl in CS
|
||||
(acmurthy via bikas)
|
||||
|
||||
YARN-686. Flatten NodeReport. (sandyr via tucu)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
YARN-512. Log aggregation root directory check is more expensive than it
|
||||
|
|
|
@ -51,7 +51,7 @@ public abstract class NodeReport {
|
|||
@Private
|
||||
public static NodeReport newInstance(NodeId nodeId, NodeState nodeState,
|
||||
String httpAddress, String rackName, Resource used, Resource capability,
|
||||
int numContainers, NodeHealthStatus nodeHealthStatus) {
|
||||
int numContainers, String healthReport, long lastHealthReportTime) {
|
||||
NodeReport nodeReport = Records.newRecord(NodeReport.class);
|
||||
nodeReport.setNodeId(nodeId);
|
||||
nodeReport.setNodeState(nodeState);
|
||||
|
@ -60,7 +60,8 @@ public abstract class NodeReport {
|
|||
nodeReport.setUsed(used);
|
||||
nodeReport.setCapability(capability);
|
||||
nodeReport.setNumContainers(numContainers);
|
||||
nodeReport.setNodeHealthStatus(nodeHealthStatus);
|
||||
nodeReport.setHealthReport(healthReport);
|
||||
nodeReport.setLastHealthReportTime(lastHealthReportTime);
|
||||
return nodeReport;
|
||||
}
|
||||
|
||||
|
@ -144,15 +145,28 @@ public abstract class NodeReport {
|
|||
@Unstable
|
||||
public abstract void setNumContainers(int numContainers);
|
||||
|
||||
/**
|
||||
* Get the <code>NodeHealthStatus</code> of the node.
|
||||
* @return <code>NodeHealthStatus</code> of the node
|
||||
|
||||
/**
|
||||
* Get the <em>diagnostic health report</em> of the node.
|
||||
* @return <em>diagnostic health report</em> of the node
|
||||
*/
|
||||
@Public
|
||||
@Stable
|
||||
public abstract NodeHealthStatus getNodeHealthStatus();
|
||||
|
||||
public abstract String getHealthReport();
|
||||
|
||||
@Private
|
||||
@Unstable
|
||||
public abstract void setNodeHealthStatus(NodeHealthStatus nodeHealthStatus);
|
||||
public abstract void setHealthReport(String healthReport);
|
||||
|
||||
/**
|
||||
* Get the <em>last timestamp</em> at which the health report was received.
|
||||
* @return <em>last timestamp</em> at which the health report was received
|
||||
*/
|
||||
@Public
|
||||
@Stable
|
||||
public abstract long getLastHealthReportTime();
|
||||
|
||||
@Private
|
||||
@Unstable
|
||||
public abstract void setLastHealthReportTime(long lastHealthReport);
|
||||
}
|
||||
|
|
|
@ -18,12 +18,10 @@
|
|||
|
||||
package org.apache.hadoop.yarn.api.records.impl.pb;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeReport;
|
||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.proto.YarnProtos.NodeHealthStatusProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnProtos.NodeReportProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnProtos.NodeReportProtoOrBuilder;
|
||||
|
@ -38,7 +36,6 @@ public class NodeReportPBImpl extends NodeReport {
|
|||
private NodeId nodeId;
|
||||
private Resource used;
|
||||
private Resource capability;
|
||||
private NodeHealthStatus nodeHealthStatus;
|
||||
|
||||
public NodeReportPBImpl() {
|
||||
builder = NodeReportProto.newBuilder();
|
||||
|
@ -64,19 +61,33 @@ public class NodeReportPBImpl extends NodeReport {
|
|||
}
|
||||
|
||||
@Override
|
||||
public NodeHealthStatus getNodeHealthStatus() {
|
||||
if (this.nodeHealthStatus != null) {
|
||||
return this.nodeHealthStatus;
|
||||
}
|
||||
|
||||
public String getHealthReport() {
|
||||
NodeReportProtoOrBuilder p = viaProto ? proto : builder;
|
||||
if (!p.hasNodeHealthStatus()) {
|
||||
return null;
|
||||
}
|
||||
this.nodeHealthStatus = convertFromProtoFormat(p.getNodeHealthStatus());
|
||||
return this.nodeHealthStatus;
|
||||
return p.getHealthReport();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void setHealthReport(String healthReport) {
|
||||
maybeInitBuilder();
|
||||
if (healthReport == null) {
|
||||
builder.clearHealthReport();
|
||||
return;
|
||||
}
|
||||
builder.setHealthReport(healthReport);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getLastHealthReportTime() {
|
||||
NodeReportProtoOrBuilder p = viaProto ? proto : builder;
|
||||
return p.getLastHealthReportTime();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setLastHealthReportTime(long lastHealthReportTime) {
|
||||
maybeInitBuilder();
|
||||
builder.setLastHealthReportTime(lastHealthReportTime);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHttpAddress() {
|
||||
NodeReportProtoOrBuilder p = viaProto ? proto : builder;
|
||||
|
@ -158,14 +169,6 @@ public class NodeReportPBImpl extends NodeReport {
|
|||
this.capability = capability;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNodeHealthStatus(NodeHealthStatus healthStatus) {
|
||||
maybeInitBuilder();
|
||||
if (healthStatus == null)
|
||||
builder.clearNodeHealthStatus();
|
||||
this.nodeHealthStatus = healthStatus;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setHttpAddress(String httpAddress) {
|
||||
maybeInitBuilder();
|
||||
|
@ -247,11 +250,6 @@ public class NodeReportPBImpl extends NodeReport {
|
|||
builder.getCapability())) {
|
||||
builder.setCapability(convertToProtoFormat(this.capability));
|
||||
}
|
||||
if (this.nodeHealthStatus != null
|
||||
&& !((NodeHealthStatusPBImpl) this.nodeHealthStatus).getProto().equals(
|
||||
builder.getNodeHealthStatus())) {
|
||||
builder.setNodeHealthStatus(convertToProtoFormat(this.nodeHealthStatus));
|
||||
}
|
||||
}
|
||||
|
||||
private void mergeLocalToProto() {
|
||||
|
@ -286,11 +284,4 @@ public class NodeReportPBImpl extends NodeReport {
|
|||
return ((ResourcePBImpl) r).getProto();
|
||||
}
|
||||
|
||||
private NodeHealthStatusPBImpl convertFromProtoFormat(NodeHealthStatusProto p) {
|
||||
return new NodeHealthStatusPBImpl(p);
|
||||
}
|
||||
|
||||
private NodeHealthStatusProto convertToProtoFormat(NodeHealthStatus r) {
|
||||
return ((NodeHealthStatusPBImpl) r).getProto();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -191,8 +191,9 @@ message NodeReportProto {
|
|||
optional ResourceProto used = 4;
|
||||
optional ResourceProto capability = 5;
|
||||
optional int32 numContainers = 6;
|
||||
optional NodeHealthStatusProto node_health_status = 8;
|
||||
optional NodeStateProto node_state = 9;
|
||||
optional NodeStateProto node_state = 7;
|
||||
optional string health_report = 8;
|
||||
optional int64 last_health_report_time = 9;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -330,8 +330,7 @@ public class Client extends YarnClientImpl {
|
|||
+ ", nodeId=" + node.getNodeId()
|
||||
+ ", nodeAddress" + node.getHttpAddress()
|
||||
+ ", nodeRackName" + node.getRackName()
|
||||
+ ", nodeNumContainers" + node.getNumContainers()
|
||||
+ ", nodeHealthStatus" + node.getNodeHealthStatus());
|
||||
+ ", nodeNumContainers" + node.getNumContainers());
|
||||
}
|
||||
|
||||
QueueInfo queueInfo = super.getQueueInfo(this.amQueue);
|
||||
|
|
|
@ -35,7 +35,7 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
|
|||
import org.apache.hadoop.yarn.util.ConverterUtils;
|
||||
|
||||
public class NodeCLI extends YarnCLI {
|
||||
private static final String NODES_PATTERN = "%16s\t%10s\t%17s\t%26s\t%18s" +
|
||||
private static final String NODES_PATTERN = "%16s\t%10s\t%17s\t%18s" +
|
||||
System.getProperty("line.separator");
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
@ -91,11 +91,10 @@ public class NodeCLI extends YarnCLI {
|
|||
List<NodeReport> nodesReport = client.getNodeReports();
|
||||
writer.println("Total Nodes:" + nodesReport.size());
|
||||
writer.printf(NODES_PATTERN, "Node-Id", "Node-State", "Node-Http-Address",
|
||||
"Health-Status(isNodeHealthy)", "Running-Containers");
|
||||
"Running-Containers");
|
||||
for (NodeReport nodeReport : nodesReport) {
|
||||
writer.printf(NODES_PATTERN, nodeReport.getNodeId(), nodeReport
|
||||
.getNodeState(), nodeReport.getHttpAddress(), nodeReport
|
||||
.getNodeHealthStatus().getIsNodeHealthy(), nodeReport
|
||||
.getNumContainers());
|
||||
}
|
||||
writer.flush();
|
||||
|
@ -129,16 +128,13 @@ public class NodeCLI extends YarnCLI {
|
|||
nodeReportStr.println(nodeReport.getNodeState());
|
||||
nodeReportStr.print("\tNode-Http-Address : ");
|
||||
nodeReportStr.println(nodeReport.getHttpAddress());
|
||||
nodeReportStr.print("\tHealth-Status(isNodeHealthy) : ");
|
||||
nodeReportStr.println(nodeReport.getNodeHealthStatus()
|
||||
.getIsNodeHealthy());
|
||||
nodeReportStr.print("\tLast-Health-Update : ");
|
||||
nodeReportStr.println(DateFormatUtils.format(
|
||||
new Date(nodeReport.getNodeHealthStatus().
|
||||
getLastHealthReportTime()),"E dd/MMM/yy hh:mm:ss:SSzz"));
|
||||
new Date(nodeReport.getLastHealthReportTime()),
|
||||
"E dd/MMM/yy hh:mm:ss:SSzz"));
|
||||
nodeReportStr.print("\tHealth-Report : ");
|
||||
nodeReportStr
|
||||
.println(nodeReport.getNodeHealthStatus().getHealthReport());
|
||||
.println(nodeReport.getHealthReport());
|
||||
nodeReportStr.print("\tContainers : ");
|
||||
nodeReportStr.println(nodeReport.getNumContainers());
|
||||
nodeReportStr.print("\tMemory-Used : ");
|
||||
|
|
|
@ -41,7 +41,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
|||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationReport;
|
||||
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
||||
import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeReport;
|
||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||
|
@ -163,13 +162,13 @@ public class TestYarnCLI {
|
|||
PrintWriter pw = new PrintWriter(baos);
|
||||
pw.println("Total Nodes:3");
|
||||
pw.print(" Node-Id\tNode-State\tNode-Http-Address\t");
|
||||
pw.println("Health-Status(isNodeHealthy)\tRunning-Containers");
|
||||
pw.println("Running-Containers");
|
||||
pw.print(" host0:0\t RUNNING\t host1:8888");
|
||||
pw.println("\t false\t 0");
|
||||
pw.println("\t 0");
|
||||
pw.print(" host1:0\t RUNNING\t host1:8888");
|
||||
pw.println("\t false\t 0");
|
||||
pw.println("\t 0");
|
||||
pw.print(" host2:0\t RUNNING\t host1:8888");
|
||||
pw.println("\t false\t 0");
|
||||
pw.println("\t 0");
|
||||
pw.close();
|
||||
String nodesReportStr = baos.toString("UTF-8");
|
||||
Assert.assertEquals(nodesReportStr, sysOutStream.toString());
|
||||
|
@ -194,10 +193,9 @@ public class TestYarnCLI {
|
|||
pw.println("\tRack : rack1");
|
||||
pw.println("\tNode-State : RUNNING");
|
||||
pw.println("\tNode-Http-Address : host1:8888");
|
||||
pw.println("\tHealth-Status(isNodeHealthy) : false");
|
||||
pw.println("\tLast-Health-Update : "
|
||||
+ DateFormatUtils.format(new Date(0), "E dd/MMM/yy hh:mm:ss:SSzz"));
|
||||
pw.println("\tHealth-Report : null");
|
||||
pw.println("\tHealth-Report : ");
|
||||
pw.println("\tContainers : 0");
|
||||
pw.println("\tMemory-Used : 0M");
|
||||
pw.println("\tMemory-Capacity : 0");
|
||||
|
@ -246,8 +244,7 @@ public class TestYarnCLI {
|
|||
NodeReport nodeReport = NodeReport.newInstance(NodeId
|
||||
.newInstance("host" + i, 0), NodeState.RUNNING, "host" + 1 + ":8888",
|
||||
"rack1", Records.newRecord(Resource.class), Records
|
||||
.newRecord(Resource.class), 0, Records
|
||||
.newRecord(NodeHealthStatus.class));
|
||||
.newRecord(Resource.class), 0, "", 0);
|
||||
nodeReports.add(nodeReport);
|
||||
}
|
||||
return nodeReports;
|
||||
|
|
|
@ -171,7 +171,7 @@ public class BuilderUtils {
|
|||
|
||||
public static NodeReport newNodeReport(NodeId nodeId, NodeState nodeState,
|
||||
String httpAddress, String rackName, Resource used, Resource capability,
|
||||
int numContainers, NodeHealthStatus nodeHealthStatus) {
|
||||
int numContainers, String healthReport, long lastHealthReportTime) {
|
||||
NodeReport nodeReport = recordFactory.newRecordInstance(NodeReport.class);
|
||||
nodeReport.setNodeId(nodeId);
|
||||
nodeReport.setNodeState(nodeState);
|
||||
|
@ -180,7 +180,8 @@ public class BuilderUtils {
|
|||
nodeReport.setUsed(used);
|
||||
nodeReport.setCapability(capability);
|
||||
nodeReport.setNumContainers(numContainers);
|
||||
nodeReport.setNodeHealthStatus(nodeHealthStatus);
|
||||
nodeReport.setHealthReport(healthReport);
|
||||
nodeReport.setLastHealthReportTime(lastHealthReportTime);
|
||||
return nodeReport;
|
||||
}
|
||||
|
||||
|
|
|
@ -343,7 +343,8 @@ public class ApplicationMasterService extends AbstractService implements
|
|||
rmNode.getState(),
|
||||
rmNode.getHttpAddress(), rmNode.getRackName(), used,
|
||||
rmNode.getTotalCapability(), numContainers,
|
||||
rmNode.getNodeHealthStatus());
|
||||
rmNode.getHealthReport(),
|
||||
rmNode.getLastHealthReportTime());
|
||||
|
||||
updatedNodeReports.add(report);
|
||||
}
|
||||
|
|
|
@ -475,7 +475,8 @@ public class ClientRMService extends AbstractService implements
|
|||
rmNode.getState(),
|
||||
rmNode.getHttpAddress(), rmNode.getRackName(), used,
|
||||
rmNode.getTotalCapability(), numContainers,
|
||||
rmNode.getNodeHealthStatus());
|
||||
rmNode.getHealthReport(),
|
||||
rmNode.getLastHealthReportTime());
|
||||
|
||||
return report;
|
||||
}
|
||||
|
|
|
@ -86,13 +86,10 @@ public class RMNMInfo implements RMNMInfoBeans {
|
|||
info.put("State", ni.getState().toString());
|
||||
info.put("NodeId", ni.getNodeID());
|
||||
info.put("NodeHTTPAddress", ni.getHttpAddress());
|
||||
info.put("HealthStatus",
|
||||
ni.getNodeHealthStatus().getIsNodeHealthy() ?
|
||||
"Healthy" : "Unhealthy");
|
||||
info.put("LastHealthUpdate",
|
||||
ni.getNodeHealthStatus().getLastHealthReportTime());
|
||||
ni.getLastHealthReportTime());
|
||||
info.put("HealthReport",
|
||||
ni.getNodeHealthStatus().getHealthReport());
|
||||
ni.getHealthReport());
|
||||
if(report != null) {
|
||||
info.put("NumContainers", report.getNumContainers());
|
||||
info.put("UsedMemoryMB", report.getUsedResource().getMemory());
|
||||
|
|
|
@ -74,10 +74,16 @@ public interface RMNode {
|
|||
public String getHttpAddress();
|
||||
|
||||
/**
|
||||
* the health-status for this node
|
||||
* @return the health-status for this node.
|
||||
* the latest health report received from this node.
|
||||
* @return the latest health report received from this node.
|
||||
*/
|
||||
public NodeHealthStatus getNodeHealthStatus();
|
||||
public String getHealthReport();
|
||||
|
||||
/**
|
||||
* the time of the latest health report received from this node.
|
||||
* @return the time of the latest health report received from this node.
|
||||
*/
|
||||
public long getLastHealthReportTime();
|
||||
|
||||
/**
|
||||
* the total available resource.
|
||||
|
|
|
@ -93,9 +93,10 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
|||
private final String httpAddress;
|
||||
private final Resource totalCapability;
|
||||
private final Node node;
|
||||
private final NodeHealthStatus nodeHealthStatus = recordFactory
|
||||
.newRecordInstance(NodeHealthStatus.class);
|
||||
|
||||
|
||||
private String healthReport;
|
||||
private long lastHealthReportTime;
|
||||
|
||||
/* set of containers that have just launched */
|
||||
private final Map<ContainerId, ContainerStatus> justLaunchedContainers =
|
||||
new HashMap<ContainerId, ContainerStatus>();
|
||||
|
@ -180,9 +181,8 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
|||
this.nodeAddress = hostName + ":" + cmPort;
|
||||
this.httpAddress = hostName + ":" + httpPort;
|
||||
this.node = node;
|
||||
this.nodeHealthStatus.setIsNodeHealthy(true);
|
||||
this.nodeHealthStatus.setHealthReport("Healthy");
|
||||
this.nodeHealthStatus.setLastHealthReportTime(System.currentTimeMillis());
|
||||
this.healthReport = "Healthy";
|
||||
this.lastHealthReportTime = System.currentTimeMillis();
|
||||
|
||||
this.latestNodeHeartBeatResponse.setResponseId(0);
|
||||
|
||||
|
@ -246,27 +246,46 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
|||
}
|
||||
|
||||
@Override
|
||||
public NodeHealthStatus getNodeHealthStatus() {
|
||||
public String getHealthReport() {
|
||||
this.readLock.lock();
|
||||
|
||||
try {
|
||||
return this.nodeHealthStatus;
|
||||
return this.healthReport;
|
||||
} finally {
|
||||
this.readLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
private void setNodeHealthStatus(NodeHealthStatus status)
|
||||
{
|
||||
|
||||
public void setHealthReport(String healthReport) {
|
||||
this.writeLock.lock();
|
||||
|
||||
try {
|
||||
this.nodeHealthStatus.setHealthReport(status.getHealthReport());
|
||||
this.nodeHealthStatus.setIsNodeHealthy(status.getIsNodeHealthy());
|
||||
this.nodeHealthStatus.setLastHealthReportTime(status.getLastHealthReportTime());
|
||||
this.healthReport = healthReport;
|
||||
} finally {
|
||||
this.writeLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
public void setLastHealthReportTime(long lastHealthReportTime) {
|
||||
this.writeLock.lock();
|
||||
|
||||
try {
|
||||
this.lastHealthReportTime = lastHealthReportTime;
|
||||
} finally {
|
||||
this.writeLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getLastHealthReportTime() {
|
||||
this.readLock.lock();
|
||||
|
||||
try {
|
||||
return this.lastHealthReportTime;
|
||||
} finally {
|
||||
this.readLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public NodeState getState() {
|
||||
|
@ -511,7 +530,9 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
|||
|
||||
NodeHealthStatus remoteNodeHealthStatus =
|
||||
statusEvent.getNodeHealthStatus();
|
||||
rmNode.setNodeHealthStatus(remoteNodeHealthStatus);
|
||||
rmNode.setHealthReport(remoteNodeHealthStatus.getHealthReport());
|
||||
rmNode.setLastHealthReportTime(
|
||||
remoteNodeHealthStatus.getLastHealthReportTime());
|
||||
if (!remoteNodeHealthStatus.getIsNodeHealthy()) {
|
||||
LOG.info("Node " + rmNode.nodeId + " reported UNHEALTHY with details: "
|
||||
+ remoteNodeHealthStatus.getHealthReport());
|
||||
|
@ -593,7 +614,9 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
|||
// Switch the last heartbeatresponse.
|
||||
rmNode.latestNodeHeartBeatResponse = statusEvent.getLatestResponse();
|
||||
NodeHealthStatus remoteNodeHealthStatus = statusEvent.getNodeHealthStatus();
|
||||
rmNode.setNodeHealthStatus(remoteNodeHealthStatus);
|
||||
rmNode.setHealthReport(remoteNodeHealthStatus.getHealthReport());
|
||||
rmNode.setLastHealthReportTime(
|
||||
remoteNodeHealthStatus.getLastHealthReportTime());
|
||||
if (remoteNodeHealthStatus.getIsNodeHealthy()) {
|
||||
rmNode.context.getDispatcher().getEventHandler().handle(
|
||||
new NodeAddedSchedulerEvent(rmNode));
|
||||
|
|
|
@ -71,7 +71,6 @@ class NodesPage extends RmView {
|
|||
th(".state", "Node State").
|
||||
th(".nodeaddress", "Node Address").
|
||||
th(".nodehttpaddress", "Node HTTP Address").
|
||||
th(".healthStatus", "Health-status").
|
||||
th(".lastHealthUpdate", "Last health-update").
|
||||
th(".healthReport", "Health-report").
|
||||
th(".containers", "Containers").
|
||||
|
@ -122,8 +121,7 @@ class NodesPage extends RmView {
|
|||
row.td().a(HttpConfig.getSchemePrefix() + httpAddress,
|
||||
httpAddress)._();
|
||||
}
|
||||
row.td(info.getHealthStatus()).
|
||||
td().br().$title(String.valueOf(info.getLastHealthUpdate()))._().
|
||||
row.td().br().$title(String.valueOf(info.getLastHealthUpdate()))._().
|
||||
_(Times.format(info.getLastHealthUpdate()))._().
|
||||
td(info.getHealthReport()).
|
||||
td(String.valueOf(info.getNumContainers())).
|
||||
|
|
|
@ -196,7 +196,8 @@ public class RMWebServices {
|
|||
String msg = "Error: You must specify either true or false to query on health";
|
||||
throw new BadRequestException(msg);
|
||||
}
|
||||
if (nodeInfo.isHealthy() != Boolean.parseBoolean(healthState)) {
|
||||
if ((ni.getState() != NodeState.UNHEALTHY)
|
||||
!= Boolean.parseBoolean(healthState)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,9 +21,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp.dao;
|
|||
import javax.xml.bind.annotation.XmlAccessType;
|
||||
import javax.xml.bind.annotation.XmlAccessorType;
|
||||
import javax.xml.bind.annotation.XmlRootElement;
|
||||
import javax.xml.bind.annotation.XmlTransient;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
|
@ -39,23 +37,18 @@ public class NodeInfo {
|
|||
protected String id;
|
||||
protected String nodeHostName;
|
||||
protected String nodeHTTPAddress;
|
||||
protected String healthStatus;
|
||||
protected long lastHealthUpdate;
|
||||
protected String healthReport;
|
||||
protected int numContainers;
|
||||
protected long usedMemoryMB;
|
||||
protected long availMemoryMB;
|
||||
|
||||
@XmlTransient
|
||||
protected boolean healthy;
|
||||
|
||||
public NodeInfo() {
|
||||
} // JAXB needs this
|
||||
|
||||
public NodeInfo(RMNode ni, ResourceScheduler sched) {
|
||||
NodeId id = ni.getNodeID();
|
||||
SchedulerNodeReport report = sched.getNodeReport(id);
|
||||
NodeHealthStatus health = ni.getNodeHealthStatus();
|
||||
this.numContainers = 0;
|
||||
this.usedMemoryMB = 0;
|
||||
this.availMemoryMB = 0;
|
||||
|
@ -69,14 +62,8 @@ public class NodeInfo {
|
|||
this.nodeHostName = ni.getHostName();
|
||||
this.state = ni.getState();
|
||||
this.nodeHTTPAddress = ni.getHttpAddress();
|
||||
this.healthy = health.getIsNodeHealthy();
|
||||
this.healthStatus = health.getIsNodeHealthy() ? "Healthy" : "Unhealthy";
|
||||
this.lastHealthUpdate = health.getLastHealthReportTime();
|
||||
this.healthReport = String.valueOf(health.getHealthReport());
|
||||
}
|
||||
|
||||
public boolean isHealthy() {
|
||||
return this.healthy;
|
||||
this.lastHealthUpdate = ni.getLastHealthReportTime();
|
||||
this.healthReport = String.valueOf(ni.getHealthReport());
|
||||
}
|
||||
|
||||
public String getRack() {
|
||||
|
@ -99,10 +86,6 @@ public class NodeInfo {
|
|||
this.nodeHTTPAddress = nodeHTTPAddress;
|
||||
}
|
||||
|
||||
public String getHealthStatus() {
|
||||
return this.healthStatus;
|
||||
}
|
||||
|
||||
public long getLastHealthUpdate() {
|
||||
return this.lastHealthUpdate;
|
||||
}
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.List;
|
|||
import org.apache.hadoop.net.Node;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
|
@ -96,18 +95,20 @@ public class MockNodes {
|
|||
private int cmdPort;
|
||||
private Resource perNode;
|
||||
private String rackName;
|
||||
private NodeHealthStatus nodeHealthStatus;
|
||||
private String healthReport;
|
||||
private long lastHealthReportTime;
|
||||
private NodeState state;
|
||||
|
||||
public MockRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress,
|
||||
Resource perNode, String rackName, NodeHealthStatus nodeHealthStatus,
|
||||
int cmdPort, String hostName, NodeState state) {
|
||||
Resource perNode, String rackName, String healthReport,
|
||||
long lastHealthReportTime, int cmdPort, String hostName, NodeState state) {
|
||||
this.nodeId = nodeId;
|
||||
this.nodeAddr = nodeAddr;
|
||||
this.httpAddress = httpAddress;
|
||||
this.perNode = perNode;
|
||||
this.rackName = rackName;
|
||||
this.nodeHealthStatus = nodeHealthStatus;
|
||||
this.healthReport = healthReport;
|
||||
this.lastHealthReportTime = lastHealthReportTime;
|
||||
this.cmdPort = cmdPort;
|
||||
this.hostName = hostName;
|
||||
this.state = state;
|
||||
|
@ -143,11 +144,6 @@ public class MockNodes {
|
|||
return this.httpAddress;
|
||||
}
|
||||
|
||||
@Override
|
||||
public NodeHealthStatus getNodeHealthStatus() {
|
||||
return this.nodeHealthStatus;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Resource getTotalCapability() {
|
||||
return this.perNode;
|
||||
|
@ -191,6 +187,16 @@ public class MockNodes {
|
|||
public List<UpdatedContainerInfo> pullContainerUpdates() {
|
||||
return new ArrayList<UpdatedContainerInfo>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getHealthReport() {
|
||||
return healthReport;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getLastHealthReportTime() {
|
||||
return lastHealthReportTime;
|
||||
}
|
||||
};
|
||||
|
||||
private static RMNode buildRMNode(int rack, final Resource perNode, NodeState state, String httpAddr) {
|
||||
|
@ -209,14 +215,9 @@ public class MockNodes {
|
|||
final NodeId nodeID = NodeId.newInstance(hostName, port);
|
||||
|
||||
final String httpAddress = httpAddr;
|
||||
final NodeHealthStatus nodeHealthStatus =
|
||||
recordFactory.newRecordInstance(NodeHealthStatus.class);
|
||||
if (state != NodeState.UNHEALTHY) {
|
||||
nodeHealthStatus.setIsNodeHealthy(true);
|
||||
nodeHealthStatus.setHealthReport("HealthyMe");
|
||||
}
|
||||
String healthReport = (state == NodeState.UNHEALTHY) ? null : "HealthyMe";
|
||||
return new MockRMNodeImpl(nodeID, nodeAddr, httpAddress, perNode, rackName,
|
||||
nodeHealthStatus, nid, hostName, state);
|
||||
healthReport, 0, nid, hostName, state);
|
||||
}
|
||||
|
||||
public static RMNode nodeInfo(int rack, final Resource perNode,
|
||||
|
|
|
@ -53,6 +53,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationReport;
|
|||
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
|
||||
import org.apache.hadoop.yarn.api.records.NodeReport;
|
||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
|
@ -133,8 +134,8 @@ public class TestClientRMService {
|
|||
List<NodeReport> nodeReports =
|
||||
client.getClusterNodes(request).getNodeReports();
|
||||
Assert.assertEquals(1, nodeReports.size());
|
||||
Assert.assertTrue("Node is expected to be healthy!", nodeReports.get(0)
|
||||
.getNodeHealthStatus().getIsNodeHealthy());
|
||||
Assert.assertNotSame("Node is expected to be healthy!", NodeState.UNHEALTHY,
|
||||
nodeReports.get(0).getNodeState());
|
||||
|
||||
// Now make the node unhealthy.
|
||||
node.nodeHeartbeat(false);
|
||||
|
@ -142,8 +143,8 @@ public class TestClientRMService {
|
|||
// Call again
|
||||
nodeReports = client.getClusterNodes(request).getNodeReports();
|
||||
Assert.assertEquals(1, nodeReports.size());
|
||||
Assert.assertFalse("Node is expected to be unhealthy!", nodeReports.get(0)
|
||||
.getNodeHealthStatus().getIsNodeHealthy());
|
||||
Assert.assertEquals("Node is expected to be unhealthy!", NodeState.UNHEALTHY,
|
||||
nodeReports.get(0).getNodeState());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -346,9 +346,8 @@ public class TestRMNodeTransitions {
|
|||
|
||||
private RMNodeImpl getUnhealthyNode() {
|
||||
RMNodeImpl node = getRunningNode();
|
||||
NodeHealthStatus status = node.getNodeHealthStatus();
|
||||
status.setHealthReport("sick");
|
||||
status.setIsNodeHealthy(false);
|
||||
NodeHealthStatus status = NodeHealthStatus.newInstance(false, "sick",
|
||||
System.currentTimeMillis());
|
||||
node.handle(new RMNodeStatusEvent(node.getNodeID(), status,
|
||||
new ArrayList<ContainerStatus>(), null, null));
|
||||
Assert.assertEquals(NodeState.UNHEALTHY, node.getState());
|
||||
|
|
|
@ -176,11 +176,8 @@ public class TestResourceManager {
|
|||
nm1.heartbeat();
|
||||
nm1.heartbeat();
|
||||
Collection<RMNode> values = resourceManager.getRMContext().getRMNodes().values();
|
||||
for (RMNode ni : values)
|
||||
{
|
||||
NodeHealthStatus nodeHealthStatus = ni.getNodeHealthStatus();
|
||||
String healthReport = nodeHealthStatus.getHealthReport();
|
||||
assertNotNull(healthReport);
|
||||
for (RMNode ni : values) {
|
||||
assertNotNull(ni.getHealthReport());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.hadoop.net.NetUtils;
|
|||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||
|
@ -391,15 +392,15 @@ public class TestResourceTrackerService {
|
|||
int count) throws Exception {
|
||||
|
||||
int waitCount = 0;
|
||||
while(rm.getRMContext().getRMNodes().get(nm1.getNodeId())
|
||||
.getNodeHealthStatus().getIsNodeHealthy() == health
|
||||
while((rm.getRMContext().getRMNodes().get(nm1.getNodeId())
|
||||
.getState() != NodeState.UNHEALTHY) == health
|
||||
&& waitCount++ < 20) {
|
||||
synchronized (this) {
|
||||
wait(100);
|
||||
}
|
||||
}
|
||||
Assert.assertFalse(rm.getRMContext().getRMNodes().get(nm1.getNodeId())
|
||||
.getNodeHealthStatus().getIsNodeHealthy() == health);
|
||||
Assert.assertFalse((rm.getRMContext().getRMNodes().get(nm1.getNodeId())
|
||||
.getState() != NodeState.UNHEALTHY) == health);
|
||||
Assert.assertEquals("Unhealthy metrics not incremented", count,
|
||||
ClusterMetrics.getMetrics().getUnhealthyNMs());
|
||||
}
|
||||
|
|
|
@ -49,7 +49,7 @@ public class TestNodesPage {
|
|||
// Number of Actual Table Headers for NodesPage.NodesBlock might change in
|
||||
// future. In that case this value should be adjusted to the new value.
|
||||
final int numberOfThInMetricsTable = 13;
|
||||
final int numberOfActualTableHeaders = 10;
|
||||
final int numberOfActualTableHeaders = 9;
|
||||
|
||||
private Injector injector;
|
||||
|
||||
|
|
|
@ -142,9 +142,8 @@ public class TestRMWebServicesNodes extends JerseyTest {
|
|||
rm.NMwaitForState(nm3.getNodeId(), NodeState.RUNNING);
|
||||
RMNodeImpl node = (RMNodeImpl) rm.getRMContext().getRMNodes()
|
||||
.get(nm3.getNodeId());
|
||||
NodeHealthStatus nodeHealth = node.getNodeHealthStatus();
|
||||
nodeHealth.setHealthReport("test health report");
|
||||
nodeHealth.setIsNodeHealthy(false);
|
||||
NodeHealthStatus nodeHealth = NodeHealthStatus.newInstance(false,
|
||||
"test health report", System.currentTimeMillis());
|
||||
node.handle(new RMNodeStatusEvent(nm3.getNodeId(), nodeHealth,
|
||||
new ArrayList<ContainerStatus>(), null, null));
|
||||
rm.NMwaitForState(nm3.getNodeId(), NodeState.UNHEALTHY);
|
||||
|
@ -357,9 +356,8 @@ public class TestRMWebServicesNodes extends JerseyTest {
|
|||
rm.NMwaitForState(nm1.getNodeId(), NodeState.RUNNING);
|
||||
RMNodeImpl node = (RMNodeImpl) rm.getRMContext().getRMNodes()
|
||||
.get(nm1.getNodeId());
|
||||
NodeHealthStatus nodeHealth = node.getNodeHealthStatus();
|
||||
nodeHealth.setHealthReport("test health report");
|
||||
nodeHealth.setIsNodeHealthy(false);
|
||||
NodeHealthStatus nodeHealth = NodeHealthStatus.newInstance(false,
|
||||
"test health report", System.currentTimeMillis());
|
||||
node.handle(new RMNodeStatusEvent(nm1.getNodeId(), nodeHealth,
|
||||
new ArrayList<ContainerStatus>(), null, null));
|
||||
rm.NMwaitForState(nm1.getNodeId(), NodeState.UNHEALTHY);
|
||||
|
@ -699,7 +697,6 @@ public class TestRMWebServicesNodes extends JerseyTest {
|
|||
verifyNodeInfoGeneric(nm,
|
||||
WebServicesTestUtils.getXmlString(element, "state"),
|
||||
WebServicesTestUtils.getXmlString(element, "rack"),
|
||||
WebServicesTestUtils.getXmlString(element, "healthStatus"),
|
||||
WebServicesTestUtils.getXmlString(element, "id"),
|
||||
WebServicesTestUtils.getXmlString(element, "nodeHostName"),
|
||||
WebServicesTestUtils.getXmlString(element, "nodeHTTPAddress"),
|
||||
|
@ -713,10 +710,10 @@ public class TestRMWebServicesNodes extends JerseyTest {
|
|||
|
||||
public void verifyNodeInfo(JSONObject nodeInfo, MockNM nm)
|
||||
throws JSONException, Exception {
|
||||
assertEquals("incorrect number of elements", 11, nodeInfo.length());
|
||||
assertEquals("incorrect number of elements", 10, nodeInfo.length());
|
||||
|
||||
verifyNodeInfoGeneric(nm, nodeInfo.getString("state"),
|
||||
nodeInfo.getString("rack"), nodeInfo.getString("healthStatus"),
|
||||
nodeInfo.getString("rack"),
|
||||
nodeInfo.getString("id"), nodeInfo.getString("nodeHostName"),
|
||||
nodeInfo.getString("nodeHTTPAddress"),
|
||||
nodeInfo.getLong("lastHealthUpdate"),
|
||||
|
@ -726,32 +723,29 @@ public class TestRMWebServicesNodes extends JerseyTest {
|
|||
}
|
||||
|
||||
public void verifyNodeInfoGeneric(MockNM nm, String state, String rack,
|
||||
String healthStatus, String id, String nodeHostName,
|
||||
String id, String nodeHostName,
|
||||
String nodeHTTPAddress, long lastHealthUpdate, String healthReport,
|
||||
int numContainers, long usedMemoryMB, long availMemoryMB)
|
||||
throws JSONException, Exception {
|
||||
|
||||
RMNode node = rm.getRMContext().getRMNodes().get(nm.getNodeId());
|
||||
NodeHealthStatus health = node.getNodeHealthStatus();
|
||||
ResourceScheduler sched = rm.getResourceScheduler();
|
||||
SchedulerNodeReport report = sched.getNodeReport(nm.getNodeId());
|
||||
|
||||
WebServicesTestUtils.checkStringMatch("state", node.getState().toString(),
|
||||
state);
|
||||
WebServicesTestUtils.checkStringMatch("rack", node.getRackName(), rack);
|
||||
WebServicesTestUtils.checkStringMatch("healthStatus", "Healthy",
|
||||
healthStatus);
|
||||
WebServicesTestUtils.checkStringMatch("id", nm.getNodeId().toString(), id);
|
||||
WebServicesTestUtils.checkStringMatch("nodeHostName", nm.getNodeId()
|
||||
.getHost(), nodeHostName);
|
||||
WebServicesTestUtils.checkStringMatch("healthReport",
|
||||
String.valueOf(health.getHealthReport()), healthReport);
|
||||
String.valueOf(node.getHealthReport()), healthReport);
|
||||
String expectedHttpAddress = nm.getNodeId().getHost() + ":"
|
||||
+ nm.getHttpPort();
|
||||
WebServicesTestUtils.checkStringMatch("nodeHTTPAddress",
|
||||
expectedHttpAddress, nodeHTTPAddress);
|
||||
|
||||
long expectedHealthUpdate = health.getLastHealthReportTime();
|
||||
long expectedHealthUpdate = node.getLastHealthReportTime();
|
||||
assertEquals("lastHealthUpdate doesn't match, got: " + lastHealthUpdate
|
||||
+ " expected: " + expectedHealthUpdate, expectedHealthUpdate,
|
||||
lastHealthUpdate);
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.hadoop.fs.Path;
|
|||
import org.apache.hadoop.fs.UnsupportedFileSystemException;
|
||||
import org.apache.hadoop.security.AccessControlException;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.server.MiniYARNCluster;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
|
||||
|
@ -243,7 +244,7 @@ public class TestDiskFailures {
|
|||
for (int i = 0; i < 10; i++) {
|
||||
Iterator<RMNode> iter = yarnCluster.getResourceManager().getRMContext()
|
||||
.getRMNodes().values().iterator();
|
||||
if (iter.next().getNodeHealthStatus().getIsNodeHealthy() == isHealthy) {
|
||||
if ((iter.next().getState() != NodeState.UNHEALTHY) == isHealthy) {
|
||||
break;
|
||||
}
|
||||
// wait for the node health info to go to RM
|
||||
|
@ -256,7 +257,7 @@ public class TestDiskFailures {
|
|||
Iterator<RMNode> iter = yarnCluster.getResourceManager().getRMContext()
|
||||
.getRMNodes().values().iterator();
|
||||
Assert.assertEquals("RM is not updated with the health status of a node",
|
||||
isHealthy, iter.next().getNodeHealthStatus().getIsNodeHealthy());
|
||||
isHealthy, iter.next().getState() != NodeState.UNHEALTHY);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue