YARN-686. Flatten NodeReport. (sandyr via tucu)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1490827 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2a76cddcd5
commit
af8514eef2
|
@ -301,6 +301,8 @@ Release 2.1.0-beta - UNRELEASED
|
||||||
YARN-750. Allow for black-listing resources in YARN API and Impl in CS
|
YARN-750. Allow for black-listing resources in YARN API and Impl in CS
|
||||||
(acmurthy via bikas)
|
(acmurthy via bikas)
|
||||||
|
|
||||||
|
YARN-686. Flatten NodeReport. (sandyr via tucu)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
YARN-512. Log aggregation root directory check is more expensive than it
|
YARN-512. Log aggregation root directory check is more expensive than it
|
||||||
|
|
|
@ -51,7 +51,7 @@ public abstract class NodeReport {
|
||||||
@Private
|
@Private
|
||||||
public static NodeReport newInstance(NodeId nodeId, NodeState nodeState,
|
public static NodeReport newInstance(NodeId nodeId, NodeState nodeState,
|
||||||
String httpAddress, String rackName, Resource used, Resource capability,
|
String httpAddress, String rackName, Resource used, Resource capability,
|
||||||
int numContainers, NodeHealthStatus nodeHealthStatus) {
|
int numContainers, String healthReport, long lastHealthReportTime) {
|
||||||
NodeReport nodeReport = Records.newRecord(NodeReport.class);
|
NodeReport nodeReport = Records.newRecord(NodeReport.class);
|
||||||
nodeReport.setNodeId(nodeId);
|
nodeReport.setNodeId(nodeId);
|
||||||
nodeReport.setNodeState(nodeState);
|
nodeReport.setNodeState(nodeState);
|
||||||
|
@ -60,7 +60,8 @@ public abstract class NodeReport {
|
||||||
nodeReport.setUsed(used);
|
nodeReport.setUsed(used);
|
||||||
nodeReport.setCapability(capability);
|
nodeReport.setCapability(capability);
|
||||||
nodeReport.setNumContainers(numContainers);
|
nodeReport.setNumContainers(numContainers);
|
||||||
nodeReport.setNodeHealthStatus(nodeHealthStatus);
|
nodeReport.setHealthReport(healthReport);
|
||||||
|
nodeReport.setLastHealthReportTime(lastHealthReportTime);
|
||||||
return nodeReport;
|
return nodeReport;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -144,15 +145,28 @@ public abstract class NodeReport {
|
||||||
@Unstable
|
@Unstable
|
||||||
public abstract void setNumContainers(int numContainers);
|
public abstract void setNumContainers(int numContainers);
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the <code>NodeHealthStatus</code> of the node.
|
/**
|
||||||
* @return <code>NodeHealthStatus</code> of the node
|
* Get the <em>diagnostic health report</em> of the node.
|
||||||
|
* @return <em>diagnostic health report</em> of the node
|
||||||
*/
|
*/
|
||||||
@Public
|
@Public
|
||||||
@Stable
|
@Stable
|
||||||
public abstract NodeHealthStatus getNodeHealthStatus();
|
public abstract String getHealthReport();
|
||||||
|
|
||||||
@Private
|
@Private
|
||||||
@Unstable
|
@Unstable
|
||||||
public abstract void setNodeHealthStatus(NodeHealthStatus nodeHealthStatus);
|
public abstract void setHealthReport(String healthReport);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the <em>last timestamp</em> at which the health report was received.
|
||||||
|
* @return <em>last timestamp</em> at which the health report was received
|
||||||
|
*/
|
||||||
|
@Public
|
||||||
|
@Stable
|
||||||
|
public abstract long getLastHealthReportTime();
|
||||||
|
|
||||||
|
@Private
|
||||||
|
@Unstable
|
||||||
|
public abstract void setLastHealthReportTime(long lastHealthReport);
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,12 +18,10 @@
|
||||||
|
|
||||||
package org.apache.hadoop.yarn.api.records.impl.pb;
|
package org.apache.hadoop.yarn.api.records.impl.pb;
|
||||||
|
|
||||||
import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
|
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeReport;
|
import org.apache.hadoop.yarn.api.records.NodeReport;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.proto.YarnProtos.NodeHealthStatusProto;
|
|
||||||
import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto;
|
import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto;
|
||||||
import org.apache.hadoop.yarn.proto.YarnProtos.NodeReportProto;
|
import org.apache.hadoop.yarn.proto.YarnProtos.NodeReportProto;
|
||||||
import org.apache.hadoop.yarn.proto.YarnProtos.NodeReportProtoOrBuilder;
|
import org.apache.hadoop.yarn.proto.YarnProtos.NodeReportProtoOrBuilder;
|
||||||
|
@ -38,7 +36,6 @@ public class NodeReportPBImpl extends NodeReport {
|
||||||
private NodeId nodeId;
|
private NodeId nodeId;
|
||||||
private Resource used;
|
private Resource used;
|
||||||
private Resource capability;
|
private Resource capability;
|
||||||
private NodeHealthStatus nodeHealthStatus;
|
|
||||||
|
|
||||||
public NodeReportPBImpl() {
|
public NodeReportPBImpl() {
|
||||||
builder = NodeReportProto.newBuilder();
|
builder = NodeReportProto.newBuilder();
|
||||||
|
@ -64,19 +61,33 @@ public class NodeReportPBImpl extends NodeReport {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public NodeHealthStatus getNodeHealthStatus() {
|
public String getHealthReport() {
|
||||||
if (this.nodeHealthStatus != null) {
|
|
||||||
return this.nodeHealthStatus;
|
|
||||||
}
|
|
||||||
|
|
||||||
NodeReportProtoOrBuilder p = viaProto ? proto : builder;
|
NodeReportProtoOrBuilder p = viaProto ? proto : builder;
|
||||||
if (!p.hasNodeHealthStatus()) {
|
return p.getHealthReport();
|
||||||
return null;
|
|
||||||
}
|
|
||||||
this.nodeHealthStatus = convertFromProtoFormat(p.getNodeHealthStatus());
|
|
||||||
return this.nodeHealthStatus;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setHealthReport(String healthReport) {
|
||||||
|
maybeInitBuilder();
|
||||||
|
if (healthReport == null) {
|
||||||
|
builder.clearHealthReport();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
builder.setHealthReport(healthReport);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getLastHealthReportTime() {
|
||||||
|
NodeReportProtoOrBuilder p = viaProto ? proto : builder;
|
||||||
|
return p.getLastHealthReportTime();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setLastHealthReportTime(long lastHealthReportTime) {
|
||||||
|
maybeInitBuilder();
|
||||||
|
builder.setLastHealthReportTime(lastHealthReportTime);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getHttpAddress() {
|
public String getHttpAddress() {
|
||||||
NodeReportProtoOrBuilder p = viaProto ? proto : builder;
|
NodeReportProtoOrBuilder p = viaProto ? proto : builder;
|
||||||
|
@ -158,14 +169,6 @@ public class NodeReportPBImpl extends NodeReport {
|
||||||
this.capability = capability;
|
this.capability = capability;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setNodeHealthStatus(NodeHealthStatus healthStatus) {
|
|
||||||
maybeInitBuilder();
|
|
||||||
if (healthStatus == null)
|
|
||||||
builder.clearNodeHealthStatus();
|
|
||||||
this.nodeHealthStatus = healthStatus;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setHttpAddress(String httpAddress) {
|
public void setHttpAddress(String httpAddress) {
|
||||||
maybeInitBuilder();
|
maybeInitBuilder();
|
||||||
|
@ -247,11 +250,6 @@ public class NodeReportPBImpl extends NodeReport {
|
||||||
builder.getCapability())) {
|
builder.getCapability())) {
|
||||||
builder.setCapability(convertToProtoFormat(this.capability));
|
builder.setCapability(convertToProtoFormat(this.capability));
|
||||||
}
|
}
|
||||||
if (this.nodeHealthStatus != null
|
|
||||||
&& !((NodeHealthStatusPBImpl) this.nodeHealthStatus).getProto().equals(
|
|
||||||
builder.getNodeHealthStatus())) {
|
|
||||||
builder.setNodeHealthStatus(convertToProtoFormat(this.nodeHealthStatus));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void mergeLocalToProto() {
|
private void mergeLocalToProto() {
|
||||||
|
@ -286,11 +284,4 @@ public class NodeReportPBImpl extends NodeReport {
|
||||||
return ((ResourcePBImpl) r).getProto();
|
return ((ResourcePBImpl) r).getProto();
|
||||||
}
|
}
|
||||||
|
|
||||||
private NodeHealthStatusPBImpl convertFromProtoFormat(NodeHealthStatusProto p) {
|
|
||||||
return new NodeHealthStatusPBImpl(p);
|
|
||||||
}
|
|
||||||
|
|
||||||
private NodeHealthStatusProto convertToProtoFormat(NodeHealthStatus r) {
|
|
||||||
return ((NodeHealthStatusPBImpl) r).getProto();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -191,8 +191,9 @@ message NodeReportProto {
|
||||||
optional ResourceProto used = 4;
|
optional ResourceProto used = 4;
|
||||||
optional ResourceProto capability = 5;
|
optional ResourceProto capability = 5;
|
||||||
optional int32 numContainers = 6;
|
optional int32 numContainers = 6;
|
||||||
optional NodeHealthStatusProto node_health_status = 8;
|
optional NodeStateProto node_state = 7;
|
||||||
optional NodeStateProto node_state = 9;
|
optional string health_report = 8;
|
||||||
|
optional int64 last_health_report_time = 9;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -330,8 +330,7 @@ public class Client extends YarnClientImpl {
|
||||||
+ ", nodeId=" + node.getNodeId()
|
+ ", nodeId=" + node.getNodeId()
|
||||||
+ ", nodeAddress" + node.getHttpAddress()
|
+ ", nodeAddress" + node.getHttpAddress()
|
||||||
+ ", nodeRackName" + node.getRackName()
|
+ ", nodeRackName" + node.getRackName()
|
||||||
+ ", nodeNumContainers" + node.getNumContainers()
|
+ ", nodeNumContainers" + node.getNumContainers());
|
||||||
+ ", nodeHealthStatus" + node.getNodeHealthStatus());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
QueueInfo queueInfo = super.getQueueInfo(this.amQueue);
|
QueueInfo queueInfo = super.getQueueInfo(this.amQueue);
|
||||||
|
|
|
@ -35,7 +35,7 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.util.ConverterUtils;
|
import org.apache.hadoop.yarn.util.ConverterUtils;
|
||||||
|
|
||||||
public class NodeCLI extends YarnCLI {
|
public class NodeCLI extends YarnCLI {
|
||||||
private static final String NODES_PATTERN = "%16s\t%10s\t%17s\t%26s\t%18s" +
|
private static final String NODES_PATTERN = "%16s\t%10s\t%17s\t%18s" +
|
||||||
System.getProperty("line.separator");
|
System.getProperty("line.separator");
|
||||||
|
|
||||||
public static void main(String[] args) throws Exception {
|
public static void main(String[] args) throws Exception {
|
||||||
|
@ -91,11 +91,10 @@ public class NodeCLI extends YarnCLI {
|
||||||
List<NodeReport> nodesReport = client.getNodeReports();
|
List<NodeReport> nodesReport = client.getNodeReports();
|
||||||
writer.println("Total Nodes:" + nodesReport.size());
|
writer.println("Total Nodes:" + nodesReport.size());
|
||||||
writer.printf(NODES_PATTERN, "Node-Id", "Node-State", "Node-Http-Address",
|
writer.printf(NODES_PATTERN, "Node-Id", "Node-State", "Node-Http-Address",
|
||||||
"Health-Status(isNodeHealthy)", "Running-Containers");
|
"Running-Containers");
|
||||||
for (NodeReport nodeReport : nodesReport) {
|
for (NodeReport nodeReport : nodesReport) {
|
||||||
writer.printf(NODES_PATTERN, nodeReport.getNodeId(), nodeReport
|
writer.printf(NODES_PATTERN, nodeReport.getNodeId(), nodeReport
|
||||||
.getNodeState(), nodeReport.getHttpAddress(), nodeReport
|
.getNodeState(), nodeReport.getHttpAddress(), nodeReport
|
||||||
.getNodeHealthStatus().getIsNodeHealthy(), nodeReport
|
|
||||||
.getNumContainers());
|
.getNumContainers());
|
||||||
}
|
}
|
||||||
writer.flush();
|
writer.flush();
|
||||||
|
@ -129,16 +128,13 @@ public class NodeCLI extends YarnCLI {
|
||||||
nodeReportStr.println(nodeReport.getNodeState());
|
nodeReportStr.println(nodeReport.getNodeState());
|
||||||
nodeReportStr.print("\tNode-Http-Address : ");
|
nodeReportStr.print("\tNode-Http-Address : ");
|
||||||
nodeReportStr.println(nodeReport.getHttpAddress());
|
nodeReportStr.println(nodeReport.getHttpAddress());
|
||||||
nodeReportStr.print("\tHealth-Status(isNodeHealthy) : ");
|
|
||||||
nodeReportStr.println(nodeReport.getNodeHealthStatus()
|
|
||||||
.getIsNodeHealthy());
|
|
||||||
nodeReportStr.print("\tLast-Health-Update : ");
|
nodeReportStr.print("\tLast-Health-Update : ");
|
||||||
nodeReportStr.println(DateFormatUtils.format(
|
nodeReportStr.println(DateFormatUtils.format(
|
||||||
new Date(nodeReport.getNodeHealthStatus().
|
new Date(nodeReport.getLastHealthReportTime()),
|
||||||
getLastHealthReportTime()),"E dd/MMM/yy hh:mm:ss:SSzz"));
|
"E dd/MMM/yy hh:mm:ss:SSzz"));
|
||||||
nodeReportStr.print("\tHealth-Report : ");
|
nodeReportStr.print("\tHealth-Report : ");
|
||||||
nodeReportStr
|
nodeReportStr
|
||||||
.println(nodeReport.getNodeHealthStatus().getHealthReport());
|
.println(nodeReport.getHealthReport());
|
||||||
nodeReportStr.print("\tContainers : ");
|
nodeReportStr.print("\tContainers : ");
|
||||||
nodeReportStr.println(nodeReport.getNumContainers());
|
nodeReportStr.println(nodeReport.getNumContainers());
|
||||||
nodeReportStr.print("\tMemory-Used : ");
|
nodeReportStr.print("\tMemory-Used : ");
|
||||||
|
|
|
@ -41,7 +41,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationReport;
|
import org.apache.hadoop.yarn.api.records.ApplicationReport;
|
||||||
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
|
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeReport;
|
import org.apache.hadoop.yarn.api.records.NodeReport;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||||
|
@ -163,13 +162,13 @@ public class TestYarnCLI {
|
||||||
PrintWriter pw = new PrintWriter(baos);
|
PrintWriter pw = new PrintWriter(baos);
|
||||||
pw.println("Total Nodes:3");
|
pw.println("Total Nodes:3");
|
||||||
pw.print(" Node-Id\tNode-State\tNode-Http-Address\t");
|
pw.print(" Node-Id\tNode-State\tNode-Http-Address\t");
|
||||||
pw.println("Health-Status(isNodeHealthy)\tRunning-Containers");
|
pw.println("Running-Containers");
|
||||||
pw.print(" host0:0\t RUNNING\t host1:8888");
|
pw.print(" host0:0\t RUNNING\t host1:8888");
|
||||||
pw.println("\t false\t 0");
|
pw.println("\t 0");
|
||||||
pw.print(" host1:0\t RUNNING\t host1:8888");
|
pw.print(" host1:0\t RUNNING\t host1:8888");
|
||||||
pw.println("\t false\t 0");
|
pw.println("\t 0");
|
||||||
pw.print(" host2:0\t RUNNING\t host1:8888");
|
pw.print(" host2:0\t RUNNING\t host1:8888");
|
||||||
pw.println("\t false\t 0");
|
pw.println("\t 0");
|
||||||
pw.close();
|
pw.close();
|
||||||
String nodesReportStr = baos.toString("UTF-8");
|
String nodesReportStr = baos.toString("UTF-8");
|
||||||
Assert.assertEquals(nodesReportStr, sysOutStream.toString());
|
Assert.assertEquals(nodesReportStr, sysOutStream.toString());
|
||||||
|
@ -194,10 +193,9 @@ public class TestYarnCLI {
|
||||||
pw.println("\tRack : rack1");
|
pw.println("\tRack : rack1");
|
||||||
pw.println("\tNode-State : RUNNING");
|
pw.println("\tNode-State : RUNNING");
|
||||||
pw.println("\tNode-Http-Address : host1:8888");
|
pw.println("\tNode-Http-Address : host1:8888");
|
||||||
pw.println("\tHealth-Status(isNodeHealthy) : false");
|
|
||||||
pw.println("\tLast-Health-Update : "
|
pw.println("\tLast-Health-Update : "
|
||||||
+ DateFormatUtils.format(new Date(0), "E dd/MMM/yy hh:mm:ss:SSzz"));
|
+ DateFormatUtils.format(new Date(0), "E dd/MMM/yy hh:mm:ss:SSzz"));
|
||||||
pw.println("\tHealth-Report : null");
|
pw.println("\tHealth-Report : ");
|
||||||
pw.println("\tContainers : 0");
|
pw.println("\tContainers : 0");
|
||||||
pw.println("\tMemory-Used : 0M");
|
pw.println("\tMemory-Used : 0M");
|
||||||
pw.println("\tMemory-Capacity : 0");
|
pw.println("\tMemory-Capacity : 0");
|
||||||
|
@ -246,8 +244,7 @@ public class TestYarnCLI {
|
||||||
NodeReport nodeReport = NodeReport.newInstance(NodeId
|
NodeReport nodeReport = NodeReport.newInstance(NodeId
|
||||||
.newInstance("host" + i, 0), NodeState.RUNNING, "host" + 1 + ":8888",
|
.newInstance("host" + i, 0), NodeState.RUNNING, "host" + 1 + ":8888",
|
||||||
"rack1", Records.newRecord(Resource.class), Records
|
"rack1", Records.newRecord(Resource.class), Records
|
||||||
.newRecord(Resource.class), 0, Records
|
.newRecord(Resource.class), 0, "", 0);
|
||||||
.newRecord(NodeHealthStatus.class));
|
|
||||||
nodeReports.add(nodeReport);
|
nodeReports.add(nodeReport);
|
||||||
}
|
}
|
||||||
return nodeReports;
|
return nodeReports;
|
||||||
|
|
|
@ -171,7 +171,7 @@ public class BuilderUtils {
|
||||||
|
|
||||||
public static NodeReport newNodeReport(NodeId nodeId, NodeState nodeState,
|
public static NodeReport newNodeReport(NodeId nodeId, NodeState nodeState,
|
||||||
String httpAddress, String rackName, Resource used, Resource capability,
|
String httpAddress, String rackName, Resource used, Resource capability,
|
||||||
int numContainers, NodeHealthStatus nodeHealthStatus) {
|
int numContainers, String healthReport, long lastHealthReportTime) {
|
||||||
NodeReport nodeReport = recordFactory.newRecordInstance(NodeReport.class);
|
NodeReport nodeReport = recordFactory.newRecordInstance(NodeReport.class);
|
||||||
nodeReport.setNodeId(nodeId);
|
nodeReport.setNodeId(nodeId);
|
||||||
nodeReport.setNodeState(nodeState);
|
nodeReport.setNodeState(nodeState);
|
||||||
|
@ -180,7 +180,8 @@ public class BuilderUtils {
|
||||||
nodeReport.setUsed(used);
|
nodeReport.setUsed(used);
|
||||||
nodeReport.setCapability(capability);
|
nodeReport.setCapability(capability);
|
||||||
nodeReport.setNumContainers(numContainers);
|
nodeReport.setNumContainers(numContainers);
|
||||||
nodeReport.setNodeHealthStatus(nodeHealthStatus);
|
nodeReport.setHealthReport(healthReport);
|
||||||
|
nodeReport.setLastHealthReportTime(lastHealthReportTime);
|
||||||
return nodeReport;
|
return nodeReport;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -343,7 +343,8 @@ public class ApplicationMasterService extends AbstractService implements
|
||||||
rmNode.getState(),
|
rmNode.getState(),
|
||||||
rmNode.getHttpAddress(), rmNode.getRackName(), used,
|
rmNode.getHttpAddress(), rmNode.getRackName(), used,
|
||||||
rmNode.getTotalCapability(), numContainers,
|
rmNode.getTotalCapability(), numContainers,
|
||||||
rmNode.getNodeHealthStatus());
|
rmNode.getHealthReport(),
|
||||||
|
rmNode.getLastHealthReportTime());
|
||||||
|
|
||||||
updatedNodeReports.add(report);
|
updatedNodeReports.add(report);
|
||||||
}
|
}
|
||||||
|
|
|
@ -475,7 +475,8 @@ public class ClientRMService extends AbstractService implements
|
||||||
rmNode.getState(),
|
rmNode.getState(),
|
||||||
rmNode.getHttpAddress(), rmNode.getRackName(), used,
|
rmNode.getHttpAddress(), rmNode.getRackName(), used,
|
||||||
rmNode.getTotalCapability(), numContainers,
|
rmNode.getTotalCapability(), numContainers,
|
||||||
rmNode.getNodeHealthStatus());
|
rmNode.getHealthReport(),
|
||||||
|
rmNode.getLastHealthReportTime());
|
||||||
|
|
||||||
return report;
|
return report;
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,13 +86,10 @@ public class RMNMInfo implements RMNMInfoBeans {
|
||||||
info.put("State", ni.getState().toString());
|
info.put("State", ni.getState().toString());
|
||||||
info.put("NodeId", ni.getNodeID());
|
info.put("NodeId", ni.getNodeID());
|
||||||
info.put("NodeHTTPAddress", ni.getHttpAddress());
|
info.put("NodeHTTPAddress", ni.getHttpAddress());
|
||||||
info.put("HealthStatus",
|
|
||||||
ni.getNodeHealthStatus().getIsNodeHealthy() ?
|
|
||||||
"Healthy" : "Unhealthy");
|
|
||||||
info.put("LastHealthUpdate",
|
info.put("LastHealthUpdate",
|
||||||
ni.getNodeHealthStatus().getLastHealthReportTime());
|
ni.getLastHealthReportTime());
|
||||||
info.put("HealthReport",
|
info.put("HealthReport",
|
||||||
ni.getNodeHealthStatus().getHealthReport());
|
ni.getHealthReport());
|
||||||
if(report != null) {
|
if(report != null) {
|
||||||
info.put("NumContainers", report.getNumContainers());
|
info.put("NumContainers", report.getNumContainers());
|
||||||
info.put("UsedMemoryMB", report.getUsedResource().getMemory());
|
info.put("UsedMemoryMB", report.getUsedResource().getMemory());
|
||||||
|
|
|
@ -74,10 +74,16 @@ public interface RMNode {
|
||||||
public String getHttpAddress();
|
public String getHttpAddress();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* the health-status for this node
|
* the latest health report received from this node.
|
||||||
* @return the health-status for this node.
|
* @return the latest health report received from this node.
|
||||||
*/
|
*/
|
||||||
public NodeHealthStatus getNodeHealthStatus();
|
public String getHealthReport();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* the time of the latest health report received from this node.
|
||||||
|
* @return the time of the latest health report received from this node.
|
||||||
|
*/
|
||||||
|
public long getLastHealthReportTime();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* the total available resource.
|
* the total available resource.
|
||||||
|
|
|
@ -93,9 +93,10 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
private final String httpAddress;
|
private final String httpAddress;
|
||||||
private final Resource totalCapability;
|
private final Resource totalCapability;
|
||||||
private final Node node;
|
private final Node node;
|
||||||
private final NodeHealthStatus nodeHealthStatus = recordFactory
|
|
||||||
.newRecordInstance(NodeHealthStatus.class);
|
private String healthReport;
|
||||||
|
private long lastHealthReportTime;
|
||||||
|
|
||||||
/* set of containers that have just launched */
|
/* set of containers that have just launched */
|
||||||
private final Map<ContainerId, ContainerStatus> justLaunchedContainers =
|
private final Map<ContainerId, ContainerStatus> justLaunchedContainers =
|
||||||
new HashMap<ContainerId, ContainerStatus>();
|
new HashMap<ContainerId, ContainerStatus>();
|
||||||
|
@ -180,9 +181,8 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
this.nodeAddress = hostName + ":" + cmPort;
|
this.nodeAddress = hostName + ":" + cmPort;
|
||||||
this.httpAddress = hostName + ":" + httpPort;
|
this.httpAddress = hostName + ":" + httpPort;
|
||||||
this.node = node;
|
this.node = node;
|
||||||
this.nodeHealthStatus.setIsNodeHealthy(true);
|
this.healthReport = "Healthy";
|
||||||
this.nodeHealthStatus.setHealthReport("Healthy");
|
this.lastHealthReportTime = System.currentTimeMillis();
|
||||||
this.nodeHealthStatus.setLastHealthReportTime(System.currentTimeMillis());
|
|
||||||
|
|
||||||
this.latestNodeHeartBeatResponse.setResponseId(0);
|
this.latestNodeHeartBeatResponse.setResponseId(0);
|
||||||
|
|
||||||
|
@ -246,27 +246,46 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public NodeHealthStatus getNodeHealthStatus() {
|
public String getHealthReport() {
|
||||||
this.readLock.lock();
|
this.readLock.lock();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
return this.nodeHealthStatus;
|
return this.healthReport;
|
||||||
} finally {
|
} finally {
|
||||||
this.readLock.unlock();
|
this.readLock.unlock();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void setNodeHealthStatus(NodeHealthStatus status)
|
public void setHealthReport(String healthReport) {
|
||||||
{
|
|
||||||
this.writeLock.lock();
|
this.writeLock.lock();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
this.nodeHealthStatus.setHealthReport(status.getHealthReport());
|
this.healthReport = healthReport;
|
||||||
this.nodeHealthStatus.setIsNodeHealthy(status.getIsNodeHealthy());
|
|
||||||
this.nodeHealthStatus.setLastHealthReportTime(status.getLastHealthReportTime());
|
|
||||||
} finally {
|
} finally {
|
||||||
this.writeLock.unlock();
|
this.writeLock.unlock();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setLastHealthReportTime(long lastHealthReportTime) {
|
||||||
|
this.writeLock.lock();
|
||||||
|
|
||||||
|
try {
|
||||||
|
this.lastHealthReportTime = lastHealthReportTime;
|
||||||
|
} finally {
|
||||||
|
this.writeLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getLastHealthReportTime() {
|
||||||
|
this.readLock.lock();
|
||||||
|
|
||||||
|
try {
|
||||||
|
return this.lastHealthReportTime;
|
||||||
|
} finally {
|
||||||
|
this.readLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public NodeState getState() {
|
public NodeState getState() {
|
||||||
|
@ -511,7 +530,9 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
|
|
||||||
NodeHealthStatus remoteNodeHealthStatus =
|
NodeHealthStatus remoteNodeHealthStatus =
|
||||||
statusEvent.getNodeHealthStatus();
|
statusEvent.getNodeHealthStatus();
|
||||||
rmNode.setNodeHealthStatus(remoteNodeHealthStatus);
|
rmNode.setHealthReport(remoteNodeHealthStatus.getHealthReport());
|
||||||
|
rmNode.setLastHealthReportTime(
|
||||||
|
remoteNodeHealthStatus.getLastHealthReportTime());
|
||||||
if (!remoteNodeHealthStatus.getIsNodeHealthy()) {
|
if (!remoteNodeHealthStatus.getIsNodeHealthy()) {
|
||||||
LOG.info("Node " + rmNode.nodeId + " reported UNHEALTHY with details: "
|
LOG.info("Node " + rmNode.nodeId + " reported UNHEALTHY with details: "
|
||||||
+ remoteNodeHealthStatus.getHealthReport());
|
+ remoteNodeHealthStatus.getHealthReport());
|
||||||
|
@ -593,7 +614,9 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
// Switch the last heartbeatresponse.
|
// Switch the last heartbeatresponse.
|
||||||
rmNode.latestNodeHeartBeatResponse = statusEvent.getLatestResponse();
|
rmNode.latestNodeHeartBeatResponse = statusEvent.getLatestResponse();
|
||||||
NodeHealthStatus remoteNodeHealthStatus = statusEvent.getNodeHealthStatus();
|
NodeHealthStatus remoteNodeHealthStatus = statusEvent.getNodeHealthStatus();
|
||||||
rmNode.setNodeHealthStatus(remoteNodeHealthStatus);
|
rmNode.setHealthReport(remoteNodeHealthStatus.getHealthReport());
|
||||||
|
rmNode.setLastHealthReportTime(
|
||||||
|
remoteNodeHealthStatus.getLastHealthReportTime());
|
||||||
if (remoteNodeHealthStatus.getIsNodeHealthy()) {
|
if (remoteNodeHealthStatus.getIsNodeHealthy()) {
|
||||||
rmNode.context.getDispatcher().getEventHandler().handle(
|
rmNode.context.getDispatcher().getEventHandler().handle(
|
||||||
new NodeAddedSchedulerEvent(rmNode));
|
new NodeAddedSchedulerEvent(rmNode));
|
||||||
|
|
|
@ -71,7 +71,6 @@ class NodesPage extends RmView {
|
||||||
th(".state", "Node State").
|
th(".state", "Node State").
|
||||||
th(".nodeaddress", "Node Address").
|
th(".nodeaddress", "Node Address").
|
||||||
th(".nodehttpaddress", "Node HTTP Address").
|
th(".nodehttpaddress", "Node HTTP Address").
|
||||||
th(".healthStatus", "Health-status").
|
|
||||||
th(".lastHealthUpdate", "Last health-update").
|
th(".lastHealthUpdate", "Last health-update").
|
||||||
th(".healthReport", "Health-report").
|
th(".healthReport", "Health-report").
|
||||||
th(".containers", "Containers").
|
th(".containers", "Containers").
|
||||||
|
@ -122,8 +121,7 @@ class NodesPage extends RmView {
|
||||||
row.td().a(HttpConfig.getSchemePrefix() + httpAddress,
|
row.td().a(HttpConfig.getSchemePrefix() + httpAddress,
|
||||||
httpAddress)._();
|
httpAddress)._();
|
||||||
}
|
}
|
||||||
row.td(info.getHealthStatus()).
|
row.td().br().$title(String.valueOf(info.getLastHealthUpdate()))._().
|
||||||
td().br().$title(String.valueOf(info.getLastHealthUpdate()))._().
|
|
||||||
_(Times.format(info.getLastHealthUpdate()))._().
|
_(Times.format(info.getLastHealthUpdate()))._().
|
||||||
td(info.getHealthReport()).
|
td(info.getHealthReport()).
|
||||||
td(String.valueOf(info.getNumContainers())).
|
td(String.valueOf(info.getNumContainers())).
|
||||||
|
|
|
@ -196,7 +196,8 @@ public class RMWebServices {
|
||||||
String msg = "Error: You must specify either true or false to query on health";
|
String msg = "Error: You must specify either true or false to query on health";
|
||||||
throw new BadRequestException(msg);
|
throw new BadRequestException(msg);
|
||||||
}
|
}
|
||||||
if (nodeInfo.isHealthy() != Boolean.parseBoolean(healthState)) {
|
if ((ni.getState() != NodeState.UNHEALTHY)
|
||||||
|
!= Boolean.parseBoolean(healthState)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,9 +21,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp.dao;
|
||||||
import javax.xml.bind.annotation.XmlAccessType;
|
import javax.xml.bind.annotation.XmlAccessType;
|
||||||
import javax.xml.bind.annotation.XmlAccessorType;
|
import javax.xml.bind.annotation.XmlAccessorType;
|
||||||
import javax.xml.bind.annotation.XmlRootElement;
|
import javax.xml.bind.annotation.XmlRootElement;
|
||||||
import javax.xml.bind.annotation.XmlTransient;
|
|
||||||
|
|
||||||
import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
|
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
|
@ -39,23 +37,18 @@ public class NodeInfo {
|
||||||
protected String id;
|
protected String id;
|
||||||
protected String nodeHostName;
|
protected String nodeHostName;
|
||||||
protected String nodeHTTPAddress;
|
protected String nodeHTTPAddress;
|
||||||
protected String healthStatus;
|
|
||||||
protected long lastHealthUpdate;
|
protected long lastHealthUpdate;
|
||||||
protected String healthReport;
|
protected String healthReport;
|
||||||
protected int numContainers;
|
protected int numContainers;
|
||||||
protected long usedMemoryMB;
|
protected long usedMemoryMB;
|
||||||
protected long availMemoryMB;
|
protected long availMemoryMB;
|
||||||
|
|
||||||
@XmlTransient
|
|
||||||
protected boolean healthy;
|
|
||||||
|
|
||||||
public NodeInfo() {
|
public NodeInfo() {
|
||||||
} // JAXB needs this
|
} // JAXB needs this
|
||||||
|
|
||||||
public NodeInfo(RMNode ni, ResourceScheduler sched) {
|
public NodeInfo(RMNode ni, ResourceScheduler sched) {
|
||||||
NodeId id = ni.getNodeID();
|
NodeId id = ni.getNodeID();
|
||||||
SchedulerNodeReport report = sched.getNodeReport(id);
|
SchedulerNodeReport report = sched.getNodeReport(id);
|
||||||
NodeHealthStatus health = ni.getNodeHealthStatus();
|
|
||||||
this.numContainers = 0;
|
this.numContainers = 0;
|
||||||
this.usedMemoryMB = 0;
|
this.usedMemoryMB = 0;
|
||||||
this.availMemoryMB = 0;
|
this.availMemoryMB = 0;
|
||||||
|
@ -69,14 +62,8 @@ public class NodeInfo {
|
||||||
this.nodeHostName = ni.getHostName();
|
this.nodeHostName = ni.getHostName();
|
||||||
this.state = ni.getState();
|
this.state = ni.getState();
|
||||||
this.nodeHTTPAddress = ni.getHttpAddress();
|
this.nodeHTTPAddress = ni.getHttpAddress();
|
||||||
this.healthy = health.getIsNodeHealthy();
|
this.lastHealthUpdate = ni.getLastHealthReportTime();
|
||||||
this.healthStatus = health.getIsNodeHealthy() ? "Healthy" : "Unhealthy";
|
this.healthReport = String.valueOf(ni.getHealthReport());
|
||||||
this.lastHealthUpdate = health.getLastHealthReportTime();
|
|
||||||
this.healthReport = String.valueOf(health.getHealthReport());
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isHealthy() {
|
|
||||||
return this.healthy;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getRack() {
|
public String getRack() {
|
||||||
|
@ -99,10 +86,6 @@ public class NodeInfo {
|
||||||
this.nodeHTTPAddress = nodeHTTPAddress;
|
this.nodeHTTPAddress = nodeHTTPAddress;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getHealthStatus() {
|
|
||||||
return this.healthStatus;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getLastHealthUpdate() {
|
public long getLastHealthUpdate() {
|
||||||
return this.lastHealthUpdate;
|
return this.lastHealthUpdate;
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,6 @@ import java.util.List;
|
||||||
import org.apache.hadoop.net.Node;
|
import org.apache.hadoop.net.Node;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
|
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
@ -96,18 +95,20 @@ public class MockNodes {
|
||||||
private int cmdPort;
|
private int cmdPort;
|
||||||
private Resource perNode;
|
private Resource perNode;
|
||||||
private String rackName;
|
private String rackName;
|
||||||
private NodeHealthStatus nodeHealthStatus;
|
private String healthReport;
|
||||||
|
private long lastHealthReportTime;
|
||||||
private NodeState state;
|
private NodeState state;
|
||||||
|
|
||||||
public MockRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress,
|
public MockRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress,
|
||||||
Resource perNode, String rackName, NodeHealthStatus nodeHealthStatus,
|
Resource perNode, String rackName, String healthReport,
|
||||||
int cmdPort, String hostName, NodeState state) {
|
long lastHealthReportTime, int cmdPort, String hostName, NodeState state) {
|
||||||
this.nodeId = nodeId;
|
this.nodeId = nodeId;
|
||||||
this.nodeAddr = nodeAddr;
|
this.nodeAddr = nodeAddr;
|
||||||
this.httpAddress = httpAddress;
|
this.httpAddress = httpAddress;
|
||||||
this.perNode = perNode;
|
this.perNode = perNode;
|
||||||
this.rackName = rackName;
|
this.rackName = rackName;
|
||||||
this.nodeHealthStatus = nodeHealthStatus;
|
this.healthReport = healthReport;
|
||||||
|
this.lastHealthReportTime = lastHealthReportTime;
|
||||||
this.cmdPort = cmdPort;
|
this.cmdPort = cmdPort;
|
||||||
this.hostName = hostName;
|
this.hostName = hostName;
|
||||||
this.state = state;
|
this.state = state;
|
||||||
|
@ -143,11 +144,6 @@ public class MockNodes {
|
||||||
return this.httpAddress;
|
return this.httpAddress;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public NodeHealthStatus getNodeHealthStatus() {
|
|
||||||
return this.nodeHealthStatus;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Resource getTotalCapability() {
|
public Resource getTotalCapability() {
|
||||||
return this.perNode;
|
return this.perNode;
|
||||||
|
@ -191,6 +187,16 @@ public class MockNodes {
|
||||||
public List<UpdatedContainerInfo> pullContainerUpdates() {
|
public List<UpdatedContainerInfo> pullContainerUpdates() {
|
||||||
return new ArrayList<UpdatedContainerInfo>();
|
return new ArrayList<UpdatedContainerInfo>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHealthReport() {
|
||||||
|
return healthReport;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getLastHealthReportTime() {
|
||||||
|
return lastHealthReportTime;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
private static RMNode buildRMNode(int rack, final Resource perNode, NodeState state, String httpAddr) {
|
private static RMNode buildRMNode(int rack, final Resource perNode, NodeState state, String httpAddr) {
|
||||||
|
@ -209,14 +215,9 @@ public class MockNodes {
|
||||||
final NodeId nodeID = NodeId.newInstance(hostName, port);
|
final NodeId nodeID = NodeId.newInstance(hostName, port);
|
||||||
|
|
||||||
final String httpAddress = httpAddr;
|
final String httpAddress = httpAddr;
|
||||||
final NodeHealthStatus nodeHealthStatus =
|
String healthReport = (state == NodeState.UNHEALTHY) ? null : "HealthyMe";
|
||||||
recordFactory.newRecordInstance(NodeHealthStatus.class);
|
|
||||||
if (state != NodeState.UNHEALTHY) {
|
|
||||||
nodeHealthStatus.setIsNodeHealthy(true);
|
|
||||||
nodeHealthStatus.setHealthReport("HealthyMe");
|
|
||||||
}
|
|
||||||
return new MockRMNodeImpl(nodeID, nodeAddr, httpAddress, perNode, rackName,
|
return new MockRMNodeImpl(nodeID, nodeAddr, httpAddress, perNode, rackName,
|
||||||
nodeHealthStatus, nid, hostName, state);
|
healthReport, 0, nid, hostName, state);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static RMNode nodeInfo(int rack, final Resource perNode,
|
public static RMNode nodeInfo(int rack, final Resource perNode,
|
||||||
|
|
|
@ -53,6 +53,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationReport;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
|
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeReport;
|
import org.apache.hadoop.yarn.api.records.NodeReport;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||||
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
@ -133,8 +134,8 @@ public class TestClientRMService {
|
||||||
List<NodeReport> nodeReports =
|
List<NodeReport> nodeReports =
|
||||||
client.getClusterNodes(request).getNodeReports();
|
client.getClusterNodes(request).getNodeReports();
|
||||||
Assert.assertEquals(1, nodeReports.size());
|
Assert.assertEquals(1, nodeReports.size());
|
||||||
Assert.assertTrue("Node is expected to be healthy!", nodeReports.get(0)
|
Assert.assertNotSame("Node is expected to be healthy!", NodeState.UNHEALTHY,
|
||||||
.getNodeHealthStatus().getIsNodeHealthy());
|
nodeReports.get(0).getNodeState());
|
||||||
|
|
||||||
// Now make the node unhealthy.
|
// Now make the node unhealthy.
|
||||||
node.nodeHeartbeat(false);
|
node.nodeHeartbeat(false);
|
||||||
|
@ -142,8 +143,8 @@ public class TestClientRMService {
|
||||||
// Call again
|
// Call again
|
||||||
nodeReports = client.getClusterNodes(request).getNodeReports();
|
nodeReports = client.getClusterNodes(request).getNodeReports();
|
||||||
Assert.assertEquals(1, nodeReports.size());
|
Assert.assertEquals(1, nodeReports.size());
|
||||||
Assert.assertFalse("Node is expected to be unhealthy!", nodeReports.get(0)
|
Assert.assertEquals("Node is expected to be unhealthy!", NodeState.UNHEALTHY,
|
||||||
.getNodeHealthStatus().getIsNodeHealthy());
|
nodeReports.get(0).getNodeState());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -346,9 +346,8 @@ public class TestRMNodeTransitions {
|
||||||
|
|
||||||
private RMNodeImpl getUnhealthyNode() {
|
private RMNodeImpl getUnhealthyNode() {
|
||||||
RMNodeImpl node = getRunningNode();
|
RMNodeImpl node = getRunningNode();
|
||||||
NodeHealthStatus status = node.getNodeHealthStatus();
|
NodeHealthStatus status = NodeHealthStatus.newInstance(false, "sick",
|
||||||
status.setHealthReport("sick");
|
System.currentTimeMillis());
|
||||||
status.setIsNodeHealthy(false);
|
|
||||||
node.handle(new RMNodeStatusEvent(node.getNodeID(), status,
|
node.handle(new RMNodeStatusEvent(node.getNodeID(), status,
|
||||||
new ArrayList<ContainerStatus>(), null, null));
|
new ArrayList<ContainerStatus>(), null, null));
|
||||||
Assert.assertEquals(NodeState.UNHEALTHY, node.getState());
|
Assert.assertEquals(NodeState.UNHEALTHY, node.getState());
|
||||||
|
|
|
@ -176,11 +176,8 @@ public class TestResourceManager {
|
||||||
nm1.heartbeat();
|
nm1.heartbeat();
|
||||||
nm1.heartbeat();
|
nm1.heartbeat();
|
||||||
Collection<RMNode> values = resourceManager.getRMContext().getRMNodes().values();
|
Collection<RMNode> values = resourceManager.getRMContext().getRMNodes().values();
|
||||||
for (RMNode ni : values)
|
for (RMNode ni : values) {
|
||||||
{
|
assertNotNull(ni.getHealthReport());
|
||||||
NodeHealthStatus nodeHealthStatus = ni.getNodeHealthStatus();
|
|
||||||
String healthReport = nodeHealthStatus.getHealthReport();
|
|
||||||
assertNotNull(healthReport);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.hadoop.net.NetUtils;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.event.Dispatcher;
|
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||||
|
@ -391,15 +392,15 @@ public class TestResourceTrackerService {
|
||||||
int count) throws Exception {
|
int count) throws Exception {
|
||||||
|
|
||||||
int waitCount = 0;
|
int waitCount = 0;
|
||||||
while(rm.getRMContext().getRMNodes().get(nm1.getNodeId())
|
while((rm.getRMContext().getRMNodes().get(nm1.getNodeId())
|
||||||
.getNodeHealthStatus().getIsNodeHealthy() == health
|
.getState() != NodeState.UNHEALTHY) == health
|
||||||
&& waitCount++ < 20) {
|
&& waitCount++ < 20) {
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
wait(100);
|
wait(100);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Assert.assertFalse(rm.getRMContext().getRMNodes().get(nm1.getNodeId())
|
Assert.assertFalse((rm.getRMContext().getRMNodes().get(nm1.getNodeId())
|
||||||
.getNodeHealthStatus().getIsNodeHealthy() == health);
|
.getState() != NodeState.UNHEALTHY) == health);
|
||||||
Assert.assertEquals("Unhealthy metrics not incremented", count,
|
Assert.assertEquals("Unhealthy metrics not incremented", count,
|
||||||
ClusterMetrics.getMetrics().getUnhealthyNMs());
|
ClusterMetrics.getMetrics().getUnhealthyNMs());
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,7 +49,7 @@ public class TestNodesPage {
|
||||||
// Number of Actual Table Headers for NodesPage.NodesBlock might change in
|
// Number of Actual Table Headers for NodesPage.NodesBlock might change in
|
||||||
// future. In that case this value should be adjusted to the new value.
|
// future. In that case this value should be adjusted to the new value.
|
||||||
final int numberOfThInMetricsTable = 13;
|
final int numberOfThInMetricsTable = 13;
|
||||||
final int numberOfActualTableHeaders = 10;
|
final int numberOfActualTableHeaders = 9;
|
||||||
|
|
||||||
private Injector injector;
|
private Injector injector;
|
||||||
|
|
||||||
|
|
|
@ -142,9 +142,8 @@ public class TestRMWebServicesNodes extends JerseyTest {
|
||||||
rm.NMwaitForState(nm3.getNodeId(), NodeState.RUNNING);
|
rm.NMwaitForState(nm3.getNodeId(), NodeState.RUNNING);
|
||||||
RMNodeImpl node = (RMNodeImpl) rm.getRMContext().getRMNodes()
|
RMNodeImpl node = (RMNodeImpl) rm.getRMContext().getRMNodes()
|
||||||
.get(nm3.getNodeId());
|
.get(nm3.getNodeId());
|
||||||
NodeHealthStatus nodeHealth = node.getNodeHealthStatus();
|
NodeHealthStatus nodeHealth = NodeHealthStatus.newInstance(false,
|
||||||
nodeHealth.setHealthReport("test health report");
|
"test health report", System.currentTimeMillis());
|
||||||
nodeHealth.setIsNodeHealthy(false);
|
|
||||||
node.handle(new RMNodeStatusEvent(nm3.getNodeId(), nodeHealth,
|
node.handle(new RMNodeStatusEvent(nm3.getNodeId(), nodeHealth,
|
||||||
new ArrayList<ContainerStatus>(), null, null));
|
new ArrayList<ContainerStatus>(), null, null));
|
||||||
rm.NMwaitForState(nm3.getNodeId(), NodeState.UNHEALTHY);
|
rm.NMwaitForState(nm3.getNodeId(), NodeState.UNHEALTHY);
|
||||||
|
@ -357,9 +356,8 @@ public class TestRMWebServicesNodes extends JerseyTest {
|
||||||
rm.NMwaitForState(nm1.getNodeId(), NodeState.RUNNING);
|
rm.NMwaitForState(nm1.getNodeId(), NodeState.RUNNING);
|
||||||
RMNodeImpl node = (RMNodeImpl) rm.getRMContext().getRMNodes()
|
RMNodeImpl node = (RMNodeImpl) rm.getRMContext().getRMNodes()
|
||||||
.get(nm1.getNodeId());
|
.get(nm1.getNodeId());
|
||||||
NodeHealthStatus nodeHealth = node.getNodeHealthStatus();
|
NodeHealthStatus nodeHealth = NodeHealthStatus.newInstance(false,
|
||||||
nodeHealth.setHealthReport("test health report");
|
"test health report", System.currentTimeMillis());
|
||||||
nodeHealth.setIsNodeHealthy(false);
|
|
||||||
node.handle(new RMNodeStatusEvent(nm1.getNodeId(), nodeHealth,
|
node.handle(new RMNodeStatusEvent(nm1.getNodeId(), nodeHealth,
|
||||||
new ArrayList<ContainerStatus>(), null, null));
|
new ArrayList<ContainerStatus>(), null, null));
|
||||||
rm.NMwaitForState(nm1.getNodeId(), NodeState.UNHEALTHY);
|
rm.NMwaitForState(nm1.getNodeId(), NodeState.UNHEALTHY);
|
||||||
|
@ -699,7 +697,6 @@ public class TestRMWebServicesNodes extends JerseyTest {
|
||||||
verifyNodeInfoGeneric(nm,
|
verifyNodeInfoGeneric(nm,
|
||||||
WebServicesTestUtils.getXmlString(element, "state"),
|
WebServicesTestUtils.getXmlString(element, "state"),
|
||||||
WebServicesTestUtils.getXmlString(element, "rack"),
|
WebServicesTestUtils.getXmlString(element, "rack"),
|
||||||
WebServicesTestUtils.getXmlString(element, "healthStatus"),
|
|
||||||
WebServicesTestUtils.getXmlString(element, "id"),
|
WebServicesTestUtils.getXmlString(element, "id"),
|
||||||
WebServicesTestUtils.getXmlString(element, "nodeHostName"),
|
WebServicesTestUtils.getXmlString(element, "nodeHostName"),
|
||||||
WebServicesTestUtils.getXmlString(element, "nodeHTTPAddress"),
|
WebServicesTestUtils.getXmlString(element, "nodeHTTPAddress"),
|
||||||
|
@ -713,10 +710,10 @@ public class TestRMWebServicesNodes extends JerseyTest {
|
||||||
|
|
||||||
public void verifyNodeInfo(JSONObject nodeInfo, MockNM nm)
|
public void verifyNodeInfo(JSONObject nodeInfo, MockNM nm)
|
||||||
throws JSONException, Exception {
|
throws JSONException, Exception {
|
||||||
assertEquals("incorrect number of elements", 11, nodeInfo.length());
|
assertEquals("incorrect number of elements", 10, nodeInfo.length());
|
||||||
|
|
||||||
verifyNodeInfoGeneric(nm, nodeInfo.getString("state"),
|
verifyNodeInfoGeneric(nm, nodeInfo.getString("state"),
|
||||||
nodeInfo.getString("rack"), nodeInfo.getString("healthStatus"),
|
nodeInfo.getString("rack"),
|
||||||
nodeInfo.getString("id"), nodeInfo.getString("nodeHostName"),
|
nodeInfo.getString("id"), nodeInfo.getString("nodeHostName"),
|
||||||
nodeInfo.getString("nodeHTTPAddress"),
|
nodeInfo.getString("nodeHTTPAddress"),
|
||||||
nodeInfo.getLong("lastHealthUpdate"),
|
nodeInfo.getLong("lastHealthUpdate"),
|
||||||
|
@ -726,32 +723,29 @@ public class TestRMWebServicesNodes extends JerseyTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void verifyNodeInfoGeneric(MockNM nm, String state, String rack,
|
public void verifyNodeInfoGeneric(MockNM nm, String state, String rack,
|
||||||
String healthStatus, String id, String nodeHostName,
|
String id, String nodeHostName,
|
||||||
String nodeHTTPAddress, long lastHealthUpdate, String healthReport,
|
String nodeHTTPAddress, long lastHealthUpdate, String healthReport,
|
||||||
int numContainers, long usedMemoryMB, long availMemoryMB)
|
int numContainers, long usedMemoryMB, long availMemoryMB)
|
||||||
throws JSONException, Exception {
|
throws JSONException, Exception {
|
||||||
|
|
||||||
RMNode node = rm.getRMContext().getRMNodes().get(nm.getNodeId());
|
RMNode node = rm.getRMContext().getRMNodes().get(nm.getNodeId());
|
||||||
NodeHealthStatus health = node.getNodeHealthStatus();
|
|
||||||
ResourceScheduler sched = rm.getResourceScheduler();
|
ResourceScheduler sched = rm.getResourceScheduler();
|
||||||
SchedulerNodeReport report = sched.getNodeReport(nm.getNodeId());
|
SchedulerNodeReport report = sched.getNodeReport(nm.getNodeId());
|
||||||
|
|
||||||
WebServicesTestUtils.checkStringMatch("state", node.getState().toString(),
|
WebServicesTestUtils.checkStringMatch("state", node.getState().toString(),
|
||||||
state);
|
state);
|
||||||
WebServicesTestUtils.checkStringMatch("rack", node.getRackName(), rack);
|
WebServicesTestUtils.checkStringMatch("rack", node.getRackName(), rack);
|
||||||
WebServicesTestUtils.checkStringMatch("healthStatus", "Healthy",
|
|
||||||
healthStatus);
|
|
||||||
WebServicesTestUtils.checkStringMatch("id", nm.getNodeId().toString(), id);
|
WebServicesTestUtils.checkStringMatch("id", nm.getNodeId().toString(), id);
|
||||||
WebServicesTestUtils.checkStringMatch("nodeHostName", nm.getNodeId()
|
WebServicesTestUtils.checkStringMatch("nodeHostName", nm.getNodeId()
|
||||||
.getHost(), nodeHostName);
|
.getHost(), nodeHostName);
|
||||||
WebServicesTestUtils.checkStringMatch("healthReport",
|
WebServicesTestUtils.checkStringMatch("healthReport",
|
||||||
String.valueOf(health.getHealthReport()), healthReport);
|
String.valueOf(node.getHealthReport()), healthReport);
|
||||||
String expectedHttpAddress = nm.getNodeId().getHost() + ":"
|
String expectedHttpAddress = nm.getNodeId().getHost() + ":"
|
||||||
+ nm.getHttpPort();
|
+ nm.getHttpPort();
|
||||||
WebServicesTestUtils.checkStringMatch("nodeHTTPAddress",
|
WebServicesTestUtils.checkStringMatch("nodeHTTPAddress",
|
||||||
expectedHttpAddress, nodeHTTPAddress);
|
expectedHttpAddress, nodeHTTPAddress);
|
||||||
|
|
||||||
long expectedHealthUpdate = health.getLastHealthReportTime();
|
long expectedHealthUpdate = node.getLastHealthReportTime();
|
||||||
assertEquals("lastHealthUpdate doesn't match, got: " + lastHealthUpdate
|
assertEquals("lastHealthUpdate doesn't match, got: " + lastHealthUpdate
|
||||||
+ " expected: " + expectedHealthUpdate, expectedHealthUpdate,
|
+ " expected: " + expectedHealthUpdate, expectedHealthUpdate,
|
||||||
lastHealthUpdate);
|
lastHealthUpdate);
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.UnsupportedFileSystemException;
|
import org.apache.hadoop.fs.UnsupportedFileSystemException;
|
||||||
import org.apache.hadoop.security.AccessControlException;
|
import org.apache.hadoop.security.AccessControlException;
|
||||||
import org.apache.hadoop.util.StringUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.server.MiniYARNCluster;
|
import org.apache.hadoop.yarn.server.MiniYARNCluster;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
|
import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
|
||||||
|
@ -243,7 +244,7 @@ public class TestDiskFailures {
|
||||||
for (int i = 0; i < 10; i++) {
|
for (int i = 0; i < 10; i++) {
|
||||||
Iterator<RMNode> iter = yarnCluster.getResourceManager().getRMContext()
|
Iterator<RMNode> iter = yarnCluster.getResourceManager().getRMContext()
|
||||||
.getRMNodes().values().iterator();
|
.getRMNodes().values().iterator();
|
||||||
if (iter.next().getNodeHealthStatus().getIsNodeHealthy() == isHealthy) {
|
if ((iter.next().getState() != NodeState.UNHEALTHY) == isHealthy) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// wait for the node health info to go to RM
|
// wait for the node health info to go to RM
|
||||||
|
@ -256,7 +257,7 @@ public class TestDiskFailures {
|
||||||
Iterator<RMNode> iter = yarnCluster.getResourceManager().getRMContext()
|
Iterator<RMNode> iter = yarnCluster.getResourceManager().getRMContext()
|
||||||
.getRMNodes().values().iterator();
|
.getRMNodes().values().iterator();
|
||||||
Assert.assertEquals("RM is not updated with the health status of a node",
|
Assert.assertEquals("RM is not updated with the health status of a node",
|
||||||
isHealthy, iter.next().getNodeHealthStatus().getIsNodeHealthy());
|
isHealthy, iter.next().getState() != NodeState.UNHEALTHY);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue