YARN-3222. Fixed NPE on RMNodeImpl#ReconnectNodeTransition when a node is reconnected with a different port. Contributed by Rohith Sharmaks
(cherry picked from commit b2f1ec312e
)
This commit is contained in:
parent
824c32de1a
commit
888a445638
|
@ -575,8 +575,8 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
if (rmNode.getHttpPort() == newNode.getHttpPort()) {
|
if (rmNode.getHttpPort() == newNode.getHttpPort()) {
|
||||||
// Reset heartbeat ID since node just restarted.
|
// Reset heartbeat ID since node just restarted.
|
||||||
rmNode.getLastNodeHeartBeatResponse().setResponseId(0);
|
rmNode.getLastNodeHeartBeatResponse().setResponseId(0);
|
||||||
if (rmNode.getState() != NodeState.UNHEALTHY) {
|
if (rmNode.getState().equals(NodeState.RUNNING)) {
|
||||||
// Only add new node if old state is not UNHEALTHY
|
// Only add new node if old state is RUNNING
|
||||||
rmNode.context.getDispatcher().getEventHandler().handle(
|
rmNode.context.getDispatcher().getEventHandler().handle(
|
||||||
new NodeAddedSchedulerEvent(newNode));
|
new NodeAddedSchedulerEvent(newNode));
|
||||||
}
|
}
|
||||||
|
@ -599,30 +599,32 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
} else {
|
} else {
|
||||||
rmNode.httpPort = newNode.getHttpPort();
|
rmNode.httpPort = newNode.getHttpPort();
|
||||||
rmNode.httpAddress = newNode.getHttpAddress();
|
rmNode.httpAddress = newNode.getHttpAddress();
|
||||||
rmNode.totalCapability = newNode.getTotalCapability();
|
boolean isCapabilityChanged = false;
|
||||||
|
if (rmNode.getTotalCapability() != newNode.getTotalCapability()) {
|
||||||
|
rmNode.totalCapability = newNode.getTotalCapability();
|
||||||
|
isCapabilityChanged = true;
|
||||||
|
}
|
||||||
|
|
||||||
handleNMContainerStatus(reconnectEvent.getNMContainerStatuses(), rmNode);
|
handleNMContainerStatus(reconnectEvent.getNMContainerStatuses(), rmNode);
|
||||||
|
|
||||||
// Reset heartbeat ID since node just restarted.
|
// Reset heartbeat ID since node just restarted.
|
||||||
rmNode.getLastNodeHeartBeatResponse().setResponseId(0);
|
rmNode.getLastNodeHeartBeatResponse().setResponseId(0);
|
||||||
}
|
|
||||||
|
|
||||||
if (null != reconnectEvent.getRunningApplications()) {
|
|
||||||
for (ApplicationId appId : reconnectEvent.getRunningApplications()) {
|
for (ApplicationId appId : reconnectEvent.getRunningApplications()) {
|
||||||
handleRunningAppOnNode(rmNode, rmNode.context, appId, rmNode.nodeId);
|
handleRunningAppOnNode(rmNode, rmNode.context, appId, rmNode.nodeId);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
rmNode.context.getDispatcher().getEventHandler().handle(
|
if (isCapabilityChanged
|
||||||
new NodesListManagerEvent(
|
&& rmNode.getState().equals(NodeState.RUNNING)) {
|
||||||
NodesListManagerEventType.NODE_USABLE, rmNode));
|
// Update scheduler node's capacity for reconnect node.
|
||||||
if (rmNode.getState().equals(NodeState.RUNNING)) {
|
rmNode.context
|
||||||
// Update scheduler node's capacity for reconnect node.
|
.getDispatcher()
|
||||||
rmNode.context.getDispatcher().getEventHandler().handle(
|
.getEventHandler()
|
||||||
new NodeResourceUpdateSchedulerEvent(rmNode,
|
.handle(
|
||||||
ResourceOption.newInstance(newNode.getTotalCapability(), -1)));
|
new NodeResourceUpdateSchedulerEvent(rmNode, ResourceOption
|
||||||
|
.newInstance(newNode.getTotalCapability(), -1)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void handleNMContainerStatus(
|
private void handleNMContainerStatus(
|
||||||
|
|
|
@ -51,7 +51,7 @@ public class MockNM {
|
||||||
private final int memory;
|
private final int memory;
|
||||||
private final int vCores;
|
private final int vCores;
|
||||||
private ResourceTrackerService resourceTracker;
|
private ResourceTrackerService resourceTracker;
|
||||||
private final int httpPort = 2;
|
private int httpPort = 2;
|
||||||
private MasterKey currentContainerTokenMasterKey;
|
private MasterKey currentContainerTokenMasterKey;
|
||||||
private MasterKey currentNMTokenMasterKey;
|
private MasterKey currentNMTokenMasterKey;
|
||||||
private String version;
|
private String version;
|
||||||
|
@ -87,6 +87,10 @@ public class MockNM {
|
||||||
return httpPort;
|
return httpPort;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setHttpPort(int port) {
|
||||||
|
httpPort = port;
|
||||||
|
}
|
||||||
|
|
||||||
public void setResourceTrackerService(ResourceTrackerService resourceTracker) {
|
public void setResourceTrackerService(ResourceTrackerService resourceTracker) {
|
||||||
this.resourceTracker = resourceTracker;
|
this.resourceTracker = resourceTracker;
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,6 +56,7 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResp
|
||||||
import org.apache.hadoop.yarn.server.api.records.NodeAction;
|
import org.apache.hadoop.yarn.server.api.records.NodeAction;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
|
||||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||||
|
@ -633,6 +634,20 @@ public class TestResourceTrackerService {
|
||||||
dispatcher.await();
|
dispatcher.await();
|
||||||
Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction()));
|
Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction()));
|
||||||
Assert.assertEquals(5120 + 15360, metrics.getAvailableMB());
|
Assert.assertEquals(5120 + 15360, metrics.getAvailableMB());
|
||||||
|
|
||||||
|
// reconnect healthy node changing http port
|
||||||
|
nm1 = new MockNM("host1:1234", 5120, rm.getResourceTrackerService());
|
||||||
|
nm1.setHttpPort(3);
|
||||||
|
nm1.registerNode();
|
||||||
|
dispatcher.await();
|
||||||
|
response = nm1.nodeHeartbeat(true);
|
||||||
|
response = nm1.nodeHeartbeat(true);
|
||||||
|
dispatcher.await();
|
||||||
|
RMNode rmNode = rm.getRMContext().getRMNodes().get(nm1.getNodeId());
|
||||||
|
Assert.assertEquals(3, rmNode.getHttpPort());
|
||||||
|
Assert.assertEquals(5120, rmNode.getTotalCapability().getMemory());
|
||||||
|
Assert.assertEquals(5120 + 15360, metrics.getAvailableMB());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void writeToHostsFile(String... hosts) throws IOException {
|
private void writeToHostsFile(String... hosts) throws IOException {
|
||||||
|
|
Loading…
Reference in New Issue