YARN-2115. Replaced RegisterNodeManagerRequest's ContainerStatus with a new NMContainerStatus which has more information that is needed for work-preserving RM-restart. Contributed by Jian He.

svn merge --ignore-ancestry -c 1598790 ../../trunk/


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1598791 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2014-05-31 00:21:47 +00:00
parent c5c3241da8
commit a73447fa07
16 changed files with 665 additions and 133 deletions

View File

@ -111,6 +111,10 @@ Release 2.5.0 - UNRELEASED
YARN-2010. Document yarn.resourcemanager.zk-auth and its scope. YARN-2010. Document yarn.resourcemanager.zk-auth and its scope.
(Robert Kanter via kasha) (Robert Kanter via kasha)
YARN-2115. Replaced RegisterNodeManagerRequest's ContainerStatus with a new
NMContainerStatus which has more information that is needed for
work-preserving RM-restart. (Jian He via vinodkv)
OPTIMIZATIONS OPTIMIZATIONS
BUG FIXES BUG FIXES

View File

@ -0,0 +1,98 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.api.protocolrecords;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.util.Records;
/**
* NMContainerStatus includes the current information of a container. This
* record is used by YARN only, whereas {@link ContainerStatus} is used both
* inside YARN and by end-users.
*/
public abstract class NMContainerStatus {
public static NMContainerStatus newInstance(ContainerId containerId,
ContainerState containerState, Resource allocatedResource,
String diagnostics, int containerExitStatus) {
NMContainerStatus status =
Records.newRecord(NMContainerStatus.class);
status.setContainerId(containerId);
status.setContainerState(containerState);
status.setAllocatedResource(allocatedResource);
status.setDiagnostics(diagnostics);
status.setContainerExitStatus(containerExitStatus);
return status;
}
/**
* Get the <code>ContainerId</code> of the container.
*
* @return <code>ContainerId</code> of the container.
*/
public abstract ContainerId getContainerId();
public abstract void setContainerId(ContainerId containerId);
/**
* Get the allocated <code>Resource</code> of the container.
*
* @return allocated <code>Resource</code> of the container.
*/
public abstract Resource getAllocatedResource();
public abstract void setAllocatedResource(Resource resource);
/**
* Get the DiagnosticsInfo of the container.
*
* @return DiagnosticsInfo of the container
*/
public abstract String getDiagnostics();
public abstract void setDiagnostics(String diagnostics);
public abstract ContainerState getContainerState();
public abstract void setContainerState(ContainerState containerState);
/**
* Get the final <code>exit status</code> of the container.
*
* @return final <code>exit status</code> of the container.
*/
public abstract int getContainerExitStatus();
public abstract void setContainerExitStatus(int containerExitStatus);
/**
* Get the <code>Priority</code> of the request.
* @return <code>Priority</code> of the request
*/
public abstract Priority getPriority();
public abstract void setPriority(Priority priority);
}

View File

@ -20,7 +20,6 @@ package org.apache.hadoop.yarn.server.api.protocolrecords;
import java.util.List; import java.util.List;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.util.Records; import org.apache.hadoop.yarn.util.Records;
@ -29,7 +28,7 @@ public abstract class RegisterNodeManagerRequest {
public static RegisterNodeManagerRequest newInstance(NodeId nodeId, public static RegisterNodeManagerRequest newInstance(NodeId nodeId,
int httpPort, Resource resource, String nodeManagerVersionId, int httpPort, Resource resource, String nodeManagerVersionId,
List<ContainerStatus> containerStatuses) { List<NMContainerStatus> containerStatuses) {
RegisterNodeManagerRequest request = RegisterNodeManagerRequest request =
Records.newRecord(RegisterNodeManagerRequest.class); Records.newRecord(RegisterNodeManagerRequest.class);
request.setHttpPort(httpPort); request.setHttpPort(httpPort);
@ -44,11 +43,12 @@ public abstract class RegisterNodeManagerRequest {
public abstract int getHttpPort(); public abstract int getHttpPort();
public abstract Resource getResource(); public abstract Resource getResource();
public abstract String getNMVersion(); public abstract String getNMVersion();
public abstract List<ContainerStatus> getContainerStatuses(); public abstract List<NMContainerStatus> getNMContainerStatuses();
public abstract void setNodeId(NodeId nodeId); public abstract void setNodeId(NodeId nodeId);
public abstract void setHttpPort(int port); public abstract void setHttpPort(int port);
public abstract void setResource(Resource resource); public abstract void setResource(Resource resource);
public abstract void setNMVersion(String version); public abstract void setNMVersion(String version);
public abstract void setContainerStatuses(List<ContainerStatus> containerStatuses); public abstract void setContainerStatuses(
List<NMContainerStatus> containerStatuses);
} }

View File

@ -0,0 +1,266 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.impl.pb.ContainerIdPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.PriorityPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils;
import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl;
import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto;
import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStateProto;
import org.apache.hadoop.yarn.proto.YarnProtos.PriorityProto;
import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.NMContainerStatusProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.NMContainerStatusProtoOrBuilder;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import com.google.protobuf.TextFormat;
public class NMContainerStatusPBImpl extends NMContainerStatus {
NMContainerStatusProto proto = NMContainerStatusProto
.getDefaultInstance();
NMContainerStatusProto.Builder builder = null;
boolean viaProto = false;
private ContainerId containerId = null;
private Resource resource = null;
private Priority priority = null;
public NMContainerStatusPBImpl() {
builder = NMContainerStatusProto.newBuilder();
}
public NMContainerStatusPBImpl(NMContainerStatusProto proto) {
this.proto = proto;
viaProto = true;
}
public NMContainerStatusProto getProto() {
mergeLocalToProto();
proto = viaProto ? proto : builder.build();
viaProto = true;
return proto;
}
@Override
public int hashCode() {
return this.getProto().hashCode();
}
@Override
public boolean equals(Object other) {
if (other == null)
return false;
if (other.getClass().isAssignableFrom(this.getClass())) {
return this.getProto().equals(this.getClass().cast(other).getProto());
}
return false;
}
@Override
public String toString() {
return TextFormat.shortDebugString(getProto());
}
@Override
public Resource getAllocatedResource() {
if (this.resource != null) {
return this.resource;
}
NMContainerStatusProtoOrBuilder p = viaProto ? proto : builder;
if (!p.hasResource()) {
return null;
}
this.resource = convertFromProtoFormat(p.getResource());
return this.resource;
}
@Override
public ContainerId getContainerId() {
if (this.containerId != null) {
return this.containerId;
}
NMContainerStatusProtoOrBuilder p = viaProto ? proto : builder;
if (!p.hasContainerId()) {
return null;
}
this.containerId = convertFromProtoFormat(p.getContainerId());
return this.containerId;
}
@Override
public String getDiagnostics() {
NMContainerStatusProtoOrBuilder p = viaProto ? proto : builder;
if (!p.hasDiagnostics()) {
return null;
}
return (p.getDiagnostics());
}
@Override
public ContainerState getContainerState() {
NMContainerStatusProtoOrBuilder p = viaProto ? proto : builder;
if (!p.hasContainerState()) {
return null;
}
return convertFromProtoFormat(p.getContainerState());
}
@Override
public void setAllocatedResource(Resource resource) {
maybeInitBuilder();
if (resource == null)
builder.clearResource();
this.resource = resource;
}
public void setContainerId(ContainerId containerId) {
maybeInitBuilder();
if (containerId == null)
builder.clearContainerId();
this.containerId = containerId;
}
@Override
public void setDiagnostics(String diagnosticsInfo) {
maybeInitBuilder();
if (diagnosticsInfo == null) {
builder.clearDiagnostics();
return;
}
builder.setDiagnostics(diagnosticsInfo);
}
@Override
public void setContainerState(ContainerState containerState) {
maybeInitBuilder();
if (containerState == null) {
builder.clearContainerState();
return;
}
builder.setContainerState(convertToProtoFormat(containerState));
}
@Override
public int getContainerExitStatus() {
NMContainerStatusProtoOrBuilder p = viaProto ? proto : builder;
return p.getContainerExitStatus();
}
@Override
public void setContainerExitStatus(int containerExitStatus) {
maybeInitBuilder();
builder.setContainerExitStatus(containerExitStatus);
}
@Override
public Priority getPriority() {
NMContainerStatusProtoOrBuilder p = viaProto ? proto : builder;
if (this.priority != null) {
return this.priority;
}
if (!p.hasPriority()) {
return null;
}
this.priority = convertFromProtoFormat(p.getPriority());
return this.priority;
}
@Override
public void setPriority(Priority priority) {
maybeInitBuilder();
if (priority == null)
builder.clearPriority();
this.priority = priority;
}
private void mergeLocalToBuilder() {
if (this.containerId != null
&& !((ContainerIdPBImpl) containerId).getProto().equals(
builder.getContainerId())) {
builder.setContainerId(convertToProtoFormat(this.containerId));
}
if (this.resource != null
&& !((ResourcePBImpl) this.resource).getProto().equals(
builder.getResource())) {
builder.setResource(convertToProtoFormat(this.resource));
}
if (this.priority != null) {
builder.setPriority(convertToProtoFormat(this.priority));
}
}
private void mergeLocalToProto() {
if (viaProto)
maybeInitBuilder();
mergeLocalToBuilder();
proto = builder.build();
viaProto = true;
}
private void maybeInitBuilder() {
if (viaProto || builder == null) {
builder = NMContainerStatusProto.newBuilder(proto);
}
viaProto = false;
}
private ContainerIdPBImpl convertFromProtoFormat(ContainerIdProto p) {
return new ContainerIdPBImpl(p);
}
private ContainerIdProto convertToProtoFormat(ContainerId t) {
return ((ContainerIdPBImpl) t).getProto();
}
private ResourcePBImpl convertFromProtoFormat(ResourceProto p) {
return new ResourcePBImpl(p);
}
private ResourceProto convertToProtoFormat(Resource t) {
return ((ResourcePBImpl) t).getProto();
}
private ContainerStateProto
convertToProtoFormat(ContainerState containerState) {
return ProtoUtils.convertToProtoFormat(containerState);
}
private ContainerState convertFromProtoFormat(
ContainerStateProto containerState) {
return ProtoUtils.convertFromProtoFormat(containerState);
}
private PriorityPBImpl convertFromProtoFormat(PriorityProto p) {
return new PriorityPBImpl(p);
}
private PriorityProto convertToProtoFormat(Priority t) {
return ((PriorityPBImpl)t).getProto();
}
}

View File

@ -20,24 +20,18 @@ package org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator;
import java.util.List; import java.util.List;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.impl.pb.ContainerIdPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.ContainerStatusPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.NodeIdPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.NodeIdPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.ProtoBase;
import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl;
import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto;
import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStatusProto;
import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto; import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto;
import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.NMContainerStatusProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.RegisterNodeManagerRequestProto; import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.RegisterNodeManagerRequestProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.RegisterNodeManagerRequestProtoOrBuilder; import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.RegisterNodeManagerRequestProtoOrBuilder;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
@ -49,7 +43,7 @@ public class RegisterNodeManagerRequestPBImpl extends RegisterNodeManagerRequest
private Resource resource = null; private Resource resource = null;
private NodeId nodeId = null; private NodeId nodeId = null;
private List<ContainerStatus> containerStatuses = null; private List<NMContainerStatus> containerStatuses = null;
public RegisterNodeManagerRequestPBImpl() { public RegisterNodeManagerRequestPBImpl() {
builder = RegisterNodeManagerRequestProto.newBuilder(); builder = RegisterNodeManagerRequestProto.newBuilder();
@ -69,7 +63,7 @@ public class RegisterNodeManagerRequestPBImpl extends RegisterNodeManagerRequest
private void mergeLocalToBuilder() { private void mergeLocalToBuilder() {
if (this.containerStatuses != null) { if (this.containerStatuses != null) {
addContainerStatusesToProto(); addNMContainerStatusesToProto();
} }
if (this.resource != null) { if (this.resource != null) {
builder.setResource(convertToProtoFormat(this.resource)); builder.setResource(convertToProtoFormat(this.resource));
@ -80,6 +74,18 @@ public class RegisterNodeManagerRequestPBImpl extends RegisterNodeManagerRequest
} }
private synchronized void addNMContainerStatusesToProto() {
maybeInitBuilder();
builder.clearContainerStatuses();
List<NMContainerStatusProto> list =
new ArrayList<NMContainerStatusProto>();
for (NMContainerStatus status : this.containerStatuses) {
list.add(convertToProtoFormat(status));
}
builder.addAllContainerStatuses(list);
}
private void mergeLocalToProto() { private void mergeLocalToProto() {
if (viaProto) if (viaProto)
maybeInitBuilder(); maybeInitBuilder();
@ -154,63 +160,31 @@ public class RegisterNodeManagerRequestPBImpl extends RegisterNodeManagerRequest
} }
@Override @Override
public List<ContainerStatus> getContainerStatuses() { public List<NMContainerStatus> getNMContainerStatuses() {
initContainerStatuses(); initContainerRecoveryReports();
return containerStatuses; return containerStatuses;
} }
private void initContainerStatuses() { private void initContainerRecoveryReports() {
if (this.containerStatuses != null) { if (this.containerStatuses != null) {
return; return;
} }
RegisterNodeManagerRequestProtoOrBuilder p = viaProto ? proto : builder; RegisterNodeManagerRequestProtoOrBuilder p = viaProto ? proto : builder;
List<ContainerStatusProto> list = p.getContainerStatusesList(); List<NMContainerStatusProto> list = p.getContainerStatusesList();
this.containerStatuses = new ArrayList<ContainerStatus>(); this.containerStatuses = new ArrayList<NMContainerStatus>();
for (ContainerStatusProto c : list) { for (NMContainerStatusProto c : list) {
this.containerStatuses.add(convertFromProtoFormat(c)); this.containerStatuses.add(convertFromProtoFormat(c));
} }
} }
@Override @Override
public void setContainerStatuses(List<ContainerStatus> containers) { public void setContainerStatuses(
if (containers == null) { List<NMContainerStatus> containerReports) {
if (containerReports == null) {
return; return;
} }
initContainerStatuses(); initContainerRecoveryReports();
this.containerStatuses.addAll(containers); this.containerStatuses.addAll(containerReports);
}
private void addContainerStatusesToProto() {
maybeInitBuilder();
builder.clearContainerStatuses();
if (containerStatuses == null) {
return;
}
Iterable<ContainerStatusProto> it = new Iterable<ContainerStatusProto>() {
@Override
public Iterator<ContainerStatusProto> iterator() {
return new Iterator<ContainerStatusProto>() {
Iterator<ContainerStatus> iter = containerStatuses.iterator();
@Override
public boolean hasNext() {
return iter.hasNext();
}
@Override
public ContainerStatusProto next() {
return convertToProtoFormat(iter.next());
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
};
builder.addAllContainerStatuses(it);
} }
@Override @Override
@ -259,11 +233,11 @@ public class RegisterNodeManagerRequestPBImpl extends RegisterNodeManagerRequest
return ((ResourcePBImpl)t).getProto(); return ((ResourcePBImpl)t).getProto();
} }
private ContainerStatusPBImpl convertFromProtoFormat(ContainerStatusProto c) { private NMContainerStatusPBImpl convertFromProtoFormat(NMContainerStatusProto c) {
return new ContainerStatusPBImpl(c); return new NMContainerStatusPBImpl(c);
} }
private ContainerStatusProto convertToProtoFormat(ContainerStatus c) { private NMContainerStatusProto convertToProtoFormat(NMContainerStatus c) {
return ((ContainerStatusPBImpl)c).getProto(); return ((NMContainerStatusPBImpl)c).getProto();
} }
} }

View File

@ -30,7 +30,7 @@ message RegisterNodeManagerRequestProto {
optional int32 http_port = 3; optional int32 http_port = 3;
optional ResourceProto resource = 4; optional ResourceProto resource = 4;
optional string nm_version = 5; optional string nm_version = 5;
repeated ContainerStatusProto containerStatuses = 6; repeated NMContainerStatusProto container_statuses = 6;
} }
message RegisterNodeManagerResponseProto { message RegisterNodeManagerResponseProto {
@ -58,3 +58,12 @@ message NodeHeartbeatResponseProto {
optional int64 nextHeartBeatInterval = 7; optional int64 nextHeartBeatInterval = 7;
optional string diagnostics_message = 8; optional string diagnostics_message = 8;
} }
message NMContainerStatusProto {
optional ContainerIdProto container_id = 1;
optional ContainerStateProto container_state = 2;
optional ResourceProto resource = 3;
optional PriorityProto priority = 4;
optional string diagnostics = 5 [default = "N/A"];
optional int32 container_exit_status = 6;
}

View File

@ -0,0 +1,99 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.api.protocolrecords;
import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.NMContainerStatusPBImpl;
import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RegisterNodeManagerRequestPBImpl;
import org.junit.Assert;
import org.junit.Test;
public class TestProtocolRecords {
@Test
public void testContainerRecoveryReport() {
ApplicationId appId = ApplicationId.newInstance(123456789, 1);
ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1);
ContainerId containerId = ContainerId.newInstance(attemptId, 1);
Resource resource = Resource.newInstance(1000, 200);
NMContainerStatus report =
NMContainerStatus.newInstance(containerId,
ContainerState.COMPLETE, resource, "diagnostics",
ContainerExitStatus.ABORTED);
NMContainerStatus reportProto =
new NMContainerStatusPBImpl(
((NMContainerStatusPBImpl) report).getProto());
Assert.assertEquals("diagnostics", reportProto.getDiagnostics());
Assert.assertEquals(resource, reportProto.getAllocatedResource());
Assert.assertEquals(ContainerExitStatus.ABORTED,
reportProto.getContainerExitStatus());
Assert.assertEquals(ContainerState.COMPLETE,
reportProto.getContainerState());
Assert.assertEquals(containerId, reportProto.getContainerId());
}
public static NMContainerStatus createContainerRecoveryReport(
ApplicationAttemptId appAttemptId, int id, ContainerState containerState) {
ContainerId containerId = ContainerId.newInstance(appAttemptId, id);
NMContainerStatus containerReport =
NMContainerStatus.newInstance(containerId, containerState,
Resource.newInstance(1024, 1), "diagnostics", 0);
return containerReport;
}
@Test
public void testRegisterNodeManagerRequest() {
ApplicationId appId = ApplicationId.newInstance(123456789, 1);
ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1);
ContainerId containerId = ContainerId.newInstance(attemptId, 1);
NMContainerStatus containerReport =
NMContainerStatus.newInstance(containerId,
ContainerState.RUNNING, Resource.newInstance(1024, 1), "diagnostics",
0);
List<NMContainerStatus> reports = Arrays.asList(containerReport);
RegisterNodeManagerRequest request =
RegisterNodeManagerRequest.newInstance(
NodeId.newInstance("1.1.1.1", 1000), 8080,
Resource.newInstance(1024, 1), "NM-version-id", reports);
RegisterNodeManagerRequest requestProto =
new RegisterNodeManagerRequestPBImpl(
((RegisterNodeManagerRequestPBImpl) request).getProto());
Assert.assertEquals(containerReport, requestProto
.getNMContainerStatuses().get(0));
Assert.assertEquals(8080, requestProto.getHttpPort());
Assert.assertEquals("NM-version-id", requestProto.getNMVersion());
Assert.assertEquals(NodeId.newInstance("1.1.1.1", 1000),
requestProto.getNodeId());
Assert.assertEquals(Resource.newInstance(1024, 1),
requestProto.getResource());
}
}

View File

@ -53,6 +53,7 @@ import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.server.api.ResourceManagerConstants; import org.apache.hadoop.yarn.server.api.ResourceManagerConstants;
import org.apache.hadoop.yarn.server.api.ResourceTracker; import org.apache.hadoop.yarn.server.api.ResourceTracker;
import org.apache.hadoop.yarn.server.api.ServerRMProxy; import org.apache.hadoop.yarn.server.api.ServerRMProxy;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
@ -246,13 +247,12 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
@VisibleForTesting @VisibleForTesting
protected void registerWithRM() protected void registerWithRM()
throws YarnException, IOException { throws YarnException, IOException {
List<ContainerStatus> containerStatuses = getContainerStatuses(); List<NMContainerStatus> containerReports = getNMContainerStatuses();
RegisterNodeManagerRequest request = RegisterNodeManagerRequest request =
RegisterNodeManagerRequest.newInstance(nodeId, httpPort, totalResource, RegisterNodeManagerRequest.newInstance(nodeId, httpPort, totalResource,
nodeManagerVersionId, containerStatuses); nodeManagerVersionId, containerReports);
if (containerStatuses != null) { if (containerReports != null) {
LOG.info("Registering with RM using finished containers :" LOG.info("Registering with RM using containers :" + containerReports);
+ containerStatuses);
} }
RegisterNodeManagerResponse regNMResponse = RegisterNodeManagerResponse regNMResponse =
resourceTracker.registerNodeManager(request); resourceTracker.registerNodeManager(request);
@ -375,6 +375,27 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
return containerStatuses; return containerStatuses;
} }
// These NMContainerStatus are sent on NM registration and used by YARN only.
private List<NMContainerStatus> getNMContainerStatuses() {
List<NMContainerStatus> containerStatuses =
new ArrayList<NMContainerStatus>();
for (Container container : this.context.getContainers().values()) {
NMContainerStatus status =
container.getNMContainerStatus();
containerStatuses.add(status);
if (status.getContainerState().equals(ContainerState.COMPLETE)) {
// Adding to finished containers cache. Cache will keep it around at
// least for #durationToTrackStoppedContainers duration. In the
// subsequent call to stop container it will get removed from cache.
updateStoppedContainersInCache(container.getContainerId());
addCompletedContainer(container);
}
}
LOG.info("Sending out " + containerStatuses.size()
+ " NM container statuses: " + containerStatuses);
return containerStatuses;
}
private void addCompletedContainer(Container container) { private void addCompletedContainer(Container container) {
synchronized (previousCompletedContainers) { synchronized (previousCompletedContainers) {
previousCompletedContainers.add(container.getContainerId()); previousCompletedContainers.add(container.getContainerId());

View File

@ -29,6 +29,7 @@ import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
public interface Container extends EventHandler<ContainerEvent> { public interface Container extends EventHandler<ContainerEvent> {
@ -50,6 +51,8 @@ public interface Container extends EventHandler<ContainerEvent> {
ContainerStatus cloneAndGetContainerStatus(); ContainerStatus cloneAndGetContainerStatus();
NMContainerStatus getNMContainerStatus();
String toString(); String toString();
} }

View File

@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger;
import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.nodemanager.NMAuditLogger.AuditConstants;
@ -387,6 +388,17 @@ public class ContainerImpl implements Container {
} }
} }
@Override
public NMContainerStatus getNMContainerStatus() {
this.readLock.lock();
try {
return NMContainerStatus.newInstance(this.containerId,
getCurrentState(), getResource(), diagnostics.toString(), exitCode);
} finally {
this.readLock.unlock();
}
}
@Override @Override
public ContainerId getContainerId() { public ContainerId getContainerId() {
return this.containerId; return this.containerId;

View File

@ -18,6 +18,9 @@
package org.apache.hadoop.yarn.server.nodemanager; package org.apache.hadoop.yarn.server.nodemanager;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
@ -27,17 +30,18 @@ import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CyclicBarrier; import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import org.junit.Assert;
import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest; import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.exceptions.NMNotYetReadyException; import org.apache.hadoop.yarn.exceptions.NMNotYetReadyException;
@ -46,6 +50,7 @@ import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.server.api.ResourceTracker; import org.apache.hadoop.yarn.server.api.ResourceTracker;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
@ -53,10 +58,12 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResp
import org.apache.hadoop.yarn.server.api.records.NodeAction; import org.apache.hadoop.yarn.server.api.records.NodeAction;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
import org.apache.hadoop.yarn.server.utils.YarnServerBuilderUtils; import org.apache.hadoop.yarn.server.utils.YarnServerBuilderUtils;
import org.junit.After; import org.junit.After;
import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
@ -185,6 +192,9 @@ public class TestNodeManagerResync {
TestNodeStatusUpdater.createContainerStatus(2, ContainerState.COMPLETE); TestNodeStatusUpdater.createContainerStatus(2, ContainerState.COMPLETE);
final Container container = final Container container =
TestNodeStatusUpdater.getMockContainer(testCompleteContainer); TestNodeStatusUpdater.getMockContainer(testCompleteContainer);
NMContainerStatus report =
createNMContainerStatus(2, ContainerState.COMPLETE);
when(container.getNMContainerStatus()).thenReturn(report);
NodeManager nm = new NodeManager() { NodeManager nm = new NodeManager() {
int registerCount = 0; int registerCount = 0;
@ -203,7 +213,7 @@ public class TestNodeManagerResync {
if (registerCount == 0) { if (registerCount == 0) {
// first register, no containers info. // first register, no containers info.
try { try {
Assert.assertEquals(0, request.getContainerStatuses() Assert.assertEquals(0, request.getNMContainerStatuses()
.size()); .size());
} catch (AssertionError error) { } catch (AssertionError error) {
error.printStackTrace(); error.printStackTrace();
@ -214,8 +224,8 @@ public class TestNodeManagerResync {
testCompleteContainer.getContainerId(), container); testCompleteContainer.getContainerId(), container);
} else { } else {
// second register contains the completed container info. // second register contains the completed container info.
List<ContainerStatus> statuses = List<NMContainerStatus> statuses =
request.getContainerStatuses(); request.getNMContainerStatuses();
try { try {
Assert.assertEquals(1, statuses.size()); Assert.assertEquals(1, statuses.size());
Assert.assertEquals(testCompleteContainer.getContainerId(), Assert.assertEquals(testCompleteContainer.getContainerId(),
@ -510,4 +520,16 @@ public class TestNodeManagerResync {
} }
} }
}} }}
public static NMContainerStatus createNMContainerStatus(int id,
ContainerState containerState) {
ApplicationId applicationId = ApplicationId.newInstance(0, 1);
ApplicationAttemptId applicationAttemptId =
ApplicationAttemptId.newInstance(applicationId, 1);
ContainerId containerId = ContainerId.newInstance(applicationAttemptId, id);
NMContainerStatus containerReport =
NMContainerStatus.newInstance(containerId, containerState,
Resource.newInstance(1024, 1), "recover container", 0);
return containerReport;
}
} }

View File

@ -36,6 +36,7 @@ import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
@ -134,4 +135,9 @@ public class MockContainer implements Container {
public ContainerTokenIdentifier getContainerTokenIdentifier() { public ContainerTokenIdentifier getContainerTokenIdentifier() {
return this.containerTokenIdentifier; return this.containerTokenIdentifier;
} }
@Override
public NMContainerStatus getNMContainerStatus() {
return null;
}
} }

View File

@ -32,7 +32,6 @@ import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.util.VersionUtil; import org.apache.hadoop.util.VersionUtil;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeId;
@ -45,6 +44,7 @@ import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.ipc.YarnRPC; import org.apache.hadoop.yarn.ipc.YarnRPC;
import org.apache.hadoop.yarn.server.api.ResourceTracker; import org.apache.hadoop.yarn.server.api.ResourceTracker;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
@ -195,7 +195,7 @@ public class ResourceTrackerService extends AbstractService implements
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
@VisibleForTesting @VisibleForTesting
void handleContainerStatus(ContainerStatus containerStatus) { void handleNMContainerStatus(NMContainerStatus containerStatus) {
ApplicationAttemptId appAttemptId = ApplicationAttemptId appAttemptId =
containerStatus.getContainerId().getApplicationAttemptId(); containerStatus.getContainerId().getApplicationAttemptId();
RMApp rmApp = RMApp rmApp =
@ -219,11 +219,14 @@ public class ResourceTrackerService extends AbstractService implements
RMAppAttempt rmAppAttempt = rmApp.getRMAppAttempt(appAttemptId); RMAppAttempt rmAppAttempt = rmApp.getRMAppAttempt(appAttemptId);
Container masterContainer = rmAppAttempt.getMasterContainer(); Container masterContainer = rmAppAttempt.getMasterContainer();
if (masterContainer.getId().equals(containerStatus.getContainerId()) if (masterContainer.getId().equals(containerStatus.getContainerId())
&& containerStatus.getState() == ContainerState.COMPLETE) { && containerStatus.getContainerState() == ContainerState.COMPLETE) {
ContainerStatus status =
ContainerStatus.newInstance(containerStatus.getContainerId(),
containerStatus.getContainerState(), containerStatus.getDiagnostics(),
containerStatus.getContainerExitStatus());
// sending master container finished event. // sending master container finished event.
RMAppAttemptContainerFinishedEvent evt = RMAppAttemptContainerFinishedEvent evt =
new RMAppAttemptContainerFinishedEvent(appAttemptId, new RMAppAttemptContainerFinishedEvent(appAttemptId, status);
containerStatus);
rmContext.getDispatcher().getEventHandler().handle(evt); rmContext.getDispatcher().getEventHandler().handle(evt);
} }
} }
@ -240,11 +243,11 @@ public class ResourceTrackerService extends AbstractService implements
Resource capability = request.getResource(); Resource capability = request.getResource();
String nodeManagerVersion = request.getNMVersion(); String nodeManagerVersion = request.getNMVersion();
if (!request.getContainerStatuses().isEmpty()) { if (!request.getNMContainerStatuses().isEmpty()) {
LOG.info("received container statuses on node manager register :" LOG.info("received container statuses on node manager register :"
+ request.getContainerStatuses()); + request.getNMContainerStatuses());
for (ContainerStatus containerStatus : request.getContainerStatuses()) { for (NMContainerStatus report : request.getNMContainerStatuses()) {
handleContainerStatus(containerStatus); handleNMContainerStatus(report);
} }
} }
RegisterNodeManagerResponse response = recordFactory RegisterNodeManagerResponse response = recordFactory

View File

@ -18,7 +18,6 @@
package org.apache.hadoop.yarn.server.resourcemanager; package org.apache.hadoop.yarn.server.resourcemanager;
import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
@ -32,7 +31,7 @@ import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
@ -105,14 +104,14 @@ public class MockNM {
} }
public RegisterNodeManagerResponse registerNode( public RegisterNodeManagerResponse registerNode(
List<ContainerStatus> containerStatus) throws Exception{ List<NMContainerStatus> containerReports) throws Exception{
RegisterNodeManagerRequest req = Records.newRecord( RegisterNodeManagerRequest req = Records.newRecord(
RegisterNodeManagerRequest.class); RegisterNodeManagerRequest.class);
req.setNodeId(nodeId); req.setNodeId(nodeId);
req.setHttpPort(httpPort); req.setHttpPort(httpPort);
Resource resource = BuilderUtils.newResource(memory, vCores); Resource resource = BuilderUtils.newResource(memory, vCores);
req.setResource(resource); req.setResource(resource);
req.setContainerStatuses(containerStatus); req.setContainerStatuses(containerReports);
req.setNMVersion(version); req.setNMVersion(version);
RegisterNodeManagerResponse registrationResponse = RegisterNodeManagerResponse registrationResponse =
resourceTracker.registerNodeManager(req); resourceTracker.registerNodeManager(req);
@ -185,4 +184,11 @@ public class MockNM {
return heartbeatResponse; return heartbeatResponse;
} }
public int getMemory() {
return memory;
}
public int getvCores() {
return vCores;
}
} }

View File

@ -30,6 +30,7 @@ import java.net.InetSocketAddress;
import java.net.UnknownHostException; import java.net.UnknownHostException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
@ -67,13 +68,14 @@ import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
import org.apache.hadoop.yarn.server.api.records.NodeAction; import org.apache.hadoop.yarn.server.api.records.NodeAction;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore;
@ -303,13 +305,11 @@ public class TestRMRestart {
nm1 = new MockNM("127.0.0.1:1234", 15120, rm2.getResourceTrackerService()); nm1 = new MockNM("127.0.0.1:1234", 15120, rm2.getResourceTrackerService());
nm2 = new MockNM("127.0.0.2:5678", 15120, rm2.getResourceTrackerService()); nm2 = new MockNM("127.0.0.2:5678", 15120, rm2.getResourceTrackerService());
List<ContainerStatus> containerStatuses = new ArrayList<ContainerStatus>(); NMContainerStatus status =
ContainerStatus containerStatus = TestRMRestart
BuilderUtils.newContainerStatus(BuilderUtils.newContainerId(loadedApp1 .createNMContainerStatus(loadedApp1.getCurrentAppAttempt()
.getCurrentAppAttempt().getAppAttemptId(), 1), .getAppAttemptId(), 1, ContainerState.COMPLETE);
ContainerState.COMPLETE, "Killed AM container", 143); nm1.registerNode(Arrays.asList(status));
containerStatuses.add(containerStatus);
nm1.registerNode(containerStatuses);
nm2.registerNode(); nm2.registerNode();
rm2.waitForState(loadedApp1.getApplicationId(), RMAppState.ACCEPTED); rm2.waitForState(loadedApp1.getApplicationId(), RMAppState.ACCEPTED);
@ -511,13 +511,10 @@ public class TestRMRestart {
rmApp.getAppAttempts().get(am2.getApplicationAttemptId()) rmApp.getAppAttempts().get(am2.getApplicationAttemptId())
.getAppAttemptState()); .getAppAttemptState());
List<ContainerStatus> containerStatuses = new ArrayList<ContainerStatus>(); NMContainerStatus status =
ContainerStatus containerStatus = TestRMRestart.createNMContainerStatus(
BuilderUtils.newContainerStatus( am2.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
BuilderUtils.newContainerId(am2.getApplicationAttemptId(), 1), nm1.registerNode(Arrays.asList(status));
ContainerState.COMPLETE, "Killed AM container", 143);
containerStatuses.add(containerStatus);
nm1.registerNode(containerStatuses);
rm2.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.FAILED); rm2.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.FAILED);
launchAM(rmApp, rm2, nm1); launchAM(rmApp, rm2, nm1);
Assert.assertEquals(3, rmApp.getAppAttempts().size()); Assert.assertEquals(3, rmApp.getAppAttempts().size());
@ -1678,13 +1675,12 @@ public class TestRMRestart {
am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>()); am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
nm1.nodeHeartbeat(true); nm1.nodeHeartbeat(true);
nm1 = new MockNM("127.0.0.1:1234", 15120, rm2.getResourceTrackerService()); nm1 = new MockNM("127.0.0.1:1234", 15120, rm2.getResourceTrackerService());
List<ContainerStatus> containerStatuses = new ArrayList<ContainerStatus>();
ContainerStatus containerStatus = NMContainerStatus status =
BuilderUtils.newContainerStatus(BuilderUtils.newContainerId(loadedApp1 TestRMRestart
.getCurrentAppAttempt().getAppAttemptId(), 1), .createNMContainerStatus(loadedApp1.getCurrentAppAttempt()
ContainerState.COMPLETE, "Killed AM container", 143); .getAppAttemptId(), 1, ContainerState.COMPLETE);
containerStatuses.add(containerStatus); nm1.registerNode(Arrays.asList(status));
nm1.registerNode(containerStatuses);
while (loadedApp1.getAppAttempts().size() != 2) { while (loadedApp1.getAppAttempts().size() != 2) {
Thread.sleep(200); Thread.sleep(200);
} }
@ -1808,12 +1804,10 @@ public class TestRMRestart {
// ResourceTrackerService is started. // ResourceTrackerService is started.
super.serviceStart(); super.serviceStart();
nm1.setResourceTrackerService(getResourceTrackerService()); nm1.setResourceTrackerService(getResourceTrackerService());
List<ContainerStatus> status = new ArrayList<ContainerStatus>(); NMContainerStatus status =
ContainerId amContainer = TestRMRestart.createNMContainerStatus(
ContainerId.newInstance(am0.getApplicationAttemptId(), 1); am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
status.add(ContainerStatus.newInstance(amContainer, nm1.registerNode(Arrays.asList(status));
ContainerState.COMPLETE, "AM container exit", 143));
nm1.registerNode(status);
} }
}; };
} }
@ -1852,6 +1846,15 @@ public class TestRMRestart {
} }
} }
public static NMContainerStatus createNMContainerStatus(
ApplicationAttemptId appAttemptId, int id, ContainerState containerState) {
ContainerId containerId = ContainerId.newInstance(appAttemptId, id);
NMContainerStatus containerReport =
NMContainerStatus.newInstance(containerId, containerState,
Resource.newInstance(1024, 1), "recover container", 0);
return containerReport;
}
public class TestMemoryRMStateStore extends MemoryRMStateStore { public class TestMemoryRMStateStore extends MemoryRMStateStore {
int count = 0; int count = 0;
public int updateApp = 0; public int updateApp = 0;

View File

@ -45,6 +45,7 @@ import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.DrainDispatcher; import org.apache.hadoop.yarn.event.DrainDispatcher;
import org.apache.hadoop.yarn.event.Event; import org.apache.hadoop.yarn.event.Event;
import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse;
@ -487,33 +488,37 @@ public class TestResourceTrackerService {
RMApp app = rm.submitApp(1024, true); RMApp app = rm.submitApp(1024, true);
// Case 1.1: AppAttemptId is null // Case 1.1: AppAttemptId is null
ContainerStatus status = ContainerStatus.newInstance( NMContainerStatus report =
ContainerId.newInstance(ApplicationAttemptId.newInstance( NMContainerStatus.newInstance(
app.getApplicationId(), 2), 1), ContainerId.newInstance(
ContainerState.COMPLETE, "Dummy Completed", 0); ApplicationAttemptId.newInstance(app.getApplicationId(), 2), 1),
rm.getResourceTrackerService().handleContainerStatus(status); ContainerState.COMPLETE, Resource.newInstance(1024, 1),
"Dummy Completed", 0);
rm.getResourceTrackerService().handleNMContainerStatus(report);
verify(handler, never()).handle((Event) any()); verify(handler, never()).handle((Event) any());
// Case 1.2: Master container is null // Case 1.2: Master container is null
RMAppAttemptImpl currentAttempt = RMAppAttemptImpl currentAttempt =
(RMAppAttemptImpl) app.getCurrentAppAttempt(); (RMAppAttemptImpl) app.getCurrentAppAttempt();
currentAttempt.setMasterContainer(null); currentAttempt.setMasterContainer(null);
status = ContainerStatus.newInstance( report = NMContainerStatus.newInstance(
ContainerId.newInstance(currentAttempt.getAppAttemptId(), 0), ContainerId.newInstance(currentAttempt.getAppAttemptId(), 0),
ContainerState.COMPLETE, "Dummy Completed", 0); ContainerState.COMPLETE, Resource.newInstance(1024, 1),
rm.getResourceTrackerService().handleContainerStatus(status); "Dummy Completed", 0);
rm.getResourceTrackerService().handleNMContainerStatus(report);
verify(handler, never()).handle((Event)any()); verify(handler, never()).handle((Event)any());
// Case 2: Managed AM // Case 2: Managed AM
app = rm.submitApp(1024); app = rm.submitApp(1024);
// Case 2.1: AppAttemptId is null // Case 2.1: AppAttemptId is null
status = ContainerStatus.newInstance( report = NMContainerStatus.newInstance(
ContainerId.newInstance(ApplicationAttemptId.newInstance( ContainerId.newInstance(
app.getApplicationId(), 2), 1), ApplicationAttemptId.newInstance(app.getApplicationId(), 2), 1),
ContainerState.COMPLETE, "Dummy Completed", 0); ContainerState.COMPLETE, Resource.newInstance(1024, 1),
"Dummy Completed", 0);
try { try {
rm.getResourceTrackerService().handleContainerStatus(status); rm.getResourceTrackerService().handleNMContainerStatus(report);
} catch (Exception e) { } catch (Exception e) {
// expected - ignore // expected - ignore
} }
@ -523,11 +528,12 @@ public class TestResourceTrackerService {
currentAttempt = currentAttempt =
(RMAppAttemptImpl) app.getCurrentAppAttempt(); (RMAppAttemptImpl) app.getCurrentAppAttempt();
currentAttempt.setMasterContainer(null); currentAttempt.setMasterContainer(null);
status = ContainerStatus.newInstance( report = NMContainerStatus.newInstance(
ContainerId.newInstance(currentAttempt.getAppAttemptId(), 0), ContainerId.newInstance(currentAttempt.getAppAttemptId(), 0),
ContainerState.COMPLETE, "Dummy Completed", 0); ContainerState.COMPLETE, Resource.newInstance(1024, 1),
"Dummy Completed", 0);
try { try {
rm.getResourceTrackerService().handleContainerStatus(status); rm.getResourceTrackerService().handleNMContainerStatus(report);
} catch (Exception e) { } catch (Exception e) {
// expected - ignore // expected - ignore
} }