YARN-7958. ServiceMaster should only wait for recovery of containers with id that match the current application id. Contributed by Chandni Singh

This commit is contained in:
Billie Rinaldi 2018-02-28 17:14:16 -08:00
parent 55d04a6db1
commit 5ed689e33a
2 changed files with 56 additions and 7 deletions

View File

@ -40,6 +40,7 @@
import org.apache.hadoop.service.CompositeService;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
@ -360,17 +361,22 @@ private void recoverComponents(RegisterApplicationMasterResponse response) {
amRMClient.releaseAssignedContainer(container.getId());
}
}
ApplicationId appId = ApplicationId.fromString(app.getId());
existingRecords.forEach((encodedContainerId, record) -> {
String componentName = record.get(YarnRegistryAttributes.YARN_COMPONENT);
if (componentName != null) {
Component component = componentsByName.get(componentName);
ComponentInstance compInstance = component.getComponentInstance(
record.description);
ContainerId containerId = ContainerId.fromString(record.get(
YarnRegistryAttributes.YARN_ID));
unRecoveredInstances.put(containerId, compInstance);
component.removePendingInstance(compInstance);
if (component != null) {
ComponentInstance compInstance = component.getComponentInstance(
record.description);
ContainerId containerId = ContainerId.fromString(record.get(
YarnRegistryAttributes.YARN_ID));
if (containerId.getApplicationAttemptId().getApplicationId()
.equals(appId)) {
unRecoveredInstances.put(containerId, compInstance);
component.removePendingInstance(compInstance);
}
}
}
});

View File

@ -210,6 +210,49 @@ public void testContainersReleasedWhenExpired()
.getState());
}
// Test to verify that the AM doesn't wait for containers of a different app
// even though it corresponds to the same service.
@Test(timeout = 200000)
public void testContainersFromDifferentApp()
throws Exception {
ApplicationId applicationId = ApplicationId.newInstance(
System.currentTimeMillis(), 1);
Service exampleApp = new Service();
exampleApp.setId(applicationId.toString());
exampleApp.setName("testContainersFromDifferentApp");
String comp1Name = "comp1";
String comp1InstName = "comp1-0";
org.apache.hadoop.yarn.service.api.records.Component compA =
createComponent(comp1Name, 1, "sleep");
exampleApp.addComponent(compA);
MockServiceAM am = new MockServiceAM(exampleApp);
ContainerId containerId = am.createContainerId(1);
// saves the container in the registry
am.feedRegistryComponent(containerId, comp1Name, comp1InstName);
ApplicationId changedAppId = ApplicationId.newInstance(
System.currentTimeMillis(), 2);
exampleApp.setId(changedAppId.toString());
am.init(conf);
am.start();
// 1 pending instance since the container in registry belongs to a different
// app.
Assert.assertEquals(1,
am.getComponent(comp1Name).getPendingInstances().size());
am.feedContainerToComp(exampleApp, 1, comp1Name);
GenericTestUtils.waitFor(() -> am.getCompInstance(comp1Name, comp1InstName)
.getContainerStatus() != null, 2000, 200000);
Assert.assertEquals("container state",
org.apache.hadoop.yarn.api.records.ContainerState.RUNNING,
am.getCompInstance(comp1Name, comp1InstName).getContainerStatus()
.getState());
am.stop();
}
@Test
public void testScheduleWithMultipleResourceTypes()
throws TimeoutException, InterruptedException, IOException {