YARN-8153. Guaranteed containers always stay in SCHEDULED on NM after restart. Contributed by Yang Wang.
This commit is contained in:
parent
7be71ec55b
commit
84531ad9b6
@ -501,8 +501,11 @@ private void reclaimOpportunisticContainerResources(Container container) {
|
|||||||
|
|
||||||
private void startContainer(Container container) {
|
private void startContainer(Container container) {
|
||||||
LOG.info("Starting container [" + container.getContainerId()+ "]");
|
LOG.info("Starting container [" + container.getContainerId()+ "]");
|
||||||
runningContainers.put(container.getContainerId(), container);
|
// Skip to put into runningContainers and addUtilization when recover
|
||||||
this.utilizationTracker.addContainerResources(container);
|
if (!runningContainers.containsKey(container.getContainerId())) {
|
||||||
|
runningContainers.put(container.getContainerId(), container);
|
||||||
|
this.utilizationTracker.addContainerResources(container);
|
||||||
|
}
|
||||||
if (container.getContainerTokenIdentifier().getExecutionType() ==
|
if (container.getContainerTokenIdentifier().getExecutionType() ==
|
||||||
ExecutionType.OPPORTUNISTIC) {
|
ExecutionType.OPPORTUNISTIC) {
|
||||||
this.metrics.startOpportunisticContainer(container.getResource());
|
this.metrics.startOpportunisticContainer(container.getResource());
|
||||||
|
@ -67,6 +67,7 @@
|
|||||||
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
|
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
|
||||||
import org.apache.hadoop.yarn.api.records.LogAggregationContext;
|
import org.apache.hadoop.yarn.api.records.LogAggregationContext;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ResourceUtilization;
|
||||||
import org.apache.hadoop.yarn.api.records.Token;
|
import org.apache.hadoop.yarn.api.records.Token;
|
||||||
import org.apache.hadoop.yarn.api.records.URL;
|
import org.apache.hadoop.yarn.api.records.URL;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
@ -91,6 +92,7 @@
|
|||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
||||||
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ResourceMappings;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncher;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncher;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEvent;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEvent;
|
||||||
@ -439,6 +441,54 @@ public void testContainerResizeRecovery() throws Exception {
|
|||||||
assertNotNull(app);
|
assertNotNull(app);
|
||||||
containerStatus = getContainerStatus(context, cm, cid);
|
containerStatus = getContainerStatus(context, cm, cid);
|
||||||
assertEquals(targetResource, containerStatus.getCapability());
|
assertEquals(targetResource, containerStatus.getCapability());
|
||||||
|
cm.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testContainerSchedulerRecovery() throws Exception {
|
||||||
|
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
|
||||||
|
conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true);
|
||||||
|
NMStateStoreService stateStore = new NMMemoryStateStoreService();
|
||||||
|
stateStore.init(conf);
|
||||||
|
stateStore.start();
|
||||||
|
context = createContext(conf, stateStore);
|
||||||
|
ContainerManagerImpl cm = createContainerManager(context, delSrvc);
|
||||||
|
((NMContext) context).setContainerManager(cm);
|
||||||
|
cm.init(conf);
|
||||||
|
cm.start();
|
||||||
|
// add an application by starting a container
|
||||||
|
ApplicationId appId = ApplicationId.newInstance(0, 1);
|
||||||
|
ApplicationAttemptId attemptId =
|
||||||
|
ApplicationAttemptId.newInstance(appId, 1);
|
||||||
|
ContainerId cid = ContainerId.newContainerId(attemptId, 1);
|
||||||
|
|
||||||
|
commonLaunchContainer(appId, cid, cm);
|
||||||
|
|
||||||
|
Application app = context.getApplications().get(appId);
|
||||||
|
assertNotNull(app);
|
||||||
|
|
||||||
|
ResourceUtilization utilization =
|
||||||
|
ResourceUtilization.newInstance(1024, 2048, 0.25F);
|
||||||
|
assertEquals(cm.getContainerScheduler().getNumRunningContainers(), 1);
|
||||||
|
assertEquals(utilization,
|
||||||
|
cm.getContainerScheduler().getCurrentUtilization());
|
||||||
|
|
||||||
|
// restart and verify container scheduler has recovered correctly
|
||||||
|
cm.stop();
|
||||||
|
context = createContext(conf, stateStore);
|
||||||
|
cm = createContainerManager(context, delSrvc);
|
||||||
|
((NMContext) context).setContainerManager(cm);
|
||||||
|
cm.init(conf);
|
||||||
|
cm.start();
|
||||||
|
assertEquals(1, context.getApplications().size());
|
||||||
|
app = context.getApplications().get(appId);
|
||||||
|
assertNotNull(app);
|
||||||
|
waitForNMContainerState(cm, cid, ContainerState.RUNNING);
|
||||||
|
|
||||||
|
assertEquals(cm.getContainerScheduler().getNumRunningContainers(), 1);
|
||||||
|
assertEquals(utilization,
|
||||||
|
cm.getContainerScheduler().getCurrentUtilization());
|
||||||
|
cm.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -494,6 +544,7 @@ public void testResourceMappingRecoveryForContainer() throws Exception {
|
|||||||
resourceMappings.getAssignedResources("numa").equals(numaResources));
|
resourceMappings.getAssignedResources("numa").equals(numaResources));
|
||||||
Assert.assertTrue(
|
Assert.assertTrue(
|
||||||
resourceMappings.getAssignedResources("fpga").equals(fpgaResources));
|
resourceMappings.getAssignedResources("fpga").equals(fpgaResources));
|
||||||
|
cm.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
Loading…
x
Reference in New Issue
Block a user