YARN-8508. Release GPU resource for killed container.

Contributed by Chandni Singh
This commit is contained in:
Eric Yang 2018-07-27 19:33:58 -04:00
parent 79091cf76f
commit ed9d60e888
2 changed files with 25 additions and 18 deletions

View File

@ -573,15 +573,7 @@ private int handleLaunchForLaunchType(ContainerStartContext ctx,
return handleExitCode(e, container, containerId);
} finally {
resourcesHandler.postExecute(containerId);
try {
if (resourceHandlerChain != null) {
resourceHandlerChain.postComplete(containerId);
}
} catch (ResourceHandlerException e) {
LOG.warn("ResourceHandlerChain.postComplete failed for " +
"containerId: " + containerId + ". Exception: " + e);
}
postComplete(containerId);
}
return 0;
@ -721,14 +713,7 @@ public int reacquireContainer(ContainerReacquisitionContext ctx)
return super.reacquireContainer(ctx);
} finally {
resourcesHandler.postExecute(containerId);
if (resourceHandlerChain != null) {
try {
resourceHandlerChain.postComplete(containerId);
} catch (ResourceHandlerException e) {
LOG.warn("ResourceHandlerChain.postComplete failed for " +
"containerId: " + containerId + " Exception: " + e);
}
}
postComplete(containerId);
}
}
@ -798,6 +783,8 @@ public boolean reapContainer(ContainerReapContext ctx) throws IOException {
logOutput(e.getOutput());
throw new IOException("Error in reaping container "
+ container.getContainerId().toString() + " exit = " + retCode, e);
} finally {
postComplete(container.getContainerId());
}
return true;
}
@ -968,4 +955,17 @@ public void removeDockerContainer(String containerId) {
LOG.warn("Unable to remove docker container: " + containerId);
}
}
@VisibleForTesting
void postComplete(final ContainerId containerId) {
try {
if (resourceHandlerChain != null) {
LOG.debug("{} post complete", containerId);
resourceHandlerChain.postComplete(containerId);
}
} catch (ResourceHandlerException e) {
LOG.warn("ResourceHandlerChain.postComplete failed for " +
"containerId: {}. Exception: ", containerId, e);
}
}
}

View File

@ -25,11 +25,14 @@
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.Matchers.anyObject;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntime;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -40,6 +43,7 @@
import java.io.PrintWriter;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
@ -667,12 +671,15 @@ public void testRemoveDockerContainer() throws Exception {
@Test
public void testReapContainer() throws Exception {
Container container = mock(Container.class);
LinuxContainerExecutor lce = mock(LinuxContainerExecutor.class);
LinuxContainerRuntime containerRuntime = mock(LinuxContainerRuntime.class);
LinuxContainerExecutor lce = spy(new LinuxContainerExecutor(
containerRuntime));
ContainerReapContext.Builder builder = new ContainerReapContext.Builder();
builder.setContainer(container).setUser("foo");
ContainerReapContext ctx = builder.build();
lce.reapContainer(ctx);
verify(lce, times(1)).reapContainer(ctx);
verify(lce, times(1)).postComplete(anyObject());
}
@Test