YARN-8508. Release GPU resource for killed container.

Contributed by Chandni Singh
This commit is contained in:
Eric Yang 2018-07-27 19:33:58 -04:00
parent 79091cf76f
commit ed9d60e888
2 changed files with 25 additions and 18 deletions

View File

@ -573,15 +573,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
return handleExitCode(e, container, containerId); return handleExitCode(e, container, containerId);
} finally { } finally {
resourcesHandler.postExecute(containerId); resourcesHandler.postExecute(containerId);
postComplete(containerId);
try {
if (resourceHandlerChain != null) {
resourceHandlerChain.postComplete(containerId);
}
} catch (ResourceHandlerException e) {
LOG.warn("ResourceHandlerChain.postComplete failed for " +
"containerId: " + containerId + ". Exception: " + e);
}
} }
return 0; return 0;
@ -721,14 +713,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
return super.reacquireContainer(ctx); return super.reacquireContainer(ctx);
} finally { } finally {
resourcesHandler.postExecute(containerId); resourcesHandler.postExecute(containerId);
if (resourceHandlerChain != null) { postComplete(containerId);
try {
resourceHandlerChain.postComplete(containerId);
} catch (ResourceHandlerException e) {
LOG.warn("ResourceHandlerChain.postComplete failed for " +
"containerId: " + containerId + " Exception: " + e);
}
}
} }
} }
@ -798,6 +783,8 @@ public class LinuxContainerExecutor extends ContainerExecutor {
logOutput(e.getOutput()); logOutput(e.getOutput());
throw new IOException("Error in reaping container " throw new IOException("Error in reaping container "
+ container.getContainerId().toString() + " exit = " + retCode, e); + container.getContainerId().toString() + " exit = " + retCode, e);
} finally {
postComplete(container.getContainerId());
} }
return true; return true;
} }
@ -968,4 +955,17 @@ public class LinuxContainerExecutor extends ContainerExecutor {
LOG.warn("Unable to remove docker container: " + containerId); LOG.warn("Unable to remove docker container: " + containerId);
} }
} }
@VisibleForTesting
void postComplete(final ContainerId containerId) {
try {
if (resourceHandlerChain != null) {
LOG.debug("{} post complete", containerId);
resourceHandlerChain.postComplete(containerId);
}
} catch (ResourceHandlerException e) {
LOG.warn("ResourceHandlerChain.postComplete failed for " +
"containerId: {}. Exception: ", containerId, e);
}
}
} }

View File

@ -25,11 +25,14 @@ import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail; import static org.junit.Assert.fail;
import static org.mockito.Matchers.anyObject;
import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.times; import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when; import static org.mockito.Mockito.when;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntime;
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext; import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -40,6 +43,7 @@ import java.io.IOException;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
@ -667,12 +671,15 @@ public class TestLinuxContainerExecutor {
@Test @Test
public void testReapContainer() throws Exception { public void testReapContainer() throws Exception {
Container container = mock(Container.class); Container container = mock(Container.class);
LinuxContainerExecutor lce = mock(LinuxContainerExecutor.class); LinuxContainerRuntime containerRuntime = mock(LinuxContainerRuntime.class);
LinuxContainerExecutor lce = spy(new LinuxContainerExecutor(
containerRuntime));
ContainerReapContext.Builder builder = new ContainerReapContext.Builder(); ContainerReapContext.Builder builder = new ContainerReapContext.Builder();
builder.setContainer(container).setUser("foo"); builder.setContainer(container).setUser("foo");
ContainerReapContext ctx = builder.build(); ContainerReapContext ctx = builder.build();
lce.reapContainer(ctx); lce.reapContainer(ctx);
verify(lce, times(1)).reapContainer(ctx); verify(lce, times(1)).reapContainer(ctx);
verify(lce, times(1)).postComplete(anyObject());
} }
@Test @Test