YARN-8508. Release GPU resource for killed container.
Contributed by Chandni Singh
This commit is contained in:
parent
79091cf76f
commit
ed9d60e888
|
@ -573,15 +573,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
|
||||||
return handleExitCode(e, container, containerId);
|
return handleExitCode(e, container, containerId);
|
||||||
} finally {
|
} finally {
|
||||||
resourcesHandler.postExecute(containerId);
|
resourcesHandler.postExecute(containerId);
|
||||||
|
postComplete(containerId);
|
||||||
try {
|
|
||||||
if (resourceHandlerChain != null) {
|
|
||||||
resourceHandlerChain.postComplete(containerId);
|
|
||||||
}
|
|
||||||
} catch (ResourceHandlerException e) {
|
|
||||||
LOG.warn("ResourceHandlerChain.postComplete failed for " +
|
|
||||||
"containerId: " + containerId + ". Exception: " + e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -721,14 +713,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
|
||||||
return super.reacquireContainer(ctx);
|
return super.reacquireContainer(ctx);
|
||||||
} finally {
|
} finally {
|
||||||
resourcesHandler.postExecute(containerId);
|
resourcesHandler.postExecute(containerId);
|
||||||
if (resourceHandlerChain != null) {
|
postComplete(containerId);
|
||||||
try {
|
|
||||||
resourceHandlerChain.postComplete(containerId);
|
|
||||||
} catch (ResourceHandlerException e) {
|
|
||||||
LOG.warn("ResourceHandlerChain.postComplete failed for " +
|
|
||||||
"containerId: " + containerId + " Exception: " + e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -798,6 +783,8 @@ public class LinuxContainerExecutor extends ContainerExecutor {
|
||||||
logOutput(e.getOutput());
|
logOutput(e.getOutput());
|
||||||
throw new IOException("Error in reaping container "
|
throw new IOException("Error in reaping container "
|
||||||
+ container.getContainerId().toString() + " exit = " + retCode, e);
|
+ container.getContainerId().toString() + " exit = " + retCode, e);
|
||||||
|
} finally {
|
||||||
|
postComplete(container.getContainerId());
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -968,4 +955,17 @@ public class LinuxContainerExecutor extends ContainerExecutor {
|
||||||
LOG.warn("Unable to remove docker container: " + containerId);
|
LOG.warn("Unable to remove docker container: " + containerId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
void postComplete(final ContainerId containerId) {
|
||||||
|
try {
|
||||||
|
if (resourceHandlerChain != null) {
|
||||||
|
LOG.debug("{} post complete", containerId);
|
||||||
|
resourceHandlerChain.postComplete(containerId);
|
||||||
|
}
|
||||||
|
} catch (ResourceHandlerException e) {
|
||||||
|
LOG.warn("ResourceHandlerChain.postComplete failed for " +
|
||||||
|
"containerId: {}. Exception: ", containerId, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,11 +25,14 @@ import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
|
import static org.mockito.Matchers.anyObject;
|
||||||
import static org.mockito.Mockito.mock;
|
import static org.mockito.Mockito.mock;
|
||||||
|
import static org.mockito.Mockito.spy;
|
||||||
import static org.mockito.Mockito.times;
|
import static org.mockito.Mockito.times;
|
||||||
import static org.mockito.Mockito.verify;
|
import static org.mockito.Mockito.verify;
|
||||||
import static org.mockito.Mockito.when;
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntime;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext;
|
import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerReapContext;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -40,6 +43,7 @@ import java.io.IOException;
|
||||||
import java.io.PrintWriter;
|
import java.io.PrintWriter;
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
@ -667,12 +671,15 @@ public class TestLinuxContainerExecutor {
|
||||||
@Test
|
@Test
|
||||||
public void testReapContainer() throws Exception {
|
public void testReapContainer() throws Exception {
|
||||||
Container container = mock(Container.class);
|
Container container = mock(Container.class);
|
||||||
LinuxContainerExecutor lce = mock(LinuxContainerExecutor.class);
|
LinuxContainerRuntime containerRuntime = mock(LinuxContainerRuntime.class);
|
||||||
|
LinuxContainerExecutor lce = spy(new LinuxContainerExecutor(
|
||||||
|
containerRuntime));
|
||||||
ContainerReapContext.Builder builder = new ContainerReapContext.Builder();
|
ContainerReapContext.Builder builder = new ContainerReapContext.Builder();
|
||||||
builder.setContainer(container).setUser("foo");
|
builder.setContainer(container).setUser("foo");
|
||||||
ContainerReapContext ctx = builder.build();
|
ContainerReapContext ctx = builder.build();
|
||||||
lce.reapContainer(ctx);
|
lce.reapContainer(ctx);
|
||||||
verify(lce, times(1)).reapContainer(ctx);
|
verify(lce, times(1)).reapContainer(ctx);
|
||||||
|
verify(lce, times(1)).postComplete(anyObject());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
Loading…
Reference in New Issue