YARN-2431. NM restart: cgroup is not removed for reacquired containers. Contributed by Jason Lowe

(cherry picked from commit 3fa5f728c4)
This commit is contained in:
Jason Lowe 2014-09-04 21:11:27 +00:00
parent c035365338
commit 04d325afff
3 changed files with 74 additions and 0 deletions

View File

@ -255,6 +255,9 @@ Release 2.6.0 - UNRELEASED
YARN-2462. TestNodeManagerResync#testBlockNewContainerRequestsOnStartAndResync YARN-2462. TestNodeManagerResync#testBlockNewContainerRequestsOnStartAndResync
should have a test timeout (Eric Payne via jlowe) should have a test timeout (Eric Payne via jlowe)
YARN-2431. NM restart: cgroup is not removed for reacquired containers
(jlowe)
Release 2.5.1 - UNRELEASED Release 2.5.1 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -19,6 +19,7 @@
package org.apache.hadoop.yarn.server.nodemanager; package org.apache.hadoop.yarn.server.nodemanager;
import com.google.common.base.Optional; import com.google.common.base.Optional;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
@ -339,6 +340,16 @@ public class LinuxContainerExecutor extends ContainerExecutor {
return 0; return 0;
} }
@Override
public int reacquireContainer(String user, ContainerId containerId)
throws IOException {
try {
return super.reacquireContainer(user, containerId);
} finally {
resourcesHandler.postExecute(containerId);
}
}
@Override @Override
public boolean signalContainer(String user, String pid, Signal signal) public boolean signalContainer(String user, String pid, Signal signal)
throws IOException { throws IOException {

View File

@ -30,6 +30,8 @@ import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import org.junit.Assert; import org.junit.Assert;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
@ -42,11 +44,15 @@ import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.util.LCEResourcesHandler;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
@ -353,4 +359,58 @@ public class TestLinuxContainerExecutor {
} }
} }
@Test(timeout=10000)
public void testPostExecuteAfterReacquisition() throws Exception {
// make up some bogus container ID
ApplicationId appId = ApplicationId.newInstance(12345, 67890);
ApplicationAttemptId attemptId =
ApplicationAttemptId.newInstance(appId, 54321);
ContainerId cid = ContainerId.newInstance(attemptId, 9876);
Configuration conf = new YarnConfiguration();
conf.setClass(YarnConfiguration.NM_LINUX_CONTAINER_RESOURCES_HANDLER,
TestResourceHandler.class, LCEResourcesHandler.class);
LinuxContainerExecutor lce = new LinuxContainerExecutor();
lce.setConf(conf);
try {
lce.init();
} catch (IOException e) {
// expected if LCE isn't setup right, but not necessary for this test
}
lce.reacquireContainer("foouser", cid);
Assert.assertTrue("postExec not called after reacquisition",
TestResourceHandler.postExecContainers.contains(cid));
}
private static class TestResourceHandler implements LCEResourcesHandler {
static Set<ContainerId> postExecContainers = new HashSet<ContainerId>();
@Override
public void setConf(Configuration conf) {
}
@Override
public Configuration getConf() {
return null;
}
@Override
public void init(LinuxContainerExecutor lce) throws IOException {
}
@Override
public void preExecute(ContainerId containerId, Resource containerResource)
throws IOException {
}
@Override
public void postExecute(ContainerId containerId) {
postExecContainers.add(containerId);
}
@Override
public String getResourcesOption(ContainerId containerId) {
return null;
}
}
} }