YARN-2980. Move health check script related functionality to hadoop-common (Varun Saxena via aw)

(cherry picked from commit d4ac6822e1)

Conflicts:
	hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
This commit is contained in:
Allen Wittenauer 2015-02-24 11:25:26 -08:00 committed by Wangda Tan
parent db0bd6dca8
commit 02e650248d
13 changed files with 258 additions and 108 deletions

View File

@ -16,7 +16,7 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.hadoop.yarn.server.nodemanager; package org.apache.hadoop.util;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
@ -34,7 +34,6 @@ import org.apache.hadoop.util.Shell.ExitCodeException;
import org.apache.hadoop.util.Shell.ShellCommandExecutor; import org.apache.hadoop.util.Shell.ShellCommandExecutor;
import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
/** /**
* *
@ -58,14 +57,11 @@ public class NodeHealthScriptRunner extends AbstractService {
/** ShellCommandExecutor used to execute monitoring script */ /** ShellCommandExecutor used to execute monitoring script */
ShellCommandExecutor shexec = null; ShellCommandExecutor shexec = null;
/** Configuration used by the checker */
private Configuration conf;
/** Pattern used for searching in the output of the node health script */ /** Pattern used for searching in the output of the node health script */
static private final String ERROR_PATTERN = "ERROR"; static private final String ERROR_PATTERN = "ERROR";
/** Time out error message */ /** Time out error message */
static final String NODE_HEALTH_SCRIPT_TIMED_OUT_MSG = "Node health script timed out"; public static final String NODE_HEALTH_SCRIPT_TIMED_OUT_MSG = "Node health script timed out";
private boolean isHealthy; private boolean isHealthy;
@ -192,11 +188,16 @@ public class NodeHealthScriptRunner extends AbstractService {
} }
} }
public NodeHealthScriptRunner() { public NodeHealthScriptRunner(String scriptName, long chkInterval, long timeout,
String[] scriptArgs) {
super(NodeHealthScriptRunner.class.getName()); super(NodeHealthScriptRunner.class.getName());
this.lastReportedTime = System.currentTimeMillis(); this.lastReportedTime = System.currentTimeMillis();
this.isHealthy = true; this.isHealthy = true;
this.healthReport = ""; this.healthReport = "";
this.nodeHealthScript = scriptName;
this.intervalTime = chkInterval;
this.scriptTimeout = timeout;
this.timer = new NodeHealthMonitorExecutor(scriptArgs);
} }
/* /*
@ -204,17 +205,6 @@ public class NodeHealthScriptRunner extends AbstractService {
*/ */
@Override @Override
protected void serviceInit(Configuration conf) throws Exception { protected void serviceInit(Configuration conf) throws Exception {
this.conf = conf;
this.nodeHealthScript =
conf.get(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH);
this.intervalTime = conf.getLong(YarnConfiguration.NM_HEALTH_CHECK_INTERVAL_MS,
YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_INTERVAL_MS);
this.scriptTimeout = conf.getLong(
YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS,
YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS);
String[] args = conf.getStrings(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_OPTS,
new String[] {});
timer = new NodeHealthMonitorExecutor(args);
super.serviceInit(conf); super.serviceInit(conf);
} }
@ -225,7 +215,7 @@ public class NodeHealthScriptRunner extends AbstractService {
@Override @Override
protected void serviceStart() throws Exception { protected void serviceStart() throws Exception {
// if health script path is not configured don't start the thread. // if health script path is not configured don't start the thread.
if (!shouldRun(conf)) { if (!shouldRun(nodeHealthScript)) {
LOG.info("Not starting node health monitor"); LOG.info("Not starting node health monitor");
return; return;
} }
@ -242,7 +232,7 @@ public class NodeHealthScriptRunner extends AbstractService {
*/ */
@Override @Override
protected void serviceStop() { protected void serviceStop() {
if (!shouldRun(conf)) { if (!shouldRun(nodeHealthScript)) {
return; return;
} }
if (nodeHealthScriptScheduler != null) { if (nodeHealthScriptScheduler != null) {
@ -322,26 +312,25 @@ public class NodeHealthScriptRunner extends AbstractService {
* <li>Node health check script file exists</li> * <li>Node health check script file exists</li>
* </ol> * </ol>
* *
* @param conf
* @return true if node health monitoring service can be started. * @return true if node health monitoring service can be started.
*/ */
public static boolean shouldRun(Configuration conf) { public static boolean shouldRun(String healthScript) {
String nodeHealthScript = if (healthScript == null || healthScript.trim().isEmpty()) {
conf.get(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH);
if (nodeHealthScript == null || nodeHealthScript.trim().isEmpty()) {
return false; return false;
} }
File f = new File(nodeHealthScript); File f = new File(healthScript);
return f.exists() && FileUtil.canExecute(f); return f.exists() && FileUtil.canExecute(f);
} }
private synchronized void setHealthStatus(boolean isHealthy, String output) { private synchronized void setHealthStatus(boolean isHealthy, String output) {
LOG.info("health status being set as " + output);
this.setHealthy(isHealthy); this.setHealthy(isHealthy);
this.setHealthReport(output); this.setHealthReport(output);
} }
private synchronized void setHealthStatus(boolean isHealthy, String output, private synchronized void setHealthStatus(boolean isHealthy, String output,
long time) { long time) {
LOG.info("health status being set as " + output);
this.setHealthStatus(isHealthy, output); this.setHealthStatus(isHealthy, output);
this.setLastReportedTime(time); this.setLastReportedTime(time);
} }
@ -350,7 +339,7 @@ public class NodeHealthScriptRunner extends AbstractService {
* Used only by tests to access the timer task directly * Used only by tests to access the timer task directly
* @return the timer task * @return the timer task
*/ */
TimerTask getTimerTask() { public TimerTask getTimerTask() {
return timer; return timer;
} }
} }

View File

@ -0,0 +1,136 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.util;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.TimerTask;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
public class TestNodeHealthScriptRunner {
protected static File testRootDir = new File("target",
TestNodeHealthScriptRunner.class.getName() +
"-localDir").getAbsoluteFile();
private File nodeHealthscriptFile = new File(testRootDir,
Shell.appendScriptExtension("failingscript"));
@Before
public void setup() {
testRootDir.mkdirs();
}
@After
public void tearDown() throws Exception {
if (testRootDir.exists()) {
FileContext.getLocalFSFileContext().delete(
new Path(testRootDir.getAbsolutePath()), true);
}
}
private void writeNodeHealthScriptFile(String scriptStr, boolean setExecutable)
throws IOException {
PrintWriter pw = null;
try {
FileUtil.setWritable(nodeHealthscriptFile, true);
FileUtil.setReadable(nodeHealthscriptFile, true);
pw = new PrintWriter(new FileOutputStream(nodeHealthscriptFile));
pw.println(scriptStr);
pw.flush();
} finally {
pw.close();
}
FileUtil.setExecutable(nodeHealthscriptFile, setExecutable);
}
@Test
public void testNodeHealthScriptShouldRun() throws IOException {
Assert.assertFalse("Node health script should start",
NodeHealthScriptRunner.shouldRun(
nodeHealthscriptFile.getAbsolutePath()));
writeNodeHealthScriptFile("", false);
// Node health script should not start if the node health script is not
// executable.
Assert.assertFalse("Node health script should start",
NodeHealthScriptRunner.shouldRun(
nodeHealthscriptFile.getAbsolutePath()));
writeNodeHealthScriptFile("", true);
Assert.assertTrue("Node health script should start",
NodeHealthScriptRunner.shouldRun(
nodeHealthscriptFile.getAbsolutePath()));
}
@Test
public void testNodeHealthScript() throws Exception {
String errorScript = "echo ERROR\n echo \"Tracker not healthy\"";
String normalScript = "echo \"I am all fine\"";
String timeOutScript =
Shell.WINDOWS ? "@echo off\nping -n 4 127.0.0.1 >nul\necho \"I am fine\""
: "sleep 4\necho \"I am fine\"";
Configuration conf = new Configuration();
writeNodeHealthScriptFile(normalScript, true);
NodeHealthScriptRunner nodeHealthScriptRunner = new NodeHealthScriptRunner(
nodeHealthscriptFile.getAbsolutePath(),
500, 1000, new String[] {});
nodeHealthScriptRunner.init(conf);
TimerTask timerTask = nodeHealthScriptRunner.getTimerTask();
timerTask.run();
// Normal Script runs successfully
Assert.assertTrue("Node health status reported unhealthy",
nodeHealthScriptRunner.isHealthy());
Assert.assertEquals("", nodeHealthScriptRunner.getHealthReport());
// Error script.
writeNodeHealthScriptFile(errorScript, true);
// Run timer
timerTask.run();
Assert.assertFalse("Node health status reported healthy",
nodeHealthScriptRunner.isHealthy());
Assert.assertTrue(
nodeHealthScriptRunner.getHealthReport().contains("ERROR"));
// Healthy script.
writeNodeHealthScriptFile(normalScript, true);
timerTask.run();
Assert.assertTrue("Node health status reported unhealthy",
nodeHealthScriptRunner.isHealthy());
Assert.assertEquals("", nodeHealthScriptRunner.getHealthReport());
// Timeout script.
writeNodeHealthScriptFile(timeOutScript, true);
timerTask.run();
Assert.assertFalse("Node health status reported healthy even after timeout",
nodeHealthScriptRunner.isHealthy());
Assert.assertEquals(
NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG,
nodeHealthScriptRunner.getHealthReport());
}
}

View File

@ -133,6 +133,9 @@ Release 2.8.0 - UNRELEASED
at NM to show these timing information for each active container. at NM to show these timing information for each active container.
(zxu via rkanter) (zxu via rkanter)
YARN-2980. Move health check script related functionality to hadoop-common
(Varun Saxena via aw)
OPTIMIZATIONS OPTIMIZATIONS
YARN-3339. TestDockerContainerExecutor should pull a single image and not YARN-3339. TestDockerContainerExecutor should pull a single image and not

View File

@ -20,6 +20,8 @@ package org.apache.hadoop.yarn.server.nodemanager;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.CompositeService; import org.apache.hadoop.service.CompositeService;
import org.apache.hadoop.util.NodeHealthScriptRunner;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
/** /**
* The class which provides functionality of checking the health of the node and * The class which provides functionality of checking the health of the node and
@ -33,15 +35,17 @@ public class NodeHealthCheckerService extends CompositeService {
static final String SEPARATOR = ";"; static final String SEPARATOR = ";";
public NodeHealthCheckerService() { public NodeHealthCheckerService(NodeHealthScriptRunner scriptRunner,
LocalDirsHandlerService dirHandlerService) {
super(NodeHealthCheckerService.class.getName()); super(NodeHealthCheckerService.class.getName());
dirsHandler = new LocalDirsHandlerService(); nodeHealthScriptRunner = scriptRunner;
dirsHandler = dirHandlerService;
} }
@Override @Override
protected void serviceInit(Configuration conf) throws Exception { protected void serviceInit(Configuration conf) throws Exception {
if (NodeHealthScriptRunner.shouldRun(conf)) { if (NodeHealthScriptRunner.shouldRun(
nodeHealthScriptRunner = new NodeHealthScriptRunner(); conf.get(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH))) {
addService(nodeHealthScriptRunner); addService(nodeHealthScriptRunner);
} }
addService(dirsHandler); addService(dirsHandler);

View File

@ -39,6 +39,7 @@ import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.service.CompositeService; import org.apache.hadoop.service.CompositeService;
import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.NodeHealthScriptRunner;
import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.ShutdownHookManager; import org.apache.hadoop.util.ShutdownHookManager;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
@ -205,6 +206,25 @@ public class NodeManager extends CompositeService
} }
} }
public static NodeHealthScriptRunner getNodeHealthScriptRunner(Configuration conf) {
String nodeHealthScript =
conf.get(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH);
if(!NodeHealthScriptRunner.shouldRun(nodeHealthScript)) {
LOG.info("Abey khali");
return null;
}
long nmCheckintervalTime = conf.getLong(
YarnConfiguration.NM_HEALTH_CHECK_INTERVAL_MS,
YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_INTERVAL_MS);
long scriptTimeout = conf.getLong(
YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS,
YarnConfiguration.DEFAULT_NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS);
String[] scriptArgs = conf.getStrings(
YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_OPTS, new String[] {});
return new NodeHealthScriptRunner(nodeHealthScript,
nmCheckintervalTime, scriptTimeout, scriptArgs);
}
@Override @Override
protected void serviceInit(Configuration conf) throws Exception { protected void serviceInit(Configuration conf) throws Exception {
@ -240,11 +260,11 @@ public class NodeManager extends CompositeService
// NodeManager level dispatcher // NodeManager level dispatcher
this.dispatcher = new AsyncDispatcher(); this.dispatcher = new AsyncDispatcher();
nodeHealthChecker = new NodeHealthCheckerService();
dirsHandler = new LocalDirsHandlerService(metrics); dirsHandler = new LocalDirsHandlerService(metrics);
nodeHealthChecker =
new NodeHealthCheckerService(
getNodeHealthScriptRunner(conf), dirsHandler);
addService(nodeHealthChecker); addService(nodeHealthChecker);
dirsHandler = nodeHealthChecker.getDiskHandler();
this.context = createNMContext(containerTokenSecretManager, this.context = createNMContext(containerTokenSecretManager,
nmTokenSecretManager, nmStore); nmTokenSecretManager, nmStore);

View File

@ -98,9 +98,10 @@ public class TestEventFlow {
DeletionService del = new DeletionService(exec); DeletionService del = new DeletionService(exec);
Dispatcher dispatcher = new AsyncDispatcher(); Dispatcher dispatcher = new AsyncDispatcher();
NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(
NodeManager.getNodeHealthScriptRunner(conf), dirsHandler);
healthChecker.init(conf); healthChecker.init(conf);
LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler();
NodeManagerMetrics metrics = NodeManagerMetrics.create(); NodeManagerMetrics metrics = NodeManagerMetrics.create();
NodeStatusUpdater nodeStatusUpdater = NodeStatusUpdater nodeStatusUpdater =
new NodeStatusUpdaterImpl(context, dispatcher, healthChecker, metrics) { new NodeStatusUpdaterImpl(context, dispatcher, healthChecker, metrics) {

View File

@ -22,7 +22,6 @@ import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.util.TimerTask;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -30,6 +29,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.NodeHealthScriptRunner;
import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.Shell;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factories.RecordFactory;
@ -40,6 +40,9 @@ import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.spy;
public class TestNodeHealthService { public class TestNodeHealthService {
private static volatile Log LOG = LogFactory private static volatile Log LOG = LogFactory
@ -67,16 +70,6 @@ public class TestNodeHealthService {
} }
} }
private Configuration getConfForNodeHealthScript() {
Configuration conf = new Configuration();
conf.set(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH,
nodeHealthscriptFile.getAbsolutePath());
conf.setLong(YarnConfiguration.NM_HEALTH_CHECK_INTERVAL_MS, 500);
conf.setLong(
YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS, 1000);
return conf;
}
private void writeNodeHealthScriptFile(String scriptStr, boolean setExecutable) private void writeNodeHealthScriptFile(String scriptStr, boolean setExecutable)
throws IOException { throws IOException {
PrintWriter pw = null; PrintWriter pw = null;
@ -92,28 +85,14 @@ public class TestNodeHealthService {
FileUtil.setExecutable(nodeHealthscriptFile, setExecutable); FileUtil.setExecutable(nodeHealthscriptFile, setExecutable);
} }
@Test private Configuration getConfForNodeHealthScript() {
public void testNodeHealthScriptShouldRun() throws IOException { Configuration conf = new Configuration();
// Node health script should not start if there is no property called conf.set(YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_PATH,
// node health script path. nodeHealthscriptFile.getAbsolutePath());
Assert.assertFalse("By default Health script should not have started", conf.setLong(YarnConfiguration.NM_HEALTH_CHECK_INTERVAL_MS, 500);
NodeHealthScriptRunner.shouldRun(new Configuration())); conf.setLong(
Configuration conf = getConfForNodeHealthScript(); YarnConfiguration.NM_HEALTH_CHECK_SCRIPT_TIMEOUT_MS, 1000);
// Node health script should not start if the node health script does not return conf;
// exists
Assert.assertFalse("Node health script should start",
NodeHealthScriptRunner.shouldRun(conf));
// Create script path.
conf.writeXml(new FileOutputStream(nodeHealthConfigFile));
conf.addResource(nodeHealthConfigFile.getName());
writeNodeHealthScriptFile("", false);
// Node health script should not start if the node health script is not
// executable.
Assert.assertFalse("Node health script should start",
NodeHealthScriptRunner.shouldRun(conf));
writeNodeHealthScriptFile("", true);
Assert.assertTrue("Node health script should start",
NodeHealthScriptRunner.shouldRun(conf));
} }
private void setHealthStatus(NodeHealthStatus healthStatus, boolean isHealthy, private void setHealthStatus(NodeHealthStatus healthStatus, boolean isHealthy,
@ -124,27 +103,24 @@ public class TestNodeHealthService {
} }
@Test @Test
public void testNodeHealthScript() throws Exception { public void testNodeHealthService() throws Exception {
RecordFactory factory = RecordFactoryProvider.getRecordFactory(null); RecordFactory factory = RecordFactoryProvider.getRecordFactory(null);
NodeHealthStatus healthStatus = NodeHealthStatus healthStatus =
factory.newRecordInstance(NodeHealthStatus.class); factory.newRecordInstance(NodeHealthStatus.class);
String errorScript = "echo ERROR\n echo \"Tracker not healthy\"";
String normalScript = "echo \"I am all fine\"";
String timeOutScript = Shell.WINDOWS ? "@echo off\nping -n 4 127.0.0.1 >nul\necho \"I am fine\""
: "sleep 4\necho \"I am fine\"";
Configuration conf = getConfForNodeHealthScript(); Configuration conf = getConfForNodeHealthScript();
conf.writeXml(new FileOutputStream(nodeHealthConfigFile)); conf.writeXml(new FileOutputStream(nodeHealthConfigFile));
conf.addResource(nodeHealthConfigFile.getName()); conf.addResource(nodeHealthConfigFile.getName());
writeNodeHealthScriptFile("", true);
writeNodeHealthScriptFile(normalScript, true); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
NodeHealthCheckerService nodeHealthChecker = new NodeHealthCheckerService();
nodeHealthChecker.init(conf);
NodeHealthScriptRunner nodeHealthScriptRunner = NodeHealthScriptRunner nodeHealthScriptRunner =
nodeHealthChecker.getNodeHealthScriptRunner(); spy(NodeManager.getNodeHealthScriptRunner(conf));
TimerTask timerTask = nodeHealthScriptRunner.getTimerTask(); NodeHealthCheckerService nodeHealthChecker = new NodeHealthCheckerService(
nodeHealthScriptRunner, dirsHandler);
timerTask.run(); nodeHealthChecker.init(conf);
doReturn(true).when(nodeHealthScriptRunner).isHealthy();
doReturn("").when(nodeHealthScriptRunner).getHealthReport();
setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(), setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(),
nodeHealthChecker.getHealthReport(), nodeHealthChecker.getHealthReport(),
nodeHealthChecker.getLastHealthReportTime()); nodeHealthChecker.getLastHealthReportTime());
@ -155,11 +131,7 @@ public class TestNodeHealthService {
Assert.assertTrue("Node health status reported unhealthy", healthStatus Assert.assertTrue("Node health status reported unhealthy", healthStatus
.getHealthReport().equals(nodeHealthChecker.getHealthReport())); .getHealthReport().equals(nodeHealthChecker.getHealthReport()));
// write out error file. doReturn(false).when(nodeHealthScriptRunner).isHealthy();
// Healthy to unhealthy transition
writeNodeHealthScriptFile(errorScript, true);
// Run timer
timerTask.run();
// update health status // update health status
setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(), setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(),
nodeHealthChecker.getHealthReport(), nodeHealthChecker.getHealthReport(),
@ -170,9 +142,7 @@ public class TestNodeHealthService {
Assert.assertTrue("Node health status reported healthy", healthStatus Assert.assertTrue("Node health status reported healthy", healthStatus
.getHealthReport().equals(nodeHealthChecker.getHealthReport())); .getHealthReport().equals(nodeHealthChecker.getHealthReport()));
// Check unhealthy to healthy transitions. doReturn(true).when(nodeHealthScriptRunner).isHealthy();
writeNodeHealthScriptFile(normalScript, true);
timerTask.run();
setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(), setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(),
nodeHealthChecker.getHealthReport(), nodeHealthChecker.getHealthReport(),
nodeHealthChecker.getLastHealthReportTime()); nodeHealthChecker.getLastHealthReportTime());
@ -184,8 +154,9 @@ public class TestNodeHealthService {
.getHealthReport().equals(nodeHealthChecker.getHealthReport())); .getHealthReport().equals(nodeHealthChecker.getHealthReport()));
// Healthy to timeout transition. // Healthy to timeout transition.
writeNodeHealthScriptFile(timeOutScript, true); doReturn(false).when(nodeHealthScriptRunner).isHealthy();
timerTask.run(); doReturn(NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG)
.when(nodeHealthScriptRunner).getHealthReport();
setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(), setHealthStatus(healthStatus, nodeHealthChecker.isHealthy(),
nodeHealthChecker.getHealthReport(), nodeHealthChecker.getHealthReport(),
nodeHealthChecker.getLastHealthReportTime()); nodeHealthChecker.getLastHealthReportTime());
@ -198,5 +169,4 @@ public class TestNodeHealthService {
+ NodeHealthCheckerService.SEPARATOR + NodeHealthCheckerService.SEPARATOR
+ nodeHealthChecker.getDiskHandler().getDisksHealthReport(false))); + nodeHealthChecker.getDiskHandler().getDisksHealthReport(false)));
} }
} }

View File

@ -36,6 +36,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.SecretManager.InvalidToken; import org.apache.hadoop.security.token.SecretManager.InvalidToken;
import org.apache.hadoop.util.NodeHealthScriptRunner;
import org.apache.hadoop.yarn.api.ContainerManagementProtocol; import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
@ -57,6 +58,7 @@ import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
import org.apache.hadoop.yarn.server.nodemanager.LocalRMInterface; import org.apache.hadoop.yarn.server.nodemanager.LocalRMInterface;
import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService; import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService;
import org.apache.hadoop.yarn.server.nodemanager.NodeManager;
import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext; import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext;
import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdaterImpl;
@ -174,9 +176,10 @@ public abstract class BaseContainerManagerTest {
delSrvc.init(conf); delSrvc.init(conf);
exec = createContainerExecutor(); exec = createContainerExecutor();
nodeHealthChecker = new NodeHealthCheckerService(); dirsHandler = new LocalDirsHandlerService();
nodeHealthChecker = new NodeHealthCheckerService(
NodeManager.getNodeHealthScriptRunner(conf), dirsHandler);
nodeHealthChecker.init(conf); nodeHealthChecker.init(conf);
dirsHandler = nodeHealthChecker.getDiskHandler();
containerManager = createContainerManager(delSrvc); containerManager = createContainerManager(delSrvc);
((NMContext)context).setContainerManager(containerManager); ((NMContext)context).setContainerManager(containerManager);
nodeStatusUpdater.init(conf); nodeStatusUpdater.init(conf);

View File

@ -40,6 +40,7 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.nativeio.NativeIO; import org.apache.hadoop.io.nativeio.NativeIO;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.NodeHealthScriptRunner;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerId;
@ -74,6 +75,12 @@ import com.google.inject.Module;
public class TestContainerLogsPage { public class TestContainerLogsPage {
private NodeHealthCheckerService createNodeHealthCheckerService(Configuration conf) {
NodeHealthScriptRunner scriptRunner = NodeManager.getNodeHealthScriptRunner(conf);
LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
return new NodeHealthCheckerService(scriptRunner, dirsHandler);
}
@Test(timeout=30000) @Test(timeout=30000)
public void testContainerLogDirs() throws IOException, YarnException { public void testContainerLogDirs() throws IOException, YarnException {
File absLogDir = new File("target", File absLogDir = new File("target",
@ -81,7 +88,7 @@ public class TestContainerLogsPage {
String logdirwithFile = absLogDir.toURI().toString(); String logdirwithFile = absLogDir.toURI().toString();
Configuration conf = new Configuration(); Configuration conf = new Configuration();
conf.set(YarnConfiguration.NM_LOG_DIRS, logdirwithFile); conf.set(YarnConfiguration.NM_LOG_DIRS, logdirwithFile);
NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(); NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(conf);
healthChecker.init(conf); healthChecker.init(conf);
LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler();
NMContext nmContext = new NodeManager.NMContext(null, null, dirsHandler, NMContext nmContext = new NodeManager.NMContext(null, null, dirsHandler,
@ -137,7 +144,7 @@ public class TestContainerLogsPage {
"kerberos"); "kerberos");
UserGroupInformation.setConfiguration(conf); UserGroupInformation.setConfiguration(conf);
NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(); NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(conf);
healthChecker.init(conf); healthChecker.init(conf);
LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler();
// Add an application and the corresponding containers // Add an application and the corresponding containers

View File

@ -28,6 +28,7 @@ import java.io.Writer;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.util.NodeHealthScriptRunner;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerId;
@ -78,6 +79,12 @@ public class TestNMWebServer {
FileUtil.fullyDelete(testLogDir); FileUtil.fullyDelete(testLogDir);
} }
private NodeHealthCheckerService createNodeHealthCheckerService(Configuration conf) {
NodeHealthScriptRunner scriptRunner = NodeManager.getNodeHealthScriptRunner(conf);
LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
return new NodeHealthCheckerService(scriptRunner, dirsHandler);
}
private int startNMWebAppServer(String webAddr) { private int startNMWebAppServer(String webAddr) {
Context nmContext = new NodeManager.NMContext(null, null, null, null, Context nmContext = new NodeManager.NMContext(null, null, null, null,
null); null);
@ -106,7 +113,7 @@ public class TestNMWebServer {
Configuration conf = new Configuration(); Configuration conf = new Configuration();
conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath());
conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath());
NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(); NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(conf);
healthChecker.init(conf); healthChecker.init(conf);
LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler();
conf.set(YarnConfiguration.NM_WEBAPP_ADDRESS, webAddr); conf.set(YarnConfiguration.NM_WEBAPP_ADDRESS, webAddr);
@ -169,7 +176,7 @@ public class TestNMWebServer {
Configuration conf = new Configuration(); Configuration conf = new Configuration();
conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath());
conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath());
NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(); NodeHealthCheckerService healthChecker = createNodeHealthCheckerService(conf);
healthChecker.init(conf); healthChecker.init(conf);
LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler(); LocalDirsHandlerService dirsHandler = healthChecker.getDiskHandler();

View File

@ -36,6 +36,7 @@ import org.junit.Assert;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.NodeHealthScriptRunner;
import org.apache.hadoop.util.VersionInfo; import org.apache.hadoop.util.VersionInfo;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeId;
@ -98,14 +99,16 @@ public class TestNMWebServices extends JerseyTestBase {
TestNMWebServices.class.getSimpleName() + "LogDir"); TestNMWebServices.class.getSimpleName() + "LogDir");
private Injector injector = Guice.createInjector(new ServletModule() { private Injector injector = Guice.createInjector(new ServletModule() {
@Override @Override
protected void configureServlets() { protected void configureServlets() {
Configuration conf = new Configuration(); Configuration conf = new Configuration();
conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath());
conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath());
NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(); dirsHandler = new LocalDirsHandlerService();
NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(
NodeManager.getNodeHealthScriptRunner(conf), dirsHandler);
healthChecker.init(conf); healthChecker.init(conf);
dirsHandler = healthChecker.getDiskHandler();
aclsManager = new ApplicationACLsManager(conf); aclsManager = new ApplicationACLsManager(conf);
nmContext = new NodeManager.NMContext(null, null, dirsHandler, nmContext = new NodeManager.NMContext(null, null, dirsHandler,
aclsManager, null); aclsManager, null);

View File

@ -33,6 +33,7 @@ import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.util.NodeHealthScriptRunner;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
@ -91,11 +92,14 @@ public class TestNMWebServicesApps extends JerseyTestBase {
TestNMWebServicesApps.class.getSimpleName() + "LogDir"); TestNMWebServicesApps.class.getSimpleName() + "LogDir");
private Injector injector = Guice.createInjector(new ServletModule() { private Injector injector = Guice.createInjector(new ServletModule() {
@Override @Override
protected void configureServlets() { protected void configureServlets() {
conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath());
conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath());
NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(
NodeManager.getNodeHealthScriptRunner(conf), dirsHandler);
healthChecker.init(conf); healthChecker.init(conf);
dirsHandler = healthChecker.getDiskHandler(); dirsHandler = healthChecker.getDiskHandler();
aclsManager = new ApplicationACLsManager(conf); aclsManager = new ApplicationACLsManager(conf);

View File

@ -34,6 +34,7 @@ import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.util.NodeHealthScriptRunner;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
@ -123,7 +124,9 @@ public class TestNMWebServicesContainers extends JerseyTestBase {
}; };
conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOCAL_DIRS, testRootDir.getAbsolutePath());
conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath()); conf.set(YarnConfiguration.NM_LOG_DIRS, testLogDir.getAbsolutePath());
NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(); LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
NodeHealthCheckerService healthChecker = new NodeHealthCheckerService(
NodeManager.getNodeHealthScriptRunner(conf), dirsHandler);
healthChecker.init(conf); healthChecker.init(conf);
dirsHandler = healthChecker.getDiskHandler(); dirsHandler = healthChecker.getDiskHandler();
aclsManager = new ApplicationACLsManager(conf); aclsManager = new ApplicationACLsManager(conf);