From 31fff48ec4a7bf04a2078921316c58917c96f610 Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Mon, 8 Jul 2013 23:40:43 +0000 Subject: [PATCH] YARN-894. NodeHealthScriptRunner timeout checking is inaccurate on Windows. Contributed by Chuan Liu. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1501016 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../yarn/server/nodemanager/NodeHealthScriptRunner.java | 6 ++++++ .../yarn/server/nodemanager/TestNodeHealthService.java | 6 ++++-- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 266b32c989c..69f367f2f6d 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -727,6 +727,9 @@ Release 2.1.0-beta - 2013-07-02 YARN-852. TestAggregatedLogFormat.testContainerLogsFileAccess fails on Windows. (Chuan Liu via cnauroth) + YARN-894. NodeHealthScriptRunner timeout checking is inaccurate on Windows. + (Chuan Liu via cnauroth) + YARN-795. Fair scheduler queue metrics should subtract allocated vCores from available vCores. (ywskycn via tucu) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthScriptRunner.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthScriptRunner.java index 2461d1ccf4f..e3c9862acad 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthScriptRunner.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeHealthScriptRunner.java @@ -32,6 +32,7 @@ import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.util.Shell.ExitCodeException; import org.apache.hadoop.util.Shell.ShellCommandExecutor; +import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -110,6 +111,11 @@ public void run() { } catch (ExitCodeException e) { // ignore the exit code of the script status = HealthCheckerExitStatus.FAILED_WITH_EXIT_CODE; + // On Windows, we will not hit the Stream closed IOException + // thrown by stdout buffered reader for timeout event. + if (Shell.WINDOWS && shexec.isTimedOut()) { + status = HealthCheckerExitStatus.TIMED_OUT; + } } catch (Exception e) { LOG.warn("Caught exception : " + e.getMessage()); if (!shexec.isTimedOut()) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java index 0c9b595e493..8103c1643ab 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeHealthService.java @@ -30,6 +30,7 @@ import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.Shell; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; @@ -51,7 +52,7 @@ public class TestNodeHealthService { "modified-mapred-site.xml"); private File nodeHealthscriptFile = new File(testRootDir, - "failingscript.sh"); + Shell.appendScriptExtension("failingscript")); @Before public void setup() { @@ -123,7 +124,8 @@ public void testNodeHealthScript() throws Exception { factory.newRecordInstance(NodeHealthStatus.class); String errorScript = "echo ERROR\n echo \"Tracker not healthy\""; String normalScript = "echo \"I am all fine\""; - String timeOutScript = "sleep 4\n echo\"I am fine\""; + String timeOutScript = Shell.WINDOWS ? "@echo off\nping -n 4 127.0.0.1 >nul\necho \"I am fine\"" + : "sleep 4\necho \"I am fine\""; Configuration conf = getConfForNodeHealthScript(); conf.writeXml(new FileOutputStream(nodeHealthConfigFile)); conf.addResource(nodeHealthConfigFile.getName());