From d39bc903a0069a740744bafe10e506e452ed7018 Mon Sep 17 00:00:00 2001 From: Junping Du Date: Tue, 10 Mar 2015 06:21:59 -0700 Subject: [PATCH] Configurable timeout between YARNRunner terminate the application and forcefully kill. Contributed by Eric Payne. --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../apache/hadoop/mapreduce/MRJobConfig.java | 5 ++++ .../src/main/resources/mapred-default.xml | 8 ++++++ .../org/apache/hadoop/mapred/YARNRunner.java | 5 +++- .../apache/hadoop/mapred/TestYARNRunner.java | 26 +++++++++++++++++++ 5 files changed, 46 insertions(+), 1 deletion(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 8f06ac84bb8..5f3304169e6 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -335,6 +335,9 @@ Release 2.7.0 - UNRELEASED MAPREDUCE-6267. Refactor JobSubmitter#copyAndConfigureFiles into it's own class. (Chris Trezzo via kasha) + MAPREDUCE-6263. Configurable timeout between YARNRunner terminate the + application and forcefully kill. (Eric Payne via junping_du) + OPTIMIZATIONS MAPREDUCE-6169. MergeQueue should release reference to the current item diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index 55271032571..9f671cd9c3b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -644,6 +644,11 @@ public interface MRJobConfig { public static final int DEFAULT_MR_AM_HISTORY_USE_BATCHED_FLUSH_QUEUE_SIZE_THRESHOLD = 50; + public static final String MR_AM_HARD_KILL_TIMEOUT_MS = + MR_AM_PREFIX + "hard-kill-timeout-ms"; + public static final long DEFAULT_MR_AM_HARD_KILL_TIMEOUT_MS = + 10 * 1000l; + /** * The threshold in terms of seconds after which an unsatisfied mapper request * triggers reducer preemption to free space. Default 0 implies that the reduces diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index d8647561aea..11621839896 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -1359,6 +1359,14 @@ + + yarn.app.mapreduce.am.hard-kill-timeout-ms + 10000 + + Number of milliseconds to wait before the job client kills the application. + + + CLASSPATH for MR applications. A comma-separated list of CLASSPATH entries. If mapreduce.application.framework is set then this diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java index 41dc72f2b6e..8e576076125 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java @@ -640,7 +640,10 @@ public class YARNRunner implements ClientProtocol { clientCache.getClient(arg0).killJob(arg0); long currentTimeMillis = System.currentTimeMillis(); long timeKillIssued = currentTimeMillis; - while ((currentTimeMillis < timeKillIssued + 10000L) + long killTimeOut = + conf.getLong(MRJobConfig.MR_AM_HARD_KILL_TIMEOUT_MS, + MRJobConfig.DEFAULT_MR_AM_HARD_KILL_TIMEOUT_MS); + while ((currentTimeMillis < timeKillIssued + killTimeOut) && !isJobInTerminalState(status)) { try { Thread.sleep(1000L); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java index 420a95f9bbf..c427975e353 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java @@ -201,6 +201,32 @@ public class TestYARNRunner extends TestCase { verify(clientDelegate).killJob(jobId); } + @Test(timeout=60000) + public void testJobKillTimeout() throws Exception { + long timeToWaitBeforeHardKill = + 10000 + MRJobConfig.DEFAULT_MR_AM_HARD_KILL_TIMEOUT_MS; + conf.setLong(MRJobConfig.MR_AM_HARD_KILL_TIMEOUT_MS, + timeToWaitBeforeHardKill); + clientDelegate = mock(ClientServiceDelegate.class); + doAnswer( + new Answer() { + @Override + public ClientServiceDelegate answer(InvocationOnMock invocation) + throws Throwable { + return clientDelegate; + } + } + ).when(clientCache).getClient(any(JobID.class)); + when(clientDelegate.getJobStatus(any(JobID.class))).thenReturn(new + org.apache.hadoop.mapreduce.JobStatus(jobId, 0f, 0f, 0f, 0f, + State.RUNNING, JobPriority.HIGH, "tmp", "tmp", "tmp", "tmp")); + long startTimeMillis = System.currentTimeMillis(); + yarnRunner.killJob(jobId); + assertTrue("killJob should have waited at least " + timeToWaitBeforeHardKill + + " ms.", System.currentTimeMillis() - startTimeMillis + >= timeToWaitBeforeHardKill); + } + @Test(timeout=20000) public void testJobSubmissionFailure() throws Exception { when(resourceMgrDelegate.submitApplication(any(ApplicationSubmissionContext.class))).