svn merge -c 1408444 FIXES: MAPREDUCE-4786. Job End Notification retry interval is 5 milliseconds by default (Ravi Prakash via bobby)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1408447 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Joseph Evans 2012-11-12 20:38:55 +00:00
parent cc384335f9
commit 32850b486b
4 changed files with 65 additions and 76 deletions

View File

@ -510,6 +510,9 @@ Release 0.23.5 - UNRELEASED
MAPREDUCE-4425. Speculation + Fetch failures can lead to a hung job (jlowe
via bobby)
MAPREDUCE-4786. Job End Notification retry interval is 5 milliseconds by
default (Ravi Prakash via bobby)
Release 0.23.4 - UNRELEASED

View File

@ -53,7 +53,7 @@ public class JobEndNotifier implements Configurable {
protected String userUrl;
protected String proxyConf;
protected int numTries; //Number of tries to attempt notification
protected int waitInterval; //Time to wait between retrying notification
protected int waitInterval; //Time (ms) to wait between retrying notification
protected URL urlToNotify; //URL to notify read from the config
protected Proxy proxyToUse = Proxy.NO_PROXY; //Proxy to use for notification
@ -71,10 +71,10 @@ public class JobEndNotifier implements Configurable {
, conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, 1)
);
waitInterval = Math.min(
conf.getInt(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, 5)
, conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, 5)
conf.getInt(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, 5000)
, conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, 5000)
);
waitInterval = (waitInterval < 0) ? 5 : waitInterval;
waitInterval = (waitInterval < 0) ? 5000 : waitInterval;
userUrl = conf.get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL);

View File

@ -55,22 +55,22 @@ public class TestJobEndNotifier extends JobEndNotifier {
//Test maximum retry interval is capped by
//MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL
private void testWaitInterval(Configuration conf) {
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "5");
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "1");
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "5000");
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "1000");
setConf(conf);
Assert.assertTrue("Expected waitInterval to be 1, but was " + waitInterval,
waitInterval == 1);
Assert.assertTrue("Expected waitInterval to be 1000, but was "
+ waitInterval, waitInterval == 1000);
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "10");
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "10000");
setConf(conf);
Assert.assertTrue("Expected waitInterval to be 5, but was " + waitInterval,
waitInterval == 5);
Assert.assertTrue("Expected waitInterval to be 5000, but was "
+ waitInterval, waitInterval == 5000);
//Test negative numbers are set to default
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "-10");
setConf(conf);
Assert.assertTrue("Expected waitInterval to be 5, but was " + waitInterval,
waitInterval == 5);
Assert.assertTrue("Expected waitInterval to be 5000, but was "
+ waitInterval, waitInterval == 5000);
}
private void testProxyConfiguration(Configuration conf) {
@ -125,17 +125,28 @@ public class TestJobEndNotifier extends JobEndNotifier {
public void testNotifyRetries() throws InterruptedException {
Configuration conf = new Configuration();
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_URL, "http://nonexistent");
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, "3");
conf.set(MRJobConfig.MR_JOB_END_RETRY_ATTEMPTS, "3");
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3000");
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3000");
JobReport jobReport = Mockito.mock(JobReport.class);
long startTime = System.currentTimeMillis();
this.notificationCount = 0;
this.setConf(conf);
this.notify(jobReport);
long endTime = System.currentTimeMillis();
Assert.assertEquals("Only 1 try was expected but was : "
+ this.notificationCount, this.notificationCount, 1);
Assert.assertTrue("Should have taken more than 5 seconds it took "
+ (endTime - startTime), endTime - startTime > 5000);
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, "3");
conf.set(MRJobConfig.MR_JOB_END_RETRY_ATTEMPTS, "3");
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3000");
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3000");
startTime = System.currentTimeMillis();
this.notificationCount = 0;
this.setConf(conf);
this.notify(jobReport);
endTime = System.currentTimeMillis();
Assert.assertEquals("Only 3 retries were expected but was : "
+ this.notificationCount, this.notificationCount, 3);
Assert.assertTrue("Should have taken more than 9 seconds it took "

View File

@ -985,35 +985,6 @@
</description>
</property>
<!-- Job Notification Configuration -->
<!--
<property>
<name>mapreduce.job.end-notification.url</name>
<value>http://localhost:8080/jobstatus.php?jobId=$jobId&amp;jobStatus=$jobStatus</value>
<description>Indicates url which will be called on completion of job to inform
end status of job.
User can give at most 2 variables with URI : $jobId and $jobStatus.
If they are present in URI, then they will be replaced by their
respective values.
</description>
</property>
-->
<property>
<name>mapreduce.job.end-notification.retry.attempts</name>
<value>0</value>
<description>Indicates how many times hadoop should attempt to contact the
notification URL </description>
</property>
<property>
<name>mapreduce.job.end-notification.retry.interval</name>
<value>30000</value>
<description>Indicates time in milliseconds between notification URL retry
calls</description>
</property>
<!-- Proxy Configuration -->
<property>
<name>mapreduce.jobtracker.taskcache.levels</name>
@ -1235,6 +1206,34 @@
</description>
</property>
<!-- Job Notification Configuration -->
<property>
<name>mapreduce.job.end-notification.url</name>
<!--<value>http://localhost:8080/jobstatus.php?jobId=$jobId&amp;jobStatus=$jobStatus</value>-->
<description>Indicates url which will be called on completion of job to inform
end status of job.
User can give at most 2 variables with URI : $jobId and $jobStatus.
If they are present in URI, then they will be replaced by their
respective values.
</description>
</property>
<property>
<name>mapreduce.job.end-notification.retry.attempts</name>
<value>0</value>
<description>The number of times the submitter of the job wants to retry job
end notification if it fails. This is capped by
mapreduce.job.end-notification.max.attempts</description>
</property>
<property>
<name>mapreduce.job.end-notification.retry.interval</name>
<value>1000</value>
<description>The number of milliseconds the submitter of the job wants to
wait before job end notification is retried if it fails. This is capped by
mapreduce.job.end-notification.max.retry.interval</description>
</property>
<property>
<name>mapreduce.job.end-notification.max.attempts</name>
<value>5</value>
@ -1248,36 +1247,12 @@
<property>
<name>mapreduce.job.end-notification.max.retry.interval</name>
<value>5</value>
<value>5000</value>
<final>true</final>
<description>The maximum amount of time (in seconds) to wait before retrying
job end notification. Cluster administrators can set this to limit how long
the Application Master waits before exiting. Must be marked as final to
prevent users from overriding this.</description>
</property>
<property>
<name>mapreduce.job.end-notification.url</name>
<value></value>
<description>The URL to send job end notification. It may contain sentinels
$jobId and $jobStatus which will be replaced with jobId and jobStatus.
</description>
</property>
<property>
<name>mapreduce.job.end-notification.retry.attempts</name>
<value>5</value>
<description>The number of times the submitter of the job wants to retry job
end notification if it fails. This is capped by
mapreduce.job.end-notification.max.attempts</description>
</property>
<property>
<name>mapreduce.job.end-notification.retry.interval</name>
<value>1</value>
<description>The number of seconds the submitter of the job wants to wait
before job end notification is retried if it fails. This is capped by
mapreduce.job.end-notification.max.retry.interval</description>
<description>The maximum amount of time (in milliseconds) to wait before
retrying job end notification. Cluster administrators can set this to
limit how long the Application Master waits before exiting. Must be marked
as final to prevent users from overriding this.</description>
</property>
<property>