svn merge -c 1408444 FIXES: MAPREDUCE-4786. Job End Notification retry interval is 5 milliseconds by default (Ravi Prakash via bobby)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1408447 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Joseph Evans 2012-11-12 20:38:55 +00:00
parent cc384335f9
commit 32850b486b
4 changed files with 65 additions and 76 deletions

View File

@ -510,6 +510,9 @@ Release 0.23.5 - UNRELEASED
MAPREDUCE-4425. Speculation + Fetch failures can lead to a hung job (jlowe MAPREDUCE-4425. Speculation + Fetch failures can lead to a hung job (jlowe
via bobby) via bobby)
MAPREDUCE-4786. Job End Notification retry interval is 5 milliseconds by
default (Ravi Prakash via bobby)
Release 0.23.4 - UNRELEASED Release 0.23.4 - UNRELEASED

View File

@ -53,7 +53,7 @@ public class JobEndNotifier implements Configurable {
protected String userUrl; protected String userUrl;
protected String proxyConf; protected String proxyConf;
protected int numTries; //Number of tries to attempt notification protected int numTries; //Number of tries to attempt notification
protected int waitInterval; //Time to wait between retrying notification protected int waitInterval; //Time (ms) to wait between retrying notification
protected URL urlToNotify; //URL to notify read from the config protected URL urlToNotify; //URL to notify read from the config
protected Proxy proxyToUse = Proxy.NO_PROXY; //Proxy to use for notification protected Proxy proxyToUse = Proxy.NO_PROXY; //Proxy to use for notification
@ -71,10 +71,10 @@ public class JobEndNotifier implements Configurable {
, conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, 1) , conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, 1)
); );
waitInterval = Math.min( waitInterval = Math.min(
conf.getInt(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, 5) conf.getInt(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, 5000)
, conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, 5) , conf.getInt(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, 5000)
); );
waitInterval = (waitInterval < 0) ? 5 : waitInterval; waitInterval = (waitInterval < 0) ? 5000 : waitInterval;
userUrl = conf.get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL); userUrl = conf.get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL);

View File

@ -55,22 +55,22 @@ public class TestJobEndNotifier extends JobEndNotifier {
//Test maximum retry interval is capped by //Test maximum retry interval is capped by
//MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL //MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL
private void testWaitInterval(Configuration conf) { private void testWaitInterval(Configuration conf) {
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "5"); conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "5000");
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "1"); conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "1000");
setConf(conf); setConf(conf);
Assert.assertTrue("Expected waitInterval to be 1, but was " + waitInterval, Assert.assertTrue("Expected waitInterval to be 1000, but was "
waitInterval == 1); + waitInterval, waitInterval == 1000);
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "10"); conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "10000");
setConf(conf); setConf(conf);
Assert.assertTrue("Expected waitInterval to be 5, but was " + waitInterval, Assert.assertTrue("Expected waitInterval to be 5000, but was "
waitInterval == 5); + waitInterval, waitInterval == 5000);
//Test negative numbers are set to default //Test negative numbers are set to default
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "-10"); conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "-10");
setConf(conf); setConf(conf);
Assert.assertTrue("Expected waitInterval to be 5, but was " + waitInterval, Assert.assertTrue("Expected waitInterval to be 5000, but was "
waitInterval == 5); + waitInterval, waitInterval == 5000);
} }
private void testProxyConfiguration(Configuration conf) { private void testProxyConfiguration(Configuration conf) {
@ -125,17 +125,28 @@ public class TestJobEndNotifier extends JobEndNotifier {
public void testNotifyRetries() throws InterruptedException { public void testNotifyRetries() throws InterruptedException {
Configuration conf = new Configuration(); Configuration conf = new Configuration();
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_URL, "http://nonexistent"); conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_URL, "http://nonexistent");
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, "3");
conf.set(MRJobConfig.MR_JOB_END_RETRY_ATTEMPTS, "3");
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3000");
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3000");
JobReport jobReport = Mockito.mock(JobReport.class); JobReport jobReport = Mockito.mock(JobReport.class);
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
this.notificationCount = 0; this.notificationCount = 0;
this.setConf(conf); this.setConf(conf);
this.notify(jobReport); this.notify(jobReport);
long endTime = System.currentTimeMillis(); long endTime = System.currentTimeMillis();
Assert.assertEquals("Only 1 try was expected but was : "
+ this.notificationCount, this.notificationCount, 1);
Assert.assertTrue("Should have taken more than 5 seconds it took "
+ (endTime - startTime), endTime - startTime > 5000);
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS, "3");
conf.set(MRJobConfig.MR_JOB_END_RETRY_ATTEMPTS, "3");
conf.set(MRJobConfig.MR_JOB_END_RETRY_INTERVAL, "3000");
conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_MAX_RETRY_INTERVAL, "3000");
startTime = System.currentTimeMillis();
this.notificationCount = 0;
this.setConf(conf);
this.notify(jobReport);
endTime = System.currentTimeMillis();
Assert.assertEquals("Only 3 retries were expected but was : " Assert.assertEquals("Only 3 retries were expected but was : "
+ this.notificationCount, this.notificationCount, 3); + this.notificationCount, this.notificationCount, 3);
Assert.assertTrue("Should have taken more than 9 seconds it took " Assert.assertTrue("Should have taken more than 9 seconds it took "

View File

@ -985,35 +985,6 @@
</description> </description>
</property> </property>
<!-- Job Notification Configuration -->
<!--
<property>
<name>mapreduce.job.end-notification.url</name>
<value>http://localhost:8080/jobstatus.php?jobId=$jobId&amp;jobStatus=$jobStatus</value>
<description>Indicates url which will be called on completion of job to inform
end status of job.
User can give at most 2 variables with URI : $jobId and $jobStatus.
If they are present in URI, then they will be replaced by their
respective values.
</description>
</property>
-->
<property>
<name>mapreduce.job.end-notification.retry.attempts</name>
<value>0</value>
<description>Indicates how many times hadoop should attempt to contact the
notification URL </description>
</property>
<property>
<name>mapreduce.job.end-notification.retry.interval</name>
<value>30000</value>
<description>Indicates time in milliseconds between notification URL retry
calls</description>
</property>
<!-- Proxy Configuration --> <!-- Proxy Configuration -->
<property> <property>
<name>mapreduce.jobtracker.taskcache.levels</name> <name>mapreduce.jobtracker.taskcache.levels</name>
@ -1235,6 +1206,34 @@
</description> </description>
</property> </property>
<!-- Job Notification Configuration -->
<property>
<name>mapreduce.job.end-notification.url</name>
<!--<value>http://localhost:8080/jobstatus.php?jobId=$jobId&amp;jobStatus=$jobStatus</value>-->
<description>Indicates url which will be called on completion of job to inform
end status of job.
User can give at most 2 variables with URI : $jobId and $jobStatus.
If they are present in URI, then they will be replaced by their
respective values.
</description>
</property>
<property>
<name>mapreduce.job.end-notification.retry.attempts</name>
<value>0</value>
<description>The number of times the submitter of the job wants to retry job
end notification if it fails. This is capped by
mapreduce.job.end-notification.max.attempts</description>
</property>
<property>
<name>mapreduce.job.end-notification.retry.interval</name>
<value>1000</value>
<description>The number of milliseconds the submitter of the job wants to
wait before job end notification is retried if it fails. This is capped by
mapreduce.job.end-notification.max.retry.interval</description>
</property>
<property> <property>
<name>mapreduce.job.end-notification.max.attempts</name> <name>mapreduce.job.end-notification.max.attempts</name>
<value>5</value> <value>5</value>
@ -1248,36 +1247,12 @@
<property> <property>
<name>mapreduce.job.end-notification.max.retry.interval</name> <name>mapreduce.job.end-notification.max.retry.interval</name>
<value>5</value> <value>5000</value>
<final>true</final> <final>true</final>
<description>The maximum amount of time (in seconds) to wait before retrying <description>The maximum amount of time (in milliseconds) to wait before
job end notification. Cluster administrators can set this to limit how long retrying job end notification. Cluster administrators can set this to
the Application Master waits before exiting. Must be marked as final to limit how long the Application Master waits before exiting. Must be marked
prevent users from overriding this.</description> as final to prevent users from overriding this.</description>
</property>
<property>
<name>mapreduce.job.end-notification.url</name>
<value></value>
<description>The URL to send job end notification. It may contain sentinels
$jobId and $jobStatus which will be replaced with jobId and jobStatus.
</description>
</property>
<property>
<name>mapreduce.job.end-notification.retry.attempts</name>
<value>5</value>
<description>The number of times the submitter of the job wants to retry job
end notification if it fails. This is capped by
mapreduce.job.end-notification.max.attempts</description>
</property>
<property>
<name>mapreduce.job.end-notification.retry.interval</name>
<value>1</value>
<description>The number of seconds the submitter of the job wants to wait
before job end notification is retried if it fails. This is capped by
mapreduce.job.end-notification.max.retry.interval</description>
</property> </property>
<property> <property>