diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index f88dfbfb716..6716dbb02e9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -1653,10 +1653,17 @@ public ContainerState transition(final ContainerImpl container, private void doRelaunch(final ContainerImpl container, int remainingRetryAttempts, final int retryInterval) { - LOG.info("Relaunching Container " + container.getContainerId() - + ". Remaining retry attempts(after relaunch) : " - + remainingRetryAttempts + ". Interval between retries is " - + retryInterval + "ms"); + if (remainingRetryAttempts == ContainerRetryContext.RETRY_FOREVER) { + LOG.info("Relaunching Container {}. " + + "retry interval {} ms", container.getContainerId(), + retryInterval); + } else { + LOG.info("Relaunching Container {}. " + + "remaining retry attempts(after relaunch) {}, " + + "retry interval {} ms", container.getContainerId(), + remainingRetryAttempts, retryInterval); + } + container.wasLaunched = false; container.metrics.endRunningContainer(); if (retryInterval == 0) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/SlidingWindowRetryPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/SlidingWindowRetryPolicy.java index 9360669bb66..957764fe4b3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/SlidingWindowRetryPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/SlidingWindowRetryPolicy.java @@ -153,6 +153,10 @@ ContainerRetryContext getContainerRetryContext() { } int getRemainingRetries() { + if (containerRetryContext.getMaxRetries() == + ContainerRetryContext.RETRY_FOREVER) { + return ContainerRetryContext.RETRY_FOREVER; + } return remainingRetries; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestSlidingWindowRetryPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestSlidingWindowRetryPolicy.java index bacf3bbf182..2aa4605c540 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestSlidingWindowRetryPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestSlidingWindowRetryPolicy.java @@ -43,8 +43,12 @@ public void setup() { public void testNeverRetry() { ContainerRetryContext retryContext = ContainerRetryContext.NEVER_RETRY_CONTEXT; - Assert.assertFalse("never retry", retryPolicy.shouldRetry( - new SlidingWindowRetryPolicy.RetryContext(retryContext), 12)); + SlidingWindowRetryPolicy.RetryContext windowContext = new + SlidingWindowRetryPolicy.RetryContext(retryContext); + Assert.assertFalse("never retry", retryPolicy.shouldRetry(windowContext, + 12)); + Assert.assertEquals("remaining retries", 0, + windowContext.getRemainingRetries()); } @Test @@ -52,8 +56,13 @@ public void testAlwaysRetry() { ContainerRetryContext retryContext = ContainerRetryContext.newInstance( ContainerRetryPolicy.RETRY_ON_ALL_ERRORS, null, -1, 0, 10); - Assert.assertTrue("always retry", retryPolicy.shouldRetry( - new SlidingWindowRetryPolicy.RetryContext(retryContext), 12)); + SlidingWindowRetryPolicy.RetryContext windowContext = new + SlidingWindowRetryPolicy.RetryContext(retryContext); + Assert.assertTrue("always retry", retryPolicy.shouldRetry(windowContext, + 12)); + Assert.assertEquals("remaining retries", + ContainerRetryContext.RETRY_FOREVER, + windowContext.getRemainingRetries()); } @Test @@ -65,19 +74,28 @@ public void testFailuresValidityInterval() { Assert.assertTrue("retry 1", retryPolicy.shouldRetry(windowRetryContext, 12)); retryPolicy.updateRetryContext(windowRetryContext); + Assert.assertEquals("remaining retries", 1, + windowRetryContext.getRemainingRetries()); clock.setTime(20); Assert.assertTrue("retry 2", retryPolicy.shouldRetry(windowRetryContext, 12)); retryPolicy.updateRetryContext(windowRetryContext); + Assert.assertEquals("remaining retries", 1, + windowRetryContext.getRemainingRetries()); clock.setTime(40); Assert.assertTrue("retry 3", retryPolicy.shouldRetry(windowRetryContext, 12)); retryPolicy.updateRetryContext(windowRetryContext); + Assert.assertEquals("remaining retries", 1, + windowRetryContext.getRemainingRetries()); clock.setTime(45); Assert.assertFalse("retry failed", retryPolicy.shouldRetry(windowRetryContext, 12)); + retryPolicy.updateRetryContext(windowRetryContext); + Assert.assertEquals("remaining retries", 0, + windowRetryContext.getRemainingRetries()); } }