diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/YarnServiceConf.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/YarnServiceConf.java index 6002346fcaf..3dd5a7eaba1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/YarnServiceConf.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/YarnServiceConf.java @@ -31,6 +31,8 @@ public class YarnServiceConf { // Retry settings for container failures public static final String CONTAINER_RETRY_MAX = "yarn.service.container-failure.retry.max"; public static final String CONTAINER_RETRY_INTERVAL = "yarn.service.container-failure.retry-interval-ms"; + public static final String CONTAINER_FAILURES_VALIDITY_INTERVAL = + "yarn.service.container-failure.validity-interval-ms"; public static final String AM_RESTART_MAX = "yarn.service.am-restart.max-attempts"; public static final String AM_RESOURCE_MEM = "yarn.service.am-resource.memory"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/AbstractLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/AbstractLauncher.java index f497985072f..dc51b250723 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/AbstractLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/containerlaunch/AbstractLauncher.java @@ -169,10 +169,11 @@ public class AbstractLauncher { return containerLaunchContext; } - public void setRetryContext(int maxRetries, int retryInterval) { + public void setRetryContext(int maxRetries, int retryInterval, + long failuresValidityInterval) { ContainerRetryContext retryContext = ContainerRetryContext - .newInstance(ContainerRetryPolicy.RETRY_ON_ALL_ERRORS, null, maxRetries, - retryInterval); + .newInstance(ContainerRetryPolicy.RETRY_ON_ALL_ERRORS, null, + maxRetries, retryInterval, failuresValidityInterval); containerLaunchContext.setContainerRetryContext(retryContext); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/AbstractProviderService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/AbstractProviderService.java index 70155915ea6..2f840b1678c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/AbstractProviderService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/provider/AbstractProviderService.java @@ -39,6 +39,7 @@ import java.io.IOException; import java.util.Map; import java.util.Map.Entry; +import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_FAILURES_VALIDITY_INTERVAL; import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_RETRY_INTERVAL; import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_RETRY_MAX; import static org.apache.hadoop.yarn.service.utils.ServiceApiUtil.$; @@ -109,6 +110,8 @@ public abstract class AbstractProviderService implements ProviderService, .getInt(CONTAINER_RETRY_MAX, -1, service.getConfiguration(), yarnConf), YarnServiceConf .getInt(CONTAINER_RETRY_INTERVAL, 30000, service.getConfiguration(), - yarnConf)); + yarnConf), + YarnServiceConf.getLong(CONTAINER_FAILURES_VALIDITY_INTERVAL, -1, + service.getConfiguration(), yarnConf)); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/Configurations.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/Configurations.md index 88550757645..7ec2ecb879d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/Configurations.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/yarn-service/Configurations.md @@ -113,6 +113,7 @@ Above config make the service AM to be retried at max 10 times. |yarn.service.client-am.retry-interval-ms | the retry interval in milliseconds for the service client to talk to the service AM. By default, it is 2000, i.e. 2 seconds | |yarn.service.container-failure.retry.max | the max number of retries for the container to be auto restarted if it fails. By default, it is set to -1, which means forever. |yarn.service.container-failure.retry-interval-ms| the retry interval in milliseconds for the container to be restarted. By default, it is 30000, i.e. 30 seconds | +|yarn.service.container-failure.validity-interval-ms | the failure validity interval in milliseconds which when set to a value greater than 0, will not take the failures that happened outside of this interval into failure count. By default, it is set to -1, which means that all the failures so far will be included in failure count. | |yarn.service.am-restart.max-attempts| the max number of attempts for the framework AM |yarn.service.am-resource.memory | the memory size in GB for the framework AM. By default, it is set to 1024 |yarn.service.queue | the default queue to which the service will be submitted. By default, it is submitted to `default` queue