YARN-8032. Added ability to configure failure validity interval for YARN service.

Contributed by Chandni Singh
This commit is contained in:
Eric Yang 2018-03-23 15:54:52 -04:00
parent 7616683db5
commit 647058efc0
4 changed files with 11 additions and 4 deletions

View File

@ -31,6 +31,8 @@ public class YarnServiceConf {
// Retry settings for container failures
public static final String CONTAINER_RETRY_MAX = "yarn.service.container-failure.retry.max";
public static final String CONTAINER_RETRY_INTERVAL = "yarn.service.container-failure.retry-interval-ms";
public static final String CONTAINER_FAILURES_VALIDITY_INTERVAL =
"yarn.service.container-failure.validity-interval-ms";
public static final String AM_RESTART_MAX = "yarn.service.am-restart.max-attempts";
public static final String AM_RESOURCE_MEM = "yarn.service.am-resource.memory";

View File

@ -169,10 +169,11 @@ public class AbstractLauncher {
return containerLaunchContext;
}
public void setRetryContext(int maxRetries, int retryInterval) {
public void setRetryContext(int maxRetries, int retryInterval,
long failuresValidityInterval) {
ContainerRetryContext retryContext = ContainerRetryContext
.newInstance(ContainerRetryPolicy.RETRY_ON_ALL_ERRORS, null, maxRetries,
retryInterval);
.newInstance(ContainerRetryPolicy.RETRY_ON_ALL_ERRORS, null,
maxRetries, retryInterval, failuresValidityInterval);
containerLaunchContext.setContainerRetryContext(retryContext);
}

View File

@ -39,6 +39,7 @@ import java.io.IOException;
import java.util.Map;
import java.util.Map.Entry;
import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_FAILURES_VALIDITY_INTERVAL;
import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_RETRY_INTERVAL;
import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_RETRY_MAX;
import static org.apache.hadoop.yarn.service.utils.ServiceApiUtil.$;
@ -109,6 +110,8 @@ public abstract class AbstractProviderService implements ProviderService,
.getInt(CONTAINER_RETRY_MAX, -1, service.getConfiguration(),
yarnConf), YarnServiceConf
.getInt(CONTAINER_RETRY_INTERVAL, 30000, service.getConfiguration(),
yarnConf));
yarnConf),
YarnServiceConf.getLong(CONTAINER_FAILURES_VALIDITY_INTERVAL, -1,
service.getConfiguration(), yarnConf));
}
}

View File

@ -113,6 +113,7 @@ Above config make the service AM to be retried at max 10 times.
|yarn.service.client-am.retry-interval-ms | the retry interval in milliseconds for the service client to talk to the service AM. By default, it is 2000, i.e. 2 seconds |
|yarn.service.container-failure.retry.max | the max number of retries for the container to be auto restarted if it fails. By default, it is set to -1, which means forever.
|yarn.service.container-failure.retry-interval-ms| the retry interval in milliseconds for the container to be restarted. By default, it is 30000, i.e. 30 seconds |
|yarn.service.container-failure.validity-interval-ms | the failure validity interval in milliseconds which when set to a value greater than 0, will not take the failures that happened outside of this interval into failure count. By default, it is set to -1, which means that all the failures so far will be included in failure count. |
|yarn.service.am-restart.max-attempts| the max number of attempts for the framework AM
|yarn.service.am-resource.memory | the memory size in GB for the framework AM. By default, it is set to 1024
|yarn.service.queue | the default queue to which the service will be submitted. By default, it is submitted to `default` queue