YARN-8032. Added ability to configure failure validity interval for YARN service.
Contributed by Chandni Singh
This commit is contained in:
parent
7616683db5
commit
647058efc0
|
@ -31,6 +31,8 @@ public class YarnServiceConf {
|
|||
// Retry settings for container failures
|
||||
public static final String CONTAINER_RETRY_MAX = "yarn.service.container-failure.retry.max";
|
||||
public static final String CONTAINER_RETRY_INTERVAL = "yarn.service.container-failure.retry-interval-ms";
|
||||
public static final String CONTAINER_FAILURES_VALIDITY_INTERVAL =
|
||||
"yarn.service.container-failure.validity-interval-ms";
|
||||
|
||||
public static final String AM_RESTART_MAX = "yarn.service.am-restart.max-attempts";
|
||||
public static final String AM_RESOURCE_MEM = "yarn.service.am-resource.memory";
|
||||
|
|
|
@ -169,10 +169,11 @@ public class AbstractLauncher {
|
|||
return containerLaunchContext;
|
||||
}
|
||||
|
||||
public void setRetryContext(int maxRetries, int retryInterval) {
|
||||
public void setRetryContext(int maxRetries, int retryInterval,
|
||||
long failuresValidityInterval) {
|
||||
ContainerRetryContext retryContext = ContainerRetryContext
|
||||
.newInstance(ContainerRetryPolicy.RETRY_ON_ALL_ERRORS, null, maxRetries,
|
||||
retryInterval);
|
||||
.newInstance(ContainerRetryPolicy.RETRY_ON_ALL_ERRORS, null,
|
||||
maxRetries, retryInterval, failuresValidityInterval);
|
||||
containerLaunchContext.setContainerRetryContext(retryContext);
|
||||
}
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@ import java.io.IOException;
|
|||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_FAILURES_VALIDITY_INTERVAL;
|
||||
import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_RETRY_INTERVAL;
|
||||
import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_RETRY_MAX;
|
||||
import static org.apache.hadoop.yarn.service.utils.ServiceApiUtil.$;
|
||||
|
@ -109,6 +110,8 @@ public abstract class AbstractProviderService implements ProviderService,
|
|||
.getInt(CONTAINER_RETRY_MAX, -1, service.getConfiguration(),
|
||||
yarnConf), YarnServiceConf
|
||||
.getInt(CONTAINER_RETRY_INTERVAL, 30000, service.getConfiguration(),
|
||||
yarnConf));
|
||||
yarnConf),
|
||||
YarnServiceConf.getLong(CONTAINER_FAILURES_VALIDITY_INTERVAL, -1,
|
||||
service.getConfiguration(), yarnConf));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -113,6 +113,7 @@ Above config make the service AM to be retried at max 10 times.
|
|||
|yarn.service.client-am.retry-interval-ms | the retry interval in milliseconds for the service client to talk to the service AM. By default, it is 2000, i.e. 2 seconds |
|
||||
|yarn.service.container-failure.retry.max | the max number of retries for the container to be auto restarted if it fails. By default, it is set to -1, which means forever.
|
||||
|yarn.service.container-failure.retry-interval-ms| the retry interval in milliseconds for the container to be restarted. By default, it is 30000, i.e. 30 seconds |
|
||||
|yarn.service.container-failure.validity-interval-ms | the failure validity interval in milliseconds which when set to a value greater than 0, will not take the failures that happened outside of this interval into failure count. By default, it is set to -1, which means that all the failures so far will be included in failure count. |
|
||||
|yarn.service.am-restart.max-attempts| the max number of attempts for the framework AM
|
||||
|yarn.service.am-resource.memory | the memory size in GB for the framework AM. By default, it is set to 1024
|
||||
|yarn.service.queue | the default queue to which the service will be submitted. By default, it is submitted to `default` queue
|
||||
|
|
Loading…
Reference in New Issue