YARN-8032. Added ability to configure failure validity interval for YARN service.
Contributed by Chandni Singh
(cherry-picked from 647058efc0
)
This commit is contained in:
parent
148322ca72
commit
20453488e8
|
@ -31,6 +31,8 @@ public class YarnServiceConf {
|
||||||
// Retry settings for container failures
|
// Retry settings for container failures
|
||||||
public static final String CONTAINER_RETRY_MAX = "yarn.service.container-failure.retry.max";
|
public static final String CONTAINER_RETRY_MAX = "yarn.service.container-failure.retry.max";
|
||||||
public static final String CONTAINER_RETRY_INTERVAL = "yarn.service.container-failure.retry-interval-ms";
|
public static final String CONTAINER_RETRY_INTERVAL = "yarn.service.container-failure.retry-interval-ms";
|
||||||
|
public static final String CONTAINER_FAILURES_VALIDITY_INTERVAL =
|
||||||
|
"yarn.service.container-failure.validity-interval-ms";
|
||||||
|
|
||||||
public static final String AM_RESTART_MAX = "yarn.service.am-restart.max-attempts";
|
public static final String AM_RESTART_MAX = "yarn.service.am-restart.max-attempts";
|
||||||
public static final String AM_RESOURCE_MEM = "yarn.service.am-resource.memory";
|
public static final String AM_RESOURCE_MEM = "yarn.service.am-resource.memory";
|
||||||
|
|
|
@ -169,10 +169,11 @@ public class AbstractLauncher {
|
||||||
return containerLaunchContext;
|
return containerLaunchContext;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setRetryContext(int maxRetries, int retryInterval) {
|
public void setRetryContext(int maxRetries, int retryInterval,
|
||||||
|
long failuresValidityInterval) {
|
||||||
ContainerRetryContext retryContext = ContainerRetryContext
|
ContainerRetryContext retryContext = ContainerRetryContext
|
||||||
.newInstance(ContainerRetryPolicy.RETRY_ON_ALL_ERRORS, null, maxRetries,
|
.newInstance(ContainerRetryPolicy.RETRY_ON_ALL_ERRORS, null,
|
||||||
retryInterval);
|
maxRetries, retryInterval, failuresValidityInterval);
|
||||||
containerLaunchContext.setContainerRetryContext(retryContext);
|
containerLaunchContext.setContainerRetryContext(retryContext);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -39,6 +39,7 @@ import java.io.IOException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_FAILURES_VALIDITY_INTERVAL;
|
||||||
import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_RETRY_INTERVAL;
|
import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_RETRY_INTERVAL;
|
||||||
import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_RETRY_MAX;
|
import static org.apache.hadoop.yarn.service.conf.YarnServiceConf.CONTAINER_RETRY_MAX;
|
||||||
import static org.apache.hadoop.yarn.service.utils.ServiceApiUtil.$;
|
import static org.apache.hadoop.yarn.service.utils.ServiceApiUtil.$;
|
||||||
|
@ -109,6 +110,8 @@ public abstract class AbstractProviderService implements ProviderService,
|
||||||
.getInt(CONTAINER_RETRY_MAX, -1, service.getConfiguration(),
|
.getInt(CONTAINER_RETRY_MAX, -1, service.getConfiguration(),
|
||||||
yarnConf), YarnServiceConf
|
yarnConf), YarnServiceConf
|
||||||
.getInt(CONTAINER_RETRY_INTERVAL, 30000, service.getConfiguration(),
|
.getInt(CONTAINER_RETRY_INTERVAL, 30000, service.getConfiguration(),
|
||||||
yarnConf));
|
yarnConf),
|
||||||
|
YarnServiceConf.getLong(CONTAINER_FAILURES_VALIDITY_INTERVAL, -1,
|
||||||
|
service.getConfiguration(), yarnConf));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -113,6 +113,7 @@ Above config make the service AM to be retried at max 10 times.
|
||||||
|yarn.service.client-am.retry-interval-ms | the retry interval in milliseconds for the service client to talk to the service AM. By default, it is 2000, i.e. 2 seconds |
|
|yarn.service.client-am.retry-interval-ms | the retry interval in milliseconds for the service client to talk to the service AM. By default, it is 2000, i.e. 2 seconds |
|
||||||
|yarn.service.container-failure.retry.max | the max number of retries for the container to be auto restarted if it fails. By default, it is set to -1, which means forever.
|
|yarn.service.container-failure.retry.max | the max number of retries for the container to be auto restarted if it fails. By default, it is set to -1, which means forever.
|
||||||
|yarn.service.container-failure.retry-interval-ms| the retry interval in milliseconds for the container to be restarted. By default, it is 30000, i.e. 30 seconds |
|
|yarn.service.container-failure.retry-interval-ms| the retry interval in milliseconds for the container to be restarted. By default, it is 30000, i.e. 30 seconds |
|
||||||
|
|yarn.service.container-failure.validity-interval-ms | the failure validity interval in milliseconds which when set to a value greater than 0, will not take the failures that happened outside of this interval into failure count. By default, it is set to -1, which means that all the failures so far will be included in failure count. |
|
||||||
|yarn.service.am-restart.max-attempts| the max number of attempts for the framework AM
|
|yarn.service.am-restart.max-attempts| the max number of attempts for the framework AM
|
||||||
|yarn.service.am-resource.memory | the memory size in GB for the framework AM. By default, it is set to 1024
|
|yarn.service.am-resource.memory | the memory size in GB for the framework AM. By default, it is set to 1024
|
||||||
|yarn.service.queue | the default queue to which the service will be submitted. By default, it is submitted to `default` queue
|
|yarn.service.queue | the default queue to which the service will be submitted. By default, it is submitted to `default` queue
|
||||||
|
|
Loading…
Reference in New Issue