yarn.resourcemanager.fail-fast is used inconsistently. Contributed by Yuanbo Liu.
This commit is contained in:
parent
59a3038bc3
commit
d9ba6f3656
|
@ -207,4 +207,14 @@
|
|||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
<property>
|
||||
<name>yarn.scheduler.capacity.application.fail-fast</name>
|
||||
<value>false</value>
|
||||
<description>
|
||||
Whether RM should fail during recovery if previous applications'
|
||||
queue is no longer valid.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
</configuration>
|
||||
|
|
|
@ -786,7 +786,7 @@ public class CapacityScheduler extends
|
|||
if (queue == null) {
|
||||
//During a restart, this indicates a queue was removed, which is
|
||||
//not presently supported
|
||||
if (!YarnConfiguration.shouldRMFailFast(getConfig())) {
|
||||
if (!getConfiguration().shouldAppFailFast(getConfig())) {
|
||||
this.rmContext.getDispatcher().getEventHandler().handle(
|
||||
new RMAppEvent(applicationId, RMAppEventType.KILL,
|
||||
"Application killed on recovery as it"
|
||||
|
@ -807,7 +807,7 @@ public class CapacityScheduler extends
|
|||
if (!(queue instanceof LeafQueue)) {
|
||||
// During RM restart, this means leaf queue was converted to a parent
|
||||
// queue, which is not supported for running apps.
|
||||
if (!YarnConfiguration.shouldRMFailFast(getConfig())) {
|
||||
if (!getConfiguration().shouldAppFailFast(getConfig())) {
|
||||
this.rmContext.getDispatcher().getEventHandler().handle(
|
||||
new RMAppEvent(applicationId, RMAppEventType.KILL,
|
||||
"Application killed on recovery as it was "
|
||||
|
@ -866,7 +866,7 @@ public class CapacityScheduler extends
|
|||
return autoCreateLeafQueue(placementContext);
|
||||
} catch (YarnException | IOException e) {
|
||||
if (isRecovery) {
|
||||
if (!YarnConfiguration.shouldRMFailFast(getConfig())) {
|
||||
if (!getConfiguration().shouldAppFailFast(getConfig())) {
|
||||
LOG.error("Could not auto-create leaf queue " + queueName +
|
||||
" due to : ", e);
|
||||
this.rmContext.getDispatcher().getEventHandler().handle(
|
||||
|
|
|
@ -249,6 +249,12 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
|
|||
public static final String SCHEDULE_ASYNCHRONOUSLY_MAXIMUM_PENDING_BACKLOGS =
|
||||
SCHEDULE_ASYNCHRONOUSLY_PREFIX + ".maximum-pending-backlogs";
|
||||
|
||||
@Private
|
||||
public static final String APP_FAIL_FAST = PREFIX + "application.fail-fast";
|
||||
|
||||
@Private
|
||||
public static final boolean DEFAULT_APP_FAIL_FAST = false;
|
||||
|
||||
@Private
|
||||
public static final Integer
|
||||
DEFAULT_SCHEDULE_ASYNCHRONOUSLY_MAXIMUM_PENDING_BACKLOGS = 100;
|
||||
|
@ -1336,6 +1342,10 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
|
|||
return getBoolean(LAZY_PREEMPTION_ENABLED, DEFAULT_LAZY_PREEMPTION_ENABLED);
|
||||
}
|
||||
|
||||
public boolean shouldAppFailFast(Configuration conf) {
|
||||
return conf.getBoolean(APP_FAIL_FAST, DEFAULT_APP_FAIL_FAST);
|
||||
}
|
||||
|
||||
private static final String PREEMPTION_CONFIG_PREFIX =
|
||||
"yarn.resourcemanager.monitor.capacity.preemption.";
|
||||
|
||||
|
|
|
@ -760,6 +760,7 @@ public class TestWorkPreservingRMRestart extends ParameterizedSchedulerTestBase
|
|||
MockMemoryRMStateStore memStore, RMState state) throws Exception {
|
||||
// Restart RM with fail-fast as false. App should be killed.
|
||||
csConf.setBoolean(YarnConfiguration.RM_FAIL_FAST, false);
|
||||
csConf.setBoolean(CapacitySchedulerConfiguration.APP_FAIL_FAST, false);
|
||||
rm2 = new MockRM(csConf, memStore);
|
||||
rm2.start();
|
||||
|
||||
|
@ -794,6 +795,7 @@ public class TestWorkPreservingRMRestart extends ParameterizedSchedulerTestBase
|
|||
|
||||
// Now restart RM with fail-fast as true. QueueException should be thrown.
|
||||
csConf.setBoolean(YarnConfiguration.RM_FAIL_FAST, true);
|
||||
csConf.setBoolean(CapacitySchedulerConfiguration.APP_FAIL_FAST, true);
|
||||
MockRM rm = new MockRM(csConf, memStore2);
|
||||
try {
|
||||
rm.start();
|
||||
|
|
Loading…
Reference in New Issue