yarn.resourcemanager.fail-fast is used inconsistently. Contributed by Yuanbo Liu.

(cherry picked from commit d9ba6f3656)
This commit is contained in:
Junping Du 2018-07-03 14:46:44 +08:00
parent 3b266abdad
commit 3d6ba2dd4e
4 changed files with 25 additions and 3 deletions

View File

@ -207,4 +207,14 @@
</description>
</property>
<property>
<name>yarn.scheduler.capacity.application.fail-fast</name>
<value>false</value>
<description>
Whether RM should fail during recovery if previous applications'
queue is no longer valid.
</description>
</property>
</configuration>

View File

@ -749,7 +749,7 @@ public class CapacityScheduler extends
if (queue == null) {
//During a restart, this indicates a queue was removed, which is
//not presently supported
if (!YarnConfiguration.shouldRMFailFast(getConfig())) {
if (!getConfiguration().shouldAppFailFast(getConfig())) {
this.rmContext.getDispatcher().getEventHandler().handle(
new RMAppEvent(applicationId, RMAppEventType.KILL,
"Application killed on recovery as it"
@ -770,7 +770,7 @@ public class CapacityScheduler extends
if (!(queue instanceof LeafQueue)) {
// During RM restart, this means leaf queue was converted to a parent
// queue, which is not supported for running apps.
if (!YarnConfiguration.shouldRMFailFast(getConfig())) {
if (!getConfiguration().shouldAppFailFast(getConfig())) {
this.rmContext.getDispatcher().getEventHandler().handle(
new RMAppEvent(applicationId, RMAppEventType.KILL,
"Application killed on recovery as it was "
@ -829,7 +829,7 @@ public class CapacityScheduler extends
return autoCreateLeafQueue(placementContext);
} catch (YarnException | IOException e) {
if (isRecovery) {
if (!YarnConfiguration.shouldRMFailFast(getConfig())) {
if (!getConfiguration().shouldAppFailFast(getConfig())) {
LOG.error("Could not auto-create leaf queue " + queueName +
" due to : ", e);
this.rmContext.getDispatcher().getEventHandler().handle(

View File

@ -248,6 +248,12 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
public static final String SCHEDULE_ASYNCHRONOUSLY_MAXIMUM_PENDING_BACKLOGS =
SCHEDULE_ASYNCHRONOUSLY_PREFIX + ".maximum-pending-backlogs";
@Private
public static final String APP_FAIL_FAST = PREFIX + "application.fail-fast";
@Private
public static final boolean DEFAULT_APP_FAIL_FAST = false;
@Private
public static final Integer
DEFAULT_SCHEDULE_ASYNCHRONOUSLY_MAXIMUM_PENDING_BACKLOGS = 100;
@ -1287,6 +1293,10 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
return getBoolean(LAZY_PREEMPTION_ENABLED, DEFAULT_LAZY_PREEMPTION_ENABLED);
}
public boolean shouldAppFailFast(Configuration conf) {
return conf.getBoolean(APP_FAIL_FAST, DEFAULT_APP_FAIL_FAST);
}
private static final String PREEMPTION_CONFIG_PREFIX =
"yarn.resourcemanager.monitor.capacity.preemption.";

View File

@ -760,6 +760,7 @@ public class TestWorkPreservingRMRestart extends ParameterizedSchedulerTestBase
MockMemoryRMStateStore memStore, RMState state) throws Exception {
// Restart RM with fail-fast as false. App should be killed.
csConf.setBoolean(YarnConfiguration.RM_FAIL_FAST, false);
csConf.setBoolean(CapacitySchedulerConfiguration.APP_FAIL_FAST, false);
rm2 = new MockRM(csConf, memStore);
rm2.start();
@ -794,6 +795,7 @@ public class TestWorkPreservingRMRestart extends ParameterizedSchedulerTestBase
// Now restart RM with fail-fast as true. QueueException should be thrown.
csConf.setBoolean(YarnConfiguration.RM_FAIL_FAST, true);
csConf.setBoolean(CapacitySchedulerConfiguration.APP_FAIL_FAST, true);
MockRM rm = new MockRM(csConf, memStore2);
try {
rm.start();