YARN-10526. RMAppManager CS Placement ignores parent path. Contributed by Gergely Pollak

This commit is contained in:
Szilard Nemeth 2020-12-15 17:30:14 +01:00
parent 5bf977e6b1
commit df35c7f519
4 changed files with 114 additions and 7 deletions

View File

@ -500,11 +500,25 @@ public class RMAppManager implements EventHandler<RMAppManagerEvent>,
}
}
//In the case of capacity scheduler the queue name only means the name of
// the leaf queue, but since YARN-9879, internal queue references should
// use full path, so we get the queue and parent name from the placement
// context instead of the submissionContext.
String placementQueueName = submissionContext.getQueue();
if (placementContext != null && scheduler instanceof CapacityScheduler) {
if (placementContext.hasParentQueue()) {
placementQueueName = placementContext.getParentQueue() + "." +
placementContext.getQueue();
} else {
placementQueueName = placementContext.getQueue();
}
}
// Create RMApp
RMAppImpl application =
new RMAppImpl(applicationId, rmContext, this.conf,
submissionContext.getApplicationName(), user,
submissionContext.getQueue(),
placementQueueName,
submissionContext, this.scheduler, this.masterService,
submitTime, submissionContext.getApplicationType(),
submissionContext.getApplicationTags(), amReqs, placementContext,

View File

@ -1835,6 +1835,40 @@ public class CapacityScheduler extends
return assignment;
}
/**
* This method extracts the actual queue name from an app add event.
* Currently unfortunately ApplicationPlacementContext and
* ApplicationSubmissionContext are used in a quite erratic way, this method
* helps to get the proper placement path for the queue if placement context
* is provided
* @param appAddedEvent The application add event with details about the app
* @return The name of the queue the application should be added
*/
private String getAddedAppQueueName(AppAddedSchedulerEvent appAddedEvent) {
//appAddedEvent uses the queue from ApplicationSubmissionContext but in
//the case of CS it may be only a leaf name due to legacy reasons
String ret = appAddedEvent.getQueue();
ApplicationPlacementContext placementContext =
appAddedEvent.getPlacementContext();
//If we have a placement context, it means a mapping rule made a decision
//about the queue placement, so we use those data, it is supposed to be in
//sync with the ApplicationSubmissionContext and appAddedEvent.getQueue, but
//because of the aforementioned legacy reasons these two may only contain
//the leaf queue name.
if (placementContext != null) {
String leafName = placementContext.getQueue();
String parentName = placementContext.getParentQueue();
if (leafName != null) {
//building the proper queue path from the parent and leaf queue name
ret = placementContext.hasParentQueue() ?
(parentName + "." + leafName) : leafName;
}
}
return ret;
}
@Override
public void handle(SchedulerEvent event) {
switch(event.getType()) {
@ -1886,9 +1920,9 @@ public class CapacityScheduler extends
case APP_ADDED:
{
AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
String queueName = resolveReservationQueueName(appAddedEvent.getQueue(),
appAddedEvent.getApplicationId(), appAddedEvent.getReservationID(),
appAddedEvent.getIsAppRecovering());
String queueName = resolveReservationQueueName(
getAddedAppQueueName(appAddedEvent), appAddedEvent.getApplicationId(),
appAddedEvent.getReservationID(), appAddedEvent.getIsAppRecovering());
if (queueName != null) {
if (!appAddedEvent.getIsAppRecovering()) {
addApplication(appAddedEvent.getApplicationId(), queueName,

View File

@ -420,7 +420,7 @@ public class TestCapacitySchedulerAutoCreatedQueueBase {
(C, NODEL_LABEL_SSD);
LOG.info("Setup " + C + " as an auto leaf creation enabled parent queue");
LOG.info("Setup " + D + " as an auto leaf creation enabled parent queue");
conf.setUserLimitFactor(D, 1.0f);
conf.setAutoCreateChildQueueEnabled(D, true);

View File

@ -42,6 +42,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.NullRMNodeLabels
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.placement
.ApplicationPlacementContext;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
@ -90,6 +91,7 @@ import static org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.C
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.Mockito.mock;
@ -501,7 +503,12 @@ public class TestCapacitySchedulerAutoQueueCreation
assertNotNull(newCS.getQueue(USER0));
setupQueueMapping(newCS, USER0, "d", USER0);
//The new placement engine's validation is a bit more
//strict so it would reject the original u:user_0:a.user_0 rule since
//it checks if that paths exists or is a managed parent, but if we use
//a.%user we can trick the engine, since it cannot validate if the actual
//value of the %user will exist or not, it allows the rule
setupQueueMapping(newCS, USER0, "a", "%user");
newCS.updatePlacementRules();
RMContext rmContext = mock(RMContext.class);
@ -509,8 +516,10 @@ public class TestCapacitySchedulerAutoQueueCreation
newCS.setRMContext(rmContext);
ApplicationId appId = BuilderUtils.newApplicationId(1, 1);
//The new engine would return root.a as the parent queue for this
// submission so creating the ApplicationPlacementContext accordingly
SchedulerEvent addAppEvent = new AppAddedSchedulerEvent(appId, USER0,
USER0, new ApplicationPlacementContext(USER0, "d"));
USER0, new ApplicationPlacementContext(USER0, "root.a"));
newCS.handle(addAppEvent);
RMAppEvent event = new RMAppEvent(appId, RMAppEventType.APP_REJECTED,
@ -524,6 +533,56 @@ public class TestCapacitySchedulerAutoQueueCreation
}
}
/**
* This test case checks if a mapping rule can put an application to an auto
* created queue even if an other queue with the same leaf name already
* exists.
*
* In this scenario we use the following queues
* root.a.a1 - already existing queue
* root.c - managed parent queue
*
* And the following mapping rule
* u:%user:root.c.%user - Any submission should go to root.c.USERNAME queue
*
* When user 'a1' submits a new application we expect it to go to 'root.c.a1'
* because of the mapping rule, and the queue should be created.
*
* @throws Exception - When stuff go wrong, obvious reason to fail the test
*/
@Test
public void testAutoQueueCreationWhenQueueExistsWithSameName()
throws Exception {
MockRM newMockRM = setupSchedulerInstance();
CapacityScheduler newCS =
(CapacityScheduler) newMockRM.getResourceScheduler();
try {
setupQueueMapping(newCS, "%user", "root.c", "%user");
newCS.updatePlacementRules();
//making sure the target queue does not exist before submission
assertNull(newCS.getQueue("root.c.a1"));
RMApp app = MockRMAppSubmitter.submit(newMockRM,
MockRMAppSubmissionData.Builder.createWithMemory(512, newMockRM)
.withAppName("testAutoQueueCreationWhenQueueExistsWithSameName")
.withUser("a1")
.withQueue("default")
.build());
RMAppAttempt attempt = MockRM.waitForAttemptScheduled(app, newMockRM);
//checking if the target queue have been created during the submission
assertNotNull(newCS.getQueue("root.c.a1"));
//making sure the application is indeed in the right queue
assertEquals("root.c.a1", app.getQueue());
} finally {
if (newMockRM != null) {
((CapacityScheduler) newMockRM.getResourceScheduler()).stop();
newMockRM.stop();
}
}
}
@Test
public void testAutoCreationFailsWhenParentCapacityExceeded()
throws Exception {