YARN-10869. CS considers only the default maximum-allocation-mb/vcore property as a maximum when it creates dynamic queues (#3504)

Co-authored-by: Benjamin Teke <bteke@cloudera.com>
This commit is contained in:
Benjamin Teke 2021-10-12 18:05:50 +02:00 committed by GitHub
parent 9e2936f8d1
commit 700045896c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 102 additions and 27 deletions

View File

@ -100,7 +100,7 @@ public abstract class AbstractCSQueue implements CSQueue {
String defaultLabelExpression;
private String multiNodeSortingPolicyName = null;
Map<AccessType, AccessControlList> acls =
Map<AccessType, AccessControlList> acls =
new HashMap<AccessType, AccessControlList>();
volatile boolean reservationsContinueLooking;
private volatile boolean preemptionDisabled;
@ -112,7 +112,7 @@ public abstract class AbstractCSQueue implements CSQueue {
volatile ResourceUsage queueUsage;
private final boolean fullPathQueueNamingPolicy = false;
// Track capacities like used-capcity/abs-used-capacity/capacity/abs-capacity,
// etc.
QueueCapacities queueCapacities;
@ -134,7 +134,7 @@ public abstract class AbstractCSQueue implements CSQueue {
protected CapacityConfigType capacityConfigType =
CapacityConfigType.NONE;
private final RecordFactory recordFactory =
private final RecordFactory recordFactory =
RecordFactoryProvider.getRecordFactory(null);
protected CapacitySchedulerContext csContext;
protected YarnAuthorizationProvider authorizer = null;
@ -250,12 +250,12 @@ public abstract class AbstractCSQueue implements CSQueue {
public QueueState getState() {
return state;
}
@Override
public CSQueueMetrics getMetrics() {
return metrics;
}
@Override
public String getQueueShortName() {
return queueName;
@ -283,7 +283,7 @@ public abstract class AbstractCSQueue implements CSQueue {
public void setParent(CSQueue newParentQueue) {
this.parent = newParentQueue;
}
public Set<String> getAccessibleNodeLabels() {
return accessibleLabels;
}
@ -344,7 +344,7 @@ public abstract class AbstractCSQueue implements CSQueue {
public String getDefaultNodeLabelExpression() {
return defaultLabelExpression;
}
void setupQueueConfigs(Resource clusterResource)
throws IOException {
setupQueueConfigs(clusterResource, csContext.getConfiguration());
@ -471,8 +471,14 @@ public abstract class AbstractCSQueue implements CSQueue {
private void setupMaximumAllocation(CapacitySchedulerConfiguration csConf) {
String myQueuePath = getQueuePath();
/* YARN-10869: When using AutoCreatedLeafQueues, the passed configuration
* object is a cloned one containing only the template configs
* (see ManagedParentQueue#getLeafQueueConfigs). To ensure that the actual
* cluster maximum allocation is fetched the original config object should
* be used.
*/
Resource clusterMax = ResourceUtils
.fetchMaximumAllocationFromConfig(csConf);
.fetchMaximumAllocationFromConfig(this.csContext.getConfiguration());
Resource queueMax = csConf.getQueueMaximumAllocation(myQueuePath);
maximumAllocation = Resources.clone(
@ -740,7 +746,7 @@ public abstract class AbstractCSQueue implements CSQueue {
stats.setReservedContainers(getMetrics().getReservedContainers());
return stats;
}
public Map<String, QueueConfigurations> getQueueConfigurations() {
Map<String, QueueConfigurations> queueConfigurations = new HashMap<>();
Set<String> nodeLabels = getNodeLabelsForQueue();
@ -776,12 +782,12 @@ public abstract class AbstractCSQueue implements CSQueue {
public Resource getMaximumAllocation() {
return maximumAllocation;
}
@Private
public Resource getMinimumAllocation() {
return minimumAllocation;
}
void allocateResource(Resource clusterResource,
Resource resource, String nodePartition) {
writeLock.lock();
@ -796,7 +802,7 @@ public abstract class AbstractCSQueue implements CSQueue {
writeLock.unlock();
}
}
protected void releaseResource(Resource clusterResource,
Resource resource, String nodePartition) {
writeLock.lock();
@ -811,12 +817,12 @@ public abstract class AbstractCSQueue implements CSQueue {
writeLock.unlock();
}
}
@Private
public boolean getReservationContinueLooking() {
return reservationsContinueLooking;
}
@Private
public Map<AccessType, AccessControlList> getACLs() {
readLock.lock();
@ -841,12 +847,12 @@ public abstract class AbstractCSQueue implements CSQueue {
public boolean getIntraQueuePreemptionDisabledInHierarchy() {
return intraQueuePreemptionDisabledInHierarchy;
}
@Private
public QueueCapacities getQueueCapacities() {
return queueCapacities;
}
@Private
public ResourceUsage getQueueResourceUsage() {
return queueUsage;
@ -1018,7 +1024,7 @@ public abstract class AbstractCSQueue implements CSQueue {
// all queues on this label equals to total resource with the label.
return labelManager.getResourceByLabel(nodePartition, clusterResource);
}
return Resources.none();
}
@ -1159,7 +1165,7 @@ public abstract class AbstractCSQueue implements CSQueue {
parent.incPendingResource(nodeLabel, resourceToInc);
}
}
@Override
public void decPendingResource(String nodeLabel, Resource resourceToDec) {
if (nodeLabel == null) {
@ -1171,7 +1177,7 @@ public abstract class AbstractCSQueue implements CSQueue {
parent.decPendingResource(nodeLabel, resourceToDec);
}
}
@Override
public void incUsedResource(String nodeLabel, Resource resourceToInc,
SchedulerApplicationAttempt application) {
@ -1206,14 +1212,14 @@ public abstract class AbstractCSQueue implements CSQueue {
/**
* Return if the queue has pending resource on given nodePartition and
* schedulingMode.
* schedulingMode.
*/
boolean hasPendingResourceRequest(String nodePartition,
boolean hasPendingResourceRequest(String nodePartition,
Resource cluster, SchedulingMode schedulingMode) {
return SchedulerUtils.hasPendingResourceRequest(resourceCalculator,
queueUsage, nodePartition, cluster, schedulingMode);
}
public boolean accessibleToPartition(String nodePartition) {
// if queue's label is *, it can access any node
if (accessibleLabels != null

View File

@ -953,6 +953,11 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
}
}
public void setQueueMaximumAllocation(String queue, String maximumAllocation) {
String queuePrefix = getQueuePrefix(queue);
set(queuePrefix + MAXIMUM_ALLOCATION, maximumAllocation);
}
public long getQueueMaximumAllocationMb(String queue) {
String queuePrefix = getQueuePrefix(queue);
return getInt(queuePrefix + MAXIMUM_ALLOCATION_MB, (int)UNDEFINED);
@ -2061,6 +2066,15 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
setDefaultNodeLabelExpression(leafQueueConfPrefix, expression);
}
@Private
@VisibleForTesting
public void setAutoCreatedLeafQueueConfigMaximumAllocation(String
queuePath, String expression) {
String leafQueueConfPrefix = getAutoCreatedQueueTemplateConfPrefix(
queuePath);
setQueueMaximumAllocation(leafQueueConfPrefix, expression);
}
public static String getUnits(String resourceValue) {
String units;
for (int i = 0; i < resourceValue.length(); i++) {

View File

@ -385,6 +385,9 @@ public class TestCapacitySchedulerAutoCreatedQueueBase {
conf.setAutoCreatedLeafQueueConfigMaxCapacity(C, 100.0f);
conf.setAutoCreatedLeafQueueConfigUserLimit(C, 100);
conf.setAutoCreatedLeafQueueConfigUserLimitFactor(C, 3.0f);
conf.setAutoCreatedLeafQueueConfigUserLimitFactor(C, 3.0f);
conf.setAutoCreatedLeafQueueConfigMaximumAllocation(C,
"memory-mb=10240,vcores=6");
conf.setAutoCreatedLeafQueueTemplateCapacityByLabel(C, NODEL_LABEL_GPU,
NODE_LABEL_GPU_TEMPLATE_CAPACITY);
@ -540,9 +543,31 @@ public class TestCapacitySchedulerAutoCreatedQueueBase {
schedConf.setInt(YarnConfiguration.RESOURCE_TYPES
+ ".memory-mb.maximum-allocation", 16384);
return new CapacitySchedulerConfiguration(schedConf);
}
protected void setSchedulerMinMaxAllocation(CapacitySchedulerConfiguration conf) {
unsetMinMaxAllocation(conf);
conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, 1);
conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, 8);
conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 1024);
conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 18384);
}
private void unsetMinMaxAllocation(CapacitySchedulerConfiguration conf) {
conf.unset(YarnConfiguration.RESOURCE_TYPES
+ ".vcores.minimum-allocation");
conf.unset(YarnConfiguration.RESOURCE_TYPES
+ ".vcores.maximum-allocation");
conf.unset(YarnConfiguration.RESOURCE_TYPES
+ ".memory-mb.minimum-allocation");
conf.unset(YarnConfiguration.RESOURCE_TYPES
+ ".memory-mb.maximum-allocation");
}
protected MockRM setupSchedulerInstance() throws Exception {
if (mockRM != null) {
@ -640,10 +665,11 @@ public class TestCapacitySchedulerAutoCreatedQueueBase {
}
protected void validateContainerLimits(
AutoCreatedLeafQueue autoCreatedLeafQueue) {
assertEquals(8,
AutoCreatedLeafQueue autoCreatedLeafQueue, int vCoreLimit,
long memorySize) {
assertEquals(vCoreLimit,
autoCreatedLeafQueue.getMaximumAllocation().getVirtualCores());
assertEquals(16384,
assertEquals(memorySize,
autoCreatedLeafQueue.getMaximumAllocation().getMemorySize());
}

View File

@ -137,7 +137,7 @@ public class TestCapacitySchedulerAutoQueueCreation
expectedChildQueueAbsCapacity, accessibleNodeLabelsOnC);
validateUserAndAppLimits(autoCreatedLeafQueue, 1000, 1000);
validateContainerLimits(autoCreatedLeafQueue);
validateContainerLimits(autoCreatedLeafQueue, 6, 10240);
assertTrue(autoCreatedLeafQueue
.getOrderingPolicy() instanceof FairOrderingPolicy);
@ -166,6 +166,35 @@ public class TestCapacitySchedulerAutoQueueCreation
}
}
@Test(timeout = 20000)
public void testAutoCreateLeafQueueCreationSchedulerMaximumAllocation()
throws Exception {
try {
// Check the minimum/maximum allocation settings via the
// yarn.scheduler.minimum/maximum-allocation-mb/vcore property
setSchedulerMinMaxAllocation(cs.getConfiguration());
cs.getConfiguration().setAutoCreatedLeafQueueConfigMaximumAllocation(C,
"memory-mb=18384,vcores=8");
cs.reinitialize(cs.getConfiguration(), mockRM.getRMContext());
// submit an app
submitApp(mockRM, cs.getQueue(PARENT_QUEUE), USER0, USER0, 1, 1);
// check preconditions
List<ApplicationAttemptId> appsInC = cs.getAppsInQueue(PARENT_QUEUE);
assertEquals(1, appsInC.size());
assertNotNull(cs.getQueue(USER0));
AutoCreatedLeafQueue autoCreatedLeafQueue =
(AutoCreatedLeafQueue) cs.getQueue(USER0);
validateContainerLimits(autoCreatedLeafQueue, 8, 18384);
} finally {
cleanupQueue(USER0);
cleanupQueue(TEST_GROUPUSER);
}
}
@Test(timeout = 20000)
public void testAutoCreateLeafQueueCreationUsingFullParentPath()
throws Exception {
@ -825,7 +854,7 @@ public class TestCapacitySchedulerAutoQueueCreation
validateCapacities(user3Queue, 0.3f, 0.09f, 0.4f,0.2f);
validateUserAndAppLimits(user3Queue, 900, 900);
validateContainerLimits(user3Queue);
validateContainerLimits(user3Queue, 6, 10240);
GuaranteedOrZeroCapacityOverTimePolicy autoCreatedQueueManagementPolicy =
(GuaranteedOrZeroCapacityOverTimePolicy) ((ManagedParentQueue)