YARN-10802. Change Capacity Scheduler minimum-user-limit-percent to accept decimal values. Contributed by Benjamin Teke

This commit is contained in:
Szilard Nemeth 2021-06-14 22:33:04 +02:00
parent ebee2aed00
commit e31d06032b
10 changed files with 170 additions and 48 deletions

View File

@ -636,8 +636,8 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
absoluteResourceCapacity);
}
public int getUserLimit(String queue) {
int userLimit = getInt(getQueuePrefix(queue) + USER_LIMIT,
public float getUserLimit(String queue) {
float userLimit = getFloat(getQueuePrefix(queue) + USER_LIMIT,
DEFAULT_USER_LIMIT);
return userLimit;
}
@ -686,8 +686,8 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
return orderingPolicy;
}
public void setUserLimit(String queue, int userLimit) {
setInt(getQueuePrefix(queue) + USER_LIMIT, userLimit);
public void setUserLimit(String queue, float userLimit) {
setFloat(getQueuePrefix(queue) + USER_LIMIT, userLimit);
LOG.debug("here setUserLimit: queuePrefix={}, userLimit={}",
getQueuePrefix(queue), getUserLimit(queue));
}

View File

@ -255,7 +255,7 @@ public class LeafQueue extends AbstractCSQueue {
conf.getDefaultApplicationPriorityConfPerQueue(getQueuePath()));
// Validate leaf queue's user's weights.
int queueUL = Math.min(100, conf.getUserLimit(getQueuePath()));
float queueUL = Math.min(100.0f, conf.getUserLimit(getQueuePath()));
for (Entry<String, Float> e : getUserWeights().entrySet()) {
float val = e.getValue().floatValue();
if (val < 0.0f || val > (100.0f / queueUL)) {
@ -367,17 +367,17 @@ public class LeafQueue extends AbstractCSQueue {
}
/**
* Set user limit - used only for testing.
* Set user limit.
* @param userLimit new user limit
*/
@VisibleForTesting
void setUserLimit(int userLimit) {
void setUserLimit(float userLimit) {
usersManager.setUserLimit(userLimit);
usersManager.userLimitNeedsRecompute();
}
/**
* Set user limit factor - used only for testing.
* Set user limit factor.
* @param userLimitFactor new user limit factor
*/
@VisibleForTesting
@ -444,7 +444,7 @@ public class LeafQueue extends AbstractCSQueue {
}
@Private
public int getUserLimit() {
public float getUserLimit() {
return usersManager.getUserLimit();
}

View File

@ -38,7 +38,7 @@ public class PlanQueue extends AbstractManagedParentQueue {
private int maxAppsForReservation;
private int maxAppsPerUserForReservation;
private int userLimit;
private float userLimit;
private float userLimitFactor;
protected CapacitySchedulerContext schedulerContext;
private boolean showReservationsAsQueues;
@ -60,15 +60,16 @@ public class PlanQueue extends AbstractManagedParentQueue {
DEFAULT_MAXIMUM_SYSTEM_APPLICATIIONS * super
.getAbsoluteCapacity());
}
int userLimit = conf.getUserLimit(queuePath);
float userLimitFactor = conf.getUserLimitFactor(queuePath);
int maxAppsPerUserForReservation =
(int) (maxAppsForReservation * (userLimit / 100.0f) * userLimitFactor);
if (userLimitFactor == -1) {
maxAppsPerUserForReservation = maxAppsForReservation;
float configuredUserLimit = conf.getUserLimit(queuePath);
float configuredUserLimitFactor = conf.getUserLimitFactor(queuePath);
int configuredMaxAppsPerUserForReservation =
(int) (maxAppsForReservation * (configuredUserLimit / 100.0f) *
configuredUserLimitFactor);
if (configuredUserLimitFactor == -1) {
configuredMaxAppsPerUserForReservation = maxAppsForReservation;
}
updateQuotas(userLimit, userLimitFactor, maxAppsForReservation,
maxAppsPerUserForReservation);
updateQuotas(configuredUserLimit, configuredUserLimitFactor,
maxAppsForReservation, configuredMaxAppsPerUserForReservation);
StringBuffer queueInfo = new StringBuffer();
queueInfo.append("Created Plan Queue: ").append(queueName)
@ -76,9 +77,10 @@ public class PlanQueue extends AbstractManagedParentQueue {
.append("]\nwith max capacity: [").append(super.getMaximumCapacity())
.append("\nwith max reservation apps: [").append(maxAppsForReservation)
.append("]\nwith max reservation apps per user: [")
.append(maxAppsPerUserForReservation).append("]\nwith user limit: [")
.append(userLimit).append("]\nwith user limit factor: [")
.append(userLimitFactor).append("].");
.append(configuredMaxAppsPerUserForReservation)
.append("]\nwith user limit: [")
.append(configuredUserLimit).append("]\nwith user limit factor: [")
.append(configuredUserLimitFactor).append("].");
LOG.info(queueInfo.toString());
}
@ -123,12 +125,12 @@ public class PlanQueue extends AbstractManagedParentQueue {
}
}
private void updateQuotas(int userLimit, float userLimitFactor,
int maxAppsForReservation, int maxAppsPerUserForReservation) {
this.userLimit = userLimit;
this.userLimitFactor = userLimitFactor;
this.maxAppsForReservation = maxAppsForReservation;
this.maxAppsPerUserForReservation = maxAppsPerUserForReservation;
private void updateQuotas(float newUserLimit, float newUserLimitFactor,
int newMaxAppsForReservation, int newMaxAppsPerUserForReservation) {
this.userLimit = newUserLimit;
this.userLimitFactor = newUserLimitFactor;
this.maxAppsForReservation = newMaxAppsForReservation;
this.maxAppsPerUserForReservation = newMaxAppsPerUserForReservation;
}
/**
@ -155,7 +157,7 @@ public class PlanQueue extends AbstractManagedParentQueue {
*
* @return userLimit
*/
public int getUserLimitForReservation() {
public float getUserLimitForReservation() {
return userLimit;
}

View File

@ -73,7 +73,7 @@ public class ReservationQueue extends AbstractAutoCreatedLeafQueue {
}
}
private void updateQuotas(int userLimit, float userLimitFactor,
private void updateQuotas(float userLimit, float userLimitFactor,
int maxAppsForReservation, int maxAppsPerUserForReservation) {
setUserLimit(userLimit);
setUserLimitFactor(userLimitFactor);

View File

@ -77,7 +77,7 @@ public class UsersManager implements AbstractUsersManager {
private Map<String, Map<SchedulingMode, Long>> localVersionOfAllUsersState =
new HashMap<String, Map<SchedulingMode, Long>>();
private volatile int userLimit;
private volatile float userLimit;
private volatile float userLimitFactor;
private WriteLock writeLock;
@ -320,7 +320,7 @@ public class UsersManager implements AbstractUsersManager {
* Get configured user-limit.
* @return user limit
*/
public int getUserLimit() {
public float getUserLimit() {
return userLimit;
}
@ -328,7 +328,7 @@ public class UsersManager implements AbstractUsersManager {
* Set configured user-limit.
* @param userLimit user limit
*/
public void setUserLimit(int userLimit) {
public void setUserLimit(float userLimit) {
this.userLimit = userLimit;
}

View File

@ -197,14 +197,21 @@ class CapacitySchedulerPage extends RmView {
private void renderCommonLeafQueueInfo(ResponseInfo ri) {
ri.
__("Num Schedulable Applications:", Integer.toString(lqinfo.getNumActiveApplications())).
__("Num Non-Schedulable Applications:", Integer.toString(lqinfo.getNumPendingApplications())).
__("Num Containers:", Integer.toString(lqinfo.getNumContainers())).
__("Max Applications:", Integer.toString(lqinfo.getMaxApplications())).
__("Max Applications Per User:", Integer.toString(lqinfo.getMaxApplicationsPerUser())).
__("Configured Minimum User Limit Percent:", Integer.toString(lqinfo.getUserLimit()) + "%").
__("Num Schedulable Applications:",
Integer.toString(lqinfo.getNumActiveApplications())).
__("Num Non-Schedulable Applications:",
Integer.toString(lqinfo.getNumPendingApplications())).
__("Num Containers:",
Integer.toString(lqinfo.getNumContainers())).
__("Max Applications:",
Integer.toString(lqinfo.getMaxApplications())).
__("Max Applications Per User:",
Integer.toString(lqinfo.getMaxApplicationsPerUser())).
__("Configured Minimum User Limit Percent:",
lqinfo.getUserLimit() + "%").
__("Configured User Limit Factor:", lqinfo.getUserLimitFactor()).
__("Accessible Node Labels:", StringUtils.join(",", lqinfo.getNodeLabels())).
__("Accessible Node Labels:",
StringUtils.join(",", lqinfo.getNodeLabels())).
__("Ordering Policy: ", lqinfo.getOrderingPolicyDisplayName()).
__("Preemption:",
lqinfo.getPreemptionDisabled() ? "disabled" : "enabled").

View File

@ -43,7 +43,7 @@ public class CapacitySchedulerLeafQueueInfo extends CapacitySchedulerQueueInfo {
protected int numContainers;
protected int maxApplications;
protected int maxApplicationsPerUser;
protected int userLimit;
protected float userLimit;
protected UsersInfo users; // To add another level in the XML
protected float userLimitFactor;
protected float configuredMaxAMResourceLimit;
@ -130,7 +130,7 @@ public class CapacitySchedulerLeafQueueInfo extends CapacitySchedulerQueueInfo {
return maxApplicationsPerUser;
}
public int getUserLimit() {
public float getUserLimit() {
return userLimit;
}

View File

@ -1731,6 +1731,111 @@ public class TestLeafQueue {
1, a.getAbstractUsersManager().getNumActiveUsers());
}
@Test
public void testDecimalUserLimits() throws Exception {
// Mock the queue
LeafQueue a = stubLeafQueue((LeafQueue)queues.get(A));
//unset maxCapacity
a.setMaxCapacity(1.0f);
when(csContext.getClusterResource())
.thenReturn(Resources.createResource(16 * GB, 32));
// Users
final String user0 = "user_0";
final String user1 = "user_1";
// Submit applications
final ApplicationAttemptId appAttemptId0 =
TestUtils.getMockApplicationAttemptId(0, 0);
FiCaSchedulerApp app0 =
new FiCaSchedulerApp(appAttemptId0, user0, a,
a.getAbstractUsersManager(), spyRMContext);
a.submitApplicationAttempt(app0, user0);
final ApplicationAttemptId appAttemptId1 =
TestUtils.getMockApplicationAttemptId(1, 0);
FiCaSchedulerApp app1 =
new FiCaSchedulerApp(appAttemptId1, user1, a,
a.getAbstractUsersManager(), spyRMContext);
a.submitApplicationAttempt(app1, user1); // different user
// Setup some nodes
String host0 = "127.0.0.1";
FiCaSchedulerNode node0 =
TestUtils.getMockNode(host0, DEFAULT_RACK, 0, 8*GB);
String host1 = "127.0.0.2";
FiCaSchedulerNode node1 =
TestUtils.getMockNode(host1, DEFAULT_RACK, 0, 8*GB);
final int numNodes = 2;
Resource clusterResource =
Resources.createResource(numNodes * (8*GB), numNodes * 16);
when(csContext.getNumClusterNodes()).thenReturn(numNodes);
root.updateClusterResource(clusterResource,
new ResourceLimits(clusterResource));
// Setup resource-requests
Priority priority = TestUtils.createMockPriority(1);
app0.updateResourceRequests(Collections.singletonList(
TestUtils.createResourceRequest(ResourceRequest.ANY, 3*GB, 2, true,
priority, recordFactory)));
app1.updateResourceRequests(Collections.singletonList(
TestUtils.createResourceRequest(ResourceRequest.ANY, 1*GB, 2, true,
priority, recordFactory)));
Map<ApplicationAttemptId, FiCaSchedulerApp> apps = ImmutableMap.of(
app0.getApplicationAttemptId(), app0, app1.getApplicationAttemptId(),
app1);
Map<NodeId, FiCaSchedulerNode> nodes = ImmutableMap.of(node0.getNodeID(),
node0, node1.getNodeID(), node1);
/**
* Start testing...
*/
// Set user-limit
a.setUserLimit(50.1f);
a.setUserLimitFactor(2);
root.updateClusterResource(clusterResource,
new ResourceLimits(clusterResource));
// There're two active users
assertEquals(2, a.getAbstractUsersManager().getNumActiveUsers());
// 1 container to user_0
applyCSAssignment(clusterResource,
a.assignContainers(clusterResource, node0,
new ResourceLimits(clusterResource),
SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(3*GB, a.getUsedResources().getMemorySize());
assertEquals(3*GB, app0.getCurrentConsumption().getMemorySize());
assertEquals(0, app1.getCurrentConsumption().getMemorySize());
// Allocate another container. Since the user limit is 50.1% it isn't
// reached, app_0 will get another container.
applyCSAssignment(clusterResource,
a.assignContainers(clusterResource, node0,
new ResourceLimits(clusterResource),
SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(6*GB, a.getUsedResources().getMemorySize());
assertEquals(6*GB, app0.getCurrentConsumption().getMemorySize());
assertEquals(0, app1.getCurrentConsumption().getMemorySize());
applyCSAssignment(clusterResource,
a.assignContainers(clusterResource, node1,
new ResourceLimits(clusterResource),
SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY), a, nodes, apps);
assertEquals(7*GB, a.getUsedResources().getMemorySize());
assertEquals(6*GB, app0.getCurrentConsumption().getMemorySize());
assertEquals(GB, app1.getCurrentConsumption().getMemorySize());
// app_0 doesn't have outstanding resources, there's only one active user.
assertEquals("There should only be 1 active user!",
1, a.getAbstractUsersManager().getNumActiveUsers());
}
@Test
public void testUserSpecificUserLimits() throws Exception {
// Mock the queue

View File

@ -1164,19 +1164,27 @@ public class TestParentQueue {
assertEquals(b.getMaxApplications(), b.getMaxApplicationsPerUser());
// Extra cases for testing maxApplicationsPerUser
int halfPercent = 50;
int oneAndQuarterPercent = 125;
float halfPercent = 50f;
float oneAndQuarterPercent = 125f;
float thirdPercent = 33.3f;
a.getUsersManager().setUserLimit(halfPercent);
b.getUsersManager().setUserLimit(oneAndQuarterPercent);
root.updateClusterResource(clusterResource,
new ResourceLimits(clusterResource));
assertEquals(a.getMaxApplications() * halfPercent / 100,
assertEquals((int) (a.getMaxApplications() * halfPercent / 100),
a.getMaxApplicationsPerUser());
// Q_B's limit per user shouldn't be greater
// than the whole queue's application limit
assertEquals(b.getMaxApplications(), b.getMaxApplicationsPerUser());
b.getUsersManager().setUserLimit(thirdPercent);
root.updateClusterResource(clusterResource,
new ResourceLimits(clusterResource));
assertEquals((int) (b.getMaxApplications() * thirdPercent / 100),
b.getMaxApplicationsPerUser());
float userLimitFactorQueueA = 0.9f;
float userLimitFactorQueueB = 1.1f;
a.getUsersManager().setUserLimit(halfPercent);

View File

@ -100,7 +100,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTestBase {
int numContainers;
int maxApplications;
int maxApplicationsPerUser;
int userLimit;
float userLimit;
float userLimitFactor;
long defaultApplicationLifetime;
long maxApplicationLifetime;
@ -352,7 +352,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTestBase {
WebServicesTestUtils.getXmlInt(qElem, "maxApplications");
lqi.maxApplicationsPerUser =
WebServicesTestUtils.getXmlInt(qElem, "maxApplicationsPerUser");
lqi.userLimit = WebServicesTestUtils.getXmlInt(qElem, "userLimit");
lqi.userLimit = WebServicesTestUtils.getXmlFloat(qElem, "userLimit");
lqi.userLimitFactor =
WebServicesTestUtils.getXmlFloat(qElem, "userLimitFactor");
lqi.defaultApplicationLifetime =
@ -477,7 +477,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTestBase {
lqi.numContainers = info.getInt("numContainers");
lqi.maxApplications = info.getInt("maxApplications");
lqi.maxApplicationsPerUser = info.getInt("maxApplicationsPerUser");
lqi.userLimit = info.getInt("userLimit");
lqi.userLimit = (float) info.getDouble("userLimit");
lqi.userLimitFactor = (float) info.getDouble("userLimitFactor");
lqi.defaultApplicationLifetime =
info.getLong("defaultApplicationLifetime");
@ -553,7 +553,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTestBase {
(float)info.maxApplicationsPerUser, info.userLimitFactor);
assertEquals("userLimit doesn't match", csConf.getUserLimit(q),
info.userLimit);
info.userLimit, 1e-3f);
assertEquals("userLimitFactor doesn't match",
csConf.getUserLimitFactor(q), info.userLimitFactor, 1e-3f);