YARN-9930. Support max running app logic for CapacityScheduler. Contributed by Peter Bacsko
This commit is contained in:
parent
9821b94c94
commit
469841446f
|
@ -146,6 +146,7 @@ public abstract class AbstractCSQueue implements CSQueue {
|
||||||
|
|
||||||
volatile Priority priority = Priority.newInstance(0);
|
volatile Priority priority = Priority.newInstance(0);
|
||||||
private Map<String, Float> userWeights = new HashMap<String, Float>();
|
private Map<String, Float> userWeights = new HashMap<String, Float>();
|
||||||
|
private int maxParallelApps;
|
||||||
|
|
||||||
public AbstractCSQueue(CapacitySchedulerContext cs,
|
public AbstractCSQueue(CapacitySchedulerContext cs,
|
||||||
String queueName, CSQueue parent, CSQueue old) throws IOException {
|
String queueName, CSQueue parent, CSQueue old) throws IOException {
|
||||||
|
@ -390,6 +391,11 @@ public abstract class AbstractCSQueue implements CSQueue {
|
||||||
// and queue setting
|
// and queue setting
|
||||||
setupMaximumAllocation(configuration);
|
setupMaximumAllocation(configuration);
|
||||||
|
|
||||||
|
// Max parallel apps
|
||||||
|
int queueMaxParallelApps =
|
||||||
|
configuration.getMaxParallelAppsForQueue(getQueuePath());
|
||||||
|
setMaxParallelApps(queueMaxParallelApps);
|
||||||
|
|
||||||
// initialized the queue state based on previous state, configured state
|
// initialized the queue state based on previous state, configured state
|
||||||
// and its parent state.
|
// and its parent state.
|
||||||
QueueState previous = getState();
|
QueueState previous = getState();
|
||||||
|
@ -1431,4 +1437,14 @@ public abstract class AbstractCSQueue implements CSQueue {
|
||||||
public boolean getDefaultAppLifetimeWasSpecifiedInConfig() {
|
public boolean getDefaultAppLifetimeWasSpecifiedInConfig() {
|
||||||
return defaultAppLifetimeWasSpecifiedInConfig;
|
return defaultAppLifetimeWasSpecifiedInConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setMaxParallelApps(int maxParallelApps) {
|
||||||
|
this.maxParallelApps = maxParallelApps;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getMaxParallelApps() {
|
||||||
|
return maxParallelApps;
|
||||||
|
}
|
||||||
|
|
||||||
|
abstract int getNumRunnableApps();
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,436 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.PriorityQueue;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import com.google.common.collect.ArrayListMultimap;
|
||||||
|
import com.google.common.collect.ListMultimap;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handles tracking and enforcement for user and queue maxRunningApps
|
||||||
|
* constraints.
|
||||||
|
*/
|
||||||
|
public class CSMaxRunningAppsEnforcer {
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(
|
||||||
|
CSMaxRunningAppsEnforcer.class);
|
||||||
|
|
||||||
|
private final CapacityScheduler scheduler;
|
||||||
|
|
||||||
|
// Tracks the number of running applications by user.
|
||||||
|
private final Map<String, Integer> usersNumRunnableApps;
|
||||||
|
|
||||||
|
private final ListMultimap<String, FiCaSchedulerApp> usersNonRunnableApps;
|
||||||
|
|
||||||
|
public CSMaxRunningAppsEnforcer(CapacityScheduler scheduler) {
|
||||||
|
this.scheduler = scheduler;
|
||||||
|
this.usersNumRunnableApps = new HashMap<String, Integer>();
|
||||||
|
this.usersNonRunnableApps = ArrayListMultimap.create();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks whether making the application runnable would exceed any
|
||||||
|
* maxRunningApps limits. Also sets the "runnable" flag on the
|
||||||
|
* attempt.
|
||||||
|
*
|
||||||
|
* @param attempt the app attempt being checked
|
||||||
|
* @return true if the application is runnable; false otherwise
|
||||||
|
*/
|
||||||
|
public boolean checkRunnabilityWithUpdate(
|
||||||
|
FiCaSchedulerApp attempt) {
|
||||||
|
boolean attemptCanRun = !exceedUserMaxParallelApps(attempt.getUser())
|
||||||
|
&& !exceedQueueMaxParallelApps(attempt.getCSLeafQueue());
|
||||||
|
|
||||||
|
attempt.setRunnable(attemptCanRun);
|
||||||
|
|
||||||
|
return attemptCanRun;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks whether the number of user runnable apps exceeds the limitation.
|
||||||
|
*
|
||||||
|
* @param user the user name
|
||||||
|
* @return true if the number hits the limit; false otherwise
|
||||||
|
*/
|
||||||
|
private boolean exceedUserMaxParallelApps(String user) {
|
||||||
|
Integer userNumRunnable = usersNumRunnableApps.get(user);
|
||||||
|
if (userNumRunnable == null) {
|
||||||
|
userNumRunnable = 0;
|
||||||
|
}
|
||||||
|
if (userNumRunnable >= getUserMaxParallelApps(user)) {
|
||||||
|
LOG.info("Maximum runnable apps exceeded for user {}", user);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursively checks whether the number of queue runnable apps exceeds the
|
||||||
|
* limitation.
|
||||||
|
*
|
||||||
|
* @param queue the current queue
|
||||||
|
* @return true if the number hits the limit; false otherwise
|
||||||
|
*/
|
||||||
|
private boolean exceedQueueMaxParallelApps(AbstractCSQueue queue) {
|
||||||
|
// Check queue and all parent queues
|
||||||
|
while (queue != null) {
|
||||||
|
if (queue.getNumRunnableApps() >= queue.getMaxParallelApps()) {
|
||||||
|
LOG.info("Maximum runnable apps exceeded for queue {}",
|
||||||
|
queue.getQueuePath());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
queue = (AbstractCSQueue) queue.getParent();
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void trackApp(FiCaSchedulerApp app) {
|
||||||
|
if (app.isRunnable()) {
|
||||||
|
trackRunnableApp(app);
|
||||||
|
} else {
|
||||||
|
trackNonRunnableApp(app);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Tracks the given new runnable app for purposes of maintaining max running
|
||||||
|
* app limits.
|
||||||
|
*/
|
||||||
|
private void trackRunnableApp(FiCaSchedulerApp app) {
|
||||||
|
String user = app.getUser();
|
||||||
|
AbstractCSQueue queue = (AbstractCSQueue) app.getQueue();
|
||||||
|
// Increment running counts for all parent queues
|
||||||
|
ParentQueue parent = (ParentQueue) queue.getParent();
|
||||||
|
while (parent != null) {
|
||||||
|
parent.incrementRunnableApps();
|
||||||
|
parent = (ParentQueue) parent.getParent();
|
||||||
|
}
|
||||||
|
|
||||||
|
Integer userNumRunnable = usersNumRunnableApps.get(user);
|
||||||
|
usersNumRunnableApps.put(user, (userNumRunnable == null ? 0
|
||||||
|
: userNumRunnable) + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tracks the given new non runnable app so that it can be made runnable when
|
||||||
|
* it would not violate max running app limits.
|
||||||
|
*/
|
||||||
|
private void trackNonRunnableApp(FiCaSchedulerApp app) {
|
||||||
|
String user = app.getUser();
|
||||||
|
usersNonRunnableApps.put(user, app);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is called after reloading the allocation configuration when the
|
||||||
|
* scheduler is reinitialized
|
||||||
|
*
|
||||||
|
* Checks to see whether any non-runnable applications become runnable
|
||||||
|
* now that the max running apps of given queue has been changed
|
||||||
|
*
|
||||||
|
* Runs in O(n) where n is the number of apps that are non-runnable and in
|
||||||
|
* the queues that went from having no slack to having slack.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public void updateRunnabilityOnReload() {
|
||||||
|
ParentQueue rootQueue = (ParentQueue) scheduler.getRootQueue();
|
||||||
|
List<List<FiCaSchedulerApp>> appsNowMaybeRunnable =
|
||||||
|
new ArrayList<List<FiCaSchedulerApp>>();
|
||||||
|
|
||||||
|
gatherPossiblyRunnableAppLists(rootQueue, appsNowMaybeRunnable);
|
||||||
|
|
||||||
|
updateAppsRunnability(appsNowMaybeRunnable, Integer.MAX_VALUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks to see whether any other applications runnable now that the given
|
||||||
|
* application has been removed from the given queue. And makes them so.
|
||||||
|
*
|
||||||
|
* Runs in O(n log(n)) where n is the number of queues that are under the
|
||||||
|
* highest queue that went from having no slack to having slack.
|
||||||
|
*/
|
||||||
|
public void updateRunnabilityOnAppRemoval(FiCaSchedulerApp app) {
|
||||||
|
// childqueueX might have no pending apps itself, but if a queue higher up
|
||||||
|
// in the hierarchy parentqueueY has a maxRunningApps set, an app completion
|
||||||
|
// in childqueueX could allow an app in some other distant child of
|
||||||
|
// parentqueueY to become runnable.
|
||||||
|
// An app removal will only possibly allow another app to become runnable if
|
||||||
|
// the queue was already at its max before the removal.
|
||||||
|
// Thus we find the ancestor queue highest in the tree for which the app
|
||||||
|
// that was at its maxRunningApps before the removal.
|
||||||
|
LeafQueue queue = app.getCSLeafQueue();
|
||||||
|
AbstractCSQueue highestQueueWithAppsNowRunnable =
|
||||||
|
(queue.getNumRunnableApps() == queue.getMaxParallelApps() - 1)
|
||||||
|
? queue : null;
|
||||||
|
|
||||||
|
ParentQueue parent = (ParentQueue) queue.getParent();
|
||||||
|
while (parent != null) {
|
||||||
|
if (parent.getNumRunnableApps() == parent.getMaxParallelApps() - 1) {
|
||||||
|
highestQueueWithAppsNowRunnable = parent;
|
||||||
|
}
|
||||||
|
parent = (ParentQueue) parent.getParent();
|
||||||
|
}
|
||||||
|
|
||||||
|
List<List<FiCaSchedulerApp>> appsNowMaybeRunnable =
|
||||||
|
new ArrayList<List<FiCaSchedulerApp>>();
|
||||||
|
|
||||||
|
// Compile lists of apps which may now be runnable
|
||||||
|
// We gather lists instead of building a set of all non-runnable apps so
|
||||||
|
// that this whole operation can be O(number of queues) instead of
|
||||||
|
// O(number of apps)
|
||||||
|
if (highestQueueWithAppsNowRunnable != null) {
|
||||||
|
gatherPossiblyRunnableAppLists(highestQueueWithAppsNowRunnable,
|
||||||
|
appsNowMaybeRunnable);
|
||||||
|
}
|
||||||
|
String user = app.getUser();
|
||||||
|
Integer userNumRunning = usersNumRunnableApps.get(user);
|
||||||
|
if (userNumRunning == null) {
|
||||||
|
userNumRunning = 0;
|
||||||
|
}
|
||||||
|
if (userNumRunning == getUserMaxParallelApps(user) - 1) {
|
||||||
|
List<FiCaSchedulerApp> userWaitingApps = usersNonRunnableApps.get(user);
|
||||||
|
if (userWaitingApps != null) {
|
||||||
|
appsNowMaybeRunnable.add(userWaitingApps);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
updateAppsRunnability(appsNowMaybeRunnable,
|
||||||
|
appsNowMaybeRunnable.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks to see whether applications are runnable now by iterating
|
||||||
|
* through each one of them and check if the queue and user have slack.
|
||||||
|
*
|
||||||
|
* if we know how many apps can be runnable, there is no need to iterate
|
||||||
|
* through all apps, maxRunnableApps is used to break out of the iteration.
|
||||||
|
*/
|
||||||
|
private void updateAppsRunnability(List<List<FiCaSchedulerApp>>
|
||||||
|
appsNowMaybeRunnable, int maxRunnableApps) {
|
||||||
|
// Scan through and check whether this means that any apps are now runnable
|
||||||
|
Iterator<FiCaSchedulerApp> iter = new MultiListStartTimeIterator(
|
||||||
|
appsNowMaybeRunnable);
|
||||||
|
FiCaSchedulerApp prev = null;
|
||||||
|
List<FiCaSchedulerApp> noLongerPendingApps = new ArrayList<>();
|
||||||
|
while (iter.hasNext()) {
|
||||||
|
FiCaSchedulerApp next = iter.next();
|
||||||
|
if (next == prev) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (checkRunnabilityWithUpdate(next)) {
|
||||||
|
LeafQueue nextQueue = next.getCSLeafQueue();
|
||||||
|
LOG.info("{} is now runnable in {}",
|
||||||
|
next.getApplicationAttemptId(), nextQueue);
|
||||||
|
trackRunnableApp(next);
|
||||||
|
FiCaSchedulerApp appSched = next;
|
||||||
|
nextQueue.submitApplicationAttempt(next, next.getUser());
|
||||||
|
noLongerPendingApps.add(appSched);
|
||||||
|
|
||||||
|
if (noLongerPendingApps.size() >= maxRunnableApps) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
prev = next;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We remove the apps from their pending lists afterwards so that we don't
|
||||||
|
// pull them out from under the iterator. If they are not in these lists
|
||||||
|
// in the first place, there is a bug.
|
||||||
|
for (FiCaSchedulerApp appSched : noLongerPendingApps) {
|
||||||
|
if (!(appSched.getCSLeafQueue().removeNonRunnableApp(appSched))) {
|
||||||
|
LOG.error("Can't make app runnable that does not already exist in queue"
|
||||||
|
+ " as non-runnable: {}. This should never happen.",
|
||||||
|
appSched.getApplicationAttemptId());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!usersNonRunnableApps.remove(appSched.getUser(), appSched)) {
|
||||||
|
LOG.error("Waiting app {} expected to be in "
|
||||||
|
+ "usersNonRunnableApps, but was not. This should never happen.",
|
||||||
|
appSched.getApplicationAttemptId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void untrackApp(FiCaSchedulerApp app) {
|
||||||
|
if (app.isRunnable()) {
|
||||||
|
untrackRunnableApp(app);
|
||||||
|
} else {
|
||||||
|
untrackNonRunnableApp(app);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Updates the relevant tracking variables after a runnable app with the given
|
||||||
|
* queue and user has been removed.
|
||||||
|
*/
|
||||||
|
private void untrackRunnableApp(FiCaSchedulerApp app) {
|
||||||
|
// Update usersRunnableApps
|
||||||
|
String user = app.getUser();
|
||||||
|
int newUserNumRunning = usersNumRunnableApps.get(user) - 1;
|
||||||
|
if (newUserNumRunning == 0) {
|
||||||
|
usersNumRunnableApps.remove(user);
|
||||||
|
} else {
|
||||||
|
usersNumRunnableApps.put(user, newUserNumRunning);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update runnable app bookkeeping for queues
|
||||||
|
AbstractCSQueue queue = (AbstractCSQueue) app.getQueue();
|
||||||
|
ParentQueue parent = (ParentQueue) queue.getParent();
|
||||||
|
while (parent != null) {
|
||||||
|
parent.decrementRunnableApps();
|
||||||
|
parent = (ParentQueue) parent.getParent();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stops tracking the given non-runnable app.
|
||||||
|
*/
|
||||||
|
private void untrackNonRunnableApp(FiCaSchedulerApp app) {
|
||||||
|
usersNonRunnableApps.remove(app.getUser(), app);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Traverses the queue hierarchy under the given queue to gather all lists
|
||||||
|
* of non-runnable applications.
|
||||||
|
*/
|
||||||
|
private void gatherPossiblyRunnableAppLists(AbstractCSQueue queue,
|
||||||
|
List<List<FiCaSchedulerApp>> appLists) {
|
||||||
|
if (queue.getNumRunnableApps() < queue.getMaxParallelApps()) {
|
||||||
|
if (queue instanceof LeafQueue) {
|
||||||
|
appLists.add(
|
||||||
|
((LeafQueue)queue).getCopyOfNonRunnableAppSchedulables());
|
||||||
|
} else {
|
||||||
|
for (CSQueue child : queue.getChildQueues()) {
|
||||||
|
gatherPossiblyRunnableAppLists((AbstractCSQueue) child, appLists);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getUserMaxParallelApps(String user) {
|
||||||
|
CapacitySchedulerConfiguration conf = scheduler.getConfiguration();
|
||||||
|
if (conf == null) {
|
||||||
|
return Integer.MAX_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
int userMaxParallelApps = conf.getMaxParallelAppsForUser(user);
|
||||||
|
|
||||||
|
return userMaxParallelApps;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes a list of lists, each of which is ordered by start time, and returns
|
||||||
|
* their elements in order of start time.
|
||||||
|
*
|
||||||
|
* We maintain positions in each of the lists. Each next() call advances
|
||||||
|
* the position in one of the lists. We maintain a heap that orders lists
|
||||||
|
* by the start time of the app in the current position in that list.
|
||||||
|
* This allows us to pick which list to advance in O(log(num lists)) instead
|
||||||
|
* of O(num lists) time.
|
||||||
|
*/
|
||||||
|
static class MultiListStartTimeIterator implements
|
||||||
|
Iterator<FiCaSchedulerApp> {
|
||||||
|
|
||||||
|
private List<FiCaSchedulerApp>[] appLists;
|
||||||
|
private int[] curPositionsInAppLists;
|
||||||
|
private PriorityQueue<IndexAndTime> appListsByCurStartTime;
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
MultiListStartTimeIterator(List<List<FiCaSchedulerApp>> appListList) {
|
||||||
|
appLists = appListList.toArray(new List[appListList.size()]);
|
||||||
|
curPositionsInAppLists = new int[appLists.length];
|
||||||
|
appListsByCurStartTime = new PriorityQueue<IndexAndTime>();
|
||||||
|
for (int i = 0; i < appLists.length; i++) {
|
||||||
|
long time = appLists[i].isEmpty() ? Long.MAX_VALUE : appLists[i].get(0)
|
||||||
|
.getStartTime();
|
||||||
|
appListsByCurStartTime.add(new IndexAndTime(i, time));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() {
|
||||||
|
return !appListsByCurStartTime.isEmpty()
|
||||||
|
&& appListsByCurStartTime.peek().time != Long.MAX_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FiCaSchedulerApp next() {
|
||||||
|
IndexAndTime indexAndTime = appListsByCurStartTime.remove();
|
||||||
|
int nextListIndex = indexAndTime.index;
|
||||||
|
FiCaSchedulerApp next = appLists[nextListIndex]
|
||||||
|
.get(curPositionsInAppLists[nextListIndex]);
|
||||||
|
curPositionsInAppLists[nextListIndex]++;
|
||||||
|
|
||||||
|
if (curPositionsInAppLists[nextListIndex] <
|
||||||
|
appLists[nextListIndex].size()) {
|
||||||
|
indexAndTime.time = appLists[nextListIndex]
|
||||||
|
.get(curPositionsInAppLists[nextListIndex]).getStartTime();
|
||||||
|
} else {
|
||||||
|
indexAndTime.time = Long.MAX_VALUE;
|
||||||
|
}
|
||||||
|
appListsByCurStartTime.add(indexAndTime);
|
||||||
|
|
||||||
|
return next;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void remove() {
|
||||||
|
throw new UnsupportedOperationException("Remove not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class IndexAndTime implements Comparable<IndexAndTime> {
|
||||||
|
private int index;
|
||||||
|
private long time;
|
||||||
|
|
||||||
|
IndexAndTime(int index, long time) {
|
||||||
|
this.index = index;
|
||||||
|
this.time = time;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(IndexAndTime o) {
|
||||||
|
return time < o.time ? -1 : (time > o.time ? 1 : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (!(o instanceof IndexAndTime)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
IndexAndTime other = (IndexAndTime)o;
|
||||||
|
return other.time == time;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return (int)time;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -242,8 +242,11 @@ public class CapacityScheduler extends
|
||||||
private static final long DEFAULT_ASYNC_SCHEDULER_INTERVAL = 5;
|
private static final long DEFAULT_ASYNC_SCHEDULER_INTERVAL = 5;
|
||||||
private long asyncMaxPendingBacklogs;
|
private long asyncMaxPendingBacklogs;
|
||||||
|
|
||||||
|
private CSMaxRunningAppsEnforcer maxRunningEnforcer;
|
||||||
|
|
||||||
public CapacityScheduler() {
|
public CapacityScheduler() {
|
||||||
super(CapacityScheduler.class.getName());
|
super(CapacityScheduler.class.getName());
|
||||||
|
this.maxRunningEnforcer = new CSMaxRunningAppsEnforcer(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -483,6 +486,7 @@ public class CapacityScheduler extends
|
||||||
|
|
||||||
super.reinitialize(newConf, rmContext);
|
super.reinitialize(newConf, rmContext);
|
||||||
}
|
}
|
||||||
|
maxRunningEnforcer.updateRunnabilityOnReload();
|
||||||
} finally {
|
} finally {
|
||||||
writeLock.unlock();
|
writeLock.unlock();
|
||||||
}
|
}
|
||||||
|
@ -1083,6 +1087,9 @@ public class CapacityScheduler extends
|
||||||
// SchedulerApplication#setCurrentAppAttempt.
|
// SchedulerApplication#setCurrentAppAttempt.
|
||||||
attempt.setPriority(application.getPriority());
|
attempt.setPriority(application.getPriority());
|
||||||
|
|
||||||
|
maxRunningEnforcer.checkRunnabilityWithUpdate(attempt);
|
||||||
|
maxRunningEnforcer.trackApp(attempt);
|
||||||
|
|
||||||
queue.submitApplicationAttempt(attempt, application.getUser());
|
queue.submitApplicationAttempt(attempt, application.getUser());
|
||||||
LOG.info("Added Application Attempt " + applicationAttemptId
|
LOG.info("Added Application Attempt " + applicationAttemptId
|
||||||
+ " to scheduler from user " + application.getUser() + " in queue "
|
+ " to scheduler from user " + application.getUser() + " in queue "
|
||||||
|
@ -1178,6 +1185,11 @@ public class CapacityScheduler extends
|
||||||
+ csQueue.getQueuePath());
|
+ csQueue.getQueuePath());
|
||||||
} else {
|
} else {
|
||||||
csQueue.finishApplicationAttempt(attempt, csQueue.getQueuePath());
|
csQueue.finishApplicationAttempt(attempt, csQueue.getQueuePath());
|
||||||
|
|
||||||
|
maxRunningEnforcer.untrackApp(attempt);
|
||||||
|
if (attempt.isRunnable()) {
|
||||||
|
maxRunningEnforcer.updateRunnabilityOnAppRemoval(attempt);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
writeLock.unlock();
|
writeLock.unlock();
|
||||||
|
@ -3253,4 +3265,9 @@ public class CapacityScheduler extends
|
||||||
public int getNumAsyncSchedulerThreads() {
|
public int getNumAsyncSchedulerThreads() {
|
||||||
return asyncSchedulerThreads == null ? 0 : asyncSchedulerThreads.size();
|
return asyncSchedulerThreads == null ? 0 : asyncSchedulerThreads.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public void setMaxRunningAppsEnforcer(CSMaxRunningAppsEnforcer enforcer) {
|
||||||
|
this.maxRunningEnforcer = enforcer;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -378,6 +378,10 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
|
||||||
|
|
||||||
public static final Pattern RESOURCE_PATTERN = Pattern.compile(PATTERN_FOR_ABSOLUTE_RESOURCE);
|
public static final Pattern RESOURCE_PATTERN = Pattern.compile(PATTERN_FOR_ABSOLUTE_RESOURCE);
|
||||||
|
|
||||||
|
public static final String MAX_PARALLEL_APPLICATIONS = "max-parallel-apps";
|
||||||
|
|
||||||
|
public static final int DEFAULT_MAX_PARALLEL_APPLICATIONS = Integer.MAX_VALUE;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Different resource types supported.
|
* Different resource types supported.
|
||||||
*/
|
*/
|
||||||
|
@ -413,6 +417,10 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
|
||||||
return queueName;
|
return queueName;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static String getUserPrefix(String user) {
|
||||||
|
return PREFIX + "user." + user + DOT;
|
||||||
|
}
|
||||||
|
|
||||||
private String getNodeLabelPrefix(String queue, String label) {
|
private String getNodeLabelPrefix(String queue, String label) {
|
||||||
if (label.equals(CommonNodeLabelsManager.NO_LABEL)) {
|
if (label.equals(CommonNodeLabelsManager.NO_LABEL)) {
|
||||||
return getQueuePrefix(queue);
|
return getQueuePrefix(queue);
|
||||||
|
@ -1392,6 +1400,31 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
|
||||||
return conf.getBoolean(APP_FAIL_FAST, DEFAULT_APP_FAIL_FAST);
|
return conf.getBoolean(APP_FAIL_FAST, DEFAULT_APP_FAIL_FAST);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Integer getMaxParallelAppsForQueue(String queue) {
|
||||||
|
int defaultMaxParallelAppsForQueue =
|
||||||
|
getInt(PREFIX + MAX_PARALLEL_APPLICATIONS,
|
||||||
|
DEFAULT_MAX_PARALLEL_APPLICATIONS);
|
||||||
|
|
||||||
|
String maxParallelAppsForQueue = get(getQueuePrefix(queue)
|
||||||
|
+ MAX_PARALLEL_APPLICATIONS);
|
||||||
|
|
||||||
|
return (maxParallelAppsForQueue != null) ?
|
||||||
|
Integer.parseInt(maxParallelAppsForQueue)
|
||||||
|
: defaultMaxParallelAppsForQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getMaxParallelAppsForUser(String user) {
|
||||||
|
int defaultMaxParallelAppsForUser =
|
||||||
|
getInt(PREFIX + "user." + MAX_PARALLEL_APPLICATIONS,
|
||||||
|
DEFAULT_MAX_PARALLEL_APPLICATIONS);
|
||||||
|
String maxParallelAppsForUser = get(getUserPrefix(user)
|
||||||
|
+ MAX_PARALLEL_APPLICATIONS);
|
||||||
|
|
||||||
|
return (maxParallelAppsForUser != null) ?
|
||||||
|
Integer.parseInt(maxParallelAppsForUser)
|
||||||
|
: defaultMaxParallelAppsForUser;
|
||||||
|
}
|
||||||
|
|
||||||
private static final String PREEMPTION_CONFIG_PREFIX =
|
private static final String PREEMPTION_CONFIG_PREFIX =
|
||||||
"yarn.resourcemanager.monitor.capacity.preemption.";
|
"yarn.resourcemanager.monitor.capacity.preemption.";
|
||||||
|
|
||||||
|
|
|
@ -129,6 +129,9 @@ public class LeafQueue extends AbstractCSQueue {
|
||||||
List<AppPriorityACLGroup> priorityAcls =
|
List<AppPriorityACLGroup> priorityAcls =
|
||||||
new ArrayList<AppPriorityACLGroup>();
|
new ArrayList<AppPriorityACLGroup>();
|
||||||
|
|
||||||
|
private final List<FiCaSchedulerApp> runnableApps = new ArrayList<>();
|
||||||
|
private final List<FiCaSchedulerApp> nonRunnableApps = new ArrayList<>();
|
||||||
|
|
||||||
@SuppressWarnings({ "unchecked", "rawtypes" })
|
@SuppressWarnings({ "unchecked", "rawtypes" })
|
||||||
public LeafQueue(CapacitySchedulerContext cs,
|
public LeafQueue(CapacitySchedulerContext cs,
|
||||||
String queueName, CSQueue parent, CSQueue old) throws IOException {
|
String queueName, CSQueue parent, CSQueue old) throws IOException {
|
||||||
|
@ -159,6 +162,7 @@ public class LeafQueue extends AbstractCSQueue {
|
||||||
setupQueueConfigs(clusterResource, csContext.getConfiguration());
|
setupQueueConfigs(clusterResource, csContext.getConfiguration());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("checkstyle:nowhitespaceafter")
|
||||||
protected void setupQueueConfigs(Resource clusterResource,
|
protected void setupQueueConfigs(Resource clusterResource,
|
||||||
CapacitySchedulerConfiguration conf) throws
|
CapacitySchedulerConfiguration conf) throws
|
||||||
IOException {
|
IOException {
|
||||||
|
@ -289,7 +293,9 @@ public class LeafQueue extends AbstractCSQueue {
|
||||||
+ " (int)(configuredMaximumSystemApplications * absoluteCapacity)]"
|
+ " (int)(configuredMaximumSystemApplications * absoluteCapacity)]"
|
||||||
+ "\n" + "maxApplicationsPerUser = " + maxApplicationsPerUser
|
+ "\n" + "maxApplicationsPerUser = " + maxApplicationsPerUser
|
||||||
+ " [= (int)(maxApplications * (userLimit / 100.0f) * "
|
+ " [= (int)(maxApplications * (userLimit / 100.0f) * "
|
||||||
+ "userLimitFactor) ]" + "\n" + "usedCapacity = "
|
+ "userLimitFactor) ]" + "\n"
|
||||||
|
+ "maxParallelApps = " + getMaxParallelApps() + "\n"
|
||||||
|
+ "usedCapacity = " +
|
||||||
+ queueCapacities.getUsedCapacity() + " [= usedResourcesMemory / "
|
+ queueCapacities.getUsedCapacity() + " [= usedResourcesMemory / "
|
||||||
+ "(clusterResourceMemory * absoluteCapacity)]" + "\n"
|
+ "(clusterResourceMemory * absoluteCapacity)]" + "\n"
|
||||||
+ "absoluteUsedCapacity = " + absoluteUsedCapacity
|
+ "absoluteUsedCapacity = " + absoluteUsedCapacity
|
||||||
|
@ -386,7 +392,8 @@ public class LeafQueue extends AbstractCSQueue {
|
||||||
public int getNumApplications() {
|
public int getNumApplications() {
|
||||||
readLock.lock();
|
readLock.lock();
|
||||||
try {
|
try {
|
||||||
return getNumPendingApplications() + getNumActiveApplications();
|
return getNumPendingApplications() + getNumActiveApplications() +
|
||||||
|
getNumNonRunnableApps();
|
||||||
} finally {
|
} finally {
|
||||||
readLock.unlock();
|
readLock.unlock();
|
||||||
}
|
}
|
||||||
|
@ -892,11 +899,23 @@ public class LeafQueue extends AbstractCSQueue {
|
||||||
User user) {
|
User user) {
|
||||||
writeLock.lock();
|
writeLock.lock();
|
||||||
try {
|
try {
|
||||||
|
applicationAttemptMap.put(application.getApplicationAttemptId(),
|
||||||
|
application);
|
||||||
|
|
||||||
|
if (application.isRunnable()) {
|
||||||
|
runnableApps.add(application);
|
||||||
|
LOG.debug("Adding runnable application: {}",
|
||||||
|
application.getApplicationAttemptId());
|
||||||
|
} else {
|
||||||
|
nonRunnableApps.add(application);
|
||||||
|
LOG.info("Application attempt {} is not runnable,"
|
||||||
|
+ " parallel limit reached", application.getApplicationAttemptId());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Accept
|
// Accept
|
||||||
user.submitApplication();
|
user.submitApplication();
|
||||||
getPendingAppsOrderingPolicy().addSchedulableEntity(application);
|
getPendingAppsOrderingPolicy().addSchedulableEntity(application);
|
||||||
applicationAttemptMap.put(application.getApplicationAttemptId(),
|
|
||||||
application);
|
|
||||||
|
|
||||||
// Activate applications
|
// Activate applications
|
||||||
if (Resources.greaterThan(resourceCalculator, lastClusterResource,
|
if (Resources.greaterThan(resourceCalculator, lastClusterResource,
|
||||||
|
@ -917,7 +936,9 @@ public class LeafQueue extends AbstractCSQueue {
|
||||||
.getPendingApplications() + " #user-active-applications: " + user
|
.getPendingApplications() + " #user-active-applications: " + user
|
||||||
.getActiveApplications() + " #queue-pending-applications: "
|
.getActiveApplications() + " #queue-pending-applications: "
|
||||||
+ getNumPendingApplications() + " #queue-active-applications: "
|
+ getNumPendingApplications() + " #queue-active-applications: "
|
||||||
+ getNumActiveApplications());
|
+ getNumActiveApplications()
|
||||||
|
+ " #queue-nonrunnable-applications: "
|
||||||
|
+ getNumNonRunnableApps());
|
||||||
} finally {
|
} finally {
|
||||||
writeLock.unlock();
|
writeLock.unlock();
|
||||||
}
|
}
|
||||||
|
@ -950,6 +971,15 @@ public class LeafQueue extends AbstractCSQueue {
|
||||||
// which is caused by wrong invoking order, will fix UT separately
|
// which is caused by wrong invoking order, will fix UT separately
|
||||||
User user = usersManager.getUserAndAddIfAbsent(userName);
|
User user = usersManager.getUserAndAddIfAbsent(userName);
|
||||||
|
|
||||||
|
boolean runnable = runnableApps.remove(application);
|
||||||
|
if (!runnable) {
|
||||||
|
// removeNonRunnableApp acquires the write lock again, which is fine
|
||||||
|
if (!removeNonRunnableApp(application)) {
|
||||||
|
LOG.error("Given app to remove " + application +
|
||||||
|
" does not exist in queue " + getQueuePath());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
String partitionName = application.getAppAMNodePartitionName();
|
String partitionName = application.getAppAMNodePartitionName();
|
||||||
boolean wasActive = orderingPolicy.removeSchedulableEntity(application);
|
boolean wasActive = orderingPolicy.removeSchedulableEntity(application);
|
||||||
if (!wasActive) {
|
if (!wasActive) {
|
||||||
|
@ -2229,4 +2259,43 @@ public class LeafQueue extends AbstractCSQueue {
|
||||||
usedSeconds);
|
usedSeconds);
|
||||||
metrics.updatePreemptedForCustomResources(containerResource);
|
metrics.updatePreemptedForCustomResources(containerResource);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
int getNumRunnableApps() {
|
||||||
|
readLock.lock();
|
||||||
|
try {
|
||||||
|
return runnableApps.size();
|
||||||
|
} finally {
|
||||||
|
readLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int getNumNonRunnableApps() {
|
||||||
|
readLock.lock();
|
||||||
|
try {
|
||||||
|
return nonRunnableApps.size();
|
||||||
|
} finally {
|
||||||
|
readLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean removeNonRunnableApp(FiCaSchedulerApp app) {
|
||||||
|
writeLock.lock();
|
||||||
|
try {
|
||||||
|
return nonRunnableApps.remove(app);
|
||||||
|
} finally {
|
||||||
|
writeLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
List<FiCaSchedulerApp> getCopyOfNonRunnableAppSchedulables() {
|
||||||
|
List<FiCaSchedulerApp> appsToReturn = new ArrayList<>();
|
||||||
|
readLock.lock();
|
||||||
|
try {
|
||||||
|
appsToReturn.addAll(nonRunnableApps);
|
||||||
|
} finally {
|
||||||
|
readLock.unlock();
|
||||||
|
}
|
||||||
|
return appsToReturn;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -93,6 +93,8 @@ public class ParentQueue extends AbstractCSQueue {
|
||||||
|
|
||||||
private long lastSkipQueueDebugLoggingTimestamp = -1;
|
private long lastSkipQueueDebugLoggingTimestamp = -1;
|
||||||
|
|
||||||
|
private int runnableApps;
|
||||||
|
|
||||||
public ParentQueue(CapacitySchedulerContext cs,
|
public ParentQueue(CapacitySchedulerContext cs,
|
||||||
String queueName, CSQueue parent, CSQueue old) throws IOException {
|
String queueName, CSQueue parent, CSQueue old) throws IOException {
|
||||||
super(cs, queueName, parent, old);
|
super(cs, queueName, parent, old);
|
||||||
|
@ -1383,4 +1385,32 @@ public class ParentQueue extends AbstractCSQueue {
|
||||||
public QueueOrderingPolicy getQueueOrderingPolicy() {
|
public QueueOrderingPolicy getQueueOrderingPolicy() {
|
||||||
return queueOrderingPolicy;
|
return queueOrderingPolicy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
int getNumRunnableApps() {
|
||||||
|
readLock.lock();
|
||||||
|
try {
|
||||||
|
return runnableApps;
|
||||||
|
} finally {
|
||||||
|
readLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void incrementRunnableApps() {
|
||||||
|
writeLock.lock();
|
||||||
|
try {
|
||||||
|
runnableApps++;
|
||||||
|
} finally {
|
||||||
|
writeLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void decrementRunnableApps() {
|
||||||
|
writeLock.lock();
|
||||||
|
try {
|
||||||
|
runnableApps--;
|
||||||
|
} finally {
|
||||||
|
writeLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -112,6 +112,8 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt {
|
||||||
|
|
||||||
private AbstractContainerAllocator containerAllocator;
|
private AbstractContainerAllocator containerAllocator;
|
||||||
|
|
||||||
|
private boolean runnable;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* to hold the message if its app doesn't not get container from a node
|
* to hold the message if its app doesn't not get container from a node
|
||||||
*/
|
*/
|
||||||
|
@ -139,6 +141,7 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt {
|
||||||
RMContext rmContext, Priority appPriority, boolean isAttemptRecovering,
|
RMContext rmContext, Priority appPriority, boolean isAttemptRecovering,
|
||||||
ActivitiesManager activitiesManager) {
|
ActivitiesManager activitiesManager) {
|
||||||
super(applicationAttemptId, user, queue, abstractUsersManager, rmContext);
|
super(applicationAttemptId, user, queue, abstractUsersManager, rmContext);
|
||||||
|
this.runnable = true;
|
||||||
|
|
||||||
RMApp rmApp = rmContext.getRMApps().get(getApplicationId());
|
RMApp rmApp = rmContext.getRMApps().get(getApplicationId());
|
||||||
|
|
||||||
|
@ -1219,4 +1222,22 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt {
|
||||||
writeLock.unlock();
|
writeLock.unlock();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setRunnable(boolean runnable) {
|
||||||
|
writeLock.lock();
|
||||||
|
try {
|
||||||
|
this.runnable = runnable;
|
||||||
|
} finally {
|
||||||
|
writeLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isRunnable() {
|
||||||
|
readLock.lock();
|
||||||
|
try {
|
||||||
|
return runnable;
|
||||||
|
} finally {
|
||||||
|
readLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSMaxRunningAppsEnforcer;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler;
|
||||||
|
@ -175,6 +176,9 @@ public class TestReservationSystem extends
|
||||||
|
|
||||||
CapacityScheduler cs = Mockito.spy(new CapacityScheduler());
|
CapacityScheduler cs = Mockito.spy(new CapacityScheduler());
|
||||||
cs.setConf(conf);
|
cs.setConf(conf);
|
||||||
|
CSMaxRunningAppsEnforcer enforcer =
|
||||||
|
Mockito.mock(CSMaxRunningAppsEnforcer.class);
|
||||||
|
cs.setMaxRunningAppsEnforcer(enforcer);
|
||||||
|
|
||||||
mockRMContext = ReservationSystemTestUtil.createRMContext(conf);
|
mockRMContext = ReservationSystemTestUtil.createRMContext(conf);
|
||||||
|
|
||||||
|
|
|
@ -184,6 +184,7 @@ public class TestApplicationLimits {
|
||||||
doReturn(amResource).when(application).getAMResource(
|
doReturn(amResource).when(application).getAMResource(
|
||||||
CommonNodeLabelsManager.NO_LABEL);
|
CommonNodeLabelsManager.NO_LABEL);
|
||||||
when(application.compareInputOrderTo(any(FiCaSchedulerApp.class))).thenCallRealMethod();
|
when(application.compareInputOrderTo(any(FiCaSchedulerApp.class))).thenCallRealMethod();
|
||||||
|
when(application.isRunnable()).thenReturn(true);
|
||||||
return application;
|
return application;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,278 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration.PREFIX;
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.mockito.ArgumentMatchers.any;
|
||||||
|
import static org.mockito.ArgumentMatchers.anyString;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Priority;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivitiesManager;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.security.AppPriorityACLsManager;
|
||||||
|
import org.apache.hadoop.yarn.util.ControlledClock;
|
||||||
|
import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestCSMaxRunningAppsEnforcer {
|
||||||
|
private CapacitySchedulerQueueManager queueManager;
|
||||||
|
private CSMaxRunningAppsEnforcer maxAppsEnforcer;
|
||||||
|
private int appNum;
|
||||||
|
private ControlledClock clock;
|
||||||
|
private RMContext rmContext;
|
||||||
|
private CapacityScheduler scheduler;
|
||||||
|
private ActivitiesManager activitiesManager;
|
||||||
|
private CapacitySchedulerConfiguration csConfig;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setup() throws IOException {
|
||||||
|
csConfig = new CapacitySchedulerConfiguration();
|
||||||
|
rmContext = mock(RMContext.class);
|
||||||
|
when(rmContext.getYarnConfiguration()).thenReturn(csConfig);
|
||||||
|
when(rmContext.getRMApps()).thenReturn(new ConcurrentHashMap<>());
|
||||||
|
clock = new ControlledClock();
|
||||||
|
scheduler = mock(CapacityScheduler.class);
|
||||||
|
when(rmContext.getScheduler()).thenReturn(scheduler);
|
||||||
|
when(scheduler.getConf()).thenReturn(csConfig);
|
||||||
|
when(scheduler.getConfig()).thenReturn(csConfig);
|
||||||
|
when(scheduler.getConfiguration()).thenReturn(csConfig);
|
||||||
|
when(scheduler.getResourceCalculator()).thenReturn(
|
||||||
|
new DefaultResourceCalculator());
|
||||||
|
when(scheduler.getRMContext()).thenReturn(rmContext);
|
||||||
|
when(scheduler.getClusterResource())
|
||||||
|
.thenReturn(Resource.newInstance(16384, 8));
|
||||||
|
when(scheduler.getMinimumAllocation())
|
||||||
|
.thenReturn(Resource.newInstance(1024, 1));
|
||||||
|
when(scheduler.getMinimumResourceCapability())
|
||||||
|
.thenReturn(Resource.newInstance(1024, 1));
|
||||||
|
activitiesManager = mock(ActivitiesManager.class);
|
||||||
|
maxAppsEnforcer = new CSMaxRunningAppsEnforcer(scheduler);
|
||||||
|
appNum = 0;
|
||||||
|
setupQueues(csConfig);
|
||||||
|
RMNodeLabelsManager labelManager = mock(RMNodeLabelsManager.class);
|
||||||
|
AppPriorityACLsManager appPriorityACLManager =
|
||||||
|
mock(AppPriorityACLsManager.class);
|
||||||
|
when(rmContext.getNodeLabelManager()).thenReturn(labelManager);
|
||||||
|
when(labelManager.getResourceByLabel(anyString(), any(Resource.class)))
|
||||||
|
.thenReturn(Resource.newInstance(16384, 8));
|
||||||
|
queueManager = new CapacitySchedulerQueueManager(csConfig, labelManager,
|
||||||
|
appPriorityACLManager);
|
||||||
|
queueManager.setCapacitySchedulerContext(scheduler);
|
||||||
|
queueManager.initializeQueues(csConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setupQueues(CapacitySchedulerConfiguration config) {
|
||||||
|
config.setQueues(CapacitySchedulerConfiguration.ROOT,
|
||||||
|
new String[] {"queue1", "queue2"});
|
||||||
|
config.setQueues("root.queue1", new String[] {"subqueue1", "subqueue2"});
|
||||||
|
config.setQueues("root.queue1.subqueue1", new String[] {"leaf1"});
|
||||||
|
config.setQueues("root.queue1.subqueue2", new String[] {"leaf2"});
|
||||||
|
config.setFloat(PREFIX + "root.capacity", 100.0f);
|
||||||
|
config.setFloat(PREFIX + "root.queue1.capacity", 50.0f);
|
||||||
|
config.setFloat(PREFIX + "root.queue2.capacity", 50.0f);
|
||||||
|
config.setFloat(PREFIX + "root.queue1.subqueue1.capacity", 50.0f);
|
||||||
|
config.setFloat(PREFIX + "root.queue1.subqueue2.capacity", 50.0f);
|
||||||
|
config.setFloat(PREFIX + "root.queue1.subqueue1.leaf1.capacity", 100.0f);
|
||||||
|
config.setFloat(PREFIX + "root.queue1.subqueue2.leaf2.capacity", 100.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
private FiCaSchedulerApp addApp(LeafQueue queue, String user) {
|
||||||
|
ApplicationId appId = ApplicationId.newInstance(0, appNum++);
|
||||||
|
ApplicationAttemptId attId = ApplicationAttemptId.newInstance(appId, 0);
|
||||||
|
|
||||||
|
FiCaSchedulerApp attempt = new FiCaSchedulerApp(attId,
|
||||||
|
user, queue, queue.getAbstractUsersManager(),
|
||||||
|
rmContext, Priority.newInstance(0), false,
|
||||||
|
activitiesManager) {
|
||||||
|
|
||||||
|
private final long startTime = clock.getTime();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getStartTime() {
|
||||||
|
return startTime;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
maxAppsEnforcer.checkRunnabilityWithUpdate(attempt);
|
||||||
|
maxAppsEnforcer.trackApp(attempt);
|
||||||
|
|
||||||
|
queue.submitApplicationAttempt(attempt, attempt.getUser());
|
||||||
|
|
||||||
|
return attempt;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void removeApp(FiCaSchedulerApp attempt) {
|
||||||
|
LeafQueue queue = attempt.getCSLeafQueue();
|
||||||
|
queue.finishApplicationAttempt(attempt, queue.getQueuePath());
|
||||||
|
maxAppsEnforcer.untrackApp(attempt);
|
||||||
|
maxAppsEnforcer.updateRunnabilityOnAppRemoval(attempt);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRemoveDoesNotEnableAnyApp() {
|
||||||
|
ParentQueue root =
|
||||||
|
(ParentQueue) queueManager.getRootQueue();
|
||||||
|
LeafQueue leaf1 = (LeafQueue) queueManager
|
||||||
|
.getQueueByFullName("root.queue1.subqueue1.leaf1");
|
||||||
|
LeafQueue leaf2 = (LeafQueue) queueManager
|
||||||
|
.getQueueByFullName("root.queue1.subqueue2.leaf2");
|
||||||
|
root.setMaxParallelApps(2);
|
||||||
|
leaf1.setMaxParallelApps(1);
|
||||||
|
leaf2.setMaxParallelApps(1);
|
||||||
|
|
||||||
|
FiCaSchedulerApp app1 = addApp(leaf1, "user");
|
||||||
|
addApp(leaf2, "user");
|
||||||
|
addApp(leaf2, "user");
|
||||||
|
assertEquals(1, leaf1.getNumRunnableApps());
|
||||||
|
assertEquals(1, leaf2.getNumRunnableApps());
|
||||||
|
assertEquals(1, leaf2.getNumNonRunnableApps());
|
||||||
|
|
||||||
|
removeApp(app1);
|
||||||
|
assertEquals(0, leaf1.getNumRunnableApps());
|
||||||
|
assertEquals(1, leaf2.getNumRunnableApps());
|
||||||
|
assertEquals(1, leaf2.getNumNonRunnableApps());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRemoveEnablesAppOnCousinQueue() {
|
||||||
|
LeafQueue leaf1 = (LeafQueue) queueManager
|
||||||
|
.getQueueByFullName("root.queue1.subqueue1.leaf1");
|
||||||
|
LeafQueue leaf2 = (LeafQueue) queueManager
|
||||||
|
.getQueueByFullName("root.queue1.subqueue2.leaf2");
|
||||||
|
ParentQueue queue1 = (ParentQueue) queueManager
|
||||||
|
.getQueueByFullName("root.queue1");
|
||||||
|
queue1.setMaxParallelApps(2);
|
||||||
|
|
||||||
|
FiCaSchedulerApp app1 = addApp(leaf1, "user");
|
||||||
|
addApp(leaf2, "user");
|
||||||
|
addApp(leaf2, "user");
|
||||||
|
assertEquals(1, leaf1.getNumRunnableApps());
|
||||||
|
assertEquals(1, leaf2.getNumRunnableApps());
|
||||||
|
assertEquals(1, leaf2.getNumNonRunnableApps());
|
||||||
|
|
||||||
|
removeApp(app1);
|
||||||
|
assertEquals(0, leaf1.getNumRunnableApps());
|
||||||
|
assertEquals(2, leaf2.getNumRunnableApps());
|
||||||
|
assertEquals(0, leaf2.getNumNonRunnableApps());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRemoveEnablesOneByQueueOneByUser() {
|
||||||
|
LeafQueue leaf1 = (LeafQueue) queueManager
|
||||||
|
.getQueueByFullName("root.queue1.subqueue1.leaf1");
|
||||||
|
LeafQueue leaf2 = (LeafQueue) queueManager
|
||||||
|
.getQueueByFullName("root.queue1.subqueue2.leaf2");
|
||||||
|
leaf1.setMaxParallelApps(2);
|
||||||
|
//userMaxApps.put("user1", 1);
|
||||||
|
csConfig.setInt(PREFIX + "user.user1.max-parallel-apps", 1);
|
||||||
|
|
||||||
|
FiCaSchedulerApp app1 = addApp(leaf1, "user1");
|
||||||
|
addApp(leaf1, "user2");
|
||||||
|
addApp(leaf1, "user3");
|
||||||
|
addApp(leaf2, "user1");
|
||||||
|
assertEquals(2, leaf1.getNumRunnableApps());
|
||||||
|
assertEquals(1, leaf1.getNumNonRunnableApps());
|
||||||
|
assertEquals(1, leaf2.getNumNonRunnableApps());
|
||||||
|
|
||||||
|
removeApp(app1);
|
||||||
|
assertEquals(2, leaf1.getNumRunnableApps());
|
||||||
|
assertEquals(1, leaf2.getNumRunnableApps());
|
||||||
|
assertEquals(0, leaf1.getNumNonRunnableApps());
|
||||||
|
assertEquals(0, leaf2.getNumNonRunnableApps());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRemoveEnablingOrderedByStartTime() {
|
||||||
|
LeafQueue leaf1 = (LeafQueue) queueManager
|
||||||
|
.getQueueByFullName("root.queue1.subqueue1.leaf1");
|
||||||
|
LeafQueue leaf2 = (LeafQueue) queueManager
|
||||||
|
.getQueueByFullName("root.queue1.subqueue2.leaf2");
|
||||||
|
ParentQueue queue1 = (ParentQueue) queueManager
|
||||||
|
.getQueueByFullName("root.queue1");
|
||||||
|
queue1.setMaxParallelApps(2);
|
||||||
|
FiCaSchedulerApp app1 = addApp(leaf1, "user");
|
||||||
|
addApp(leaf2, "user");
|
||||||
|
addApp(leaf2, "user");
|
||||||
|
clock.tickSec(20);
|
||||||
|
addApp(leaf1, "user");
|
||||||
|
assertEquals(1, leaf1.getNumRunnableApps());
|
||||||
|
assertEquals(1, leaf2.getNumRunnableApps());
|
||||||
|
assertEquals(1, leaf1.getNumNonRunnableApps());
|
||||||
|
assertEquals(1, leaf2.getNumNonRunnableApps());
|
||||||
|
removeApp(app1);
|
||||||
|
assertEquals(0, leaf1.getNumRunnableApps());
|
||||||
|
assertEquals(2, leaf2.getNumRunnableApps());
|
||||||
|
assertEquals(0, leaf2.getNumNonRunnableApps());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultipleAppsWaitingOnCousinQueue() {
|
||||||
|
LeafQueue leaf1 = (LeafQueue) queueManager
|
||||||
|
.getQueueByFullName("root.queue1.subqueue1.leaf1");
|
||||||
|
LeafQueue leaf2 = (LeafQueue) queueManager
|
||||||
|
.getQueueByFullName("root.queue1.subqueue2.leaf2");
|
||||||
|
ParentQueue queue1 = (ParentQueue) queueManager
|
||||||
|
.getQueueByFullName("root.queue1");
|
||||||
|
queue1.setMaxParallelApps(2);
|
||||||
|
FiCaSchedulerApp app1 = addApp(leaf1, "user");
|
||||||
|
addApp(leaf2, "user");
|
||||||
|
addApp(leaf2, "user");
|
||||||
|
addApp(leaf2, "user");
|
||||||
|
assertEquals(1, leaf1.getNumRunnableApps());
|
||||||
|
assertEquals(1, leaf2.getNumRunnableApps());
|
||||||
|
assertEquals(2, leaf2.getNumNonRunnableApps());
|
||||||
|
removeApp(app1);
|
||||||
|
assertEquals(0, leaf1.getNumRunnableApps());
|
||||||
|
assertEquals(2, leaf2.getNumRunnableApps());
|
||||||
|
assertEquals(1, leaf2.getNumNonRunnableApps());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultiListStartTimeIteratorEmptyAppLists() {
|
||||||
|
List<List<FiCaSchedulerApp>> lists =
|
||||||
|
new ArrayList<List<FiCaSchedulerApp>>();
|
||||||
|
lists.add(Arrays.asList(mockAppAttempt(1)));
|
||||||
|
lists.add(Arrays.asList(mockAppAttempt(2)));
|
||||||
|
Iterator<FiCaSchedulerApp> iter =
|
||||||
|
new CSMaxRunningAppsEnforcer.MultiListStartTimeIterator(lists);
|
||||||
|
assertEquals(1, iter.next().getStartTime());
|
||||||
|
assertEquals(2, iter.next().getStartTime());
|
||||||
|
}
|
||||||
|
|
||||||
|
private FiCaSchedulerApp mockAppAttempt(long startTime) {
|
||||||
|
FiCaSchedulerApp schedApp = mock(FiCaSchedulerApp.class);
|
||||||
|
when(schedApp.getStartTime()).thenReturn(startTime);
|
||||||
|
return schedApp;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,312 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration.PREFIX;
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest;
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsResponse;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationReport;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||||
|
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.MockAM;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.MockNM;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.MockRMAppSubmissionData;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.MockRMAppSubmitter;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
||||||
|
import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import com.google.common.collect.Sets;
|
||||||
|
|
||||||
|
public class TestCapacitySchedulerMaxParallelApps {
|
||||||
|
private CapacitySchedulerConfiguration conf;
|
||||||
|
private MockRM rm;
|
||||||
|
private MockNM nm1;
|
||||||
|
|
||||||
|
private RMApp app1;
|
||||||
|
private MockAM am1;
|
||||||
|
private RMApp app2;
|
||||||
|
private MockAM am2;
|
||||||
|
private RMApp app3;
|
||||||
|
private RMAppAttempt attempt3;
|
||||||
|
private RMApp app4;
|
||||||
|
private RMAppAttempt attempt4;
|
||||||
|
|
||||||
|
private ParentQueue rootQueue;
|
||||||
|
private LeafQueue defaultQueue;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() {
|
||||||
|
CapacitySchedulerConfiguration config =
|
||||||
|
new CapacitySchedulerConfiguration();
|
||||||
|
config.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS,
|
||||||
|
DominantResourceCalculator.class.getName());
|
||||||
|
|
||||||
|
conf = new CapacitySchedulerConfiguration(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void after() {
|
||||||
|
if (rm != null) {
|
||||||
|
rm.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 30000)
|
||||||
|
public void testMaxParallelAppsExceedsQueueSetting() throws Exception {
|
||||||
|
conf.setInt("yarn.scheduler.capacity.root.default.max-parallel-apps", 2);
|
||||||
|
executeCommonStepsAndChecks();
|
||||||
|
testWhenSettingsExceeded();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 30000)
|
||||||
|
public void testMaxParallelAppsExceedsDefaultQueueSetting()
|
||||||
|
throws Exception {
|
||||||
|
conf.setInt("yarn.scheduler.capacity.max-parallel-apps", 2);
|
||||||
|
executeCommonStepsAndChecks();
|
||||||
|
testWhenSettingsExceeded();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 30000)
|
||||||
|
public void testMaxParallelAppsExceedsUserSetting() throws Exception {
|
||||||
|
conf.setInt("yarn.scheduler.capacity.user.testuser.max-parallel-apps", 2);
|
||||||
|
executeCommonStepsAndChecks();
|
||||||
|
testWhenSettingsExceeded();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 30000)
|
||||||
|
public void testMaxParallelAppsExceedsDefaultUserSetting() throws Exception {
|
||||||
|
conf.setInt("yarn.scheduler.capacity.user.max-parallel-apps", 2);
|
||||||
|
executeCommonStepsAndChecks();
|
||||||
|
testWhenSettingsExceeded();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 30000)
|
||||||
|
public void testMaxParallelAppsWhenReloadingConfig() throws Exception {
|
||||||
|
conf.setInt("yarn.scheduler.capacity.root.default.max-parallel-apps", 2);
|
||||||
|
|
||||||
|
executeCommonStepsAndChecks();
|
||||||
|
|
||||||
|
RMContext rmContext = rm.getRMContext();
|
||||||
|
// Disable parallel apps setting + max out AM percent
|
||||||
|
conf.unset("yarn.scheduler.capacity.root.default.max-parallel-apps");
|
||||||
|
conf.setFloat(PREFIX + "maximum-am-resource-percent", 1.0f);
|
||||||
|
CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
|
||||||
|
cs.reinitialize(conf, rmContext);
|
||||||
|
|
||||||
|
// Both app #3 and app #4 should transition to RUNNABLE
|
||||||
|
launchAMandWaitForRunning(app3, attempt3, nm1);
|
||||||
|
launchAMandWaitForRunning(app4, attempt4, nm1);
|
||||||
|
verifyRunningAndAcceptedApps(4, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 30000)
|
||||||
|
public void testMaxAppsReachedWithNonRunnableApps() throws Exception {
|
||||||
|
conf.setInt("yarn.scheduler.capacity.root.default.max-parallel-apps", 2);
|
||||||
|
conf.setInt("yarn.scheduler.capacity.root.default.maximum-applications", 4);
|
||||||
|
executeCommonStepsAndChecks();
|
||||||
|
|
||||||
|
RMApp app5 = MockRMAppSubmitter.submit(rm,
|
||||||
|
MockRMAppSubmissionData.Builder.createWithMemory(512, rm)
|
||||||
|
.withAppName("app5")
|
||||||
|
.withUser("testuser")
|
||||||
|
.withQueue("default")
|
||||||
|
.withWaitForAppAcceptedState(false)
|
||||||
|
.build());
|
||||||
|
|
||||||
|
rm.waitForState(app5.getApplicationId(), RMAppState.FAILED);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void executeCommonStepsAndChecks() throws Exception {
|
||||||
|
rm = new MockRM(conf);
|
||||||
|
rm.start();
|
||||||
|
|
||||||
|
nm1 = rm.registerNode("h1:1234", 4096, 8);
|
||||||
|
rm.registerNode("h2:1234", 4096, 8);
|
||||||
|
rm.registerNode("h3:1234", 4096, 8);
|
||||||
|
|
||||||
|
rm.drainEvents();
|
||||||
|
|
||||||
|
app1 = MockRMAppSubmitter.submit(rm,
|
||||||
|
MockRMAppSubmissionData.Builder.createWithMemory(512, rm)
|
||||||
|
.withAppName("app1")
|
||||||
|
.withUser("testuser")
|
||||||
|
.withQueue("default")
|
||||||
|
.build());
|
||||||
|
|
||||||
|
am1 = MockRM.launchAndRegisterAM(app1, rm, nm1);
|
||||||
|
|
||||||
|
app2 = MockRMAppSubmitter.submit(rm,
|
||||||
|
MockRMAppSubmissionData.Builder.createWithMemory(512, rm)
|
||||||
|
.withAppName("app2")
|
||||||
|
.withUser("testuser")
|
||||||
|
.withQueue("default")
|
||||||
|
.build());
|
||||||
|
am2 = MockRM.launchAndRegisterAM(app2, rm, nm1);
|
||||||
|
|
||||||
|
app3 = MockRMAppSubmitter.submit(rm,
|
||||||
|
MockRMAppSubmissionData.Builder.createWithMemory(512, rm)
|
||||||
|
.withAppName("app3")
|
||||||
|
.withUser("testuser")
|
||||||
|
.withQueue("default")
|
||||||
|
.build());
|
||||||
|
attempt3 = MockRM.waitForAttemptScheduled(app3, rm);
|
||||||
|
|
||||||
|
app4 = MockRMAppSubmitter.submit(rm,
|
||||||
|
MockRMAppSubmissionData.Builder.createWithMemory(512, rm)
|
||||||
|
.withAppName("app4")
|
||||||
|
.withUser("testuser")
|
||||||
|
.withQueue("default")
|
||||||
|
.build());
|
||||||
|
attempt4 = MockRM.waitForAttemptScheduled(app4, rm);
|
||||||
|
|
||||||
|
// Check that app attempt #3 and #4 are non-runnable
|
||||||
|
rootQueue = getRootQueue();
|
||||||
|
defaultQueue = getDefaultQueue();
|
||||||
|
Set<ApplicationAttemptId> nonRunnables =
|
||||||
|
Sets.newHashSet(
|
||||||
|
attempt3.getAppAttemptId(),
|
||||||
|
attempt4.getAppAttemptId());
|
||||||
|
verifyRunnableAppsInParent(rootQueue, 2);
|
||||||
|
verifyRunnableAppsInLeaf(defaultQueue, 2, nonRunnables);
|
||||||
|
verifyRunningAndAcceptedApps(2, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testWhenSettingsExceeded() throws Exception {
|
||||||
|
// Stop app #1
|
||||||
|
unregisterAMandWaitForFinish(app1, am1, nm1);
|
||||||
|
|
||||||
|
// Launch app #3
|
||||||
|
launchAMandWaitForRunning(app3, attempt3, nm1);
|
||||||
|
|
||||||
|
// Check that attempt #4 is still non-runnable
|
||||||
|
verifyRunnableAppsInParent(rootQueue, 2);
|
||||||
|
verifyRunnableAppsInLeaf(defaultQueue, 2,
|
||||||
|
Collections.singleton(attempt4.getAppAttemptId()));
|
||||||
|
verifyRunningAndAcceptedApps(2, 1);
|
||||||
|
|
||||||
|
// Stop app #2
|
||||||
|
unregisterAMandWaitForFinish(app2, am2, nm1);
|
||||||
|
|
||||||
|
// Launch app #4
|
||||||
|
launchAMandWaitForRunning(app4, attempt4, nm1);
|
||||||
|
verifyRunnableAppsInParent(rootQueue, 2);
|
||||||
|
verifyRunnableAppsInLeaf(defaultQueue, 2,
|
||||||
|
Collections.emptySet());
|
||||||
|
verifyRunningAndAcceptedApps(2, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("checkstyle:hiddenfield")
|
||||||
|
private LeafQueue getDefaultQueue() {
|
||||||
|
CSQueue defaultQueue =
|
||||||
|
((CapacityScheduler) rm.getResourceScheduler()).getQueue("default");
|
||||||
|
|
||||||
|
return (LeafQueue) defaultQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ParentQueue getRootQueue() {
|
||||||
|
CSQueue root =
|
||||||
|
((CapacityScheduler) rm.getResourceScheduler()).getQueue("root");
|
||||||
|
|
||||||
|
return (ParentQueue) root;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verifyRunnableAppsInParent(ParentQueue queue,
|
||||||
|
int expectedRunnable) {
|
||||||
|
assertEquals("Num of runnable apps", expectedRunnable,
|
||||||
|
queue.getNumRunnableApps());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verifyRunnableAppsInLeaf(LeafQueue queue, int expectedRunnable,
|
||||||
|
Set<ApplicationAttemptId> nonRunnableIds) {
|
||||||
|
assertEquals("Num of runnable apps", expectedRunnable,
|
||||||
|
queue.getNumRunnableApps());
|
||||||
|
|
||||||
|
queue.getCopyOfNonRunnableAppSchedulables()
|
||||||
|
.stream()
|
||||||
|
.map(fca -> fca.getApplicationAttemptId())
|
||||||
|
.forEach(id -> assertTrue(id + " not found as non-runnable",
|
||||||
|
nonRunnableIds.contains(id)));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verifyRunningAndAcceptedApps(int expectedRunning,
|
||||||
|
int expectedAccepted) throws YarnException {
|
||||||
|
GetApplicationsRequest request = GetApplicationsRequest.newInstance();
|
||||||
|
|
||||||
|
GetApplicationsResponse resp =
|
||||||
|
rm.getClientRMService().getApplications(request);
|
||||||
|
|
||||||
|
List<ApplicationReport> apps = resp.getApplicationList();
|
||||||
|
|
||||||
|
long runningCount = apps
|
||||||
|
.stream()
|
||||||
|
.filter(report ->
|
||||||
|
report.getYarnApplicationState() == YarnApplicationState.RUNNING)
|
||||||
|
.count();
|
||||||
|
|
||||||
|
long acceptedCount = apps
|
||||||
|
.stream()
|
||||||
|
.filter(report ->
|
||||||
|
report.getYarnApplicationState() == YarnApplicationState.ACCEPTED)
|
||||||
|
.count();
|
||||||
|
|
||||||
|
assertEquals("Running apps count", expectedRunning, runningCount);
|
||||||
|
assertEquals("Accepted apps count", expectedAccepted, acceptedCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void unregisterAMandWaitForFinish(RMApp app, MockAM am, MockNM nm)
|
||||||
|
throws Exception {
|
||||||
|
am.unregisterAppAttempt();
|
||||||
|
nm.nodeHeartbeat(app.getCurrentAppAttempt().getAppAttemptId(), 1,
|
||||||
|
ContainerState.COMPLETE);
|
||||||
|
rm.waitForState(app.getCurrentAppAttempt().getAppAttemptId(),
|
||||||
|
RMAppAttemptState.FINISHED);
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("rawtypes")
|
||||||
|
private MockAM launchAMandWaitForRunning(RMApp app, RMAppAttempt attempt,
|
||||||
|
MockNM nm) throws Exception {
|
||||||
|
nm.nodeHeartbeat(true);
|
||||||
|
((AbstractYarnScheduler)rm.getResourceScheduler()).update();
|
||||||
|
rm.drainEvents();
|
||||||
|
nm.nodeHeartbeat(true);
|
||||||
|
MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
|
||||||
|
am.registerAppAttempt();
|
||||||
|
rm.waitForState(app.getApplicationId(), RMAppState.RUNNING);
|
||||||
|
|
||||||
|
return am;
|
||||||
|
}
|
||||||
|
}
|
|
@ -456,6 +456,8 @@ public class TestLeafQueue {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAppAttemptMetrics() throws Exception {
|
public void testAppAttemptMetrics() throws Exception {
|
||||||
|
CSMaxRunningAppsEnforcer enforcer = mock(CSMaxRunningAppsEnforcer.class);
|
||||||
|
cs.setMaxRunningAppsEnforcer(enforcer);
|
||||||
|
|
||||||
// Manipulate queue 'a'
|
// Manipulate queue 'a'
|
||||||
LeafQueue a = stubLeafQueue((LeafQueue) queues.get(B));
|
LeafQueue a = stubLeafQueue((LeafQueue) queues.get(B));
|
||||||
|
|
|
@ -218,6 +218,7 @@ public class TestQueueState {
|
||||||
CommonNodeLabelsManager.NO_LABEL);
|
CommonNodeLabelsManager.NO_LABEL);
|
||||||
when(application.compareInputOrderTo(any(FiCaSchedulerApp.class)))
|
when(application.compareInputOrderTo(any(FiCaSchedulerApp.class)))
|
||||||
.thenCallRealMethod();
|
.thenCallRealMethod();
|
||||||
|
when(application.isRunnable()).thenReturn(true);
|
||||||
return application;
|
return application;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -157,6 +157,7 @@ public class TestQueueStateManager {
|
||||||
CommonNodeLabelsManager.NO_LABEL);
|
CommonNodeLabelsManager.NO_LABEL);
|
||||||
when(application.compareInputOrderTo(any(FiCaSchedulerApp.class)))
|
when(application.compareInputOrderTo(any(FiCaSchedulerApp.class)))
|
||||||
.thenCallRealMethod();
|
.thenCallRealMethod();
|
||||||
|
when(application.isRunnable()).thenReturn(true);
|
||||||
return application;
|
return application;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue