YARN-4390. Do surgical preemption based on reserved container in CapacityScheduler. Contributed by Wangda Tan
This commit is contained in:
parent
35cf503149
commit
bb62e05925
|
@ -21,6 +21,7 @@ import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.service.AbstractService;
|
import org.apache.hadoop.service.AbstractService;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResourceScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResourceScheduler;
|
||||||
|
|
||||||
|
@ -84,10 +85,17 @@ public class SchedulingMonitor extends AbstractService {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
while (!stopped && !Thread.currentThread().isInterrupted()) {
|
while (!stopped && !Thread.currentThread().isInterrupted()) {
|
||||||
//invoke the preemption policy at a regular pace
|
try {
|
||||||
//the policy will generate preemption or kill events
|
//invoke the preemption policy at a regular pace
|
||||||
//managed by the dispatcher
|
//the policy will generate preemption or kill events
|
||||||
invokePolicy();
|
//managed by the dispatcher
|
||||||
|
invokePolicy();
|
||||||
|
} catch (YarnRuntimeException e) {
|
||||||
|
LOG.error("YarnRuntimeException raised while executing preemption"
|
||||||
|
+ " checker, skip this run..., exception=", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait before next run
|
||||||
try {
|
try {
|
||||||
Thread.sleep(monitorInterval);
|
Thread.sleep(monitorInterval);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
|
|
|
@ -21,9 +21,11 @@ package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
|
||||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
@ -40,9 +42,9 @@ public class CapacitySchedulerPreemptionUtils {
|
||||||
|
|
||||||
// Only add resToObtainByPartition when actuallyToBePreempted resource >= 0
|
// Only add resToObtainByPartition when actuallyToBePreempted resource >= 0
|
||||||
if (Resources.greaterThan(context.getResourceCalculator(),
|
if (Resources.greaterThan(context.getResourceCalculator(),
|
||||||
clusterResource, qT.actuallyToBePreempted, Resources.none())) {
|
clusterResource, qT.getActuallyToBePreempted(), Resources.none())) {
|
||||||
resToObtainByPartition.put(qT.partition,
|
resToObtainByPartition.put(qT.partition,
|
||||||
Resources.clone(qT.actuallyToBePreempted));
|
Resources.clone(qT.getActuallyToBePreempted()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,4 +64,33 @@ public class CapacitySchedulerPreemptionUtils {
|
||||||
}
|
}
|
||||||
return containers.contains(container);
|
return containers.contains(container);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void deductPreemptableResourcesBasedSelectedCandidates(
|
||||||
|
CapacitySchedulerPreemptionContext context,
|
||||||
|
Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates) {
|
||||||
|
for (Set<RMContainer> containers : selectedCandidates.values()) {
|
||||||
|
for (RMContainer c : containers) {
|
||||||
|
SchedulerNode schedulerNode = context.getScheduler().getSchedulerNode(
|
||||||
|
c.getAllocatedNode());
|
||||||
|
if (null == schedulerNode) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
String partition = schedulerNode.getPartition();
|
||||||
|
String queue = c.getQueueName();
|
||||||
|
TempQueuePerPartition tq = context.getQueueByPartition(queue,
|
||||||
|
partition);
|
||||||
|
|
||||||
|
Resource res = c.getReservedResource();
|
||||||
|
if (null == res) {
|
||||||
|
res = c.getAllocatedResource();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (null != res) {
|
||||||
|
tq.deductActuallyToBePreempted(context.getResourceCalculator(),
|
||||||
|
tq.totalPartitionResource, res);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,7 +36,6 @@ import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -55,7 +54,7 @@ public class FifoCandidatesSelector
|
||||||
super(preemptionContext);
|
super(preemptionContext);
|
||||||
|
|
||||||
preemptableAmountCalculator = new PreemptableResourceCalculator(
|
preemptableAmountCalculator = new PreemptableResourceCalculator(
|
||||||
preemptionContext);
|
preemptionContext, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -66,8 +65,13 @@ public class FifoCandidatesSelector
|
||||||
preemptableAmountCalculator.computeIdealAllocation(clusterResource,
|
preemptableAmountCalculator.computeIdealAllocation(clusterResource,
|
||||||
totalPreemptionAllowed);
|
totalPreemptionAllowed);
|
||||||
|
|
||||||
Map<ApplicationAttemptId, Set<RMContainer>> preemptMap =
|
// Previous selectors (with higher priority) could have already
|
||||||
new HashMap<>();
|
// selected containers. We need to deduct preemptable resources
|
||||||
|
// based on already selected candidates.
|
||||||
|
CapacitySchedulerPreemptionUtils
|
||||||
|
.deductPreemptableResourcesBasedSelectedCandidates(preemptionContext,
|
||||||
|
selectedCandidates);
|
||||||
|
|
||||||
List<RMContainer> skippedAMContainerlist = new ArrayList<>();
|
List<RMContainer> skippedAMContainerlist = new ArrayList<>();
|
||||||
|
|
||||||
// Loop all leaf queues
|
// Loop all leaf queues
|
||||||
|
@ -109,7 +113,7 @@ public class FifoCandidatesSelector
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
boolean preempted = tryPreemptContainerAndDeductResToObtain(
|
boolean preempted = tryPreemptContainerAndDeductResToObtain(
|
||||||
resToObtainByPartition, c, clusterResource, preemptMap,
|
resToObtainByPartition, c, clusterResource, selectedCandidates,
|
||||||
totalPreemptionAllowed);
|
totalPreemptionAllowed);
|
||||||
if (!preempted) {
|
if (!preempted) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -132,7 +136,7 @@ public class FifoCandidatesSelector
|
||||||
}
|
}
|
||||||
|
|
||||||
preemptFrom(fc, clusterResource, resToObtainByPartition,
|
preemptFrom(fc, clusterResource, resToObtainByPartition,
|
||||||
skippedAMContainerlist, skippedAMSize, preemptMap,
|
skippedAMContainerlist, skippedAMSize, selectedCandidates,
|
||||||
totalPreemptionAllowed);
|
totalPreemptionAllowed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -144,13 +148,13 @@ public class FifoCandidatesSelector
|
||||||
leafQueue.getAbsoluteCapacity()),
|
leafQueue.getAbsoluteCapacity()),
|
||||||
leafQueue.getMaxAMResourcePerQueuePercent());
|
leafQueue.getMaxAMResourcePerQueuePercent());
|
||||||
|
|
||||||
preemptAMContainers(clusterResource, preemptMap, skippedAMContainerlist,
|
preemptAMContainers(clusterResource, selectedCandidates, skippedAMContainerlist,
|
||||||
resToObtainByPartition, skippedAMSize, maxAMCapacityForThisQueue,
|
resToObtainByPartition, skippedAMSize, maxAMCapacityForThisQueue,
|
||||||
totalPreemptionAllowed);
|
totalPreemptionAllowed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return preemptMap;
|
return selectedCandidates;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -236,9 +240,9 @@ public class FifoCandidatesSelector
|
||||||
resourceToObtainByPartitions.remove(nodePartition);
|
resourceToObtainByPartitions.remove(nodePartition);
|
||||||
}
|
}
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Marked container=" + rmContainer.getContainerId()
|
LOG.debug(this.getClass().getName() + " Marked container=" + rmContainer
|
||||||
+ " in partition=" + nodePartition
|
.getContainerId() + " from partition=" + nodePartition + " queue="
|
||||||
+ " to be preemption candidates");
|
+ rmContainer.getQueueName() + " to be preemption candidates");
|
||||||
}
|
}
|
||||||
// Add to preemptMap
|
// Add to preemptMap
|
||||||
addToPreemptMap(preemptMap, attemptId, rmContainer);
|
addToPreemptMap(preemptMap, attemptId, rmContainer);
|
||||||
|
|
|
@ -37,7 +37,7 @@ import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculate how much resources need to be preempted for each queue,
|
* Calculate how much resources need to be preempted for each queue,
|
||||||
* will be used by {@link FifoCandidatesSelector}
|
* will be used by {@link PreemptionCandidatesSelector}
|
||||||
*/
|
*/
|
||||||
public class PreemptableResourceCalculator {
|
public class PreemptableResourceCalculator {
|
||||||
private static final Log LOG =
|
private static final Log LOG =
|
||||||
|
@ -45,6 +45,7 @@ public class PreemptableResourceCalculator {
|
||||||
|
|
||||||
private final CapacitySchedulerPreemptionContext context;
|
private final CapacitySchedulerPreemptionContext context;
|
||||||
private final ResourceCalculator rc;
|
private final ResourceCalculator rc;
|
||||||
|
private boolean isReservedPreemptionCandidatesSelector;
|
||||||
|
|
||||||
static class TQComparator implements Comparator<TempQueuePerPartition> {
|
static class TQComparator implements Comparator<TempQueuePerPartition> {
|
||||||
private ResourceCalculator rc;
|
private ResourceCalculator rc;
|
||||||
|
@ -71,18 +72,31 @@ public class PreemptableResourceCalculator {
|
||||||
// capacity and therefore considered last for resources.
|
// capacity and therefore considered last for resources.
|
||||||
private double getIdealPctOfGuaranteed(TempQueuePerPartition q) {
|
private double getIdealPctOfGuaranteed(TempQueuePerPartition q) {
|
||||||
double pctOver = Integer.MAX_VALUE;
|
double pctOver = Integer.MAX_VALUE;
|
||||||
if (q != null && Resources.greaterThan(
|
if (q != null && Resources.greaterThan(rc, clusterRes,
|
||||||
rc, clusterRes, q.guaranteed, Resources.none())) {
|
q.getGuaranteed(),
|
||||||
pctOver =
|
Resources.none())) {
|
||||||
Resources.divide(rc, clusterRes, q.idealAssigned, q.guaranteed);
|
pctOver = Resources.divide(rc, clusterRes, q.idealAssigned,
|
||||||
|
q.getGuaranteed());
|
||||||
}
|
}
|
||||||
return (pctOver);
|
return (pctOver);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public PreemptableResourceCalculator(CapacitySchedulerPreemptionContext preemptionContext) {
|
/**
|
||||||
|
* PreemptableResourceCalculator constructor
|
||||||
|
*
|
||||||
|
* @param preemptionContext
|
||||||
|
* @param isReservedPreemptionCandidatesSelector this will be set by
|
||||||
|
* different implementation of candidate selectors, please refer to
|
||||||
|
* TempQueuePerPartition#offer for details.
|
||||||
|
*/
|
||||||
|
public PreemptableResourceCalculator(
|
||||||
|
CapacitySchedulerPreemptionContext preemptionContext,
|
||||||
|
boolean isReservedPreemptionCandidatesSelector) {
|
||||||
context = preemptionContext;
|
context = preemptionContext;
|
||||||
rc = preemptionContext.getResourceCalculator();
|
rc = preemptionContext.getResourceCalculator();
|
||||||
|
this.isReservedPreemptionCandidatesSelector =
|
||||||
|
isReservedPreemptionCandidatesSelector;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -101,11 +115,11 @@ public class PreemptableResourceCalculator {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (TempQueuePerPartition q : queues) {
|
for (TempQueuePerPartition q : queues) {
|
||||||
Resources.addTo(activeCap, q.guaranteed);
|
Resources.addTo(activeCap, q.getGuaranteed());
|
||||||
}
|
}
|
||||||
for (TempQueuePerPartition q : queues) {
|
for (TempQueuePerPartition q : queues) {
|
||||||
q.normalizedGuarantee = Resources.divide(rc, clusterResource,
|
q.normalizedGuarantee = Resources.divide(rc, clusterResource,
|
||||||
q.guaranteed, activeCap);
|
q.getGuaranteed(), activeCap);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -114,7 +128,8 @@ public class PreemptableResourceCalculator {
|
||||||
// return the list of all queues that have the same idealAssigned
|
// return the list of all queues that have the same idealAssigned
|
||||||
// percentage of guaranteed.
|
// percentage of guaranteed.
|
||||||
protected Collection<TempQueuePerPartition> getMostUnderservedQueues(
|
protected Collection<TempQueuePerPartition> getMostUnderservedQueues(
|
||||||
PriorityQueue<TempQueuePerPartition> orderedByNeed, TQComparator tqComparator) {
|
PriorityQueue<TempQueuePerPartition> orderedByNeed,
|
||||||
|
TQComparator tqComparator) {
|
||||||
ArrayList<TempQueuePerPartition> underserved = new ArrayList<>();
|
ArrayList<TempQueuePerPartition> underserved = new ArrayList<>();
|
||||||
while (!orderedByNeed.isEmpty()) {
|
while (!orderedByNeed.isEmpty()) {
|
||||||
TempQueuePerPartition q1 = orderedByNeed.remove();
|
TempQueuePerPartition q1 = orderedByNeed.remove();
|
||||||
|
@ -155,15 +170,19 @@ public class PreemptableResourceCalculator {
|
||||||
tqComparator);
|
tqComparator);
|
||||||
for (Iterator<TempQueuePerPartition> i = qAlloc.iterator(); i.hasNext();) {
|
for (Iterator<TempQueuePerPartition> i = qAlloc.iterator(); i.hasNext();) {
|
||||||
TempQueuePerPartition q = i.next();
|
TempQueuePerPartition q = i.next();
|
||||||
if (Resources.greaterThan(rc, tot_guarant, q.current, q.guaranteed)) {
|
Resource used = q.getUsed();
|
||||||
q.idealAssigned = Resources.add(q.guaranteed, q.untouchableExtra);
|
|
||||||
|
if (Resources.greaterThan(rc, tot_guarant, used,
|
||||||
|
q.getGuaranteed())) {
|
||||||
|
q.idealAssigned = Resources.add(
|
||||||
|
q.getGuaranteed(), q.untouchableExtra);
|
||||||
} else {
|
} else {
|
||||||
q.idealAssigned = Resources.clone(q.current);
|
q.idealAssigned = Resources.clone(used);
|
||||||
}
|
}
|
||||||
Resources.subtractFrom(unassigned, q.idealAssigned);
|
Resources.subtractFrom(unassigned, q.idealAssigned);
|
||||||
// If idealAssigned < (current + pending), q needs more resources, so
|
// If idealAssigned < (allocated + used + pending), q needs more resources, so
|
||||||
// add it to the list of underserved queues, ordered by need.
|
// add it to the list of underserved queues, ordered by need.
|
||||||
Resource curPlusPend = Resources.add(q.current, q.pending);
|
Resource curPlusPend = Resources.add(q.getUsed(), q.pending);
|
||||||
if (Resources.lessThan(rc, tot_guarant, q.idealAssigned, curPlusPend)) {
|
if (Resources.lessThan(rc, tot_guarant, q.idealAssigned, curPlusPend)) {
|
||||||
orderedByNeed.add(q);
|
orderedByNeed.add(q);
|
||||||
}
|
}
|
||||||
|
@ -190,7 +209,8 @@ public class PreemptableResourceCalculator {
|
||||||
TempQueuePerPartition sub = i.next();
|
TempQueuePerPartition sub = i.next();
|
||||||
Resource wQavail = Resources.multiplyAndNormalizeUp(rc,
|
Resource wQavail = Resources.multiplyAndNormalizeUp(rc,
|
||||||
unassigned, sub.normalizedGuarantee, Resource.newInstance(1, 1));
|
unassigned, sub.normalizedGuarantee, Resource.newInstance(1, 1));
|
||||||
Resource wQidle = sub.offer(wQavail, rc, tot_guarant);
|
Resource wQidle = sub.offer(wQavail, rc, tot_guarant,
|
||||||
|
isReservedPreemptionCandidatesSelector);
|
||||||
Resource wQdone = Resources.subtract(wQavail, wQidle);
|
Resource wQdone = Resources.subtract(wQavail, wQidle);
|
||||||
|
|
||||||
if (Resources.greaterThan(rc, tot_guarant,
|
if (Resources.greaterThan(rc, tot_guarant,
|
||||||
|
@ -234,8 +254,8 @@ public class PreemptableResourceCalculator {
|
||||||
Set<TempQueuePerPartition> zeroGuarQueues = new HashSet<>();
|
Set<TempQueuePerPartition> zeroGuarQueues = new HashSet<>();
|
||||||
|
|
||||||
for (TempQueuePerPartition q : qAlloc) {
|
for (TempQueuePerPartition q : qAlloc) {
|
||||||
if (Resources
|
if (Resources.greaterThan(rc, tot_guarant,
|
||||||
.greaterThan(rc, tot_guarant, q.guaranteed, Resources.none())) {
|
q.getGuaranteed(), Resources.none())) {
|
||||||
nonZeroGuarQueues.add(q);
|
nonZeroGuarQueues.add(q);
|
||||||
} else {
|
} else {
|
||||||
zeroGuarQueues.add(q);
|
zeroGuarQueues.add(q);
|
||||||
|
@ -258,19 +278,22 @@ public class PreemptableResourceCalculator {
|
||||||
// how much preemption is required overall
|
// how much preemption is required overall
|
||||||
Resource totPreemptionNeeded = Resource.newInstance(0, 0);
|
Resource totPreemptionNeeded = Resource.newInstance(0, 0);
|
||||||
for (TempQueuePerPartition t:queues) {
|
for (TempQueuePerPartition t:queues) {
|
||||||
if (Resources.greaterThan(rc, tot_guarant, t.current, t.idealAssigned)) {
|
if (Resources.greaterThan(rc, tot_guarant,
|
||||||
Resources.addTo(totPreemptionNeeded,
|
t.getUsed(), t.idealAssigned)) {
|
||||||
Resources.subtract(t.current, t.idealAssigned));
|
Resources.addTo(totPreemptionNeeded, Resources
|
||||||
|
.subtract(t.getUsed(), t.idealAssigned));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// if we need to preempt more than is allowed, compute a factor (0<f<1)
|
/**
|
||||||
// that is used to scale down how much we ask back from each queue
|
* if we need to preempt more than is allowed, compute a factor (0<f<1)
|
||||||
|
* that is used to scale down how much we ask back from each queue
|
||||||
|
*/
|
||||||
float scalingFactor = 1.0F;
|
float scalingFactor = 1.0F;
|
||||||
if (Resources.greaterThan(rc, tot_guarant,
|
if (Resources.greaterThan(rc,
|
||||||
totPreemptionNeeded, totalPreemptionAllowed)) {
|
tot_guarant, totPreemptionNeeded, totalPreemptionAllowed)) {
|
||||||
scalingFactor = Resources.divide(rc, tot_guarant,
|
scalingFactor = Resources.divide(rc, tot_guarant, totalPreemptionAllowed,
|
||||||
totalPreemptionAllowed, totPreemptionNeeded);
|
totPreemptionNeeded);
|
||||||
}
|
}
|
||||||
|
|
||||||
// assign to each queue the amount of actual preemption based on local
|
// assign to each queue the amount of actual preemption based on local
|
||||||
|
@ -278,12 +301,6 @@ public class PreemptableResourceCalculator {
|
||||||
for (TempQueuePerPartition t : queues) {
|
for (TempQueuePerPartition t : queues) {
|
||||||
t.assignPreemption(scalingFactor, rc, tot_guarant);
|
t.assignPreemption(scalingFactor, rc, tot_guarant);
|
||||||
}
|
}
|
||||||
if (LOG.isDebugEnabled()) {
|
|
||||||
for (TempQueuePerPartition t : queues) {
|
|
||||||
LOG.debug(t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -329,12 +346,31 @@ public class PreemptableResourceCalculator {
|
||||||
for (TempQueuePerPartition qT : context.getQueuePartitions(queueName)) {
|
for (TempQueuePerPartition qT : context.getQueuePartitions(queueName)) {
|
||||||
// we act only if we are violating balance by more than
|
// we act only if we are violating balance by more than
|
||||||
// maxIgnoredOverCapacity
|
// maxIgnoredOverCapacity
|
||||||
if (Resources.greaterThan(rc, clusterResource, qT.current,
|
if (Resources.greaterThan(rc, clusterResource,
|
||||||
Resources.multiply(qT.guaranteed, 1.0 + context.getMaxIgnoreOverCapacity()))) {
|
qT.getUsed(), Resources
|
||||||
// we introduce a dampening factor naturalTerminationFactor that
|
.multiply(qT.getGuaranteed(),
|
||||||
// accounts for natural termination of containers
|
1.0 + context.getMaxIgnoreOverCapacity()))) {
|
||||||
Resource resToObtain = Resources.multiply(qT.toBePreempted,
|
/*
|
||||||
context.getNaturalTerminationFactor());
|
* We introduce a dampening factor naturalTerminationFactor that
|
||||||
|
* accounts for natural termination of containers.
|
||||||
|
*
|
||||||
|
* This is added to control pace of preemption, let's say:
|
||||||
|
* If preemption policy calculated a queue *should be* preempted 20 GB
|
||||||
|
* And the nature_termination_factor set to 0.1. As a result, preemption
|
||||||
|
* policy will select 20 GB * 0.1 = 2GB containers to be preempted.
|
||||||
|
*
|
||||||
|
* However, it doesn't work for YARN-4390:
|
||||||
|
* For example, if a queue needs to be preempted 20GB for *one single*
|
||||||
|
* large container, preempt 10% of such resource isn't useful.
|
||||||
|
* So to make it simple, only apply nature_termination_factor when
|
||||||
|
* selector is not reservedPreemptionCandidatesSelector.
|
||||||
|
*/
|
||||||
|
Resource resToObtain = qT.toBePreempted;
|
||||||
|
if (!isReservedPreemptionCandidatesSelector) {
|
||||||
|
resToObtain = Resources.multiply(qT.toBePreempted,
|
||||||
|
context.getNaturalTerminationFactor());
|
||||||
|
}
|
||||||
|
|
||||||
// Only add resToObtain when it >= 0
|
// Only add resToObtain when it >= 0
|
||||||
if (Resources.greaterThan(rc, clusterResource, resToObtain,
|
if (Resources.greaterThan(rc, clusterResource, resToObtain,
|
||||||
Resources.none())) {
|
Resources.none())) {
|
||||||
|
@ -343,22 +379,39 @@ public class PreemptableResourceCalculator {
|
||||||
+ " resource-to-obtain=" + resToObtain);
|
+ " resource-to-obtain=" + resToObtain);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
qT.actuallyToBePreempted = Resources.clone(resToObtain);
|
qT.setActuallyToBePreempted(Resources.clone(resToObtain));
|
||||||
} else {
|
} else {
|
||||||
qT.actuallyToBePreempted = Resources.none();
|
qT.setActuallyToBePreempted(Resources.none());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug(qT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void updatePreemptableExtras(TempQueuePerPartition cur) {
|
||||||
|
if (cur.children == null || cur.children.isEmpty()) {
|
||||||
|
cur.updatePreemptableExtras(rc);
|
||||||
|
} else {
|
||||||
|
for (TempQueuePerPartition child : cur.children) {
|
||||||
|
updatePreemptableExtras(child);
|
||||||
|
}
|
||||||
|
cur.updatePreemptableExtras(rc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void computeIdealAllocation(Resource clusterResource,
|
public void computeIdealAllocation(Resource clusterResource,
|
||||||
Resource totalPreemptionAllowed) {
|
Resource totalPreemptionAllowed) {
|
||||||
for (String partition : context.getAllPartitions()) {
|
for (String partition : context.getAllPartitions()) {
|
||||||
TempQueuePerPartition tRoot =
|
TempQueuePerPartition tRoot = context.getQueueByPartition(
|
||||||
context.getQueueByPartition(CapacitySchedulerConfiguration.ROOT, partition);
|
CapacitySchedulerConfiguration.ROOT, partition);
|
||||||
|
updatePreemptableExtras(tRoot);
|
||||||
|
|
||||||
// compute the ideal distribution of resources among queues
|
// compute the ideal distribution of resources among queues
|
||||||
// updates cloned queues state accordingly
|
// updates cloned queues state accordingly
|
||||||
tRoot.idealAssigned = tRoot.guaranteed;
|
tRoot.idealAssigned = tRoot.getGuaranteed();
|
||||||
recursivelyComputeIdealAssignment(tRoot, totalPreemptionAllowed);
|
recursivelyComputeIdealAssignment(tRoot, totalPreemptionAllowed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -34,7 +34,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResour
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.QueueCapacities;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.QueueCapacities;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.preemption.PreemptableQueue;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.preemption.PreemptableQueue;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.ContainerPreemptEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.ContainerPreemptEvent;
|
||||||
|
@ -45,6 +45,7 @@ import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
|
||||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.text.MessageFormat;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
@ -173,6 +174,15 @@ public class ProportionalCapacityPreemptionPolicy
|
||||||
rc = scheduler.getResourceCalculator();
|
rc = scheduler.getResourceCalculator();
|
||||||
nlm = scheduler.getRMContext().getNodeLabelManager();
|
nlm = scheduler.getRMContext().getNodeLabelManager();
|
||||||
|
|
||||||
|
// Do we need to specially consider reserved containers?
|
||||||
|
boolean selectCandidatesForResevedContainers = csConfig.getBoolean(
|
||||||
|
CapacitySchedulerConfiguration.PREEMPTION_SELECT_CANDIDATES_FOR_RESERVED_CONTAINERS,
|
||||||
|
CapacitySchedulerConfiguration.DEFAULT_PREEMPTION_SELECT_CANDIDATES_FOR_RESERVED_CONTAINERS);
|
||||||
|
if (selectCandidatesForResevedContainers) {
|
||||||
|
candidatesSelectionPolicies.add(
|
||||||
|
new ReservedContainerCandidatesSelector(this));
|
||||||
|
}
|
||||||
|
|
||||||
// initialize candidates preemption selection policies
|
// initialize candidates preemption selection policies
|
||||||
candidatesSelectionPolicies.add(
|
candidatesSelectionPolicies.add(
|
||||||
new FifoCandidatesSelector(this));
|
new FifoCandidatesSelector(this));
|
||||||
|
@ -185,9 +195,15 @@ public class ProportionalCapacityPreemptionPolicy
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized void editSchedule() {
|
public synchronized void editSchedule() {
|
||||||
|
long startTs = clock.getTime();
|
||||||
|
|
||||||
CSQueue root = scheduler.getRootQueue();
|
CSQueue root = scheduler.getRootQueue();
|
||||||
Resource clusterResources = Resources.clone(scheduler.getClusterResource());
|
Resource clusterResources = Resources.clone(scheduler.getClusterResource());
|
||||||
containerBasedPreemptOrKill(root, clusterResources);
|
containerBasedPreemptOrKill(root, clusterResources);
|
||||||
|
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Total time used=" + (clock.getTime() - startTs) + " ms.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
|
@ -298,8 +314,8 @@ public class ProportionalCapacityPreemptionPolicy
|
||||||
queueToPartitions.clear();
|
queueToPartitions.clear();
|
||||||
|
|
||||||
for (String partitionToLookAt : allPartitions) {
|
for (String partitionToLookAt : allPartitions) {
|
||||||
cloneQueues(root,
|
cloneQueues(root, Resources
|
||||||
nlm.getResourceByLabel(partitionToLookAt, clusterResources),
|
.clone(nlm.getResourceByLabel(partitionToLookAt, clusterResources)),
|
||||||
partitionToLookAt);
|
partitionToLookAt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -314,9 +330,15 @@ public class ProportionalCapacityPreemptionPolicy
|
||||||
|
|
||||||
// based on ideal allocation select containers to be preemptionCandidates from each
|
// based on ideal allocation select containers to be preemptionCandidates from each
|
||||||
// queue and each application
|
// queue and each application
|
||||||
Map<ApplicationAttemptId, Set<RMContainer>> toPreempt = null;
|
Map<ApplicationAttemptId, Set<RMContainer>> toPreempt =
|
||||||
|
new HashMap<>();
|
||||||
for (PreemptionCandidatesSelector selector :
|
for (PreemptionCandidatesSelector selector :
|
||||||
candidatesSelectionPolicies) {
|
candidatesSelectionPolicies) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug(MessageFormat
|
||||||
|
.format("Trying to use {0} to select preemption candidates",
|
||||||
|
selector.getClass().getName()));
|
||||||
|
}
|
||||||
toPreempt = selector.selectCandidates(toPreempt,
|
toPreempt = selector.selectCandidates(toPreempt,
|
||||||
clusterResources, totalPreemptionAllowed);
|
clusterResources, totalPreemptionAllowed);
|
||||||
}
|
}
|
||||||
|
@ -382,14 +404,15 @@ public class ProportionalCapacityPreemptionPolicy
|
||||||
float absMaxCap = qc.getAbsoluteMaximumCapacity(partitionToLookAt);
|
float absMaxCap = qc.getAbsoluteMaximumCapacity(partitionToLookAt);
|
||||||
boolean preemptionDisabled = curQueue.getPreemptionDisabled();
|
boolean preemptionDisabled = curQueue.getPreemptionDisabled();
|
||||||
|
|
||||||
Resource current = curQueue.getQueueResourceUsage().getUsed(
|
Resource current = Resources.clone(
|
||||||
partitionToLookAt);
|
curQueue.getQueueResourceUsage().getUsed(partitionToLookAt));
|
||||||
Resource guaranteed = Resources.multiply(partitionResource, absCap);
|
|
||||||
Resource maxCapacity = Resources.multiply(partitionResource, absMaxCap);
|
|
||||||
Resource killable = Resources.none();
|
Resource killable = Resources.none();
|
||||||
|
|
||||||
|
Resource reserved = Resources.clone(
|
||||||
|
curQueue.getQueueResourceUsage().getReserved(partitionToLookAt));
|
||||||
if (null != preemptableQueues.get(queueName)) {
|
if (null != preemptableQueues.get(queueName)) {
|
||||||
killable = preemptableQueues.get(queueName)
|
killable = Resources.clone(preemptableQueues.get(queueName)
|
||||||
.getKillableResource(partitionToLookAt);
|
.getKillableResource(partitionToLookAt));
|
||||||
}
|
}
|
||||||
|
|
||||||
// when partition is a non-exclusive partition, the actual maxCapacity
|
// when partition is a non-exclusive partition, the actual maxCapacity
|
||||||
|
@ -397,53 +420,24 @@ public class ProportionalCapacityPreemptionPolicy
|
||||||
try {
|
try {
|
||||||
if (!scheduler.getRMContext().getNodeLabelManager()
|
if (!scheduler.getRMContext().getNodeLabelManager()
|
||||||
.isExclusiveNodeLabel(partitionToLookAt)) {
|
.isExclusiveNodeLabel(partitionToLookAt)) {
|
||||||
maxCapacity =
|
absMaxCap = 1.0f;
|
||||||
Resources.max(rc, partitionResource, maxCapacity, current);
|
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// This may cause by partition removed when running capacity monitor,
|
// This may cause by partition removed when running capacity monitor,
|
||||||
// just ignore the error, this will be corrected when doing next check.
|
// just ignore the error, this will be corrected when doing next check.
|
||||||
}
|
}
|
||||||
|
|
||||||
Resource extra = Resource.newInstance(0, 0);
|
ret = new TempQueuePerPartition(queueName, current, preemptionDisabled,
|
||||||
if (Resources.greaterThan(rc, partitionResource, current, guaranteed)) {
|
partitionToLookAt, killable, absCap, absMaxCap, partitionResource,
|
||||||
extra = Resources.subtract(current, guaranteed);
|
reserved, curQueue);
|
||||||
}
|
|
||||||
if (curQueue instanceof LeafQueue) {
|
if (curQueue instanceof ParentQueue) {
|
||||||
LeafQueue l = (LeafQueue) curQueue;
|
// Recursively add children
|
||||||
Resource pending =
|
|
||||||
l.getTotalPendingResourcesConsideringUserLimit(
|
|
||||||
partitionResource, partitionToLookAt);
|
|
||||||
ret = new TempQueuePerPartition(queueName, current, pending, guaranteed,
|
|
||||||
maxCapacity, preemptionDisabled, partitionToLookAt, killable);
|
|
||||||
if (preemptionDisabled) {
|
|
||||||
ret.untouchableExtra = extra;
|
|
||||||
} else {
|
|
||||||
ret.preemptableExtra = extra;
|
|
||||||
}
|
|
||||||
ret.setLeafQueue(l);
|
|
||||||
} else {
|
|
||||||
Resource pending = Resource.newInstance(0, 0);
|
|
||||||
ret =
|
|
||||||
new TempQueuePerPartition(curQueue.getQueueName(), current, pending,
|
|
||||||
guaranteed, maxCapacity, false, partitionToLookAt, killable);
|
|
||||||
Resource childrensPreemptable = Resource.newInstance(0, 0);
|
|
||||||
for (CSQueue c : curQueue.getChildQueues()) {
|
for (CSQueue c : curQueue.getChildQueues()) {
|
||||||
TempQueuePerPartition subq =
|
TempQueuePerPartition subq = cloneQueues(c, partitionResource,
|
||||||
cloneQueues(c, partitionResource, partitionToLookAt);
|
partitionToLookAt);
|
||||||
Resources.addTo(childrensPreemptable, subq.preemptableExtra);
|
|
||||||
ret.addChild(subq);
|
ret.addChild(subq);
|
||||||
}
|
}
|
||||||
// untouchableExtra = max(extra - childrenPreemptable, 0)
|
|
||||||
if (Resources.greaterThanOrEqual(
|
|
||||||
rc, partitionResource, childrensPreemptable, extra)) {
|
|
||||||
ret.untouchableExtra = Resource.newInstance(0, 0);
|
|
||||||
} else {
|
|
||||||
ret.untouchableExtra =
|
|
||||||
Resources.subtract(extra, childrensPreemptable);
|
|
||||||
}
|
|
||||||
ret.preemptableExtra = Resources.min(
|
|
||||||
rc, partitionResource, childrensPreemptable, extra);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
addTempQueuePartition(ret);
|
addTempQueuePartition(ret);
|
||||||
|
@ -486,7 +480,8 @@ public class ProportionalCapacityPreemptionPolicy
|
||||||
String partition) {
|
String partition) {
|
||||||
Map<String, TempQueuePerPartition> partitionToQueues;
|
Map<String, TempQueuePerPartition> partitionToQueues;
|
||||||
if (null == (partitionToQueues = queueToPartitions.get(queueName))) {
|
if (null == (partitionToQueues = queueToPartitions.get(queueName))) {
|
||||||
return null;
|
throw new YarnRuntimeException("This shouldn't happen, cannot find "
|
||||||
|
+ "TempQueuePerPartition for queueName=" + queueName);
|
||||||
}
|
}
|
||||||
return partitionToQueues.get(partition);
|
return partitionToQueues.get(partition);
|
||||||
}
|
}
|
||||||
|
@ -497,7 +492,8 @@ public class ProportionalCapacityPreemptionPolicy
|
||||||
@Override
|
@Override
|
||||||
public Collection<TempQueuePerPartition> getQueuePartitions(String queueName) {
|
public Collection<TempQueuePerPartition> getQueuePartitions(String queueName) {
|
||||||
if (!queueToPartitions.containsKey(queueName)) {
|
if (!queueToPartitions.containsKey(queueName)) {
|
||||||
return null;
|
throw new YarnRuntimeException("This shouldn't happen, cannot find "
|
||||||
|
+ "TempQueuePerPartition collection for queueName=" + queueName);
|
||||||
}
|
}
|
||||||
return queueToPartitions.get(queueName).values();
|
return queueToPartitions.get(queueName).values();
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,316 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
|
||||||
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
public class ReservedContainerCandidatesSelector
|
||||||
|
extends PreemptionCandidatesSelector {
|
||||||
|
private static final Log LOG =
|
||||||
|
LogFactory.getLog(ReservedContainerCandidatesSelector.class);
|
||||||
|
|
||||||
|
private PreemptableResourceCalculator preemptableAmountCalculator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A temporary data structure to remember what to preempt on a node
|
||||||
|
*/
|
||||||
|
private static class NodeForPreemption {
|
||||||
|
private float preemptionCost;
|
||||||
|
private FiCaSchedulerNode schedulerNode;
|
||||||
|
private List<RMContainer> selectedContainers;
|
||||||
|
|
||||||
|
public NodeForPreemption(float preemptionCost,
|
||||||
|
FiCaSchedulerNode schedulerNode, List<RMContainer> selectedContainers) {
|
||||||
|
this.preemptionCost = preemptionCost;
|
||||||
|
this.schedulerNode = schedulerNode;
|
||||||
|
this.selectedContainers = selectedContainers;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ReservedContainerCandidatesSelector(
|
||||||
|
CapacitySchedulerPreemptionContext preemptionContext) {
|
||||||
|
super(preemptionContext);
|
||||||
|
preemptableAmountCalculator = new PreemptableResourceCalculator(
|
||||||
|
preemptionContext, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<ApplicationAttemptId, Set<RMContainer>> selectCandidates(
|
||||||
|
Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
|
||||||
|
Resource clusterResource,
|
||||||
|
Resource totalPreemptedResourceAllowed) {
|
||||||
|
// Calculate how much resources we need to preempt
|
||||||
|
preemptableAmountCalculator.computeIdealAllocation(clusterResource,
|
||||||
|
totalPreemptedResourceAllowed);
|
||||||
|
|
||||||
|
// Get queue to preemptable resource by partition
|
||||||
|
Map<String, Map<String, Resource>> queueToPreemptableResourceByPartition =
|
||||||
|
new HashMap<>();
|
||||||
|
for (String leafQueue : preemptionContext.getLeafQueueNames()) {
|
||||||
|
queueToPreemptableResourceByPartition.put(leafQueue,
|
||||||
|
CapacitySchedulerPreemptionUtils
|
||||||
|
.getResToObtainByPartitionForLeafQueue(preemptionContext,
|
||||||
|
leafQueue, clusterResource));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get list of nodes for preemption, ordered by preemption cost
|
||||||
|
List<NodeForPreemption> nodesForPreemption = getNodesForPreemption(
|
||||||
|
clusterResource, queueToPreemptableResourceByPartition,
|
||||||
|
selectedCandidates, totalPreemptedResourceAllowed);
|
||||||
|
|
||||||
|
for (NodeForPreemption nfp : nodesForPreemption) {
|
||||||
|
RMContainer reservedContainer = nfp.schedulerNode.getReservedContainer();
|
||||||
|
if (null == reservedContainer) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
NodeForPreemption preemptionResult = getPreemptionCandidatesOnNode(
|
||||||
|
nfp.schedulerNode, clusterResource,
|
||||||
|
queueToPreemptableResourceByPartition, selectedCandidates,
|
||||||
|
totalPreemptedResourceAllowed, false);
|
||||||
|
if (null != preemptionResult) {
|
||||||
|
for (RMContainer c : preemptionResult.selectedContainers) {
|
||||||
|
ApplicationAttemptId appId = c.getApplicationAttemptId();
|
||||||
|
Set<RMContainer> containers = selectedCandidates.get(appId);
|
||||||
|
if (null == containers) {
|
||||||
|
containers = new HashSet<>();
|
||||||
|
selectedCandidates.put(appId, containers);
|
||||||
|
}
|
||||||
|
|
||||||
|
containers.add(c);
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug(this.getClass().getName() + " Marked container=" + c
|
||||||
|
.getContainerId() + " from queue=" + c.getQueueName()
|
||||||
|
+ " to be preemption candidates");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return selectedCandidates;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Resource getPreemptableResource(String queueName,
|
||||||
|
String partitionName,
|
||||||
|
Map<String, Map<String, Resource>> queueToPreemptableResourceByPartition) {
|
||||||
|
Map<String, Resource> partitionToPreemptable =
|
||||||
|
queueToPreemptableResourceByPartition.get(queueName);
|
||||||
|
if (null == partitionToPreemptable) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
Resource preemptable = partitionToPreemptable.get(partitionName);
|
||||||
|
return preemptable;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean tryToPreemptFromQueue(Resource cluster, String queueName,
|
||||||
|
String partitionName,
|
||||||
|
Map<String, Map<String, Resource>> queueToPreemptableResourceByPartition,
|
||||||
|
Resource required, Resource totalPreemptionAllowed, boolean readOnly) {
|
||||||
|
Resource preemptable = getPreemptableResource(queueName, partitionName,
|
||||||
|
queueToPreemptableResourceByPartition);
|
||||||
|
if (null == preemptable) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Resources.fitsIn(rc, cluster, required, preemptable)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Resources.fitsIn(rc, cluster, required, totalPreemptionAllowed)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!readOnly) {
|
||||||
|
Resources.subtractFrom(preemptable, required);
|
||||||
|
Resources.subtractFrom(totalPreemptionAllowed, required);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Try to check if we can preempt resources for reserved container in given node
|
||||||
|
* @param node
|
||||||
|
* @param cluster
|
||||||
|
* @param queueToPreemptableResourceByPartition it's a map of
|
||||||
|
* <queueName, <partition, preemptable-resource>>
|
||||||
|
* @param readOnly do we want to modify preemptable resource after we selected
|
||||||
|
* candidates
|
||||||
|
* @return NodeForPreemption if it's possible to preempt containers on the node
|
||||||
|
* to satisfy reserved resource
|
||||||
|
*/
|
||||||
|
private NodeForPreemption getPreemptionCandidatesOnNode(
|
||||||
|
FiCaSchedulerNode node, Resource cluster,
|
||||||
|
Map<String, Map<String, Resource>> queueToPreemptableResourceByPartition,
|
||||||
|
Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
|
||||||
|
Resource totalPreemptionAllowed, boolean readOnly) {
|
||||||
|
RMContainer reservedContainer = node.getReservedContainer();
|
||||||
|
Resource available = Resources.clone(node.getUnallocatedResource());
|
||||||
|
Resource totalSelected = Resources.createResource(0);
|
||||||
|
List<RMContainer> sortedRunningContainers =
|
||||||
|
node.getCopiedListOfRunningContainers();
|
||||||
|
List<RMContainer> selectedContainers = new ArrayList<>();
|
||||||
|
Map<ContainerId, RMContainer> killableContainers =
|
||||||
|
node.getKillableContainers();
|
||||||
|
|
||||||
|
// Sort running container by launch time, we preferred to preempt recent
|
||||||
|
// launched preempt container
|
||||||
|
Collections.sort(sortedRunningContainers, new Comparator<RMContainer>() {
|
||||||
|
@Override public int compare(RMContainer o1, RMContainer o2) {
|
||||||
|
return -1 * o1.getContainerId().compareTo(o2.getContainerId());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// First check: can we preempt containers to allocate the
|
||||||
|
// reservedContainer?
|
||||||
|
boolean canAllocateReservedContainer = false;
|
||||||
|
|
||||||
|
// At least, we can get available + killable resources from this node
|
||||||
|
Resource cur = Resources.add(available, node.getTotalKillableResources());
|
||||||
|
String partition = node.getPartition();
|
||||||
|
|
||||||
|
// Avoid preempt any container if required <= available + killable
|
||||||
|
if (Resources.fitsIn(rc, cluster, reservedContainer.getReservedResource(),
|
||||||
|
cur)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extra cost of am container preemption
|
||||||
|
float amPreemptionCost = 0f;
|
||||||
|
|
||||||
|
for (RMContainer c : sortedRunningContainers) {
|
||||||
|
String containerQueueName = c.getQueueName();
|
||||||
|
|
||||||
|
// Skip container if it is already marked killable
|
||||||
|
if (killableContainers.containsKey(c.getContainerId())) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// An alternative approach is add a "penalty cost" if AM container is
|
||||||
|
// selected. Here for safety, avoid preempt AM container in any cases
|
||||||
|
if (c.isAMContainer()) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Skip selecting AM container on host=" + node.getNodeID()
|
||||||
|
+ " AM container=" + c.getContainerId());
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Can we preempt container c?
|
||||||
|
// Check if we have quota to preempt this container
|
||||||
|
boolean canPreempt = tryToPreemptFromQueue(cluster, containerQueueName,
|
||||||
|
partition, queueToPreemptableResourceByPartition,
|
||||||
|
c.getAllocatedResource(), totalPreemptionAllowed, readOnly);
|
||||||
|
|
||||||
|
// If we can, add to selected container, and change resource accordingly.
|
||||||
|
if (canPreempt) {
|
||||||
|
if (!CapacitySchedulerPreemptionUtils.isContainerAlreadySelected(c,
|
||||||
|
selectedCandidates)) {
|
||||||
|
if (!readOnly) {
|
||||||
|
selectedContainers.add(c);
|
||||||
|
}
|
||||||
|
Resources.addTo(totalSelected, c.getAllocatedResource());
|
||||||
|
}
|
||||||
|
Resources.addTo(cur, c.getAllocatedResource());
|
||||||
|
if (Resources.fitsIn(rc, cluster,
|
||||||
|
reservedContainer.getReservedResource(), cur)) {
|
||||||
|
canAllocateReservedContainer = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!canAllocateReservedContainer) {
|
||||||
|
if (!readOnly) {
|
||||||
|
// Revert queue preemption quotas
|
||||||
|
for (RMContainer c : selectedContainers) {
|
||||||
|
Resource res = getPreemptableResource(c.getQueueName(), partition,
|
||||||
|
queueToPreemptableResourceByPartition);
|
||||||
|
if (null == res) {
|
||||||
|
// This shouldn't happen in normal cases, one possible cause is
|
||||||
|
// container moved to different queue while executing preemption logic.
|
||||||
|
// Ignore such failures.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Resources.addTo(res, c.getAllocatedResource());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
float ratio = Resources.ratio(rc, totalSelected,
|
||||||
|
reservedContainer.getReservedResource());
|
||||||
|
|
||||||
|
// Compute preemption score
|
||||||
|
NodeForPreemption nfp = new NodeForPreemption(ratio + amPreemptionCost,
|
||||||
|
node, selectedContainers);
|
||||||
|
|
||||||
|
return nfp;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<NodeForPreemption> getNodesForPreemption(Resource cluster,
|
||||||
|
Map<String, Map<String, Resource>> queueToPreemptableResourceByPartition,
|
||||||
|
Map<ApplicationAttemptId, Set<RMContainer>> selectedCandidates,
|
||||||
|
Resource totalPreemptionAllowed) {
|
||||||
|
List<NodeForPreemption> nfps = new ArrayList<>();
|
||||||
|
|
||||||
|
// get nodes have reserved container
|
||||||
|
for (FiCaSchedulerNode node : preemptionContext.getScheduler()
|
||||||
|
.getAllNodes()) {
|
||||||
|
if (node.getReservedContainer() != null) {
|
||||||
|
NodeForPreemption nfp = getPreemptionCandidatesOnNode(node, cluster,
|
||||||
|
queueToPreemptableResourceByPartition, selectedCandidates,
|
||||||
|
totalPreemptionAllowed, true);
|
||||||
|
if (null != nfp) {
|
||||||
|
// Null means we cannot preempt containers on the node to satisfy
|
||||||
|
// reserved container
|
||||||
|
nfps.add(nfp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return sorted node-for-preemptions (by cost)
|
||||||
|
Collections.sort(nfps, new Comparator<NodeForPreemption>() {
|
||||||
|
@Override
|
||||||
|
public int compare(NodeForPreemption o1, NodeForPreemption o2) {
|
||||||
|
return Float.compare(o1.preemptionCost, o2.preemptionCost);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return nfps;
|
||||||
|
}
|
||||||
|
}
|
|
@ -19,6 +19,7 @@
|
||||||
package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
|
package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
|
||||||
|
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue;
|
||||||
import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
|
import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
|
||||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
@ -32,20 +33,22 @@ import java.util.ArrayList;
|
||||||
public class TempQueuePerPartition {
|
public class TempQueuePerPartition {
|
||||||
// Following fields are copied from scheduler
|
// Following fields are copied from scheduler
|
||||||
final String queueName;
|
final String queueName;
|
||||||
final Resource current;
|
|
||||||
final Resource pending;
|
|
||||||
final Resource guaranteed;
|
|
||||||
final Resource maxCapacity;
|
|
||||||
final Resource killable;
|
|
||||||
final String partition;
|
final String partition;
|
||||||
|
final Resource pending;
|
||||||
|
|
||||||
|
private final Resource current;
|
||||||
|
private final Resource killable;
|
||||||
|
private final Resource reserved;
|
||||||
|
private final float absCapacity;
|
||||||
|
private final float absMaxCapacity;
|
||||||
|
final Resource totalPartitionResource;
|
||||||
|
|
||||||
// Following fields are setted and used by candidate selection policies
|
// Following fields are setted and used by candidate selection policies
|
||||||
Resource idealAssigned;
|
Resource idealAssigned;
|
||||||
Resource toBePreempted;
|
Resource toBePreempted;
|
||||||
Resource untouchableExtra;
|
Resource untouchableExtra;
|
||||||
Resource preemptableExtra;
|
Resource preemptableExtra;
|
||||||
// For logging purpose
|
private Resource actuallyToBePreempted;
|
||||||
Resource actuallyToBePreempted;
|
|
||||||
|
|
||||||
double normalizedGuarantee;
|
double normalizedGuarantee;
|
||||||
|
|
||||||
|
@ -53,14 +56,22 @@ public class TempQueuePerPartition {
|
||||||
LeafQueue leafQueue;
|
LeafQueue leafQueue;
|
||||||
boolean preemptionDisabled;
|
boolean preemptionDisabled;
|
||||||
|
|
||||||
TempQueuePerPartition(String queueName, Resource current, Resource pending,
|
TempQueuePerPartition(String queueName, Resource current,
|
||||||
Resource guaranteed, Resource maxCapacity, boolean preemptionDisabled,
|
boolean preemptionDisabled, String partition, Resource killable,
|
||||||
String partition, Resource killable) {
|
float absCapacity, float absMaxCapacity, Resource totalPartitionResource,
|
||||||
|
Resource reserved, CSQueue queue) {
|
||||||
this.queueName = queueName;
|
this.queueName = queueName;
|
||||||
this.current = current;
|
this.current = current;
|
||||||
this.pending = pending;
|
|
||||||
this.guaranteed = guaranteed;
|
if (queue instanceof LeafQueue) {
|
||||||
this.maxCapacity = maxCapacity;
|
LeafQueue l = (LeafQueue) queue;
|
||||||
|
pending = l.getTotalPendingResourcesConsideringUserLimit(
|
||||||
|
totalPartitionResource, partition);
|
||||||
|
leafQueue = l;
|
||||||
|
} else {
|
||||||
|
pending = Resources.createResource(0);
|
||||||
|
}
|
||||||
|
|
||||||
this.idealAssigned = Resource.newInstance(0, 0);
|
this.idealAssigned = Resource.newInstance(0, 0);
|
||||||
this.actuallyToBePreempted = Resource.newInstance(0, 0);
|
this.actuallyToBePreempted = Resource.newInstance(0, 0);
|
||||||
this.toBePreempted = Resource.newInstance(0, 0);
|
this.toBePreempted = Resource.newInstance(0, 0);
|
||||||
|
@ -71,6 +82,10 @@ public class TempQueuePerPartition {
|
||||||
this.preemptionDisabled = preemptionDisabled;
|
this.preemptionDisabled = preemptionDisabled;
|
||||||
this.partition = partition;
|
this.partition = partition;
|
||||||
this.killable = killable;
|
this.killable = killable;
|
||||||
|
this.absCapacity = absCapacity;
|
||||||
|
this.absMaxCapacity = absMaxCapacity;
|
||||||
|
this.totalPartitionResource = totalPartitionResource;
|
||||||
|
this.reserved = reserved;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setLeafQueue(LeafQueue l) {
|
public void setLeafQueue(LeafQueue l) {
|
||||||
|
@ -92,31 +107,101 @@ public class TempQueuePerPartition {
|
||||||
return children;
|
return children;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Resource getUsed() {
|
||||||
|
return current;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Resource getUsedDeductReservd() {
|
||||||
|
return Resources.subtract(current, reserved);
|
||||||
|
}
|
||||||
|
|
||||||
// This function "accepts" all the resources it can (pending) and return
|
// This function "accepts" all the resources it can (pending) and return
|
||||||
// the unused ones
|
// the unused ones
|
||||||
Resource offer(Resource avail, ResourceCalculator rc,
|
Resource offer(Resource avail, ResourceCalculator rc,
|
||||||
Resource clusterResource) {
|
Resource clusterResource, boolean considersReservedResource) {
|
||||||
Resource absMaxCapIdealAssignedDelta = Resources.componentwiseMax(
|
Resource absMaxCapIdealAssignedDelta = Resources.componentwiseMax(
|
||||||
Resources.subtract(maxCapacity, idealAssigned),
|
Resources.subtract(getMax(), idealAssigned),
|
||||||
Resource.newInstance(0, 0));
|
Resource.newInstance(0, 0));
|
||||||
// remain = avail - min(avail, (max - assigned), (current + pending - assigned))
|
// remain = avail - min(avail, (max - assigned), (current + pending - assigned))
|
||||||
Resource accepted =
|
Resource accepted = Resources.min(rc, clusterResource,
|
||||||
Resources.min(rc, clusterResource,
|
absMaxCapIdealAssignedDelta, Resources.min(rc, clusterResource, avail,
|
||||||
absMaxCapIdealAssignedDelta,
|
Resources
|
||||||
Resources.min(rc, clusterResource, avail, Resources.subtract(
|
/*
|
||||||
Resources.add(current, pending), idealAssigned)));
|
* When we're using FifoPreemptionSelector
|
||||||
|
* (considerReservedResource = false).
|
||||||
|
*
|
||||||
|
* We should deduct reserved resource to avoid excessive preemption:
|
||||||
|
*
|
||||||
|
* For example, if an under-utilized queue has used = reserved = 20.
|
||||||
|
* Preemption policy will try to preempt 20 containers
|
||||||
|
* (which is not satisfied) from different hosts.
|
||||||
|
*
|
||||||
|
* In FifoPreemptionSelector, there's no guarantee that preempted
|
||||||
|
* resource can be used by pending request, so policy will preempt
|
||||||
|
* resources repeatly.
|
||||||
|
*/
|
||||||
|
.subtract(Resources.add(
|
||||||
|
(considersReservedResource ? getUsed() :
|
||||||
|
getUsedDeductReservd()),
|
||||||
|
pending), idealAssigned)));
|
||||||
Resource remain = Resources.subtract(avail, accepted);
|
Resource remain = Resources.subtract(avail, accepted);
|
||||||
Resources.addTo(idealAssigned, accepted);
|
Resources.addTo(idealAssigned, accepted);
|
||||||
return remain;
|
return remain;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Resource getGuaranteed() {
|
||||||
|
return Resources.multiply(totalPartitionResource, absCapacity);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Resource getMax() {
|
||||||
|
return Resources.multiply(totalPartitionResource, absMaxCapacity);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void updatePreemptableExtras(ResourceCalculator rc) {
|
||||||
|
// Reset untouchableExtra and preemptableExtra
|
||||||
|
untouchableExtra = Resources.none();
|
||||||
|
preemptableExtra = Resources.none();
|
||||||
|
|
||||||
|
Resource extra = Resources.subtract(getUsed(),
|
||||||
|
getGuaranteed());
|
||||||
|
if (Resources.lessThan(rc, totalPartitionResource, extra,
|
||||||
|
Resources.none())) {
|
||||||
|
extra = Resources.none();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (null == children || children.isEmpty()) {
|
||||||
|
// If it is a leaf queue
|
||||||
|
if (preemptionDisabled) {
|
||||||
|
untouchableExtra = extra;
|
||||||
|
} else {
|
||||||
|
preemptableExtra = extra;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// If it is a parent queue
|
||||||
|
Resource childrensPreemptable = Resource.newInstance(0, 0);
|
||||||
|
for (TempQueuePerPartition child : children) {
|
||||||
|
Resources.addTo(childrensPreemptable, child.preemptableExtra);
|
||||||
|
}
|
||||||
|
// untouchableExtra = max(extra - childrenPreemptable, 0)
|
||||||
|
if (Resources.greaterThanOrEqual(rc, totalPartitionResource,
|
||||||
|
childrensPreemptable, extra)) {
|
||||||
|
untouchableExtra = Resource.newInstance(0, 0);
|
||||||
|
} else {
|
||||||
|
untouchableExtra = Resources.subtract(extra, childrensPreemptable);
|
||||||
|
}
|
||||||
|
preemptableExtra = Resources.min(rc, totalPartitionResource,
|
||||||
|
childrensPreemptable, extra);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
sb.append(" NAME: " + queueName)
|
sb.append(" NAME: " + queueName)
|
||||||
.append(" CUR: ").append(current)
|
.append(" CUR: ").append(current)
|
||||||
.append(" PEN: ").append(pending)
|
.append(" PEN: ").append(pending)
|
||||||
.append(" GAR: ").append(guaranteed)
|
.append(" RESERVED: ").append(reserved)
|
||||||
|
.append(" GAR: ").append(getGuaranteed())
|
||||||
.append(" NORM: ").append(normalizedGuarantee)
|
.append(" NORM: ").append(normalizedGuarantee)
|
||||||
.append(" IDEAL_ASSIGNED: ").append(idealAssigned)
|
.append(" IDEAL_ASSIGNED: ").append(idealAssigned)
|
||||||
.append(" IDEAL_PREEMPT: ").append(toBePreempted)
|
.append(" IDEAL_PREEMPT: ").append(toBePreempted)
|
||||||
|
@ -130,24 +215,55 @@ public class TempQueuePerPartition {
|
||||||
|
|
||||||
public void assignPreemption(float scalingFactor, ResourceCalculator rc,
|
public void assignPreemption(float scalingFactor, ResourceCalculator rc,
|
||||||
Resource clusterResource) {
|
Resource clusterResource) {
|
||||||
if (Resources.greaterThan(rc, clusterResource,
|
Resource usedDeductKillable = Resources.subtract(
|
||||||
Resources.subtract(current, killable), idealAssigned)) {
|
getUsed(), killable);
|
||||||
toBePreempted = Resources.multiply(Resources
|
Resource totalResource = Resources.add(getUsed(), pending);
|
||||||
.subtract(Resources.subtract(current, killable), idealAssigned),
|
|
||||||
scalingFactor);
|
// The minimum resource that we need to keep for a queue is:
|
||||||
|
// max(idealAssigned, min(used + pending, guaranteed)).
|
||||||
|
//
|
||||||
|
// Doing this because when we calculate ideal allocation doesn't consider
|
||||||
|
// reserved resource, ideal-allocation calculated could be less than
|
||||||
|
// guaranteed and total. We should avoid preempt from a queue if it is already
|
||||||
|
// <= its guaranteed resource.
|
||||||
|
Resource minimumQueueResource = Resources.max(rc, clusterResource,
|
||||||
|
Resources.min(rc, clusterResource, totalResource, getGuaranteed()),
|
||||||
|
idealAssigned);
|
||||||
|
|
||||||
|
if (Resources.greaterThan(rc, clusterResource, usedDeductKillable,
|
||||||
|
minimumQueueResource)) {
|
||||||
|
toBePreempted = Resources.multiply(
|
||||||
|
Resources.subtract(usedDeductKillable, minimumQueueResource), scalingFactor);
|
||||||
} else {
|
} else {
|
||||||
toBePreempted = Resource.newInstance(0, 0);
|
toBePreempted = Resources.none();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Resource getActuallyToBePreempted() {
|
||||||
|
return actuallyToBePreempted;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setActuallyToBePreempted(Resource res) {
|
||||||
|
this.actuallyToBePreempted = res;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void deductActuallyToBePreempted(ResourceCalculator rc,
|
||||||
|
Resource cluster, Resource toBeDeduct) {
|
||||||
|
if (Resources.greaterThan(rc, cluster, actuallyToBePreempted, toBeDeduct)) {
|
||||||
|
Resources.subtractFrom(actuallyToBePreempted, toBeDeduct);
|
||||||
|
}
|
||||||
|
actuallyToBePreempted = Resources.max(rc, cluster, actuallyToBePreempted,
|
||||||
|
Resources.none());
|
||||||
|
}
|
||||||
|
|
||||||
void appendLogString(StringBuilder sb) {
|
void appendLogString(StringBuilder sb) {
|
||||||
sb.append(queueName).append(", ")
|
sb.append(queueName).append(", ")
|
||||||
.append(current.getMemory()).append(", ")
|
.append(current.getMemory()).append(", ")
|
||||||
.append(current.getVirtualCores()).append(", ")
|
.append(current.getVirtualCores()).append(", ")
|
||||||
.append(pending.getMemory()).append(", ")
|
.append(pending.getMemory()).append(", ")
|
||||||
.append(pending.getVirtualCores()).append(", ")
|
.append(pending.getVirtualCores()).append(", ")
|
||||||
.append(guaranteed.getMemory()).append(", ")
|
.append(getGuaranteed().getMemory()).append(", ")
|
||||||
.append(guaranteed.getVirtualCores()).append(", ")
|
.append(getGuaranteed().getVirtualCores()).append(", ")
|
||||||
.append(idealAssigned.getMemory()).append(", ")
|
.append(idealAssigned.getMemory()).append(", ")
|
||||||
.append(idealAssigned.getVirtualCores()).append(", ")
|
.append(idealAssigned.getVirtualCores()).append(", ")
|
||||||
.append(toBePreempted.getMemory()).append(", ")
|
.append(toBePreempted.getMemory()).append(", ")
|
||||||
|
|
|
@ -89,4 +89,6 @@ public interface RMContainer extends EventHandler<RMContainerEvent> {
|
||||||
boolean hasIncreaseReservation();
|
boolean hasIncreaseReservation();
|
||||||
|
|
||||||
void cancelIncreaseReservation();
|
void cancelIncreaseReservation();
|
||||||
|
|
||||||
|
String getQueueName();
|
||||||
}
|
}
|
||||||
|
|
|
@ -181,6 +181,7 @@ public class RMContainerImpl implements RMContainer, Comparable<RMContainer> {
|
||||||
// Only used for container resource increase and decrease. This is the
|
// Only used for container resource increase and decrease. This is the
|
||||||
// resource to rollback to should container resource increase token expires.
|
// resource to rollback to should container resource increase token expires.
|
||||||
private Resource lastConfirmedResource;
|
private Resource lastConfirmedResource;
|
||||||
|
private volatile String queueName;
|
||||||
|
|
||||||
public RMContainerImpl(Container container,
|
public RMContainerImpl(Container container,
|
||||||
ApplicationAttemptId appAttemptId, NodeId nodeId, String user,
|
ApplicationAttemptId appAttemptId, NodeId nodeId, String user,
|
||||||
|
@ -817,4 +818,13 @@ public class RMContainerImpl implements RMContainer, Comparable<RMContainer> {
|
||||||
public void cancelIncreaseReservation() {
|
public void cancelIncreaseReservation() {
|
||||||
hasIncreaseReservation = false;
|
hasIncreaseReservation = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setQueueName(String queueName) {
|
||||||
|
this.queueName = queueName;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getQueueName() {
|
||||||
|
return queueName;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -659,7 +659,7 @@ public abstract class AbstractYarnScheduler
|
||||||
nodeTracker.removeNode(nm.getNodeID());
|
nodeTracker.removeNode(nm.getNodeID());
|
||||||
|
|
||||||
// update resource to node
|
// update resource to node
|
||||||
node.setTotalResource(newResource);
|
node.updateTotalResource(newResource);
|
||||||
|
|
||||||
nodeTracker.addNode((N) node);
|
nodeTracker.addNode((N) node);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -404,6 +404,7 @@ public class SchedulerApplicationAttempt implements SchedulableEntity {
|
||||||
node.getNodeID(), appSchedulingInfo.getUser(), rmContext);
|
node.getNodeID(), appSchedulingInfo.getUser(), rmContext);
|
||||||
attemptResourceUsage.incReserved(node.getPartition(),
|
attemptResourceUsage.incReserved(node.getPartition(),
|
||||||
container.getResource());
|
container.getResource());
|
||||||
|
((RMContainerImpl)rmContainer).setQueueName(this.getQueueName());
|
||||||
|
|
||||||
// Reset the re-reservation count
|
// Reset the re-reservation count
|
||||||
resetReReservations(priority);
|
resetReReservations(priority);
|
||||||
|
@ -748,14 +749,17 @@ public class SchedulerApplicationAttempt implements SchedulableEntity {
|
||||||
public synchronized void move(Queue newQueue) {
|
public synchronized void move(Queue newQueue) {
|
||||||
QueueMetrics oldMetrics = queue.getMetrics();
|
QueueMetrics oldMetrics = queue.getMetrics();
|
||||||
QueueMetrics newMetrics = newQueue.getMetrics();
|
QueueMetrics newMetrics = newQueue.getMetrics();
|
||||||
|
String newQueueName = newQueue.getQueueName();
|
||||||
String user = getUser();
|
String user = getUser();
|
||||||
for (RMContainer liveContainer : liveContainers.values()) {
|
for (RMContainer liveContainer : liveContainers.values()) {
|
||||||
Resource resource = liveContainer.getContainer().getResource();
|
Resource resource = liveContainer.getContainer().getResource();
|
||||||
|
((RMContainerImpl)liveContainer).setQueueName(newQueueName);
|
||||||
oldMetrics.releaseResources(user, 1, resource);
|
oldMetrics.releaseResources(user, 1, resource);
|
||||||
newMetrics.allocateResources(user, 1, resource, false);
|
newMetrics.allocateResources(user, 1, resource, false);
|
||||||
}
|
}
|
||||||
for (Map<NodeId, RMContainer> map : reservedContainers.values()) {
|
for (Map<NodeId, RMContainer> map : reservedContainers.values()) {
|
||||||
for (RMContainer reservedContainer : map.values()) {
|
for (RMContainer reservedContainer : map.values()) {
|
||||||
|
((RMContainerImpl)reservedContainer).setQueueName(newQueueName);
|
||||||
Resource resource = reservedContainer.getReservedResource();
|
Resource resource = reservedContainer.getReservedResource();
|
||||||
oldMetrics.unreserveResource(user, resource);
|
oldMetrics.unreserveResource(user, resource);
|
||||||
newMetrics.reserveResource(user, resource);
|
newMetrics.reserveResource(user, resource);
|
||||||
|
|
|
@ -98,12 +98,12 @@ public abstract class SchedulerNode {
|
||||||
* Set total resources on the node.
|
* Set total resources on the node.
|
||||||
* @param resource Total resources on the node.
|
* @param resource Total resources on the node.
|
||||||
*/
|
*/
|
||||||
public synchronized void setTotalResource(Resource resource){
|
public synchronized void updateTotalResource(Resource resource){
|
||||||
this.totalResource = resource;
|
this.totalResource = resource;
|
||||||
this.unallocatedResource = Resources.subtract(totalResource,
|
this.unallocatedResource = Resources.subtract(totalResource,
|
||||||
this.allocatedResource);
|
this.allocatedResource);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the ID of the node which contains both its hostname and port.
|
* Get the ID of the node which contains both its hostname and port.
|
||||||
* @return The ID of the node.
|
* @return The ID of the node.
|
||||||
|
@ -185,7 +185,7 @@ public abstract class SchedulerNode {
|
||||||
+ getUnallocatedResource() + " available after allocation");
|
+ getUnallocatedResource() + " available after allocation");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Increase the resources allocated to a container.
|
* Increase the resources allocated to a container.
|
||||||
* @param containerId Identifier of the container to change.
|
* @param containerId Identifier of the container to change.
|
||||||
|
@ -195,7 +195,7 @@ public abstract class SchedulerNode {
|
||||||
Resource deltaResource) {
|
Resource deltaResource) {
|
||||||
changeContainerResource(containerId, deltaResource, true);
|
changeContainerResource(containerId, deltaResource, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decrease the resources allocated to a container.
|
* Decrease the resources allocated to a container.
|
||||||
* @param containerId Identifier of the container to change.
|
* @param containerId Identifier of the container to change.
|
||||||
|
@ -242,10 +242,11 @@ public abstract class SchedulerNode {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update the resources of the node when allocating a new container.
|
* Update the resources of the node when releasing a container.
|
||||||
* @param container Container to allocate.
|
* @param container Container to release.
|
||||||
*/
|
*/
|
||||||
protected synchronized void updateResource(Container container) {
|
protected synchronized void updateResourceForReleasedContainer(
|
||||||
|
Container container) {
|
||||||
addUnallocatedResource(container.getResource());
|
addUnallocatedResource(container.getResource());
|
||||||
--numContainers;
|
--numContainers;
|
||||||
}
|
}
|
||||||
|
@ -262,7 +263,7 @@ public abstract class SchedulerNode {
|
||||||
|
|
||||||
// Remove the containers from the nodemanger
|
// Remove the containers from the nodemanger
|
||||||
if (null != launchedContainers.remove(container.getId())) {
|
if (null != launchedContainers.remove(container.getId())) {
|
||||||
updateResource(container);
|
updateResourceForReleasedContainer(container);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
|
@ -338,7 +339,7 @@ public abstract class SchedulerNode {
|
||||||
* Get the running containers in the node.
|
* Get the running containers in the node.
|
||||||
* @return List of running containers in the node.
|
* @return List of running containers in the node.
|
||||||
*/
|
*/
|
||||||
public synchronized List<RMContainer> getRunningContainers() {
|
public synchronized List<RMContainer> getCopiedListOfRunningContainers() {
|
||||||
return new ArrayList<RMContainer>(launchedContainers.values());
|
return new ArrayList<RMContainer>(launchedContainers.values());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -355,7 +356,7 @@ public abstract class SchedulerNode {
|
||||||
* @param reservedContainer Reserved container in the node.
|
* @param reservedContainer Reserved container in the node.
|
||||||
*/
|
*/
|
||||||
protected synchronized void
|
protected synchronized void
|
||||||
setReservedContainer(RMContainer reservedContainer) {
|
setReservedContainer(RMContainer reservedContainer) {
|
||||||
this.reservedContainer = reservedContainer;
|
this.reservedContainer = reservedContainer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -385,7 +386,7 @@ public abstract class SchedulerNode {
|
||||||
public void updateLabels(Set<String> labels) {
|
public void updateLabels(Set<String> labels) {
|
||||||
this.labels = labels;
|
this.labels = labels;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get partition of which the node belongs to, if node-labels of this node is
|
* Get partition of which the node belongs to, if node-labels of this node is
|
||||||
* empty or null, it belongs to NO_LABEL partition. And since we only support
|
* empty or null, it belongs to NO_LABEL partition. And since we only support
|
||||||
|
@ -394,7 +395,7 @@ public abstract class SchedulerNode {
|
||||||
*/
|
*/
|
||||||
public String getPartition() {
|
public String getPartition() {
|
||||||
if (this.labels == null || this.labels.isEmpty()) {
|
if (this.labels == null || this.labels.isEmpty()) {
|
||||||
return RMNodeLabelsManager.NO_LABEL;
|
return RMNodeLabelsManager.NO_LABEL;
|
||||||
} else {
|
} else {
|
||||||
return this.labels.iterator().next();
|
return this.labels.iterator().next();
|
||||||
}
|
}
|
||||||
|
@ -432,4 +433,4 @@ public abstract class SchedulerNode {
|
||||||
public ResourceUtilization getNodeUtilization() {
|
public ResourceUtilization getNodeUtilization() {
|
||||||
return this.nodeUtilization;
|
return this.nodeUtilization;
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -50,7 +50,6 @@ import org.apache.hadoop.util.Time;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.Container;
|
import org.apache.hadoop.yarn.api.records.Container;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
|
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest;
|
import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||||
|
@ -1160,7 +1159,7 @@ public class CapacityScheduler extends
|
||||||
String oldPartition = node.getPartition();
|
String oldPartition = node.getPartition();
|
||||||
|
|
||||||
// Update resources of these containers
|
// Update resources of these containers
|
||||||
for (RMContainer rmContainer : node.getRunningContainers()) {
|
for (RMContainer rmContainer : node.getCopiedListOfRunningContainers()) {
|
||||||
FiCaSchedulerApp application =
|
FiCaSchedulerApp application =
|
||||||
getApplicationAttempt(rmContainer.getApplicationAttemptId());
|
getApplicationAttempt(rmContainer.getApplicationAttemptId());
|
||||||
if (null != application) {
|
if (null != application) {
|
||||||
|
@ -1509,7 +1508,7 @@ public class CapacityScheduler extends
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove running containers
|
// Remove running containers
|
||||||
List<RMContainer> runningContainers = node.getRunningContainers();
|
List<RMContainer> runningContainers = node.getCopiedListOfRunningContainers();
|
||||||
for (RMContainer container : runningContainers) {
|
for (RMContainer container : runningContainers) {
|
||||||
super.completedContainer(container,
|
super.completedContainer(container,
|
||||||
SchedulerUtils.createAbnormalContainerStatus(
|
SchedulerUtils.createAbnormalContainerStatus(
|
||||||
|
@ -1633,6 +1632,11 @@ public class CapacityScheduler extends
|
||||||
public FiCaSchedulerNode getNode(NodeId nodeId) {
|
public FiCaSchedulerNode getNode(NodeId nodeId) {
|
||||||
return nodeTracker.getNode(nodeId);
|
return nodeTracker.getNode(nodeId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Lock(Lock.NoLock.class)
|
||||||
|
public List<FiCaSchedulerNode> getAllNodes() {
|
||||||
|
return nodeTracker.getAllNodes();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Lock(Lock.NoLock.class)
|
@Lock(Lock.NoLock.class)
|
||||||
|
|
|
@ -1021,21 +1021,24 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
|
||||||
return getBoolean(LAZY_PREEMPTION_ENALBED, DEFAULT_LAZY_PREEMPTION_ENABLED);
|
return getBoolean(LAZY_PREEMPTION_ENALBED, DEFAULT_LAZY_PREEMPTION_ENABLED);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static final String PREEMPTION_CONFIG_PREFIX =
|
||||||
|
"yarn.resourcemanager.monitor.capacity.preemption.";
|
||||||
|
|
||||||
/** If true, run the policy but do not affect the cluster with preemption and
|
/** If true, run the policy but do not affect the cluster with preemption and
|
||||||
* kill events. */
|
* kill events. */
|
||||||
public static final String PREEMPTION_OBSERVE_ONLY =
|
public static final String PREEMPTION_OBSERVE_ONLY =
|
||||||
"yarn.resourcemanager.monitor.capacity.preemption.observe_only";
|
PREEMPTION_CONFIG_PREFIX + "observe_only";
|
||||||
public static final boolean DEFAULT_PREEMPTION_OBSERVE_ONLY = false;
|
public static final boolean DEFAULT_PREEMPTION_OBSERVE_ONLY = false;
|
||||||
|
|
||||||
/** Time in milliseconds between invocations of this policy */
|
/** Time in milliseconds between invocations of this policy */
|
||||||
public static final String PREEMPTION_MONITORING_INTERVAL =
|
public static final String PREEMPTION_MONITORING_INTERVAL =
|
||||||
"yarn.resourcemanager.monitor.capacity.preemption.monitoring_interval";
|
PREEMPTION_CONFIG_PREFIX + "monitoring_interval";
|
||||||
public static final long DEFAULT_PREEMPTION_MONITORING_INTERVAL = 3000L;
|
public static final long DEFAULT_PREEMPTION_MONITORING_INTERVAL = 3000L;
|
||||||
|
|
||||||
/** Time in milliseconds between requesting a preemption from an application
|
/** Time in milliseconds between requesting a preemption from an application
|
||||||
* and killing the container. */
|
* and killing the container. */
|
||||||
public static final String PREEMPTION_WAIT_TIME_BEFORE_KILL =
|
public static final String PREEMPTION_WAIT_TIME_BEFORE_KILL =
|
||||||
"yarn.resourcemanager.monitor.capacity.preemption.max_wait_before_kill";
|
PREEMPTION_CONFIG_PREFIX + "max_wait_before_kill";
|
||||||
public static final long DEFAULT_PREEMPTION_WAIT_TIME_BEFORE_KILL = 15000L;
|
public static final long DEFAULT_PREEMPTION_WAIT_TIME_BEFORE_KILL = 15000L;
|
||||||
|
|
||||||
/** Maximum percentage of resources preemptionCandidates in a single round. By
|
/** Maximum percentage of resources preemptionCandidates in a single round. By
|
||||||
|
@ -1043,7 +1046,7 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
|
||||||
* reclaimed from the cluster. After computing the total desired preemption,
|
* reclaimed from the cluster. After computing the total desired preemption,
|
||||||
* the policy scales it back within this limit. */
|
* the policy scales it back within this limit. */
|
||||||
public static final String TOTAL_PREEMPTION_PER_ROUND =
|
public static final String TOTAL_PREEMPTION_PER_ROUND =
|
||||||
"yarn.resourcemanager.monitor.capacity.preemption.total_preemption_per_round";
|
PREEMPTION_CONFIG_PREFIX + "total_preemption_per_round";
|
||||||
public static final float DEFAULT_TOTAL_PREEMPTION_PER_ROUND = 0.1f;
|
public static final float DEFAULT_TOTAL_PREEMPTION_PER_ROUND = 0.1f;
|
||||||
|
|
||||||
/** Maximum amount of resources above the target capacity ignored for
|
/** Maximum amount of resources above the target capacity ignored for
|
||||||
|
@ -1052,7 +1055,7 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
|
||||||
* High values would slow the time to capacity and (absent natural
|
* High values would slow the time to capacity and (absent natural
|
||||||
* completions) it might prevent convergence to guaranteed capacity. */
|
* completions) it might prevent convergence to guaranteed capacity. */
|
||||||
public static final String PREEMPTION_MAX_IGNORED_OVER_CAPACITY =
|
public static final String PREEMPTION_MAX_IGNORED_OVER_CAPACITY =
|
||||||
"yarn.resourcemanager.monitor.capacity.preemption.max_ignored_over_capacity";
|
PREEMPTION_CONFIG_PREFIX + "max_ignored_over_capacity";
|
||||||
public static final float DEFAULT_PREEMPTION_MAX_IGNORED_OVER_CAPACITY = 0.1f;
|
public static final float DEFAULT_PREEMPTION_MAX_IGNORED_OVER_CAPACITY = 0.1f;
|
||||||
/**
|
/**
|
||||||
* Given a computed preemption target, account for containers naturally
|
* Given a computed preemption target, account for containers naturally
|
||||||
|
@ -1062,7 +1065,16 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur
|
||||||
* will reclaim almost 95% of resources within 5 * {@link
|
* will reclaim almost 95% of resources within 5 * {@link
|
||||||
* #PREEMPTION_WAIT_TIME_BEFORE_KILL}, even absent natural termination. */
|
* #PREEMPTION_WAIT_TIME_BEFORE_KILL}, even absent natural termination. */
|
||||||
public static final String PREEMPTION_NATURAL_TERMINATION_FACTOR =
|
public static final String PREEMPTION_NATURAL_TERMINATION_FACTOR =
|
||||||
"yarn.resourcemanager.monitor.capacity.preemption.natural_termination_factor";
|
PREEMPTION_CONFIG_PREFIX + "natural_termination_factor";
|
||||||
public static final float DEFAULT_PREEMPTION_NATURAL_TERMINATION_FACTOR =
|
public static final float DEFAULT_PREEMPTION_NATURAL_TERMINATION_FACTOR =
|
||||||
0.2f;
|
0.2f;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* When calculating which containers to be preempted, we will try to preempt
|
||||||
|
* containers for reserved containers first. By default is false.
|
||||||
|
*/
|
||||||
|
public static final String PREEMPTION_SELECT_CANDIDATES_FOR_RESERVED_CONTAINERS =
|
||||||
|
PREEMPTION_CONFIG_PREFIX + "select_based_on_reserved_containers";
|
||||||
|
public static final boolean DEFAULT_PREEMPTION_SELECT_CANDIDATES_FOR_RESERVED_CONTAINERS =
|
||||||
|
false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -199,6 +199,7 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt {
|
||||||
new RMContainerImpl(container, this.getApplicationAttemptId(),
|
new RMContainerImpl(container, this.getApplicationAttemptId(),
|
||||||
node.getNodeID(), appSchedulingInfo.getUser(), this.rmContext,
|
node.getNodeID(), appSchedulingInfo.getUser(), this.rmContext,
|
||||||
request.getNodeLabelExpression());
|
request.getNodeLabelExpression());
|
||||||
|
((RMContainerImpl)rmContainer).setQueueName(this.getQueueName());
|
||||||
|
|
||||||
updateAMContainerDiagnostics(AMState.ASSIGNED, null);
|
updateAMContainerDiagnostics(AMState.ASSIGNED, null);
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicat
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
|
||||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
@ -142,9 +143,9 @@ public class FiCaSchedulerNode extends SchedulerNode {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected synchronized void updateResource(
|
protected synchronized void updateResourceForReleasedContainer(
|
||||||
Container container) {
|
Container container) {
|
||||||
super.updateResource(container);
|
super.updateResourceForReleasedContainer(container);
|
||||||
if (killableContainers.containsKey(container.getId())) {
|
if (killableContainers.containsKey(container.getId())) {
|
||||||
Resources.subtractFrom(totalKillableResources, container.getResource());
|
Resources.subtractFrom(totalKillableResources, container.getResource());
|
||||||
killableContainers.remove(container.getId());
|
killableContainers.remove(container.getId());
|
||||||
|
@ -170,6 +171,6 @@ public class FiCaSchedulerNode extends SchedulerNode {
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized Map<ContainerId, RMContainer> getKillableContainers() {
|
public synchronized Map<ContainerId, RMContainer> getKillableContainers() {
|
||||||
return killableContainers;
|
return Collections.unmodifiableMap(killableContainers);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -378,6 +378,7 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
|
||||||
RMContainer rmContainer = new RMContainerImpl(container,
|
RMContainer rmContainer = new RMContainerImpl(container,
|
||||||
getApplicationAttemptId(), node.getNodeID(),
|
getApplicationAttemptId(), node.getNodeID(),
|
||||||
appSchedulingInfo.getUser(), rmContext);
|
appSchedulingInfo.getUser(), rmContext);
|
||||||
|
((RMContainerImpl)rmContainer).setQueueName(this.getQueueName());
|
||||||
|
|
||||||
// Add it to allContainers list.
|
// Add it to allContainers list.
|
||||||
newlyAllocatedContainers.add(rmContainer);
|
newlyAllocatedContainers.add(rmContainer);
|
||||||
|
|
|
@ -904,7 +904,8 @@ public class FairScheduler extends
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove running containers
|
// Remove running containers
|
||||||
List<RMContainer> runningContainers = node.getRunningContainers();
|
List<RMContainer> runningContainers =
|
||||||
|
node.getCopiedListOfRunningContainers();
|
||||||
for (RMContainer container : runningContainers) {
|
for (RMContainer container : runningContainers) {
|
||||||
super.completedContainer(container,
|
super.completedContainer(container,
|
||||||
SchedulerUtils.createAbnormalContainerStatus(
|
SchedulerUtils.createAbnormalContainerStatus(
|
||||||
|
|
|
@ -944,7 +944,7 @@ public class FifoScheduler extends
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Kill running containers
|
// Kill running containers
|
||||||
for(RMContainer container : node.getRunningContainers()) {
|
for(RMContainer container : node.getCopiedListOfRunningContainers()) {
|
||||||
super.completedContainer(container,
|
super.completedContainer(container,
|
||||||
SchedulerUtils.createAbnormalContainerStatus(
|
SchedulerUtils.createAbnormalContainerStatus(
|
||||||
container.getContainerId(),
|
container.getContainerId(),
|
||||||
|
|
|
@ -0,0 +1,689 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Container;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Priority;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||||
|
import org.apache.hadoop.yarn.event.EventHandler;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.QueueCapacities;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.preemption.PreemptionManager;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.ContainerPreemptEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.OrderingPolicy;
|
||||||
|
import org.apache.hadoop.yarn.util.Clock;
|
||||||
|
import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
|
||||||
|
import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator;
|
||||||
|
import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
|
||||||
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.mockito.ArgumentMatcher;
|
||||||
|
import org.mockito.invocation.InvocationOnMock;
|
||||||
|
import org.mockito.stubbing.Answer;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
import static org.mockito.Matchers.any;
|
||||||
|
import static org.mockito.Matchers.eq;
|
||||||
|
import static org.mockito.Matchers.isA;
|
||||||
|
import static org.mockito.Mockito.doAnswer;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
public class ProportionalCapacityPreemptionPolicyMockFramework {
|
||||||
|
static final Log LOG =
|
||||||
|
LogFactory.getLog(TestProportionalCapacityPreemptionPolicyForNodePartitions.class);
|
||||||
|
final String ROOT = CapacitySchedulerConfiguration.ROOT;
|
||||||
|
|
||||||
|
Map<String, CSQueue> nameToCSQueues = null;
|
||||||
|
Map<String, Resource> partitionToResource = null;
|
||||||
|
Map<NodeId, FiCaSchedulerNode> nodeIdToSchedulerNodes = null;
|
||||||
|
RMNodeLabelsManager nlm = null;
|
||||||
|
RMContext rmContext = null;
|
||||||
|
|
||||||
|
ResourceCalculator rc = new DefaultResourceCalculator();
|
||||||
|
Clock mClock = null;
|
||||||
|
CapacitySchedulerConfiguration conf = null;
|
||||||
|
CapacityScheduler cs = null;
|
||||||
|
EventHandler<SchedulerEvent> mDisp = null;
|
||||||
|
ProportionalCapacityPreemptionPolicy policy = null;
|
||||||
|
Resource clusterResource = null;
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
@Before
|
||||||
|
public void setup() {
|
||||||
|
org.apache.log4j.Logger.getRootLogger().setLevel(
|
||||||
|
org.apache.log4j.Level.DEBUG);
|
||||||
|
|
||||||
|
conf = new CapacitySchedulerConfiguration(new Configuration(false));
|
||||||
|
conf.setLong(
|
||||||
|
CapacitySchedulerConfiguration.PREEMPTION_WAIT_TIME_BEFORE_KILL, 10000);
|
||||||
|
conf.setLong(CapacitySchedulerConfiguration.PREEMPTION_MONITORING_INTERVAL,
|
||||||
|
3000);
|
||||||
|
// report "ideal" preempt
|
||||||
|
conf.setFloat(CapacitySchedulerConfiguration.TOTAL_PREEMPTION_PER_ROUND,
|
||||||
|
(float) 1.0);
|
||||||
|
conf.setFloat(
|
||||||
|
CapacitySchedulerConfiguration.PREEMPTION_NATURAL_TERMINATION_FACTOR,
|
||||||
|
(float) 1.0);
|
||||||
|
|
||||||
|
mClock = mock(Clock.class);
|
||||||
|
cs = mock(CapacityScheduler.class);
|
||||||
|
when(cs.getResourceCalculator()).thenReturn(rc);
|
||||||
|
when(cs.getPreemptionManager()).thenReturn(new PreemptionManager());
|
||||||
|
when(cs.getConfiguration()).thenReturn(conf);
|
||||||
|
|
||||||
|
nlm = mock(RMNodeLabelsManager.class);
|
||||||
|
mDisp = mock(EventHandler.class);
|
||||||
|
|
||||||
|
rmContext = mock(RMContext.class);
|
||||||
|
when(rmContext.getNodeLabelManager()).thenReturn(nlm);
|
||||||
|
Dispatcher disp = mock(Dispatcher.class);
|
||||||
|
when(rmContext.getDispatcher()).thenReturn(disp);
|
||||||
|
when(disp.getEventHandler()).thenReturn(mDisp);
|
||||||
|
when(cs.getRMContext()).thenReturn(rmContext);
|
||||||
|
|
||||||
|
partitionToResource = new HashMap<>();
|
||||||
|
nodeIdToSchedulerNodes = new HashMap<>();
|
||||||
|
nameToCSQueues = new HashMap<>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void buildEnv(String labelsConfig, String nodesConfig,
|
||||||
|
String queuesConfig, String appsConfig) throws IOException {
|
||||||
|
buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void buildEnv(String labelsConfig, String nodesConfig,
|
||||||
|
String queuesConfig, String appsConfig,
|
||||||
|
boolean useDominantResourceCalculator) throws IOException {
|
||||||
|
if (useDominantResourceCalculator) {
|
||||||
|
when(cs.getResourceCalculator()).thenReturn(
|
||||||
|
new DominantResourceCalculator());
|
||||||
|
}
|
||||||
|
mockNodeLabelsManager(labelsConfig);
|
||||||
|
mockSchedulerNodes(nodesConfig);
|
||||||
|
for (NodeId nodeId : nodeIdToSchedulerNodes.keySet()) {
|
||||||
|
when(cs.getSchedulerNode(nodeId)).thenReturn(
|
||||||
|
nodeIdToSchedulerNodes.get(nodeId));
|
||||||
|
}
|
||||||
|
List<FiCaSchedulerNode> allNodes = new ArrayList<>(
|
||||||
|
nodeIdToSchedulerNodes.values());
|
||||||
|
when(cs.getAllNodes()).thenReturn(allNodes);
|
||||||
|
ParentQueue root = mockQueueHierarchy(queuesConfig);
|
||||||
|
when(cs.getRootQueue()).thenReturn(root);
|
||||||
|
when(cs.getClusterResource()).thenReturn(clusterResource);
|
||||||
|
mockApplications(appsConfig);
|
||||||
|
|
||||||
|
policy = new ProportionalCapacityPreemptionPolicy(rmContext, cs,
|
||||||
|
mClock);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void mockContainers(String containersConfig, ApplicationAttemptId attemptId,
|
||||||
|
String queueName, List<RMContainer> reservedContainers,
|
||||||
|
List<RMContainer> liveContainers) {
|
||||||
|
int containerId = 1;
|
||||||
|
int start = containersConfig.indexOf("=") + 1;
|
||||||
|
int end = -1;
|
||||||
|
|
||||||
|
while (start < containersConfig.length()) {
|
||||||
|
while (start < containersConfig.length()
|
||||||
|
&& containersConfig.charAt(start) != '(') {
|
||||||
|
start++;
|
||||||
|
}
|
||||||
|
if (start >= containersConfig.length()) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"Error containers specification, line=" + containersConfig);
|
||||||
|
}
|
||||||
|
end = start + 1;
|
||||||
|
while (end < containersConfig.length()
|
||||||
|
&& containersConfig.charAt(end) != ')') {
|
||||||
|
end++;
|
||||||
|
}
|
||||||
|
if (end >= containersConfig.length()) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"Error containers specification, line=" + containersConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
// now we found start/end, get container values
|
||||||
|
String[] values = containersConfig.substring(start + 1, end).split(",");
|
||||||
|
if (values.length != 6) {
|
||||||
|
throw new IllegalArgumentException("Format to define container is:"
|
||||||
|
+ "(priority,resource,host,expression,repeat,reserved)");
|
||||||
|
}
|
||||||
|
Priority pri = Priority.newInstance(Integer.valueOf(values[0]));
|
||||||
|
Resource res = parseResourceFromString(values[1]);
|
||||||
|
NodeId host = NodeId.newInstance(values[2], 1);
|
||||||
|
String exp = values[3];
|
||||||
|
int repeat = Integer.valueOf(values[4]);
|
||||||
|
boolean reserved = Boolean.valueOf(values[5]);
|
||||||
|
|
||||||
|
for (int i = 0; i < repeat; i++) {
|
||||||
|
Container c = mock(Container.class);
|
||||||
|
when(c.getResource()).thenReturn(res);
|
||||||
|
when(c.getPriority()).thenReturn(pri);
|
||||||
|
RMContainerImpl rmc = mock(RMContainerImpl.class);
|
||||||
|
when(rmc.getAllocatedNode()).thenReturn(host);
|
||||||
|
when(rmc.getNodeLabelExpression()).thenReturn(exp);
|
||||||
|
when(rmc.getAllocatedResource()).thenReturn(res);
|
||||||
|
when(rmc.getContainer()).thenReturn(c);
|
||||||
|
when(rmc.getApplicationAttemptId()).thenReturn(attemptId);
|
||||||
|
when(rmc.getQueueName()).thenReturn(queueName);
|
||||||
|
final ContainerId cId = ContainerId.newContainerId(attemptId, containerId);
|
||||||
|
when(rmc.getContainerId()).thenReturn(
|
||||||
|
cId);
|
||||||
|
doAnswer(new Answer<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer answer(InvocationOnMock invocation) throws Throwable {
|
||||||
|
return cId.compareTo(((RMContainer) invocation.getArguments()[0])
|
||||||
|
.getContainerId());
|
||||||
|
}
|
||||||
|
}).when(rmc).compareTo(any(RMContainer.class));
|
||||||
|
|
||||||
|
if (containerId == 1) {
|
||||||
|
when(rmc.isAMContainer()).thenReturn(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (reserved) {
|
||||||
|
reservedContainers.add(rmc);
|
||||||
|
when(rmc.getReservedResource()).thenReturn(res);
|
||||||
|
} else {
|
||||||
|
liveContainers.add(rmc);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add container to scheduler-node
|
||||||
|
addContainerToSchedulerNode(host, rmc, reserved);
|
||||||
|
|
||||||
|
// If this is a non-exclusive allocation
|
||||||
|
String partition = null;
|
||||||
|
if (exp.isEmpty()
|
||||||
|
&& !(partition = nodeIdToSchedulerNodes.get(host).getPartition())
|
||||||
|
.isEmpty()) {
|
||||||
|
LeafQueue queue = (LeafQueue) nameToCSQueues.get(queueName);
|
||||||
|
Map<String, TreeSet<RMContainer>> ignoreExclusivityContainers =
|
||||||
|
queue.getIgnoreExclusivityRMContainers();
|
||||||
|
if (!ignoreExclusivityContainers.containsKey(partition)) {
|
||||||
|
ignoreExclusivityContainers.put(partition,
|
||||||
|
new TreeSet<RMContainer>());
|
||||||
|
}
|
||||||
|
ignoreExclusivityContainers.get(partition).add(rmc);
|
||||||
|
}
|
||||||
|
LOG.debug("add container to app=" + attemptId + " res=" + res
|
||||||
|
+ " node=" + host + " nodeLabelExpression=" + exp + " partition="
|
||||||
|
+ partition);
|
||||||
|
|
||||||
|
containerId++;
|
||||||
|
}
|
||||||
|
|
||||||
|
start = end + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format is:
|
||||||
|
* <pre>
|
||||||
|
* queueName\t // app1
|
||||||
|
* (priority,resource,host,expression,#repeat,reserved)
|
||||||
|
* (priority,resource,host,expression,#repeat,reserved);
|
||||||
|
* queueName\t // app2
|
||||||
|
* </pre>
|
||||||
|
*/
|
||||||
|
private void mockApplications(String appsConfig) {
|
||||||
|
int id = 1;
|
||||||
|
for (String a : appsConfig.split(";")) {
|
||||||
|
String[] strs = a.split("\t");
|
||||||
|
String queueName = strs[0];
|
||||||
|
|
||||||
|
// get containers
|
||||||
|
List<RMContainer> liveContainers = new ArrayList<RMContainer>();
|
||||||
|
List<RMContainer> reservedContainers = new ArrayList<RMContainer>();
|
||||||
|
ApplicationId appId = ApplicationId.newInstance(0L, id);
|
||||||
|
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
|
||||||
|
|
||||||
|
mockContainers(strs[1], appAttemptId, queueName, reservedContainers,
|
||||||
|
liveContainers);
|
||||||
|
|
||||||
|
FiCaSchedulerApp app = mock(FiCaSchedulerApp.class);
|
||||||
|
when(app.getLiveContainers()).thenReturn(liveContainers);
|
||||||
|
when(app.getReservedContainers()).thenReturn(reservedContainers);
|
||||||
|
when(app.getApplicationAttemptId()).thenReturn(appAttemptId);
|
||||||
|
when(app.getApplicationId()).thenReturn(appId);
|
||||||
|
when(app.getPriority()).thenReturn(Priority.newInstance(0));
|
||||||
|
|
||||||
|
// add to LeafQueue
|
||||||
|
LeafQueue queue = (LeafQueue) nameToCSQueues.get(queueName);
|
||||||
|
queue.getApplications().add(app);
|
||||||
|
|
||||||
|
id++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addContainerToSchedulerNode(NodeId nodeId, RMContainer container,
|
||||||
|
boolean isReserved) {
|
||||||
|
SchedulerNode node = nodeIdToSchedulerNodes.get(nodeId);
|
||||||
|
assert node != null;
|
||||||
|
|
||||||
|
if (isReserved) {
|
||||||
|
when(node.getReservedContainer()).thenReturn(container);
|
||||||
|
} else {
|
||||||
|
node.getCopiedListOfRunningContainers().add(container);
|
||||||
|
Resources.subtractFrom(node.getUnallocatedResource(),
|
||||||
|
container.getAllocatedResource());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format is:
|
||||||
|
* host1=partition[ res=resource];
|
||||||
|
* host2=partition[ res=resource];
|
||||||
|
*/
|
||||||
|
private void mockSchedulerNodes(String schedulerNodesConfigStr)
|
||||||
|
throws IOException {
|
||||||
|
String[] nodesConfigStrArray = schedulerNodesConfigStr.split(";");
|
||||||
|
for (String p : nodesConfigStrArray) {
|
||||||
|
String[] arr = p.split(" ");
|
||||||
|
|
||||||
|
NodeId nodeId = NodeId.newInstance(arr[0].substring(0, arr[0].indexOf("=")), 1);
|
||||||
|
String partition = arr[0].substring(arr[0].indexOf("=") + 1, arr[0].length());
|
||||||
|
|
||||||
|
FiCaSchedulerNode sn = mock(FiCaSchedulerNode.class);
|
||||||
|
when(sn.getNodeID()).thenReturn(nodeId);
|
||||||
|
when(sn.getPartition()).thenReturn(partition);
|
||||||
|
|
||||||
|
Resource totalRes = Resources.createResource(0);
|
||||||
|
if (arr.length > 1) {
|
||||||
|
String res = arr[1];
|
||||||
|
if (res.contains("res=")) {
|
||||||
|
String resSring = res.substring(
|
||||||
|
res.indexOf("res=") + "res=".length());
|
||||||
|
totalRes = parseResourceFromString(resSring);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
when(sn.getTotalResource()).thenReturn(totalRes);
|
||||||
|
when(sn.getUnallocatedResource()).thenReturn(Resources.clone(totalRes));
|
||||||
|
|
||||||
|
// TODO, add settings of killable resources when necessary
|
||||||
|
when(sn.getTotalKillableResources()).thenReturn(Resources.none());
|
||||||
|
|
||||||
|
List<RMContainer> liveContainers = new ArrayList<>();
|
||||||
|
when(sn.getCopiedListOfRunningContainers()).thenReturn(liveContainers);
|
||||||
|
|
||||||
|
nodeIdToSchedulerNodes.put(nodeId, sn);
|
||||||
|
|
||||||
|
LOG.debug("add scheduler node, id=" + nodeId + ", partition=" + partition);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format is:
|
||||||
|
* <pre>
|
||||||
|
* partition0=total_resource,exclusivity;
|
||||||
|
* partition1=total_resource,exclusivity;
|
||||||
|
* ...
|
||||||
|
* </pre>
|
||||||
|
*/
|
||||||
|
private void mockNodeLabelsManager(String nodeLabelsConfigStr) throws IOException {
|
||||||
|
String[] partitionConfigArr = nodeLabelsConfigStr.split(";");
|
||||||
|
clusterResource = Resources.createResource(0);
|
||||||
|
for (String p : partitionConfigArr) {
|
||||||
|
String partitionName = p.substring(0, p.indexOf("="));
|
||||||
|
Resource res = parseResourceFromString(p.substring(p.indexOf("=") + 1,
|
||||||
|
p.indexOf(",")));
|
||||||
|
boolean exclusivity =
|
||||||
|
Boolean.valueOf(p.substring(p.indexOf(",") + 1, p.length()));
|
||||||
|
when(nlm.getResourceByLabel(eq(partitionName), any(Resource.class)))
|
||||||
|
.thenReturn(res);
|
||||||
|
when(nlm.isExclusiveNodeLabel(eq(partitionName))).thenReturn(exclusivity);
|
||||||
|
|
||||||
|
// add to partition to resource
|
||||||
|
partitionToResource.put(partitionName, res);
|
||||||
|
LOG.debug("add partition=" + partitionName + " totalRes=" + res
|
||||||
|
+ " exclusivity=" + exclusivity);
|
||||||
|
Resources.addTo(clusterResource, res);
|
||||||
|
}
|
||||||
|
|
||||||
|
when(nlm.getClusterNodeLabelNames()).thenReturn(
|
||||||
|
partitionToResource.keySet());
|
||||||
|
}
|
||||||
|
|
||||||
|
private Resource parseResourceFromString(String p) {
|
||||||
|
String[] resource = p.split(":");
|
||||||
|
Resource res;
|
||||||
|
if (resource.length == 1) {
|
||||||
|
res = Resources.createResource(Integer.valueOf(resource[0]));
|
||||||
|
} else {
|
||||||
|
res = Resources.createResource(Integer.valueOf(resource[0]),
|
||||||
|
Integer.valueOf(resource[1]));
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format is:
|
||||||
|
* <pre>
|
||||||
|
* root (<partition-name-1>=[guaranteed max used pending (reserved)],<partition-name-2>=..);
|
||||||
|
* -A(...);
|
||||||
|
* --A1(...);
|
||||||
|
* --A2(...);
|
||||||
|
* -B...
|
||||||
|
* </pre>
|
||||||
|
* ";" splits queues, and there should no empty lines, no extra spaces
|
||||||
|
*/
|
||||||
|
@SuppressWarnings({ "unchecked", "rawtypes" })
|
||||||
|
private ParentQueue mockQueueHierarchy(String queueExprs) {
|
||||||
|
String[] queueExprArray = queueExprs.split(";");
|
||||||
|
ParentQueue rootQueue = null;
|
||||||
|
for (int idx = 0; idx < queueExprArray.length; idx++) {
|
||||||
|
String q = queueExprArray[idx];
|
||||||
|
CSQueue queue;
|
||||||
|
|
||||||
|
// Initialize queue
|
||||||
|
if (isParent(queueExprArray, idx)) {
|
||||||
|
ParentQueue parentQueue = mock(ParentQueue.class);
|
||||||
|
queue = parentQueue;
|
||||||
|
List<CSQueue> children = new ArrayList<CSQueue>();
|
||||||
|
when(parentQueue.getChildQueues()).thenReturn(children);
|
||||||
|
} else {
|
||||||
|
LeafQueue leafQueue = mock(LeafQueue.class);
|
||||||
|
final TreeSet<FiCaSchedulerApp> apps = new TreeSet<>(
|
||||||
|
new Comparator<FiCaSchedulerApp>() {
|
||||||
|
@Override
|
||||||
|
public int compare(FiCaSchedulerApp a1, FiCaSchedulerApp a2) {
|
||||||
|
return a1.getApplicationId().compareTo(a2.getApplicationId());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
when(leafQueue.getApplications()).thenReturn(apps);
|
||||||
|
OrderingPolicy<FiCaSchedulerApp> so = mock(OrderingPolicy.class);
|
||||||
|
when(so.getPreemptionIterator()).thenAnswer(new Answer() {
|
||||||
|
public Object answer(InvocationOnMock invocation) {
|
||||||
|
return apps.descendingIterator();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
when(leafQueue.getOrderingPolicy()).thenReturn(so);
|
||||||
|
|
||||||
|
Map<String, TreeSet<RMContainer>> ignorePartitionContainers =
|
||||||
|
new HashMap<>();
|
||||||
|
when(leafQueue.getIgnoreExclusivityRMContainers()).thenReturn(
|
||||||
|
ignorePartitionContainers);
|
||||||
|
queue = leafQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
setupQueue(queue, q, queueExprArray, idx);
|
||||||
|
if (queue.getQueueName().equals(ROOT)) {
|
||||||
|
rootQueue = (ParentQueue) queue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return rootQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setupQueue(CSQueue queue, String q, String[] queueExprArray,
|
||||||
|
int idx) {
|
||||||
|
LOG.debug("*** Setup queue, source=" + q);
|
||||||
|
String queuePath = null;
|
||||||
|
|
||||||
|
int myLevel = getLevel(q);
|
||||||
|
if (0 == myLevel) {
|
||||||
|
// It's root
|
||||||
|
when(queue.getQueueName()).thenReturn(ROOT);
|
||||||
|
queuePath = ROOT;
|
||||||
|
}
|
||||||
|
|
||||||
|
String queueName = getQueueName(q);
|
||||||
|
when(queue.getQueueName()).thenReturn(queueName);
|
||||||
|
|
||||||
|
// Setup parent queue, and add myself to parentQueue.children-list
|
||||||
|
ParentQueue parentQueue = getParentQueue(queueExprArray, idx, myLevel);
|
||||||
|
if (null != parentQueue) {
|
||||||
|
when(queue.getParent()).thenReturn(parentQueue);
|
||||||
|
parentQueue.getChildQueues().add(queue);
|
||||||
|
|
||||||
|
// Setup my path
|
||||||
|
queuePath = parentQueue.getQueuePath() + "." + queueName;
|
||||||
|
}
|
||||||
|
when(queue.getQueuePath()).thenReturn(queuePath);
|
||||||
|
|
||||||
|
QueueCapacities qc = new QueueCapacities(0 == myLevel);
|
||||||
|
ResourceUsage ru = new ResourceUsage();
|
||||||
|
|
||||||
|
when(queue.getQueueCapacities()).thenReturn(qc);
|
||||||
|
when(queue.getQueueResourceUsage()).thenReturn(ru);
|
||||||
|
|
||||||
|
LOG.debug("Setup queue, name=" + queue.getQueueName() + " path="
|
||||||
|
+ queue.getQueuePath());
|
||||||
|
LOG.debug("Parent=" + (parentQueue == null ? "null" : parentQueue
|
||||||
|
.getQueueName()));
|
||||||
|
|
||||||
|
// Setup other fields like used resource, guaranteed resource, etc.
|
||||||
|
String capacitySettingStr = q.substring(q.indexOf("(") + 1, q.indexOf(")"));
|
||||||
|
for (String s : capacitySettingStr.split(",")) {
|
||||||
|
String partitionName = s.substring(0, s.indexOf("="));
|
||||||
|
String[] values = s.substring(s.indexOf("[") + 1, s.indexOf("]")).split(" ");
|
||||||
|
// Add a small epsilon to capacities to avoid truncate when doing
|
||||||
|
// Resources.multiply
|
||||||
|
float epsilon = 1e-6f;
|
||||||
|
Resource totResoucePerPartition = partitionToResource.get(partitionName);
|
||||||
|
float absGuaranteed = Resources.divide(rc, totResoucePerPartition,
|
||||||
|
parseResourceFromString(values[0].trim()), totResoucePerPartition)
|
||||||
|
+ epsilon;
|
||||||
|
float absMax = Resources.divide(rc, totResoucePerPartition,
|
||||||
|
parseResourceFromString(values[1].trim()), totResoucePerPartition)
|
||||||
|
+ epsilon;
|
||||||
|
float absUsed = Resources.divide(rc, totResoucePerPartition,
|
||||||
|
parseResourceFromString(values[2].trim()), totResoucePerPartition)
|
||||||
|
+ epsilon;
|
||||||
|
Resource pending = parseResourceFromString(values[3].trim());
|
||||||
|
qc.setAbsoluteCapacity(partitionName, absGuaranteed);
|
||||||
|
qc.setAbsoluteMaximumCapacity(partitionName, absMax);
|
||||||
|
qc.setAbsoluteUsedCapacity(partitionName, absUsed);
|
||||||
|
ru.setPending(partitionName, pending);
|
||||||
|
if (!isParent(queueExprArray, idx)) {
|
||||||
|
LeafQueue lq = (LeafQueue) queue;
|
||||||
|
when(lq.getTotalPendingResourcesConsideringUserLimit(isA(Resource.class),
|
||||||
|
isA(String.class))).thenReturn(pending);
|
||||||
|
}
|
||||||
|
ru.setUsed(partitionName, parseResourceFromString(values[2].trim()));
|
||||||
|
|
||||||
|
// Setup reserved resource if it contained by input config
|
||||||
|
Resource reserved = Resources.none();
|
||||||
|
if(values.length == 5) {
|
||||||
|
reserved = parseResourceFromString(values[4].trim());
|
||||||
|
ru.setReserved(partitionName, reserved);
|
||||||
|
}
|
||||||
|
LOG.debug("Setup queue=" + queueName + " partition=" + partitionName
|
||||||
|
+ " [abs_guaranteed=" + absGuaranteed + ",abs_max=" + absMax
|
||||||
|
+ ",abs_used" + absUsed + ",pending_resource=" + pending
|
||||||
|
+ ", reserved_resource=" + reserved + "]");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setup preemption disabled
|
||||||
|
when(queue.getPreemptionDisabled()).thenReturn(
|
||||||
|
conf.getPreemptionDisabled(queuePath, false));
|
||||||
|
|
||||||
|
nameToCSQueues.put(queueName, queue);
|
||||||
|
when(cs.getQueue(eq(queueName))).thenReturn(queue);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Level of a queue is how many "-" at beginning, root's level is 0
|
||||||
|
*/
|
||||||
|
private int getLevel(String q) {
|
||||||
|
int level = 0; // level = how many "-" at beginning
|
||||||
|
while (level < q.length() && q.charAt(level) == '-') {
|
||||||
|
level++;
|
||||||
|
}
|
||||||
|
return level;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getQueueName(String q) {
|
||||||
|
int idx = 0;
|
||||||
|
// find first != '-' char
|
||||||
|
while (idx < q.length() && q.charAt(idx) == '-') {
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
if (idx == q.length()) {
|
||||||
|
throw new IllegalArgumentException("illegal input:" + q);
|
||||||
|
}
|
||||||
|
// name = after '-' and before '('
|
||||||
|
String name = q.substring(idx, q.indexOf('('));
|
||||||
|
if (name.isEmpty()) {
|
||||||
|
throw new IllegalArgumentException("queue name shouldn't be empty:" + q);
|
||||||
|
}
|
||||||
|
if (name.contains(".")) {
|
||||||
|
throw new IllegalArgumentException("queue name shouldn't contain '.':"
|
||||||
|
+ name);
|
||||||
|
}
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ParentQueue getParentQueue(String[] queueExprArray, int idx, int myLevel) {
|
||||||
|
idx--;
|
||||||
|
while (idx >= 0) {
|
||||||
|
int level = getLevel(queueExprArray[idx]);
|
||||||
|
if (level < myLevel) {
|
||||||
|
String parentQueuName = getQueueName(queueExprArray[idx]);
|
||||||
|
return (ParentQueue) nameToCSQueues.get(parentQueuName);
|
||||||
|
}
|
||||||
|
idx--;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get if a queue is ParentQueue
|
||||||
|
*/
|
||||||
|
private boolean isParent(String[] queues, int idx) {
|
||||||
|
int myLevel = getLevel(queues[idx]);
|
||||||
|
idx++;
|
||||||
|
while (idx < queues.length && getLevel(queues[idx]) == myLevel) {
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
if (idx >= queues.length || getLevel(queues[idx]) < myLevel) {
|
||||||
|
// It's a LeafQueue
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public ApplicationAttemptId getAppAttemptId(int id) {
|
||||||
|
ApplicationId appId = ApplicationId.newInstance(0L, id);
|
||||||
|
ApplicationAttemptId appAttemptId =
|
||||||
|
ApplicationAttemptId.newInstance(appId, 1);
|
||||||
|
return appAttemptId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkContainerNodesInApp(FiCaSchedulerApp app,
|
||||||
|
int expectedContainersNumber, String host) {
|
||||||
|
NodeId nodeId = NodeId.newInstance(host, 1);
|
||||||
|
int num = 0;
|
||||||
|
for (RMContainer c : app.getLiveContainers()) {
|
||||||
|
if (c.getAllocatedNode().equals(nodeId)) {
|
||||||
|
num++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (RMContainer c : app.getReservedContainers()) {
|
||||||
|
if (c.getAllocatedNode().equals(nodeId)) {
|
||||||
|
num++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Assert.assertEquals(expectedContainersNumber, num);
|
||||||
|
}
|
||||||
|
|
||||||
|
public FiCaSchedulerApp getApp(String queueName, int appId) {
|
||||||
|
for (FiCaSchedulerApp app : ((LeafQueue) cs.getQueue(queueName))
|
||||||
|
.getApplications()) {
|
||||||
|
if (app.getApplicationId().getId() == appId) {
|
||||||
|
return app;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkAbsCapacities(CSQueue queue, String partition,
|
||||||
|
float guaranteed, float max, float used) {
|
||||||
|
QueueCapacities qc = queue.getQueueCapacities();
|
||||||
|
Assert.assertEquals(guaranteed, qc.getAbsoluteCapacity(partition), 1e-3);
|
||||||
|
Assert.assertEquals(max, qc.getAbsoluteMaximumCapacity(partition), 1e-3);
|
||||||
|
Assert.assertEquals(used, qc.getAbsoluteUsedCapacity(partition), 1e-3);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkPendingResource(CSQueue queue, String partition, int pending) {
|
||||||
|
ResourceUsage ru = queue.getQueueResourceUsage();
|
||||||
|
Assert.assertEquals(pending, ru.getPending(partition).getMemory());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkReservedResource(CSQueue queue, String partition, int reserved) {
|
||||||
|
ResourceUsage ru = queue.getQueueResourceUsage();
|
||||||
|
Assert.assertEquals(reserved, ru.getReserved(partition).getMemory());
|
||||||
|
}
|
||||||
|
|
||||||
|
static class IsPreemptionRequestForQueueAndNode
|
||||||
|
extends ArgumentMatcher<ContainerPreemptEvent> {
|
||||||
|
private final ApplicationAttemptId appAttId;
|
||||||
|
private final String queueName;
|
||||||
|
private final NodeId nodeId;
|
||||||
|
|
||||||
|
IsPreemptionRequestForQueueAndNode(ApplicationAttemptId appAttId,
|
||||||
|
String queueName, NodeId nodeId) {
|
||||||
|
this.appAttId = appAttId;
|
||||||
|
this.queueName = queueName;
|
||||||
|
this.nodeId = nodeId;
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public boolean matches(Object o) {
|
||||||
|
ContainerPreemptEvent cpe = (ContainerPreemptEvent)o;
|
||||||
|
|
||||||
|
return appAttId.equals(cpe.getAppId())
|
||||||
|
&& queueName.equals(cpe.getContainer().getQueueName())
|
||||||
|
&& nodeId.equals(cpe.getContainer().getAllocatedNode());
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return appAttId.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -62,6 +62,7 @@ import org.mockito.ArgumentMatcher;
|
||||||
import org.mockito.invocation.InvocationOnMock;
|
import org.mockito.invocation.InvocationOnMock;
|
||||||
import org.mockito.stubbing.Answer;
|
import org.mockito.stubbing.Answer;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.Deque;
|
import java.util.Deque;
|
||||||
|
@ -161,6 +162,11 @@ public class TestProportionalCapacityPreemptionPolicy {
|
||||||
mCS = mock(CapacityScheduler.class);
|
mCS = mock(CapacityScheduler.class);
|
||||||
when(mCS.getResourceCalculator()).thenReturn(rc);
|
when(mCS.getResourceCalculator()).thenReturn(rc);
|
||||||
lm = mock(RMNodeLabelsManager.class);
|
lm = mock(RMNodeLabelsManager.class);
|
||||||
|
try {
|
||||||
|
when(lm.isExclusiveNodeLabel(anyString())).thenReturn(true);
|
||||||
|
} catch (IOException e) {
|
||||||
|
// do nothing
|
||||||
|
}
|
||||||
when(mCS.getConfiguration()).thenReturn(conf);
|
when(mCS.getConfiguration()).thenReturn(conf);
|
||||||
rmContext = mock(RMContext.class);
|
rmContext = mock(RMContext.class);
|
||||||
when(mCS.getRMContext()).thenReturn(rmContext);
|
when(mCS.getRMContext()).thenReturn(rmContext);
|
||||||
|
@ -649,6 +655,26 @@ public class TestProportionalCapacityPreemptionPolicy {
|
||||||
verify(mDisp, times(10)).handle(argThat(new IsPreemptionRequestFor(appA)));
|
verify(mDisp, times(10)).handle(argThat(new IsPreemptionRequestFor(appA)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHierarchicalWithReserved() {
|
||||||
|
int[][] qData = new int[][] {
|
||||||
|
// / A B C D E F
|
||||||
|
{ 200, 100, 50, 50, 100, 10, 90 }, // abs
|
||||||
|
{ 200, 200, 200, 200, 200, 200, 200 }, // maxCap
|
||||||
|
{ 200, 110, 60, 50, 90, 90, 0 }, // used
|
||||||
|
{ 10, 0, 0, 0, 10, 0, 10 }, // pending
|
||||||
|
{ 40, 25, 15, 10, 15, 15, 0 }, // reserved
|
||||||
|
{ 4, 2, 1, 1, 2, 1, 1 }, // apps
|
||||||
|
{ -1, -1, 1, 1, -1, 1, 1 }, // req granularity
|
||||||
|
{ 2, 2, 0, 0, 2, 0, 0 }, // subqueues
|
||||||
|
};
|
||||||
|
ProportionalCapacityPreemptionPolicy policy = buildPolicy(qData);
|
||||||
|
policy.editSchedule();
|
||||||
|
// verify capacity taken from A1, not B1 despite B1 being far over
|
||||||
|
// its absolute guaranteed capacity
|
||||||
|
verify(mDisp, times(10)).handle(argThat(new IsPreemptionRequestFor(appA)));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testZeroGuar() {
|
public void testZeroGuar() {
|
||||||
int[][] qData = new int[][] {
|
int[][] qData = new int[][] {
|
||||||
|
@ -934,7 +960,38 @@ public class TestProportionalCapacityPreemptionPolicy {
|
||||||
//check the parent can prempt only the extra from > 2 level child
|
//check the parent can prempt only the extra from > 2 level child
|
||||||
TempQueuePerPartition tempQueueAPartition = policy.getQueuePartitions().get("queueA").get("");
|
TempQueuePerPartition tempQueueAPartition = policy.getQueuePartitions().get("queueA").get("");
|
||||||
assertEquals(0, tempQueueAPartition.untouchableExtra.getMemory());
|
assertEquals(0, tempQueueAPartition.untouchableExtra.getMemory());
|
||||||
int extraForQueueA = tempQueueAPartition.current.getMemory()- tempQueueAPartition.guaranteed.getMemory();
|
int extraForQueueA = tempQueueAPartition.getUsed().getMemory()
|
||||||
|
- tempQueueAPartition.getGuaranteed().getMemory();
|
||||||
|
assertEquals(extraForQueueA,tempQueueAPartition.preemptableExtra.getMemory());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHierarchicalLarge3LevelsWithReserved() {
|
||||||
|
int[][] qData = new int[][] {
|
||||||
|
// / A F I
|
||||||
|
// B C G H J K
|
||||||
|
// D E
|
||||||
|
{ 400, 200, 60, 140, 100, 40, 100, 70, 30, 100, 10, 90 }, // abs
|
||||||
|
{ 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400 }, // maxCap
|
||||||
|
{ 400, 210, 60, 150, 100, 50, 100, 50, 50, 90, 10, 80 }, // used
|
||||||
|
{ 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10 }, // pending
|
||||||
|
{ 50, 30, 20, 10, 5, 5, 0, 0, 0, 10, 10, 0 }, // reserved
|
||||||
|
// appA appB appC appD appE appF appG
|
||||||
|
{ 7, 3, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1 }, // apps
|
||||||
|
{ -1, -1, 1, -1, 1, 1, -1, 1, 1, -1, 1, 1 }, // req granularity
|
||||||
|
{ 3, 2, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0 }, // subqueues
|
||||||
|
};
|
||||||
|
ProportionalCapacityPreemptionPolicy policy = buildPolicy(qData);
|
||||||
|
policy.editSchedule();
|
||||||
|
|
||||||
|
verify(mDisp, times(10)).handle(argThat(new IsPreemptionRequestFor(appC)));
|
||||||
|
assertEquals(10, policy.getQueuePartitions().get("queueE").get("").preemptableExtra.getMemory());
|
||||||
|
//2nd level child(E) preempts 10, but parent A has only 9 extra
|
||||||
|
//check the parent can prempt only the extra from > 2 level child
|
||||||
|
TempQueuePerPartition tempQueueAPartition = policy.getQueuePartitions().get("queueA").get("");
|
||||||
|
assertEquals(0, tempQueueAPartition.untouchableExtra.getMemory());
|
||||||
|
int extraForQueueA = tempQueueAPartition.getUsed().getMemory()
|
||||||
|
- tempQueueAPartition.getGuaranteed().getMemory();
|
||||||
assertEquals(extraForQueueA,tempQueueAPartition.preemptableExtra.getMemory());
|
assertEquals(extraForQueueA,tempQueueAPartition.preemptableExtra.getMemory());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1058,6 +1115,7 @@ public class TestProportionalCapacityPreemptionPolicy {
|
||||||
ParentQueue root = mockParentQueue(null, queues[0], pqs);
|
ParentQueue root = mockParentQueue(null, queues[0], pqs);
|
||||||
ResourceUsage resUsage = new ResourceUsage();
|
ResourceUsage resUsage = new ResourceUsage();
|
||||||
resUsage.setUsed(used[0]);
|
resUsage.setUsed(used[0]);
|
||||||
|
resUsage.setReserved(reserved[0]);
|
||||||
when(root.getQueueName()).thenReturn(CapacitySchedulerConfiguration.ROOT);
|
when(root.getQueueName()).thenReturn(CapacitySchedulerConfiguration.ROOT);
|
||||||
when(root.getAbsoluteUsedCapacity()).thenReturn(
|
when(root.getAbsoluteUsedCapacity()).thenReturn(
|
||||||
Resources.divide(rc, tot, used[0], tot));
|
Resources.divide(rc, tot, used[0], tot));
|
||||||
|
@ -1083,6 +1141,7 @@ public class TestProportionalCapacityPreemptionPolicy {
|
||||||
q = mockParentQueue(p, queues[i], pqs);
|
q = mockParentQueue(p, queues[i], pqs);
|
||||||
ResourceUsage resUsagePerQueue = new ResourceUsage();
|
ResourceUsage resUsagePerQueue = new ResourceUsage();
|
||||||
resUsagePerQueue.setUsed(used[i]);
|
resUsagePerQueue.setUsed(used[i]);
|
||||||
|
resUsagePerQueue.setReserved(reserved[i]);
|
||||||
when(q.getQueueResourceUsage()).thenReturn(resUsagePerQueue);
|
when(q.getQueueResourceUsage()).thenReturn(resUsagePerQueue);
|
||||||
} else {
|
} else {
|
||||||
q = mockLeafQueue(p, tot, i, abs, used, pending, reserved, apps, gran);
|
q = mockLeafQueue(p, tot, i, abs, used, pending, reserved, apps, gran);
|
||||||
|
@ -1159,6 +1218,7 @@ public class TestProportionalCapacityPreemptionPolicy {
|
||||||
ResourceUsage ru = new ResourceUsage();
|
ResourceUsage ru = new ResourceUsage();
|
||||||
ru.setPending(pending[i]);
|
ru.setPending(pending[i]);
|
||||||
ru.setUsed(used[i]);
|
ru.setUsed(used[i]);
|
||||||
|
ru.setReserved(reserved[i]);
|
||||||
when(lq.getQueueResourceUsage()).thenReturn(ru);
|
when(lq.getQueueResourceUsage()).thenReturn(ru);
|
||||||
// consider moving where CapacityScheduler::comparator accessible
|
// consider moving where CapacityScheduler::comparator accessible
|
||||||
final NavigableSet<FiCaSchedulerApp> qApps = new TreeSet<FiCaSchedulerApp>(
|
final NavigableSet<FiCaSchedulerApp> qApps = new TreeSet<FiCaSchedulerApp>(
|
||||||
|
|
|
@ -18,231 +18,23 @@
|
||||||
|
|
||||||
package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
|
package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
|
||||||
import org.apache.hadoop.yarn.api.records.Container;
|
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
|
||||||
import org.apache.hadoop.yarn.api.records.Priority;
|
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
|
||||||
import org.apache.hadoop.yarn.event.Dispatcher;
|
|
||||||
import org.apache.hadoop.yarn.event.EventHandler;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor;
|
import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.QueueCapacities;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.preemption.PreemptionManager;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.OrderingPolicy;
|
|
||||||
import org.apache.hadoop.yarn.util.Clock;
|
|
||||||
import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
|
|
||||||
import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator;
|
|
||||||
import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
|
|
||||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
|
||||||
import org.junit.Assert;
|
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.mockito.invocation.InvocationOnMock;
|
|
||||||
import org.mockito.stubbing.Answer;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.TreeSet;
|
|
||||||
|
|
||||||
import static org.mockito.Matchers.any;
|
|
||||||
import static org.mockito.Matchers.argThat;
|
import static org.mockito.Matchers.argThat;
|
||||||
import static org.mockito.Matchers.eq;
|
|
||||||
import static org.mockito.Matchers.isA;
|
|
||||||
import static org.mockito.Mockito.doAnswer;
|
|
||||||
import static org.mockito.Mockito.mock;
|
|
||||||
import static org.mockito.Mockito.never;
|
import static org.mockito.Mockito.never;
|
||||||
import static org.mockito.Mockito.times;
|
import static org.mockito.Mockito.times;
|
||||||
import static org.mockito.Mockito.verify;
|
import static org.mockito.Mockito.verify;
|
||||||
import static org.mockito.Mockito.when;
|
|
||||||
public class TestProportionalCapacityPreemptionPolicyForNodePartitions {
|
|
||||||
private static final Log LOG =
|
|
||||||
LogFactory.getLog(TestProportionalCapacityPreemptionPolicyForNodePartitions.class);
|
|
||||||
static final String ROOT = CapacitySchedulerConfiguration.ROOT;
|
|
||||||
|
|
||||||
private Map<String, CSQueue> nameToCSQueues = null;
|
public class TestProportionalCapacityPreemptionPolicyForNodePartitions
|
||||||
private Map<String, Resource> partitionToResource = null;
|
extends ProportionalCapacityPreemptionPolicyMockFramework {
|
||||||
private Map<NodeId, SchedulerNode> nodeIdToSchedulerNodes = null;
|
|
||||||
private RMNodeLabelsManager nlm = null;
|
|
||||||
private RMContext rmContext = null;
|
|
||||||
|
|
||||||
private ResourceCalculator rc = new DefaultResourceCalculator();
|
|
||||||
private Clock mClock = null;
|
|
||||||
private CapacitySchedulerConfiguration conf = null;
|
|
||||||
private CapacityScheduler cs = null;
|
|
||||||
private EventHandler<SchedulerEvent> mDisp = null;
|
|
||||||
private ProportionalCapacityPreemptionPolicy policy = null;
|
|
||||||
private Resource clusterResource = null;
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
@Before
|
@Before
|
||||||
public void setup() {
|
public void setup() {
|
||||||
org.apache.log4j.Logger.getRootLogger().setLevel(
|
super.setup();
|
||||||
org.apache.log4j.Level.DEBUG);
|
|
||||||
|
|
||||||
conf = new CapacitySchedulerConfiguration(new Configuration(false));
|
|
||||||
conf.setLong(
|
|
||||||
CapacitySchedulerConfiguration.PREEMPTION_WAIT_TIME_BEFORE_KILL, 10000);
|
|
||||||
conf.setLong(CapacitySchedulerConfiguration.PREEMPTION_MONITORING_INTERVAL,
|
|
||||||
3000);
|
|
||||||
// report "ideal" preempt
|
|
||||||
conf.setFloat(CapacitySchedulerConfiguration.TOTAL_PREEMPTION_PER_ROUND,
|
|
||||||
(float) 1.0);
|
|
||||||
conf.setFloat(
|
|
||||||
CapacitySchedulerConfiguration.PREEMPTION_NATURAL_TERMINATION_FACTOR,
|
|
||||||
(float) 1.0);
|
|
||||||
|
|
||||||
mClock = mock(Clock.class);
|
|
||||||
cs = mock(CapacityScheduler.class);
|
|
||||||
when(cs.getResourceCalculator()).thenReturn(rc);
|
|
||||||
when(cs.getPreemptionManager()).thenReturn(new PreemptionManager());
|
|
||||||
when(cs.getConfiguration()).thenReturn(conf);
|
|
||||||
|
|
||||||
nlm = mock(RMNodeLabelsManager.class);
|
|
||||||
mDisp = mock(EventHandler.class);
|
|
||||||
|
|
||||||
rmContext = mock(RMContext.class);
|
|
||||||
when(rmContext.getNodeLabelManager()).thenReturn(nlm);
|
|
||||||
Dispatcher disp = mock(Dispatcher.class);
|
|
||||||
when(rmContext.getDispatcher()).thenReturn(disp);
|
|
||||||
when(disp.getEventHandler()).thenReturn(mDisp);
|
|
||||||
when(cs.getRMContext()).thenReturn(rmContext);
|
|
||||||
|
|
||||||
policy = new ProportionalCapacityPreemptionPolicy(rmContext, cs, mClock);
|
policy = new ProportionalCapacityPreemptionPolicy(rmContext, cs, mClock);
|
||||||
partitionToResource = new HashMap<>();
|
|
||||||
nodeIdToSchedulerNodes = new HashMap<>();
|
|
||||||
nameToCSQueues = new HashMap<>();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testBuilder() throws Exception {
|
|
||||||
/**
|
|
||||||
* Test of test, make sure we build expected mock schedulable objects
|
|
||||||
*/
|
|
||||||
String labelsConfig =
|
|
||||||
"=200,true;" + // default partition
|
|
||||||
"red=100,false;" + // partition=red
|
|
||||||
"blue=200,true"; // partition=blue
|
|
||||||
String nodesConfig =
|
|
||||||
"n1=red;" + // n1 has partition=red
|
|
||||||
"n2=blue;" + // n2 has partition=blue
|
|
||||||
"n3="; // n3 doesn't have partition
|
|
||||||
String queuesConfig =
|
|
||||||
// guaranteed,max,used,pending
|
|
||||||
"root(=[200 200 100 100],red=[100 100 100 100],blue=[200 200 200 200]);" + //root
|
|
||||||
"-a(=[100 200 100 100],red=[0 0 0 0],blue=[200 200 200 200]);" + // a
|
|
||||||
"--a1(=[50 100 50 100],red=[0 0 0 0],blue=[100 200 200 0]);" + // a1
|
|
||||||
"--a2(=[50 200 50 0],red=[0 0 0 0],blue=[100 200 0 200]);" + // a2
|
|
||||||
"-b(=[100 200 0 0],red=[100 100 100 100],blue=[0 0 0 0])";
|
|
||||||
String appsConfig=
|
|
||||||
//queueName\t(priority,resource,host,expression,#repeat,reserved)
|
|
||||||
// app1 in a1, , 50 in n2 (reserved), 50 in n2 (allocated)
|
|
||||||
"a1\t" // app1 in a1
|
|
||||||
+ "(1,1,n3,red,50,false);" + // 50 * default in n3
|
|
||||||
|
|
||||||
"a1\t" // app2 in a1
|
|
||||||
+ "(2,1,n2,,50,true)(2,1,n2,,50,false)" // 50 * ignore-exclusivity (reserved),
|
|
||||||
// 50 * ignore-exclusivity (allocated)
|
|
||||||
+ "(2,1,n2,blue,50,true)(2,1,n2,blue,50,true);" + // 50 in n2 (reserved),
|
|
||||||
// 50 in n2 (allocated)
|
|
||||||
"a2\t" // app3 in a2
|
|
||||||
+ "(1,1,n3,red,50,false);" + // 50 * default in n3
|
|
||||||
|
|
||||||
"b\t" // app4 in b
|
|
||||||
+ "(1,1,n1,red,100,false);";
|
|
||||||
|
|
||||||
buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
|
|
||||||
|
|
||||||
// Check queues:
|
|
||||||
// root
|
|
||||||
checkAbsCapacities(cs.getQueue("root"), "", 1f, 1f, 0.5f);
|
|
||||||
checkPendingResource(cs.getQueue("root"), "", 100);
|
|
||||||
checkAbsCapacities(cs.getQueue("root"), "red", 1f, 1f, 1f);
|
|
||||||
checkPendingResource(cs.getQueue("root"), "red", 100);
|
|
||||||
checkAbsCapacities(cs.getQueue("root"), "blue", 1f, 1f, 1f);
|
|
||||||
checkPendingResource(cs.getQueue("root"), "blue", 200);
|
|
||||||
|
|
||||||
// a
|
|
||||||
checkAbsCapacities(cs.getQueue("a"), "", 0.5f, 1f, 0.5f);
|
|
||||||
checkPendingResource(cs.getQueue("a"), "", 100);
|
|
||||||
checkAbsCapacities(cs.getQueue("a"), "red", 0f, 0f, 0f);
|
|
||||||
checkPendingResource(cs.getQueue("a"), "red", 0);
|
|
||||||
checkAbsCapacities(cs.getQueue("a"), "blue", 1f, 1f, 1f);
|
|
||||||
checkPendingResource(cs.getQueue("a"), "blue", 200);
|
|
||||||
|
|
||||||
// a1
|
|
||||||
checkAbsCapacities(cs.getQueue("a1"), "", 0.25f, 0.5f, 0.25f);
|
|
||||||
checkPendingResource(cs.getQueue("a1"), "", 100);
|
|
||||||
checkAbsCapacities(cs.getQueue("a1"), "red", 0f, 0f, 0f);
|
|
||||||
checkPendingResource(cs.getQueue("a1"), "red", 0);
|
|
||||||
checkAbsCapacities(cs.getQueue("a1"), "blue", 0.5f, 1f, 1f);
|
|
||||||
checkPendingResource(cs.getQueue("a1"), "blue", 0);
|
|
||||||
|
|
||||||
// a2
|
|
||||||
checkAbsCapacities(cs.getQueue("a2"), "", 0.25f, 1f, 0.25f);
|
|
||||||
checkPendingResource(cs.getQueue("a2"), "", 0);
|
|
||||||
checkAbsCapacities(cs.getQueue("a2"), "red", 0f, 0f, 0f);
|
|
||||||
checkPendingResource(cs.getQueue("a2"), "red", 0);
|
|
||||||
checkAbsCapacities(cs.getQueue("a2"), "blue", 0.5f, 1f, 0f);
|
|
||||||
checkPendingResource(cs.getQueue("a2"), "blue", 200);
|
|
||||||
|
|
||||||
// b1
|
|
||||||
checkAbsCapacities(cs.getQueue("b"), "", 0.5f, 1f, 0f);
|
|
||||||
checkPendingResource(cs.getQueue("b"), "", 0);
|
|
||||||
checkAbsCapacities(cs.getQueue("b"), "red", 1f, 1f, 1f);
|
|
||||||
checkPendingResource(cs.getQueue("b"), "red", 100);
|
|
||||||
checkAbsCapacities(cs.getQueue("b"), "blue", 0f, 0f, 0f);
|
|
||||||
checkPendingResource(cs.getQueue("b"), "blue", 0);
|
|
||||||
|
|
||||||
// Check ignored partitioned containers in queue
|
|
||||||
Assert.assertEquals(100, ((LeafQueue) cs.getQueue("a1"))
|
|
||||||
.getIgnoreExclusivityRMContainers().get("blue").size());
|
|
||||||
|
|
||||||
// Check applications
|
|
||||||
Assert.assertEquals(2, ((LeafQueue)cs.getQueue("a1")).getApplications().size());
|
|
||||||
Assert.assertEquals(1, ((LeafQueue)cs.getQueue("a2")).getApplications().size());
|
|
||||||
Assert.assertEquals(1, ((LeafQueue)cs.getQueue("b")).getApplications().size());
|
|
||||||
|
|
||||||
// Check #containers
|
|
||||||
FiCaSchedulerApp app1 = getApp("a1", 1);
|
|
||||||
FiCaSchedulerApp app2 = getApp("a1", 2);
|
|
||||||
FiCaSchedulerApp app3 = getApp("a2", 3);
|
|
||||||
FiCaSchedulerApp app4 = getApp("b", 4);
|
|
||||||
|
|
||||||
Assert.assertEquals(50, app1.getLiveContainers().size());
|
|
||||||
checkContainerNodesInApp(app1, 50, "n3");
|
|
||||||
|
|
||||||
Assert.assertEquals(50, app2.getLiveContainers().size());
|
|
||||||
Assert.assertEquals(150, app2.getReservedContainers().size());
|
|
||||||
checkContainerNodesInApp(app2, 200, "n2");
|
|
||||||
|
|
||||||
Assert.assertEquals(50, app3.getLiveContainers().size());
|
|
||||||
checkContainerNodesInApp(app3, 50, "n3");
|
|
||||||
|
|
||||||
Assert.assertEquals(100, app4.getLiveContainers().size());
|
|
||||||
checkContainerNodesInApp(app4, 100, "n1");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -822,477 +614,4 @@ public class TestProportionalCapacityPreemptionPolicyForNodePartitions {
|
||||||
verify(mDisp, never()).handle(
|
verify(mDisp, never()).handle(
|
||||||
argThat(new IsPreemptionRequestFor(getAppAttemptId(3))));
|
argThat(new IsPreemptionRequestFor(getAppAttemptId(3))));
|
||||||
}
|
}
|
||||||
|
|
||||||
private ApplicationAttemptId getAppAttemptId(int id) {
|
|
||||||
ApplicationId appId = ApplicationId.newInstance(0L, id);
|
|
||||||
ApplicationAttemptId appAttemptId =
|
|
||||||
ApplicationAttemptId.newInstance(appId, 1);
|
|
||||||
return appAttemptId;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void checkContainerNodesInApp(FiCaSchedulerApp app,
|
|
||||||
int expectedContainersNumber, String host) {
|
|
||||||
NodeId nodeId = NodeId.newInstance(host, 1);
|
|
||||||
int num = 0;
|
|
||||||
for (RMContainer c : app.getLiveContainers()) {
|
|
||||||
if (c.getAllocatedNode().equals(nodeId)) {
|
|
||||||
num++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (RMContainer c : app.getReservedContainers()) {
|
|
||||||
if (c.getAllocatedNode().equals(nodeId)) {
|
|
||||||
num++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Assert.assertEquals(expectedContainersNumber, num);
|
|
||||||
}
|
|
||||||
|
|
||||||
private FiCaSchedulerApp getApp(String queueName, int appId) {
|
|
||||||
for (FiCaSchedulerApp app : ((LeafQueue) cs.getQueue(queueName))
|
|
||||||
.getApplications()) {
|
|
||||||
if (app.getApplicationId().getId() == appId) {
|
|
||||||
return app;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void checkAbsCapacities(CSQueue queue, String partition,
|
|
||||||
float guaranteed, float max, float used) {
|
|
||||||
QueueCapacities qc = queue.getQueueCapacities();
|
|
||||||
Assert.assertEquals(guaranteed, qc.getAbsoluteCapacity(partition), 1e-3);
|
|
||||||
Assert.assertEquals(max, qc.getAbsoluteMaximumCapacity(partition), 1e-3);
|
|
||||||
Assert.assertEquals(used, qc.getAbsoluteUsedCapacity(partition), 1e-3);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void checkPendingResource(CSQueue queue, String partition, int pending) {
|
|
||||||
ResourceUsage ru = queue.getQueueResourceUsage();
|
|
||||||
Assert.assertEquals(pending, ru.getPending(partition).getMemory());
|
|
||||||
}
|
|
||||||
|
|
||||||
private void buildEnv(String labelsConfig, String nodesConfig,
|
|
||||||
String queuesConfig, String appsConfig) throws IOException {
|
|
||||||
buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void buildEnv(String labelsConfig, String nodesConfig,
|
|
||||||
String queuesConfig, String appsConfig,
|
|
||||||
boolean useDominantResourceCalculator) throws IOException {
|
|
||||||
if (useDominantResourceCalculator) {
|
|
||||||
when(cs.getResourceCalculator()).thenReturn(
|
|
||||||
new DominantResourceCalculator());
|
|
||||||
}
|
|
||||||
mockNodeLabelsManager(labelsConfig);
|
|
||||||
mockSchedulerNodes(nodesConfig);
|
|
||||||
for (NodeId nodeId : nodeIdToSchedulerNodes.keySet()) {
|
|
||||||
when(cs.getSchedulerNode(nodeId)).thenReturn(
|
|
||||||
nodeIdToSchedulerNodes.get(nodeId));
|
|
||||||
}
|
|
||||||
ParentQueue root = mockQueueHierarchy(queuesConfig);
|
|
||||||
when(cs.getRootQueue()).thenReturn(root);
|
|
||||||
when(cs.getClusterResource()).thenReturn(clusterResource);
|
|
||||||
mockApplications(appsConfig);
|
|
||||||
|
|
||||||
policy = new ProportionalCapacityPreemptionPolicy(rmContext, cs,
|
|
||||||
mClock);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void mockContainers(String containersConfig, ApplicationAttemptId attemptId,
|
|
||||||
String queueName, List<RMContainer> reservedContainers,
|
|
||||||
List<RMContainer> liveContainers) {
|
|
||||||
int containerId = 1;
|
|
||||||
int start = containersConfig.indexOf("=") + 1;
|
|
||||||
int end = -1;
|
|
||||||
|
|
||||||
while (start < containersConfig.length()) {
|
|
||||||
while (start < containersConfig.length()
|
|
||||||
&& containersConfig.charAt(start) != '(') {
|
|
||||||
start++;
|
|
||||||
}
|
|
||||||
if (start >= containersConfig.length()) {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
"Error containers specification, line=" + containersConfig);
|
|
||||||
}
|
|
||||||
end = start + 1;
|
|
||||||
while (end < containersConfig.length()
|
|
||||||
&& containersConfig.charAt(end) != ')') {
|
|
||||||
end++;
|
|
||||||
}
|
|
||||||
if (end >= containersConfig.length()) {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
"Error containers specification, line=" + containersConfig);
|
|
||||||
}
|
|
||||||
|
|
||||||
// now we found start/end, get container values
|
|
||||||
String[] values = containersConfig.substring(start + 1, end).split(",");
|
|
||||||
if (values.length != 6) {
|
|
||||||
throw new IllegalArgumentException("Format to define container is:"
|
|
||||||
+ "(priority,resource,host,expression,repeat,reserved)");
|
|
||||||
}
|
|
||||||
Priority pri = Priority.newInstance(Integer.parseInt(values[0]));
|
|
||||||
Resource res = parseResourceFromString(values[1]);
|
|
||||||
NodeId host = NodeId.newInstance(values[2], 1);
|
|
||||||
String exp = values[3];
|
|
||||||
int repeat = Integer.parseInt(values[4]);
|
|
||||||
boolean reserved = Boolean.parseBoolean(values[5]);
|
|
||||||
|
|
||||||
for (int i = 0; i < repeat; i++) {
|
|
||||||
Container c = mock(Container.class);
|
|
||||||
when(c.getResource()).thenReturn(res);
|
|
||||||
when(c.getPriority()).thenReturn(pri);
|
|
||||||
RMContainerImpl rmc = mock(RMContainerImpl.class);
|
|
||||||
when(rmc.getAllocatedNode()).thenReturn(host);
|
|
||||||
when(rmc.getNodeLabelExpression()).thenReturn(exp);
|
|
||||||
when(rmc.getAllocatedResource()).thenReturn(res);
|
|
||||||
when(rmc.getContainer()).thenReturn(c);
|
|
||||||
when(rmc.getApplicationAttemptId()).thenReturn(attemptId);
|
|
||||||
final ContainerId cId = ContainerId.newContainerId(attemptId, containerId);
|
|
||||||
when(rmc.getContainerId()).thenReturn(
|
|
||||||
cId);
|
|
||||||
doAnswer(new Answer<Integer>() {
|
|
||||||
@Override
|
|
||||||
public Integer answer(InvocationOnMock invocation) throws Throwable {
|
|
||||||
return cId.compareTo(((RMContainer) invocation.getArguments()[0])
|
|
||||||
.getContainerId());
|
|
||||||
}
|
|
||||||
}).when(rmc).compareTo(any(RMContainer.class));
|
|
||||||
|
|
||||||
if (containerId == 1) {
|
|
||||||
when(rmc.isAMContainer()).thenReturn(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (reserved) {
|
|
||||||
reservedContainers.add(rmc);
|
|
||||||
} else {
|
|
||||||
liveContainers.add(rmc);
|
|
||||||
}
|
|
||||||
|
|
||||||
// If this is a non-exclusive allocation
|
|
||||||
String partition = null;
|
|
||||||
if (exp.isEmpty()
|
|
||||||
&& !(partition = nodeIdToSchedulerNodes.get(host).getPartition())
|
|
||||||
.isEmpty()) {
|
|
||||||
LeafQueue queue = (LeafQueue) nameToCSQueues.get(queueName);
|
|
||||||
Map<String, TreeSet<RMContainer>> ignoreExclusivityContainers =
|
|
||||||
queue.getIgnoreExclusivityRMContainers();
|
|
||||||
if (!ignoreExclusivityContainers.containsKey(partition)) {
|
|
||||||
ignoreExclusivityContainers.put(partition,
|
|
||||||
new TreeSet<RMContainer>());
|
|
||||||
}
|
|
||||||
ignoreExclusivityContainers.get(partition).add(rmc);
|
|
||||||
}
|
|
||||||
LOG.debug("add container to app=" + attemptId + " res=" + res
|
|
||||||
+ " node=" + host + " nodeLabelExpression=" + exp + " partition="
|
|
||||||
+ partition);
|
|
||||||
|
|
||||||
containerId++;
|
|
||||||
}
|
|
||||||
|
|
||||||
start = end + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Format is:
|
|
||||||
* <pre>
|
|
||||||
* queueName\t // app1
|
|
||||||
* (priority,resource,host,expression,#repeat,reserved)
|
|
||||||
* (priority,resource,host,expression,#repeat,reserved);
|
|
||||||
* queueName\t // app2
|
|
||||||
* </pre>
|
|
||||||
*/
|
|
||||||
private void mockApplications(String appsConfig) {
|
|
||||||
int id = 1;
|
|
||||||
for (String a : appsConfig.split(";")) {
|
|
||||||
String[] strs = a.split("\t");
|
|
||||||
String queueName = strs[0];
|
|
||||||
|
|
||||||
// get containers
|
|
||||||
List<RMContainer> liveContainers = new ArrayList<RMContainer>();
|
|
||||||
List<RMContainer> reservedContainers = new ArrayList<RMContainer>();
|
|
||||||
ApplicationId appId = ApplicationId.newInstance(0L, id);
|
|
||||||
ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1);
|
|
||||||
|
|
||||||
mockContainers(strs[1], appAttemptId, queueName, reservedContainers,
|
|
||||||
liveContainers);
|
|
||||||
|
|
||||||
FiCaSchedulerApp app = mock(FiCaSchedulerApp.class);
|
|
||||||
when(app.getLiveContainers()).thenReturn(liveContainers);
|
|
||||||
when(app.getReservedContainers()).thenReturn(reservedContainers);
|
|
||||||
when(app.getApplicationAttemptId()).thenReturn(appAttemptId);
|
|
||||||
when(app.getApplicationId()).thenReturn(appId);
|
|
||||||
when(app.getPriority()).thenReturn(Priority.newInstance(0));
|
|
||||||
|
|
||||||
// add to LeafQueue
|
|
||||||
LeafQueue queue = (LeafQueue) nameToCSQueues.get(queueName);
|
|
||||||
queue.getApplications().add(app);
|
|
||||||
|
|
||||||
id++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Format is:
|
|
||||||
* host1=partition;
|
|
||||||
* host2=partition;
|
|
||||||
*/
|
|
||||||
private void mockSchedulerNodes(String schedulerNodesConfigStr)
|
|
||||||
throws IOException {
|
|
||||||
String[] nodesConfigStrArray = schedulerNodesConfigStr.split(";");
|
|
||||||
for (String p : nodesConfigStrArray) {
|
|
||||||
NodeId nodeId = NodeId.newInstance(p.substring(0, p.indexOf("=")), 1);
|
|
||||||
String partition = p.substring(p.indexOf("=") + 1, p.length());
|
|
||||||
|
|
||||||
SchedulerNode sn = mock(SchedulerNode.class);
|
|
||||||
when(sn.getNodeID()).thenReturn(nodeId);
|
|
||||||
when(sn.getPartition()).thenReturn(partition);
|
|
||||||
nodeIdToSchedulerNodes.put(nodeId, sn);
|
|
||||||
|
|
||||||
LOG.debug("add scheduler node, id=" + nodeId + ", partition=" + partition);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Format is:
|
|
||||||
* <pre>
|
|
||||||
* partition0=total_resource,exclusivity;
|
|
||||||
* partition1=total_resource,exclusivity;
|
|
||||||
* ...
|
|
||||||
* </pre>
|
|
||||||
*/
|
|
||||||
private void mockNodeLabelsManager(String nodeLabelsConfigStr) throws IOException {
|
|
||||||
String[] partitionConfigArr = nodeLabelsConfigStr.split(";");
|
|
||||||
clusterResource = Resources.createResource(0);
|
|
||||||
for (String p : partitionConfigArr) {
|
|
||||||
String partitionName = p.substring(0, p.indexOf("="));
|
|
||||||
Resource res = parseResourceFromString(p.substring(p.indexOf("=") + 1,
|
|
||||||
p.indexOf(",")));
|
|
||||||
boolean exclusivity =
|
|
||||||
Boolean.parseBoolean(p.substring(p.indexOf(",") + 1, p.length()));
|
|
||||||
when(nlm.getResourceByLabel(eq(partitionName), any(Resource.class)))
|
|
||||||
.thenReturn(res);
|
|
||||||
when(nlm.isExclusiveNodeLabel(eq(partitionName))).thenReturn(exclusivity);
|
|
||||||
|
|
||||||
// add to partition to resource
|
|
||||||
partitionToResource.put(partitionName, res);
|
|
||||||
LOG.debug("add partition=" + partitionName + " totalRes=" + res
|
|
||||||
+ " exclusivity=" + exclusivity);
|
|
||||||
Resources.addTo(clusterResource, res);
|
|
||||||
}
|
|
||||||
|
|
||||||
when(nlm.getClusterNodeLabelNames()).thenReturn(
|
|
||||||
partitionToResource.keySet());
|
|
||||||
}
|
|
||||||
|
|
||||||
private Resource parseResourceFromString(String p) {
|
|
||||||
String[] resource = p.split(":");
|
|
||||||
Resource res = Resources.createResource(0);
|
|
||||||
if (resource.length == 1) {
|
|
||||||
res = Resources.createResource(Integer.parseInt(resource[0]));
|
|
||||||
} else {
|
|
||||||
res = Resources.createResource(Integer.parseInt(resource[0]),
|
|
||||||
Integer.parseInt(resource[1]));
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Format is:
|
|
||||||
* <pre>
|
|
||||||
* root (<partition-name-1>=[guaranteed max used pending],<partition-name-2>=..);
|
|
||||||
* -A(...);
|
|
||||||
* --A1(...);
|
|
||||||
* --A2(...);
|
|
||||||
* -B...
|
|
||||||
* </pre>
|
|
||||||
* ";" splits queues, and there should no empty lines, no extra spaces
|
|
||||||
*/
|
|
||||||
@SuppressWarnings({ "unchecked", "rawtypes" })
|
|
||||||
private ParentQueue mockQueueHierarchy(String queueExprs) {
|
|
||||||
String[] queueExprArray = queueExprs.split(";");
|
|
||||||
ParentQueue rootQueue = null;
|
|
||||||
for (int idx = 0; idx < queueExprArray.length; idx++) {
|
|
||||||
String q = queueExprArray[idx];
|
|
||||||
CSQueue queue;
|
|
||||||
|
|
||||||
// Initialize queue
|
|
||||||
if (isParent(queueExprArray, idx)) {
|
|
||||||
ParentQueue parentQueue = mock(ParentQueue.class);
|
|
||||||
queue = parentQueue;
|
|
||||||
List<CSQueue> children = new ArrayList<CSQueue>();
|
|
||||||
when(parentQueue.getChildQueues()).thenReturn(children);
|
|
||||||
} else {
|
|
||||||
LeafQueue leafQueue = mock(LeafQueue.class);
|
|
||||||
final TreeSet<FiCaSchedulerApp> apps = new TreeSet<>(
|
|
||||||
new Comparator<FiCaSchedulerApp>() {
|
|
||||||
@Override
|
|
||||||
public int compare(FiCaSchedulerApp a1, FiCaSchedulerApp a2) {
|
|
||||||
return a1.getApplicationId().compareTo(a2.getApplicationId());
|
|
||||||
}
|
|
||||||
});
|
|
||||||
when(leafQueue.getApplications()).thenReturn(apps);
|
|
||||||
OrderingPolicy<FiCaSchedulerApp> so = mock(OrderingPolicy.class);
|
|
||||||
when(so.getPreemptionIterator()).thenAnswer(new Answer() {
|
|
||||||
public Object answer(InvocationOnMock invocation) {
|
|
||||||
return apps.descendingIterator();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
when(leafQueue.getOrderingPolicy()).thenReturn(so);
|
|
||||||
|
|
||||||
Map<String, TreeSet<RMContainer>> ignorePartitionContainers =
|
|
||||||
new HashMap<>();
|
|
||||||
when(leafQueue.getIgnoreExclusivityRMContainers()).thenReturn(
|
|
||||||
ignorePartitionContainers);
|
|
||||||
queue = leafQueue;
|
|
||||||
}
|
|
||||||
|
|
||||||
setupQueue(queue, q, queueExprArray, idx);
|
|
||||||
if (queue.getQueueName().equals(ROOT)) {
|
|
||||||
rootQueue = (ParentQueue) queue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return rootQueue;
|
|
||||||
}
|
|
||||||
|
|
||||||
private void setupQueue(CSQueue queue, String q, String[] queueExprArray,
|
|
||||||
int idx) {
|
|
||||||
LOG.debug("*** Setup queue, source=" + q);
|
|
||||||
String queuePath = null;
|
|
||||||
|
|
||||||
int myLevel = getLevel(q);
|
|
||||||
if (0 == myLevel) {
|
|
||||||
// It's root
|
|
||||||
when(queue.getQueueName()).thenReturn(ROOT);
|
|
||||||
queuePath = ROOT;
|
|
||||||
}
|
|
||||||
|
|
||||||
String queueName = getQueueName(q);
|
|
||||||
when(queue.getQueueName()).thenReturn(queueName);
|
|
||||||
|
|
||||||
// Setup parent queue, and add myself to parentQueue.children-list
|
|
||||||
ParentQueue parentQueue = getParentQueue(queueExprArray, idx, myLevel);
|
|
||||||
if (null != parentQueue) {
|
|
||||||
when(queue.getParent()).thenReturn(parentQueue);
|
|
||||||
parentQueue.getChildQueues().add(queue);
|
|
||||||
|
|
||||||
// Setup my path
|
|
||||||
queuePath = parentQueue.getQueuePath() + "." + queueName;
|
|
||||||
}
|
|
||||||
when(queue.getQueuePath()).thenReturn(queuePath);
|
|
||||||
|
|
||||||
QueueCapacities qc = new QueueCapacities(0 == myLevel);
|
|
||||||
ResourceUsage ru = new ResourceUsage();
|
|
||||||
|
|
||||||
when(queue.getQueueCapacities()).thenReturn(qc);
|
|
||||||
when(queue.getQueueResourceUsage()).thenReturn(ru);
|
|
||||||
|
|
||||||
LOG.debug("Setup queue, name=" + queue.getQueueName() + " path="
|
|
||||||
+ queue.getQueuePath());
|
|
||||||
LOG.debug("Parent=" + (parentQueue == null ? "null" : parentQueue
|
|
||||||
.getQueueName()));
|
|
||||||
|
|
||||||
// Setup other fields like used resource, guaranteed resource, etc.
|
|
||||||
String capacitySettingStr = q.substring(q.indexOf("(") + 1, q.indexOf(")"));
|
|
||||||
for (String s : capacitySettingStr.split(",")) {
|
|
||||||
String partitionName = s.substring(0, s.indexOf("="));
|
|
||||||
String[] values = s.substring(s.indexOf("[") + 1, s.indexOf("]")).split(" ");
|
|
||||||
// Add a small epsilon to capacities to avoid truncate when doing
|
|
||||||
// Resources.multiply
|
|
||||||
float epsilon = 1e-6f;
|
|
||||||
Resource totResoucePerPartition = partitionToResource.get(partitionName);
|
|
||||||
float absGuaranteed = Resources.divide(rc, totResoucePerPartition,
|
|
||||||
parseResourceFromString(values[0].trim()), totResoucePerPartition)
|
|
||||||
+ epsilon;
|
|
||||||
float absMax = Resources.divide(rc, totResoucePerPartition,
|
|
||||||
parseResourceFromString(values[1].trim()), totResoucePerPartition)
|
|
||||||
+ epsilon;
|
|
||||||
float absUsed = Resources.divide(rc, totResoucePerPartition,
|
|
||||||
parseResourceFromString(values[2].trim()), totResoucePerPartition)
|
|
||||||
+ epsilon;
|
|
||||||
Resource pending = parseResourceFromString(values[3].trim());
|
|
||||||
qc.setAbsoluteCapacity(partitionName, absGuaranteed);
|
|
||||||
qc.setAbsoluteMaximumCapacity(partitionName, absMax);
|
|
||||||
qc.setAbsoluteUsedCapacity(partitionName, absUsed);
|
|
||||||
ru.setPending(partitionName, pending);
|
|
||||||
if (!isParent(queueExprArray, idx)) {
|
|
||||||
LeafQueue lq = (LeafQueue) queue;
|
|
||||||
when(lq.getTotalPendingResourcesConsideringUserLimit(isA(Resource.class),
|
|
||||||
isA(String.class))).thenReturn(pending);
|
|
||||||
}
|
|
||||||
ru.setUsed(partitionName, parseResourceFromString(values[2].trim()));
|
|
||||||
LOG.debug("Setup queue=" + queueName + " partition=" + partitionName
|
|
||||||
+ " [abs_guaranteed=" + absGuaranteed + ",abs_max=" + absMax
|
|
||||||
+ ",abs_used" + absUsed + ",pending_resource=" + pending + "]");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Setup preemption disabled
|
|
||||||
when(queue.getPreemptionDisabled()).thenReturn(
|
|
||||||
conf.getPreemptionDisabled(queuePath, false));
|
|
||||||
|
|
||||||
nameToCSQueues.put(queueName, queue);
|
|
||||||
when(cs.getQueue(eq(queueName))).thenReturn(queue);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Level of a queue is how many "-" at beginning, root's level is 0
|
|
||||||
*/
|
|
||||||
private int getLevel(String q) {
|
|
||||||
int level = 0; // level = how many "-" at beginning
|
|
||||||
while (level < q.length() && q.charAt(level) == '-') {
|
|
||||||
level++;
|
|
||||||
}
|
|
||||||
return level;
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getQueueName(String q) {
|
|
||||||
int idx = 0;
|
|
||||||
// find first != '-' char
|
|
||||||
while (idx < q.length() && q.charAt(idx) == '-') {
|
|
||||||
idx++;
|
|
||||||
}
|
|
||||||
if (idx == q.length()) {
|
|
||||||
throw new IllegalArgumentException("illegal input:" + q);
|
|
||||||
}
|
|
||||||
// name = after '-' and before '('
|
|
||||||
String name = q.substring(idx, q.indexOf('('));
|
|
||||||
if (name.isEmpty()) {
|
|
||||||
throw new IllegalArgumentException("queue name shouldn't be empty:" + q);
|
|
||||||
}
|
|
||||||
if (name.contains(".")) {
|
|
||||||
throw new IllegalArgumentException("queue name shouldn't contain '.':"
|
|
||||||
+ name);
|
|
||||||
}
|
|
||||||
return name;
|
|
||||||
}
|
|
||||||
|
|
||||||
private ParentQueue getParentQueue(String[] queueExprArray, int idx, int myLevel) {
|
|
||||||
idx--;
|
|
||||||
while (idx >= 0) {
|
|
||||||
int level = getLevel(queueExprArray[idx]);
|
|
||||||
if (level < myLevel) {
|
|
||||||
String parentQueuName = getQueueName(queueExprArray[idx]);
|
|
||||||
return (ParentQueue) nameToCSQueues.get(parentQueuName);
|
|
||||||
}
|
|
||||||
idx--;
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get if a queue is ParentQueue
|
|
||||||
*/
|
|
||||||
private boolean isParent(String[] queues, int idx) {
|
|
||||||
int myLevel = getLevel(queues[idx]);
|
|
||||||
idx++;
|
|
||||||
while (idx < queues.length && getLevel(queues[idx]) == myLevel) {
|
|
||||||
idx++;
|
|
||||||
}
|
|
||||||
if (idx >= queues.length || getLevel(queues[idx]) < myLevel) {
|
|
||||||
// It's a LeafQueue
|
|
||||||
return false;
|
|
||||||
} else {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,430 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import static org.mockito.Matchers.argThat;
|
||||||
|
import static org.mockito.Mockito.times;
|
||||||
|
import static org.mockito.Mockito.verify;
|
||||||
|
|
||||||
|
public class TestProportionalCapacityPreemptionPolicyForReservedContainers
|
||||||
|
extends ProportionalCapacityPreemptionPolicyMockFramework {
|
||||||
|
@Before
|
||||||
|
public void setup() {
|
||||||
|
super.setup();
|
||||||
|
conf.setBoolean(
|
||||||
|
CapacitySchedulerConfiguration.PREEMPTION_SELECT_CANDIDATES_FOR_RESERVED_CONTAINERS,
|
||||||
|
true);
|
||||||
|
policy = new ProportionalCapacityPreemptionPolicy(rmContext, cs, mClock);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPreemptionForSimpleReservedContainer() throws IOException {
|
||||||
|
/**
|
||||||
|
* The simplest test of reserved container, Queue structure is:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* root
|
||||||
|
* / \
|
||||||
|
* a b
|
||||||
|
* </pre>
|
||||||
|
* Guaranteed resource of a/b are 50:50
|
||||||
|
* Total cluster resource = 100
|
||||||
|
* - A has 90 containers on two node, n1 has 45, n2 has 45, size of each
|
||||||
|
* container is 1.
|
||||||
|
* - B has am container at n1, and reserves 1 container with size = 9 at n1,
|
||||||
|
* so B needs to preempt 9 containers from A at n1 instead of randomly
|
||||||
|
* preempt from n1 and n2.
|
||||||
|
*/
|
||||||
|
String labelsConfig =
|
||||||
|
"=100,true;";
|
||||||
|
String nodesConfig = // n1 / n2 has no label
|
||||||
|
"n1= res=50;" +
|
||||||
|
"n2= res=50";
|
||||||
|
String queuesConfig =
|
||||||
|
// guaranteed,max,used,pending,reserved
|
||||||
|
"root(=[100 100 100 9 9]);" + //root
|
||||||
|
"-a(=[50 100 90 0]);" + // a
|
||||||
|
"-b(=[50 100 10 9 9])"; // b
|
||||||
|
String appsConfig=
|
||||||
|
//queueName\t(priority,resource,host,expression,#repeat,reserved)
|
||||||
|
"a\t" // app1 in a
|
||||||
|
+ "(1,1,n1,,45,false)" // 45 in n1
|
||||||
|
+ "(1,1,n2,,45,false);" + // 45 in n2
|
||||||
|
"b\t" // app2 in b
|
||||||
|
+ "(1,1,n1,,1,false)" // AM container in n1
|
||||||
|
+ "(1,9,n1,,1,true)"; // 1 container with size=9 reserved at n1
|
||||||
|
|
||||||
|
buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
|
||||||
|
policy.editSchedule();
|
||||||
|
|
||||||
|
// Total 5 preempted from app1 at n1, don't preempt container from other
|
||||||
|
// app/node
|
||||||
|
verify(mDisp, times(5)).handle(argThat(
|
||||||
|
new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
|
||||||
|
getAppAttemptId(1))));
|
||||||
|
verify(mDisp, times(5)).handle(
|
||||||
|
argThat(new IsPreemptionRequestForQueueAndNode(getAppAttemptId(1), "a",
|
||||||
|
NodeId.newInstance("n1", 1))));
|
||||||
|
verify(mDisp, times(0)).handle(argThat(
|
||||||
|
new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
|
||||||
|
getAppAttemptId(2))));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUseReservedAndFifoSelectorTogether() throws IOException {
|
||||||
|
/**
|
||||||
|
* Queue structure is:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* root
|
||||||
|
* / \
|
||||||
|
* a b
|
||||||
|
* </pre>
|
||||||
|
* Guaranteed resource of a/b are 30:70
|
||||||
|
* Total cluster resource = 100
|
||||||
|
* - A has 45 containers on two node, n1 has 10, n2 has 35, size of each
|
||||||
|
* container is 1.
|
||||||
|
* - B has 5 containers at n2, and reserves 1 container with size = 50 at n1,
|
||||||
|
* B also has 20 pending resources.
|
||||||
|
* so B needs to preempt:
|
||||||
|
* - 10 containers from n1 (for reserved)
|
||||||
|
* - 5 containers from n2 for pending resources
|
||||||
|
*/
|
||||||
|
String labelsConfig =
|
||||||
|
"=100,true;";
|
||||||
|
String nodesConfig = // n1 / n2 has no label
|
||||||
|
"n1= res=50;" +
|
||||||
|
"n2= res=50";
|
||||||
|
String queuesConfig =
|
||||||
|
// guaranteed,max,used,pending,reserved
|
||||||
|
"root(=[100 100 100 70 10]);" + //root
|
||||||
|
"-a(=[30 100 45 0]);" + // a
|
||||||
|
"-b(=[70 100 55 70 50])"; // b
|
||||||
|
String appsConfig=
|
||||||
|
//queueName\t(priority,resource,host,expression,#repeat,reserved)
|
||||||
|
"a\t" // app1 in a
|
||||||
|
+ "(1,1,n2,,35,false)" // 35 in n2
|
||||||
|
+ "(1,1,n1,,10,false);" + // 10 in n1
|
||||||
|
"b\t" // app2 in b
|
||||||
|
+ "(1,1,n2,,5,false)" // 5 in n2
|
||||||
|
+ "(1,50,n1,,1,true)"; // 1 container with size=50 reserved at n1
|
||||||
|
|
||||||
|
buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
|
||||||
|
policy.editSchedule();
|
||||||
|
|
||||||
|
verify(mDisp, times(15)).handle(argThat(
|
||||||
|
new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
|
||||||
|
getAppAttemptId(1))));
|
||||||
|
verify(mDisp, times(10)).handle(
|
||||||
|
argThat(new IsPreemptionRequestForQueueAndNode(getAppAttemptId(1), "a",
|
||||||
|
NodeId.newInstance("n1", 1))));
|
||||||
|
verify(mDisp, times(5)).handle(
|
||||||
|
argThat(new IsPreemptionRequestForQueueAndNode(getAppAttemptId(1), "a",
|
||||||
|
NodeId.newInstance("n2", 1))));
|
||||||
|
verify(mDisp, times(0)).handle(argThat(
|
||||||
|
new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
|
||||||
|
getAppAttemptId(2))));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReservedSelectorSkipsAMContainer() throws IOException {
|
||||||
|
/**
|
||||||
|
* Queue structure is:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* root
|
||||||
|
* / \
|
||||||
|
* a b
|
||||||
|
* </pre>
|
||||||
|
* Guaranteed resource of a/b are 30:70
|
||||||
|
* Total cluster resource = 100
|
||||||
|
* - A has 45 containers on two node, n1 has 10, n2 has 35, size of each
|
||||||
|
* container is 1.
|
||||||
|
* - B has 5 containers at n2, and reserves 1 container with size = 50 at n1,
|
||||||
|
* B also has 20 pending resources.
|
||||||
|
*
|
||||||
|
* Ideally B needs to preempt:
|
||||||
|
* - 10 containers from n1 (for reserved)
|
||||||
|
* - 5 containers from n2 for pending resources
|
||||||
|
*
|
||||||
|
* However, since one AM container is located at n1 (from queueA), we cannot
|
||||||
|
* preempt 10 containers from n1 for reserved container. Instead, we will
|
||||||
|
* preempt 15 containers from n2, since containers from queueA launched in n2
|
||||||
|
* are later than containers from queueA launched in n1 (FIFO order of containers)
|
||||||
|
*/
|
||||||
|
String labelsConfig =
|
||||||
|
"=100,true;";
|
||||||
|
String nodesConfig = // n1 / n2 has no label
|
||||||
|
"n1= res=50;" +
|
||||||
|
"n2= res=50";
|
||||||
|
String queuesConfig =
|
||||||
|
// guaranteed,max,used,pending,reserved
|
||||||
|
"root(=[100 100 100 70 10]);" + //root
|
||||||
|
"-a(=[30 100 45 0]);" + // a
|
||||||
|
"-b(=[70 100 55 70 50])"; // b
|
||||||
|
String appsConfig=
|
||||||
|
//queueName\t(priority,resource,host,expression,#repeat,reserved)
|
||||||
|
"a\t" // app1 in a
|
||||||
|
+ "(1,1,n1,,10,false)" // 10 in n1
|
||||||
|
+ "(1,1,n2,,35,false);" +// 35 in n2
|
||||||
|
"b\t" // app2 in b
|
||||||
|
+ "(1,1,n2,,5,false)" // 5 in n2
|
||||||
|
+ "(1,50,n1,,1,true)"; // 1 container with size=50 reserved at n1
|
||||||
|
|
||||||
|
buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
|
||||||
|
policy.editSchedule();
|
||||||
|
|
||||||
|
verify(mDisp, times(15)).handle(argThat(
|
||||||
|
new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
|
||||||
|
getAppAttemptId(1))));
|
||||||
|
verify(mDisp, times(0)).handle(
|
||||||
|
argThat(new IsPreemptionRequestForQueueAndNode(getAppAttemptId(1), "a",
|
||||||
|
NodeId.newInstance("n1", 1))));
|
||||||
|
verify(mDisp, times(15)).handle(
|
||||||
|
argThat(new IsPreemptionRequestForQueueAndNode(getAppAttemptId(1), "a",
|
||||||
|
NodeId.newInstance("n2", 1))));
|
||||||
|
verify(mDisp, times(0)).handle(argThat(
|
||||||
|
new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
|
||||||
|
getAppAttemptId(2))));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPreemptionForReservedContainerRespectGuaranteedResource()
|
||||||
|
throws IOException {
|
||||||
|
/**
|
||||||
|
* Queue structure is:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* root
|
||||||
|
* / \
|
||||||
|
* a b
|
||||||
|
* </pre>
|
||||||
|
* Guaranteed resource of a/b are 85:15
|
||||||
|
* Total cluster resource = 100
|
||||||
|
* - A has 90 containers on two node, n1 has 45, n2 has 45, size of each
|
||||||
|
* container is 1.
|
||||||
|
* - B has am container at n1, and reserves 1 container with size = 9 at n1,
|
||||||
|
*
|
||||||
|
* If we preempt 9 containers from queue-A, queue-A will be below its
|
||||||
|
* guaranteed resource = 90 - 9 = 81 < 85.
|
||||||
|
*
|
||||||
|
* So no preemption will take place
|
||||||
|
*/
|
||||||
|
String labelsConfig =
|
||||||
|
"=100,true;";
|
||||||
|
String nodesConfig = // n1 / n2 has no label
|
||||||
|
"n1= res=50;" +
|
||||||
|
"n2= res=50";
|
||||||
|
String queuesConfig =
|
||||||
|
// guaranteed,max,used,pending,reserved
|
||||||
|
"root(=[100 100 100 9 9]);" + //root
|
||||||
|
"-a(=[85 100 90 0]);" + // a
|
||||||
|
"-b(=[15 100 10 9 9])"; // b
|
||||||
|
String appsConfig=
|
||||||
|
//queueName\t(priority,resource,host,expression,#repeat,reserved)
|
||||||
|
"a\t" // app1 in a
|
||||||
|
+ "(1,1,n1,,45,false)" // 45 in n1
|
||||||
|
+ "(1,1,n2,,45,false);" + // 45 in n2
|
||||||
|
"b\t" // app2 in b
|
||||||
|
+ "(1,1,n1,,1,false)" // AM container in n1
|
||||||
|
+ "(1,9,n1,,1,true)"; // 1 container with size=9 reserved at n1
|
||||||
|
|
||||||
|
buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
|
||||||
|
policy.editSchedule();
|
||||||
|
|
||||||
|
verify(mDisp, times(0)).handle(argThat(
|
||||||
|
new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
|
||||||
|
getAppAttemptId(1))));
|
||||||
|
verify(mDisp, times(0)).handle(argThat(
|
||||||
|
new TestProportionalCapacityPreemptionPolicy.IsPreemptionRequestFor(
|
||||||
|
getAppAttemptId(2))));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPreemptionForReservedContainerWhichHasAvailableResource()
|
||||||
|
throws IOException {
|
||||||
|
/**
|
||||||
|
* Queue structure is:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* root
|
||||||
|
* / \
|
||||||
|
* a b
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* Guaranteed resource of a/b are 50:50
|
||||||
|
* Total cluster resource = 100
|
||||||
|
* - A has 90 containers on two node, n1 has 45, n2 has 45, size of each
|
||||||
|
* container is 1.
|
||||||
|
* - B has am container at n1, and reserves 1 container with size = 9 at n1,
|
||||||
|
*
|
||||||
|
* So we can get 4 containers preempted after preemption.
|
||||||
|
* (reserved 5 + preempted 4) = 9
|
||||||
|
*/
|
||||||
|
String labelsConfig =
|
||||||
|
"=100,true;";
|
||||||
|
String nodesConfig = // n1 / n2 has no label
|
||||||
|
"n1= res=50;" +
|
||||||
|
"n2= res=50";
|
||||||
|
String queuesConfig =
|
||||||
|
// guaranteed,max,used,pending,reserved
|
||||||
|
"root(=[100 100 99 9 9]);" + //root
|
||||||
|
"-a(=[50 100 90 0]);" + // a
|
||||||
|
"-b(=[50 100 9 9 9])"; // b
|
||||||
|
String appsConfig=
|
||||||
|
//queueName\t(priority,resource,host,expression,#repeat,reserved)
|
||||||
|
"a\t" // app1 in a
|
||||||
|
+ "(1,1,n1,,45,false)" // 45 in n1
|
||||||
|
+ "(1,1,n2,,45,false);" + // 45 in n2
|
||||||
|
"b\t" // app2 in b
|
||||||
|
+ "(1,9,n1,,1,true)"; // 1 container with size=9 reserved at n1
|
||||||
|
|
||||||
|
buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
|
||||||
|
policy.editSchedule();
|
||||||
|
|
||||||
|
// Total 4 preempted from app1 at n1, don't preempt container from other
|
||||||
|
// app/node
|
||||||
|
verify(mDisp, times(4)).handle(argThat(
|
||||||
|
new IsPreemptionRequestForQueueAndNode(getAppAttemptId(1), "a",
|
||||||
|
NodeId.newInstance("n1", 1))));
|
||||||
|
verify(mDisp, times(0)).handle(argThat(
|
||||||
|
new IsPreemptionRequestForQueueAndNode(getAppAttemptId(1), "a",
|
||||||
|
NodeId.newInstance("n2", 1))));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPreemptionForReservedContainerWhichHasNondivisibleAvailableResource()
|
||||||
|
throws IOException {
|
||||||
|
/**
|
||||||
|
* Queue structure is:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* root
|
||||||
|
* / \
|
||||||
|
* a b
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* Guaranteed resource of a/b are 50:50
|
||||||
|
* Total cluster resource = 100
|
||||||
|
* - A has 45 containers on two node, size of each container is 2,
|
||||||
|
* n1 has 23, n2 has 22
|
||||||
|
* - B reserves 1 container with size = 9 at n1,
|
||||||
|
*
|
||||||
|
* So we can get 4 containers (total-resource = 8) preempted after
|
||||||
|
* preemption. Actual required is 3.5, but we need to preempt integer
|
||||||
|
* number of containers
|
||||||
|
*/
|
||||||
|
String labelsConfig =
|
||||||
|
"=100,true;";
|
||||||
|
String nodesConfig = // n1 / n2 has no label
|
||||||
|
"n1= res=50;" +
|
||||||
|
"n2= res=50";
|
||||||
|
String queuesConfig =
|
||||||
|
// guaranteed,max,used,pending,reserved
|
||||||
|
"root(=[100 100 99 9 9]);" + //root
|
||||||
|
"-a(=[50 100 90 0]);" + // a
|
||||||
|
"-b(=[50 100 9 9 9])"; // b
|
||||||
|
String appsConfig=
|
||||||
|
//queueName\t(priority,resource,host,expression,#repeat,reserved)
|
||||||
|
"a\t" // app1 in a
|
||||||
|
+ "(1,2,n1,,24,false)" // 48 in n1
|
||||||
|
+ "(1,2,n2,,23,false);" + // 46 in n2
|
||||||
|
"b\t" // app2 in b
|
||||||
|
+ "(1,9,n1,,1,true)"; // 1 container with size=9 reserved at n1
|
||||||
|
|
||||||
|
buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
|
||||||
|
policy.editSchedule();
|
||||||
|
|
||||||
|
// Total 4 preempted from app1 at n1, don't preempt container from other
|
||||||
|
// app/node
|
||||||
|
verify(mDisp, times(4)).handle(argThat(
|
||||||
|
new IsPreemptionRequestForQueueAndNode(getAppAttemptId(1), "a",
|
||||||
|
NodeId.newInstance("n1", 1))));
|
||||||
|
verify(mDisp, times(0)).handle(argThat(
|
||||||
|
new IsPreemptionRequestForQueueAndNode(getAppAttemptId(1), "a",
|
||||||
|
NodeId.newInstance("n2", 1))));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPreemptionForReservedContainerRespectAvailableResources()
|
||||||
|
throws IOException {
|
||||||
|
/**
|
||||||
|
* Queue structure is:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* root
|
||||||
|
* / \
|
||||||
|
* a b
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* Guaranteed resource of a/b are 50:50
|
||||||
|
* Total cluster resource = 100, 4 nodes, 25 on each node
|
||||||
|
* - A has 10 containers on every node, size of container is 2
|
||||||
|
* - B reserves 1 container with size = 9 at n1,
|
||||||
|
*
|
||||||
|
* So even if we cannot allocate container for B now, no preemption should
|
||||||
|
* happen since there're plenty of available resources.
|
||||||
|
*/
|
||||||
|
String labelsConfig =
|
||||||
|
"=100,true;";
|
||||||
|
String nodesConfig =
|
||||||
|
"n1= res=25;" +
|
||||||
|
"n2= res=25;" +
|
||||||
|
"n3= res=25;" +
|
||||||
|
"n4= res=25;";
|
||||||
|
String queuesConfig =
|
||||||
|
// guaranteed,max,used,pending,reserved
|
||||||
|
"root(=[100 100 89 9 9]);" + //root
|
||||||
|
"-a(=[50 100 80 0]);" + // a
|
||||||
|
"-b(=[50 100 9 9 9])"; // b
|
||||||
|
String appsConfig=
|
||||||
|
//queueName\t(priority,resource,host,expression,#repeat,reserved)
|
||||||
|
"a\t" // app1 in a
|
||||||
|
+ "(1,2,n1,,10,false)" // 10 in n1
|
||||||
|
+ "(1,2,n2,,10,false)" // 10 in n2
|
||||||
|
+ "(1,2,n3,,10,false)" // 10 in n3
|
||||||
|
+ "(1,2,n4,,10,false);" + // 10 in n4
|
||||||
|
"b\t" // app2 in b
|
||||||
|
+ "(1,9,n1,,1,true)"; // 1 container with size=5 reserved at n1
|
||||||
|
|
||||||
|
buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
|
||||||
|
policy.editSchedule();
|
||||||
|
|
||||||
|
// No preemption should happen
|
||||||
|
verify(mDisp, times(0)).handle(argThat(
|
||||||
|
new IsPreemptionRequestForQueueAndNode(getAppAttemptId(1), "a",
|
||||||
|
NodeId.newInstance("n1", 1))));
|
||||||
|
verify(mDisp, times(0)).handle(argThat(
|
||||||
|
new IsPreemptionRequestForQueueAndNode(getAppAttemptId(1), "a",
|
||||||
|
NodeId.newInstance("n2", 1))));
|
||||||
|
verify(mDisp, times(0)).handle(argThat(
|
||||||
|
new IsPreemptionRequestForQueueAndNode(getAppAttemptId(1), "a",
|
||||||
|
NodeId.newInstance("n3", 1))));
|
||||||
|
verify(mDisp, times(0)).handle(argThat(
|
||||||
|
new IsPreemptionRequestForQueueAndNode(getAppAttemptId(1), "a",
|
||||||
|
NodeId.newInstance("n4", 1))));
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,247 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestProportionalCapacityPreemptionPolicyMockFramework
|
||||||
|
extends ProportionalCapacityPreemptionPolicyMockFramework {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBuilder() throws Exception {
|
||||||
|
/**
|
||||||
|
* Test of test, make sure we build expected mock schedulable objects
|
||||||
|
*/
|
||||||
|
String labelsConfig =
|
||||||
|
"=200,true;" + // default partition
|
||||||
|
"red=100,false;" + // partition=red
|
||||||
|
"blue=200,true"; // partition=blue
|
||||||
|
String nodesConfig =
|
||||||
|
"n1=red;" + // n1 has partition=red
|
||||||
|
"n2=blue;" + // n2 has partition=blue
|
||||||
|
"n3="; // n3 doesn't have partition
|
||||||
|
String queuesConfig =
|
||||||
|
// guaranteed,max,used,pending
|
||||||
|
"root(=[200 200 100 100],red=[100 100 100 100],blue=[200 200 200 200]);" + //root
|
||||||
|
"-a(=[100 200 100 100],red=[0 0 0 0],blue=[200 200 200 200]);" + // a
|
||||||
|
"--a1(=[50 100 50 100],red=[0 0 0 0],blue=[100 200 200 0]);" + // a1
|
||||||
|
"--a2(=[50 200 50 0],red=[0 0 0 0],blue=[100 200 0 200]);" + // a2
|
||||||
|
"-b(=[100 200 0 0],red=[100 100 100 100],blue=[0 0 0 0])";
|
||||||
|
String appsConfig=
|
||||||
|
//queueName\t(priority,resource,host,expression,#repeat,reserved)
|
||||||
|
// app1 in a1, , 50 in n2 (reserved), 50 in n2 (allocated)
|
||||||
|
"a1\t" // app1 in a1
|
||||||
|
+ "(1,1,n3,red,50,false);" + // 50 * default in n3
|
||||||
|
|
||||||
|
"a1\t" // app2 in a1
|
||||||
|
+ "(2,1,n2,,50,true)(2,1,n2,,50,false)" // 50 * ignore-exclusivity (reserved),
|
||||||
|
// 50 * ignore-exclusivity (allocated)
|
||||||
|
+ "(2,1,n2,blue,50,true)(2,1,n2,blue,50,true);" + // 50 in n2 (reserved),
|
||||||
|
// 50 in n2 (allocated)
|
||||||
|
"a2\t" // app3 in a2
|
||||||
|
+ "(1,1,n3,red,50,false);" + // 50 * default in n3
|
||||||
|
|
||||||
|
"b\t" // app4 in b
|
||||||
|
+ "(1,1,n1,red,100,false);";
|
||||||
|
|
||||||
|
buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
|
||||||
|
|
||||||
|
// Check queues:
|
||||||
|
// root
|
||||||
|
checkAbsCapacities(cs.getQueue("root"), "", 1f, 1f, 0.5f);
|
||||||
|
checkPendingResource(cs.getQueue("root"), "", 100);
|
||||||
|
checkAbsCapacities(cs.getQueue("root"), "red", 1f, 1f, 1f);
|
||||||
|
checkPendingResource(cs.getQueue("root"), "red", 100);
|
||||||
|
checkAbsCapacities(cs.getQueue("root"), "blue", 1f, 1f, 1f);
|
||||||
|
checkPendingResource(cs.getQueue("root"), "blue", 200);
|
||||||
|
|
||||||
|
// a
|
||||||
|
checkAbsCapacities(cs.getQueue("a"), "", 0.5f, 1f, 0.5f);
|
||||||
|
checkPendingResource(cs.getQueue("a"), "", 100);
|
||||||
|
checkAbsCapacities(cs.getQueue("a"), "red", 0f, 0f, 0f);
|
||||||
|
checkPendingResource(cs.getQueue("a"), "red", 0);
|
||||||
|
checkAbsCapacities(cs.getQueue("a"), "blue", 1f, 1f, 1f);
|
||||||
|
checkPendingResource(cs.getQueue("a"), "blue", 200);
|
||||||
|
|
||||||
|
// a1
|
||||||
|
checkAbsCapacities(cs.getQueue("a1"), "", 0.25f, 0.5f, 0.25f);
|
||||||
|
checkPendingResource(cs.getQueue("a1"), "", 100);
|
||||||
|
checkAbsCapacities(cs.getQueue("a1"), "red", 0f, 0f, 0f);
|
||||||
|
checkPendingResource(cs.getQueue("a1"), "red", 0);
|
||||||
|
checkAbsCapacities(cs.getQueue("a1"), "blue", 0.5f, 1f, 1f);
|
||||||
|
checkPendingResource(cs.getQueue("a1"), "blue", 0);
|
||||||
|
|
||||||
|
// a2
|
||||||
|
checkAbsCapacities(cs.getQueue("a2"), "", 0.25f, 1f, 0.25f);
|
||||||
|
checkPendingResource(cs.getQueue("a2"), "", 0);
|
||||||
|
checkAbsCapacities(cs.getQueue("a2"), "red", 0f, 0f, 0f);
|
||||||
|
checkPendingResource(cs.getQueue("a2"), "red", 0);
|
||||||
|
checkAbsCapacities(cs.getQueue("a2"), "blue", 0.5f, 1f, 0f);
|
||||||
|
checkPendingResource(cs.getQueue("a2"), "blue", 200);
|
||||||
|
|
||||||
|
// b1
|
||||||
|
checkAbsCapacities(cs.getQueue("b"), "", 0.5f, 1f, 0f);
|
||||||
|
checkPendingResource(cs.getQueue("b"), "", 0);
|
||||||
|
checkAbsCapacities(cs.getQueue("b"), "red", 1f, 1f, 1f);
|
||||||
|
checkPendingResource(cs.getQueue("b"), "red", 100);
|
||||||
|
checkAbsCapacities(cs.getQueue("b"), "blue", 0f, 0f, 0f);
|
||||||
|
checkPendingResource(cs.getQueue("b"), "blue", 0);
|
||||||
|
|
||||||
|
// Check ignored partitioned containers in queue
|
||||||
|
Assert.assertEquals(100, ((LeafQueue) cs.getQueue("a1"))
|
||||||
|
.getIgnoreExclusivityRMContainers().get("blue").size());
|
||||||
|
|
||||||
|
// Check applications
|
||||||
|
Assert.assertEquals(2, ((LeafQueue)cs.getQueue("a1")).getApplications().size());
|
||||||
|
Assert.assertEquals(1, ((LeafQueue)cs.getQueue("a2")).getApplications().size());
|
||||||
|
Assert.assertEquals(1, ((LeafQueue)cs.getQueue("b")).getApplications().size());
|
||||||
|
|
||||||
|
// Check #containers
|
||||||
|
FiCaSchedulerApp app1 = getApp("a1", 1);
|
||||||
|
FiCaSchedulerApp app2 = getApp("a1", 2);
|
||||||
|
FiCaSchedulerApp app3 = getApp("a2", 3);
|
||||||
|
FiCaSchedulerApp app4 = getApp("b", 4);
|
||||||
|
|
||||||
|
Assert.assertEquals(50, app1.getLiveContainers().size());
|
||||||
|
checkContainerNodesInApp(app1, 50, "n3");
|
||||||
|
|
||||||
|
Assert.assertEquals(50, app2.getLiveContainers().size());
|
||||||
|
Assert.assertEquals(150, app2.getReservedContainers().size());
|
||||||
|
checkContainerNodesInApp(app2, 200, "n2");
|
||||||
|
|
||||||
|
Assert.assertEquals(50, app3.getLiveContainers().size());
|
||||||
|
checkContainerNodesInApp(app3, 50, "n3");
|
||||||
|
|
||||||
|
Assert.assertEquals(100, app4.getLiveContainers().size());
|
||||||
|
checkContainerNodesInApp(app4, 100, "n1");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBuilderWithReservedResource() throws Exception {
|
||||||
|
String labelsConfig =
|
||||||
|
"=200,true;" + // default partition
|
||||||
|
"red=100,false;" + // partition=red
|
||||||
|
"blue=200,true"; // partition=blue
|
||||||
|
String nodesConfig =
|
||||||
|
"n1=red;" + // n1 has partition=red
|
||||||
|
"n2=blue;" + // n2 has partition=blue
|
||||||
|
"n3="; // n3 doesn't have partition
|
||||||
|
String queuesConfig =
|
||||||
|
// guaranteed,max,used,pending
|
||||||
|
"root(=[200 200 100 100 100],red=[100 100 100 100 90],blue=[200 200 200 200 80]);" + //root
|
||||||
|
"-a(=[100 200 100 100 50],red=[0 0 0 0 40],blue=[200 200 200 200 30]);" + // a
|
||||||
|
"--a1(=[50 100 50 100 40],red=[0 0 0 0 20],blue=[100 200 200 0]);" + // a1
|
||||||
|
"--a2(=[50 200 50 0 10],red=[0 0 0 0 20],blue=[100 200 0 200]);" + // a2
|
||||||
|
"-b(=[100 200 0 0],red=[100 100 100 100],blue=[0 0 0 0])";
|
||||||
|
String appsConfig=
|
||||||
|
//queueName\t(priority,resource,host,expression,#repeat,reserved)
|
||||||
|
// app1 in a1, , 50 in n2 (reserved), 50 in n2 (allocated)
|
||||||
|
"a1\t" // app1 in a1
|
||||||
|
+ "(1,1,n3,red,50,false);" + // 50 * default in n3
|
||||||
|
|
||||||
|
"a1\t" // app2 in a1
|
||||||
|
+ "(2,1,n2,,50,true)(2,1,n2,,50,false)" // 50 * ignore-exclusivity (reserved),
|
||||||
|
// 50 * ignore-exclusivity (allocated)
|
||||||
|
+ "(2,1,n2,blue,50,true)(2,1,n2,blue,50,true);" + // 50 in n2 (reserved),
|
||||||
|
// 50 in n2 (allocated)
|
||||||
|
"a2\t" // app3 in a2
|
||||||
|
+ "(1,1,n3,red,50,false);" + // 50 * default in n3
|
||||||
|
|
||||||
|
"b\t" // app4 in b
|
||||||
|
+ "(1,1,n1,red,100,false);";
|
||||||
|
|
||||||
|
buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
|
||||||
|
|
||||||
|
// Check queues:
|
||||||
|
// root
|
||||||
|
checkReservedResource(cs.getQueue("root"), "", 100);
|
||||||
|
checkReservedResource(cs.getQueue("root"), "red", 90);
|
||||||
|
|
||||||
|
// a
|
||||||
|
checkReservedResource(cs.getQueue("a"), "", 50);
|
||||||
|
checkReservedResource(cs.getQueue("a"), "red", 40);
|
||||||
|
|
||||||
|
// a1
|
||||||
|
checkReservedResource(cs.getQueue("a1"), "", 40);
|
||||||
|
checkReservedResource(cs.getQueue("a1"), "red", 20);
|
||||||
|
|
||||||
|
// b
|
||||||
|
checkReservedResource(cs.getQueue("b"), "", 0);
|
||||||
|
checkReservedResource(cs.getQueue("b"), "red", 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBuilderWithSpecifiedNodeResources() throws Exception {
|
||||||
|
String labelsConfig =
|
||||||
|
"=200,true;" + // default partition
|
||||||
|
"red=100,false;" + // partition=red
|
||||||
|
"blue=200,true"; // partition=blue
|
||||||
|
String nodesConfig =
|
||||||
|
"n1=red res=100;" + // n1 has partition=red
|
||||||
|
"n2=blue;" + // n2 has partition=blue
|
||||||
|
"n3= res=30"; // n3 doesn't have partition
|
||||||
|
String queuesConfig =
|
||||||
|
// guaranteed,max,used,pending
|
||||||
|
"root(=[200 200 100 100 100],red=[100 100 100 100 90],blue=[200 200 200 200 80]);" + //root
|
||||||
|
"-a(=[100 200 100 100 50],red=[0 0 0 0 40],blue=[200 200 200 200 30]);" + // a
|
||||||
|
"--a1(=[50 100 50 100 40],red=[0 0 0 0 20],blue=[100 200 200 0]);" + // a1
|
||||||
|
"--a2(=[50 200 50 0 10],red=[0 0 0 0 20],blue=[100 200 0 200]);" + // a2
|
||||||
|
"-b(=[100 200 0 0],red=[100 100 100 100],blue=[0 0 0 0])";
|
||||||
|
String appsConfig=
|
||||||
|
//queueName\t(priority,resource,host,expression,#repeat,reserved)
|
||||||
|
// app1 in a1, , 50 in n2 (reserved), 50 in n2 (allocated)
|
||||||
|
"a1\t" // app1 in a1
|
||||||
|
+ "(1,1,n3,red,50,false);" + // 50 * default in n3
|
||||||
|
|
||||||
|
"a1\t" // app2 in a1
|
||||||
|
+ "(2,1,n2,,50,true)(2,1,n2,,50,false)" // 50 * ignore-exclusivity (reserved),
|
||||||
|
// 50 * ignore-exclusivity (allocated)
|
||||||
|
+ "(2,1,n2,blue,50,true)(2,1,n2,blue,50,true);" + // 50 in n2 (reserved),
|
||||||
|
// 50 in n2 (allocated)
|
||||||
|
"a2\t" // app3 in a2
|
||||||
|
+ "(1,1,n3,red,50,false);" + // 50 * default in n3
|
||||||
|
|
||||||
|
"b\t" // app4 in b
|
||||||
|
+ "(1,1,n1,red,100,false);";
|
||||||
|
|
||||||
|
buildEnv(labelsConfig, nodesConfig, queuesConfig, appsConfig);
|
||||||
|
|
||||||
|
// Check host resources
|
||||||
|
Assert.assertEquals(3, this.cs.getAllNodes().size());
|
||||||
|
SchedulerNode node1 = cs.getSchedulerNode(NodeId.newInstance("n1", 1));
|
||||||
|
Assert.assertEquals(100, node1.getTotalResource().getMemory());
|
||||||
|
Assert.assertEquals(100, node1.getCopiedListOfRunningContainers().size());
|
||||||
|
Assert.assertNull(node1.getReservedContainer());
|
||||||
|
|
||||||
|
SchedulerNode node2 = cs.getSchedulerNode(NodeId.newInstance("n2", 1));
|
||||||
|
Assert.assertEquals(0, node2.getTotalResource().getMemory());
|
||||||
|
Assert.assertEquals(50, node2.getCopiedListOfRunningContainers().size());
|
||||||
|
Assert.assertNotNull(node2.getReservedContainer());
|
||||||
|
|
||||||
|
SchedulerNode node3 = cs.getSchedulerNode(NodeId.newInstance("n3", 1));
|
||||||
|
Assert.assertEquals(30, node3.getTotalResource().getMemory());
|
||||||
|
Assert.assertEquals(100, node3.getCopiedListOfRunningContainers().size());
|
||||||
|
Assert.assertNull(node3.getReservedContainer());
|
||||||
|
}
|
||||||
|
}
|
|
@ -39,6 +39,7 @@ import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler;
|
||||||
import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
|
import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
|
@ -144,7 +145,7 @@ public class TestSchedulerApplicationAttempt {
|
||||||
private RMContainer createRMContainer(ApplicationAttemptId appAttId, int id,
|
private RMContainer createRMContainer(ApplicationAttemptId appAttId, int id,
|
||||||
Resource resource) {
|
Resource resource) {
|
||||||
ContainerId containerId = ContainerId.newContainerId(appAttId, id);
|
ContainerId containerId = ContainerId.newContainerId(appAttId, id);
|
||||||
RMContainer rmContainer = mock(RMContainer.class);
|
RMContainer rmContainer = mock(RMContainerImpl.class);
|
||||||
Container container = mock(Container.class);
|
Container container = mock(Container.class);
|
||||||
when(container.getResource()).thenReturn(resource);
|
when(container.getResource()).thenReturn(resource);
|
||||||
when(container.getNodeId()).thenReturn(nodeId);
|
when(container.getNodeId()).thenReturn(nodeId);
|
||||||
|
|
|
@ -0,0 +1,149 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.service.Service;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.Application;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingEditPolicy;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingMonitor;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.NullRMNodeLabelsManager;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
|
||||||
|
import org.apache.hadoop.yarn.util.Clock;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Before;
|
||||||
|
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
public class CapacitySchedulerPreemptionTestBase {
|
||||||
|
|
||||||
|
final int GB = 1024;
|
||||||
|
|
||||||
|
Configuration conf;
|
||||||
|
|
||||||
|
RMNodeLabelsManager mgr;
|
||||||
|
|
||||||
|
Clock clock;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
void setUp() throws Exception {
|
||||||
|
conf = new YarnConfiguration();
|
||||||
|
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
|
||||||
|
ResourceScheduler.class);
|
||||||
|
conf.setBoolean(YarnConfiguration.RM_SCHEDULER_ENABLE_MONITORS, true);
|
||||||
|
conf.setClass(YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES,
|
||||||
|
ProportionalCapacityPreemptionPolicy.class, SchedulingEditPolicy.class);
|
||||||
|
conf = TestUtils.getConfigurationWithMultipleQueues(this.conf);
|
||||||
|
|
||||||
|
// Set preemption related configurations
|
||||||
|
conf.setInt(CapacitySchedulerConfiguration.PREEMPTION_WAIT_TIME_BEFORE_KILL,
|
||||||
|
0);
|
||||||
|
conf.setFloat(CapacitySchedulerConfiguration.TOTAL_PREEMPTION_PER_ROUND,
|
||||||
|
1.0f);
|
||||||
|
conf.setFloat(
|
||||||
|
CapacitySchedulerConfiguration.PREEMPTION_NATURAL_TERMINATION_FACTOR,
|
||||||
|
1.0f);
|
||||||
|
conf.setLong(CapacitySchedulerConfiguration.PREEMPTION_MONITORING_INTERVAL,
|
||||||
|
60000L);
|
||||||
|
mgr = new NullRMNodeLabelsManager();
|
||||||
|
mgr.init(this.conf);
|
||||||
|
clock = mock(Clock.class);
|
||||||
|
when(clock.getTime()).thenReturn(0L);
|
||||||
|
}
|
||||||
|
|
||||||
|
SchedulingEditPolicy getSchedulingEditPolicy(MockRM rm) {
|
||||||
|
ResourceManager.RMActiveServices activeServices = rm.getRMActiveService();
|
||||||
|
SchedulingMonitor mon = null;
|
||||||
|
for (Service service : activeServices.getServices()) {
|
||||||
|
if (service instanceof SchedulingMonitor) {
|
||||||
|
mon = (SchedulingMonitor) service;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mon != null) {
|
||||||
|
return mon.getSchedulingEditPolicy();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void waitNumberOfLiveContainersFromApp(FiCaSchedulerApp app,
|
||||||
|
int expected) throws InterruptedException {
|
||||||
|
int waitNum = 0;
|
||||||
|
|
||||||
|
while (waitNum < 10) {
|
||||||
|
System.out.println(app.getLiveContainers().size());
|
||||||
|
if (app.getLiveContainers().size() == expected) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Thread.sleep(100);
|
||||||
|
waitNum++;
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void waitNumberOfReservedContainersFromApp(FiCaSchedulerApp app,
|
||||||
|
int expected) throws InterruptedException {
|
||||||
|
int waitNum = 0;
|
||||||
|
|
||||||
|
while (waitNum < 10) {
|
||||||
|
System.out.println(app.getReservedContainers().size());
|
||||||
|
if (app.getReservedContainers().size() == expected) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Thread.sleep(100);
|
||||||
|
waitNum++;
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void waitNumberOfLiveContainersOnNodeFromApp(FiCaSchedulerNode node,
|
||||||
|
ApplicationAttemptId appId, int expected) throws InterruptedException {
|
||||||
|
int waitNum = 0;
|
||||||
|
|
||||||
|
while (waitNum < 500) {
|
||||||
|
int total = 0;
|
||||||
|
for (RMContainer c : node.getCopiedListOfRunningContainers()) {
|
||||||
|
if (c.getApplicationAttemptId().equals(appId)) {
|
||||||
|
total++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (total == expected) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Thread.sleep(10);
|
||||||
|
waitNum++;
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert.fail();
|
||||||
|
}
|
||||||
|
}
|
|
@ -3302,7 +3302,7 @@ public class TestCapacityScheduler {
|
||||||
resourceManager
|
resourceManager
|
||||||
.getResourceScheduler()
|
.getResourceScheduler()
|
||||||
.getSchedulerNode(resourceEvent.getNodeId())
|
.getSchedulerNode(resourceEvent.getNodeId())
|
||||||
.setTotalResource(resourceEvent.getResourceOption().getResource());
|
.updateTotalResource(resourceEvent.getResourceOption().getResource());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
|
@ -55,56 +55,17 @@ import java.util.HashSet;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
public class TestCapacitySchedulerPreemption {
|
import static org.mockito.Mockito.mock;
|
||||||
private static final Log LOG = LogFactory.getLog(
|
import static org.mockito.Mockito.when;
|
||||||
TestCapacitySchedulerPreemption.class);
|
|
||||||
|
|
||||||
private final int GB = 1024;
|
|
||||||
|
|
||||||
private Configuration conf;
|
|
||||||
|
|
||||||
RMNodeLabelsManager mgr;
|
|
||||||
|
|
||||||
|
public class TestCapacitySchedulerLazyPreemption
|
||||||
|
extends CapacitySchedulerPreemptionTestBase {
|
||||||
|
@Override
|
||||||
@Before
|
@Before
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
conf = new YarnConfiguration();
|
super.setUp();
|
||||||
conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
|
|
||||||
ResourceScheduler.class);
|
|
||||||
conf.setBoolean(YarnConfiguration.RM_SCHEDULER_ENABLE_MONITORS, true);
|
|
||||||
conf.setClass(YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES,
|
|
||||||
ProportionalCapacityPreemptionPolicy.class, SchedulingEditPolicy.class);
|
|
||||||
conf = TestUtils.getConfigurationWithMultipleQueues(this.conf);
|
|
||||||
|
|
||||||
// Set preemption related configurations
|
|
||||||
conf.setInt(CapacitySchedulerConfiguration.PREEMPTION_WAIT_TIME_BEFORE_KILL,
|
|
||||||
0);
|
|
||||||
conf.setLong(CapacitySchedulerConfiguration.PREEMPTION_MONITORING_INTERVAL,
|
|
||||||
60000L);
|
|
||||||
conf.setBoolean(CapacitySchedulerConfiguration.LAZY_PREEMPTION_ENALBED,
|
conf.setBoolean(CapacitySchedulerConfiguration.LAZY_PREEMPTION_ENALBED,
|
||||||
true);
|
true);
|
||||||
conf.setFloat(CapacitySchedulerConfiguration.TOTAL_PREEMPTION_PER_ROUND,
|
|
||||||
1.0f);
|
|
||||||
conf.setFloat(
|
|
||||||
CapacitySchedulerConfiguration.PREEMPTION_NATURAL_TERMINATION_FACTOR,
|
|
||||||
1.0f);
|
|
||||||
mgr = new NullRMNodeLabelsManager();
|
|
||||||
mgr.init(this.conf);
|
|
||||||
}
|
|
||||||
|
|
||||||
private SchedulingEditPolicy getSchedulingEditPolicy(MockRM rm) {
|
|
||||||
RMActiveServices activeServices = rm.getRMActiveService();
|
|
||||||
SchedulingMonitor mon = null;
|
|
||||||
for (Service service : activeServices.getServices()) {
|
|
||||||
if (service instanceof SchedulingMonitor) {
|
|
||||||
mon = (SchedulingMonitor) service;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mon != null) {
|
|
||||||
return mon.getSchedulingEditPolicy();
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test (timeout = 60000)
|
@Test (timeout = 60000)
|
|
@ -0,0 +1,246 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Priority;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.MockAM;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.MockNM;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.monitor.SchedulingEditPolicy;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
|
||||||
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
public class TestCapacitySchedulerSurgicalPreemption
|
||||||
|
extends CapacitySchedulerPreemptionTestBase {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@Before
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
conf.setBoolean(
|
||||||
|
CapacitySchedulerConfiguration.PREEMPTION_SELECT_CANDIDATES_FOR_RESERVED_CONTAINERS,
|
||||||
|
true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 60000)
|
||||||
|
public void testSimpleSurgicalPreemption()
|
||||||
|
throws Exception {
|
||||||
|
/**
|
||||||
|
* Test case: Submit two application (app1/app2) to different queues, queue
|
||||||
|
* structure:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* Root
|
||||||
|
* / | \
|
||||||
|
* a b c
|
||||||
|
* 10 20 70
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* 1) Two nodes (n1/n2) in the cluster, each of them has 20G.
|
||||||
|
*
|
||||||
|
* 2) app1 submit to queue-a first, it asked 32 * 1G containers
|
||||||
|
* We will allocate 16 on n1 and 16 on n2.
|
||||||
|
*
|
||||||
|
* 3) app2 submit to queue-c, ask for one 1G container (for AM)
|
||||||
|
*
|
||||||
|
* 4) app2 asks for another 6G container, it will be reserved on n1
|
||||||
|
*
|
||||||
|
* Now: we have:
|
||||||
|
* n1: 17 from app1, 1 from app2, and 1 reserved from app2
|
||||||
|
* n2: 16 from app1.
|
||||||
|
*
|
||||||
|
* After preemption, we should expect:
|
||||||
|
* Preempt 4 containers from app1 on n1.
|
||||||
|
*/
|
||||||
|
MockRM rm1 = new MockRM(conf);
|
||||||
|
rm1.getRMContext().setNodeLabelManager(mgr);
|
||||||
|
rm1.start();
|
||||||
|
|
||||||
|
MockNM nm1 = rm1.registerNode("h1:1234", 20 * GB);
|
||||||
|
MockNM nm2 = rm1.registerNode("h2:1234", 20 * GB);
|
||||||
|
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
|
||||||
|
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
|
||||||
|
RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId());
|
||||||
|
|
||||||
|
// launch an app to queue, AM container should be launched in nm1
|
||||||
|
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a");
|
||||||
|
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
|
||||||
|
|
||||||
|
am1.allocate("*", 1 * GB, 32, new ArrayList<ContainerId>());
|
||||||
|
|
||||||
|
// Do allocation for node1/node2
|
||||||
|
for (int i = 0; i < 32; i++) {
|
||||||
|
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
|
||||||
|
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
|
||||||
|
}
|
||||||
|
|
||||||
|
// App1 should have 33 containers now
|
||||||
|
FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(
|
||||||
|
am1.getApplicationAttemptId());
|
||||||
|
Assert.assertEquals(33, schedulerApp1.getLiveContainers().size());
|
||||||
|
// 17 from n1 and 16 from n2
|
||||||
|
waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode1.getNodeID()),
|
||||||
|
am1.getApplicationAttemptId(), 17);
|
||||||
|
waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode2.getNodeID()),
|
||||||
|
am1.getApplicationAttemptId(), 16);
|
||||||
|
|
||||||
|
|
||||||
|
// Submit app2 to queue-c and asks for a 1G container for AM
|
||||||
|
RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "c");
|
||||||
|
MockAM am2 = MockRM.launchAndRegisterAM(app2, rm1, nm1);
|
||||||
|
|
||||||
|
// NM1/NM2 has available resource = 2G/4G
|
||||||
|
Assert.assertEquals(2 * GB, cs.getNode(nm1.getNodeId())
|
||||||
|
.getUnallocatedResource().getMemory());
|
||||||
|
Assert.assertEquals(4 * GB, cs.getNode(nm2.getNodeId())
|
||||||
|
.getUnallocatedResource().getMemory());
|
||||||
|
|
||||||
|
// AM asks for a 1 * GB container
|
||||||
|
am2.allocate(Arrays.asList(ResourceRequest
|
||||||
|
.newInstance(Priority.newInstance(1), ResourceRequest.ANY,
|
||||||
|
Resources.createResource(6 * GB), 1)), null);
|
||||||
|
|
||||||
|
// Call allocation once on n1, we should expect the container reserved on n1
|
||||||
|
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
|
||||||
|
Assert.assertNotNull(cs.getNode(nm1.getNodeId()).getReservedContainer());
|
||||||
|
|
||||||
|
// Get edit policy and do one update
|
||||||
|
SchedulingEditPolicy editPolicy = getSchedulingEditPolicy(rm1);
|
||||||
|
|
||||||
|
// Call edit schedule twice, and check if 4 containers from app1 at n1 killed
|
||||||
|
editPolicy.editSchedule();
|
||||||
|
editPolicy.editSchedule();
|
||||||
|
|
||||||
|
waitNumberOfLiveContainersFromApp(schedulerApp1, 29);
|
||||||
|
|
||||||
|
// 13 from n1 (4 preempted) and 16 from n2
|
||||||
|
waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode1.getNodeID()),
|
||||||
|
am1.getApplicationAttemptId(), 13);
|
||||||
|
waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode2.getNodeID()),
|
||||||
|
am1.getApplicationAttemptId(), 16);
|
||||||
|
|
||||||
|
rm1.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 60000)
|
||||||
|
public void testSurgicalPreemptionWithAvailableResource()
|
||||||
|
throws Exception {
|
||||||
|
/**
|
||||||
|
* Test case: Submit two application (app1/app2) to different queues, queue
|
||||||
|
* structure:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* Root
|
||||||
|
* / | \
|
||||||
|
* a b c
|
||||||
|
* 10 20 70
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* 1) Two nodes (n1/n2) in the cluster, each of them has 20G.
|
||||||
|
*
|
||||||
|
* 2) app1 submit to queue-a first, it asked 38 * 1G containers
|
||||||
|
* We will allocate 20 on n1 and 19 on n2.
|
||||||
|
*
|
||||||
|
* 3) app2 submit to queue-c, ask for one 4G container (for AM)
|
||||||
|
*
|
||||||
|
* After preemption, we should expect:
|
||||||
|
* Preempt 3 containers from app1 and AM of app2 successfully allocated.
|
||||||
|
*/
|
||||||
|
MockRM rm1 = new MockRM(conf);
|
||||||
|
rm1.getRMContext().setNodeLabelManager(mgr);
|
||||||
|
rm1.start();
|
||||||
|
|
||||||
|
MockNM nm1 = rm1.registerNode("h1:1234", 20 * GB);
|
||||||
|
MockNM nm2 = rm1.registerNode("h2:1234", 20 * GB);
|
||||||
|
CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler();
|
||||||
|
RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId());
|
||||||
|
RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId());
|
||||||
|
|
||||||
|
// launch an app to queue, AM container should be launched in nm1
|
||||||
|
RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a");
|
||||||
|
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
|
||||||
|
|
||||||
|
am1.allocate("*", 1 * GB, 38, new ArrayList<ContainerId>());
|
||||||
|
|
||||||
|
// Do allocation for node1/node2
|
||||||
|
for (int i = 0; i < 38; i++) {
|
||||||
|
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
|
||||||
|
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
|
||||||
|
}
|
||||||
|
|
||||||
|
// App1 should have 31 containers now
|
||||||
|
FiCaSchedulerApp schedulerApp1 = cs.getApplicationAttempt(
|
||||||
|
am1.getApplicationAttemptId());
|
||||||
|
Assert.assertEquals(39, schedulerApp1.getLiveContainers().size());
|
||||||
|
// 17 from n1 and 16 from n2
|
||||||
|
waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode1.getNodeID()),
|
||||||
|
am1.getApplicationAttemptId(), 20);
|
||||||
|
waitNumberOfLiveContainersOnNodeFromApp(cs.getNode(rmNode2.getNodeID()),
|
||||||
|
am1.getApplicationAttemptId(), 19);
|
||||||
|
|
||||||
|
|
||||||
|
// Submit app2 to queue-c and asks for a 4G container for AM
|
||||||
|
RMApp app2 = rm1.submitApp(4 * GB, "app", "user", null, "c");
|
||||||
|
FiCaSchedulerApp schedulerApp2 = cs.getApplicationAttempt(
|
||||||
|
ApplicationAttemptId.newInstance(app2.getApplicationId(), 1));
|
||||||
|
|
||||||
|
// Call editSchedule: containers are selected to be preemption candidate
|
||||||
|
ProportionalCapacityPreemptionPolicy editPolicy =
|
||||||
|
(ProportionalCapacityPreemptionPolicy) getSchedulingEditPolicy(rm1);
|
||||||
|
editPolicy.editSchedule();
|
||||||
|
Assert.assertEquals(3, editPolicy.getToPreemptContainers().size());
|
||||||
|
|
||||||
|
// Call editSchedule again: selected containers are killed
|
||||||
|
editPolicy.editSchedule();
|
||||||
|
waitNumberOfLiveContainersFromApp(schedulerApp1, 36);
|
||||||
|
|
||||||
|
// Call allocation, containers are reserved
|
||||||
|
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
|
||||||
|
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
|
||||||
|
waitNumberOfReservedContainersFromApp(schedulerApp2, 1);
|
||||||
|
|
||||||
|
// Call editSchedule twice and allocation once, container should get allocated
|
||||||
|
editPolicy.editSchedule();
|
||||||
|
editPolicy.editSchedule();
|
||||||
|
|
||||||
|
int tick = 0;
|
||||||
|
while (schedulerApp2.getLiveContainers().size() != 1 && tick < 10) {
|
||||||
|
cs.handle(new NodeUpdateSchedulerEvent(rmNode1));
|
||||||
|
cs.handle(new NodeUpdateSchedulerEvent(rmNode2));
|
||||||
|
tick++;
|
||||||
|
Thread.sleep(100);
|
||||||
|
}
|
||||||
|
waitNumberOfReservedContainersFromApp(schedulerApp2, 0);
|
||||||
|
|
||||||
|
rm1.close();
|
||||||
|
}
|
||||||
|
}
|
|
@ -41,7 +41,6 @@ import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
@ -61,7 +60,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
|
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
|
||||||
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
|
||||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||||
import org.apache.hadoop.yarn.api.records.Priority;
|
import org.apache.hadoop.yarn.api.records.Priority;
|
||||||
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
||||||
|
@ -4322,7 +4320,7 @@ public class TestFairScheduler extends FairSchedulerTestBase {
|
||||||
resourceManager
|
resourceManager
|
||||||
.getResourceScheduler()
|
.getResourceScheduler()
|
||||||
.getSchedulerNode(resourceEvent.getNodeId())
|
.getSchedulerNode(resourceEvent.getNodeId())
|
||||||
.setTotalResource(resourceEvent.getResourceOption().getResource());
|
.updateTotalResource(resourceEvent.getResourceOption().getResource());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
|
@ -25,7 +25,6 @@ import static org.mockito.Mockito.mock;
|
||||||
import static org.mockito.Mockito.when;
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.lang.reflect.Method;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -92,7 +91,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
|
||||||
|
@ -1191,7 +1189,7 @@ public class TestFifoScheduler {
|
||||||
resourceManager
|
resourceManager
|
||||||
.getResourceScheduler()
|
.getResourceScheduler()
|
||||||
.getSchedulerNode(resourceEvent.getNodeId())
|
.getSchedulerNode(resourceEvent.getNodeId())
|
||||||
.setTotalResource(resourceEvent.getResourceOption().getResource());
|
.updateTotalResource(resourceEvent.getResourceOption().getResource());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
Loading…
Reference in New Issue