YARN-6895. [FairScheduler] Preemption reservation may cause regular reservation leaks. (Miklos Szegedi via Yufei Gu)
This commit is contained in:
parent
48899134d2
commit
45535f8afa
|
@ -554,6 +554,15 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
|
||||||
this.minshareStarvation = Resources.none();
|
this.minshareStarvation = Resources.none();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get last computed minshare starvation.
|
||||||
|
*
|
||||||
|
* @return last computed minshare starvation
|
||||||
|
*/
|
||||||
|
Resource getMinshareStarvation() {
|
||||||
|
return minshareStarvation;
|
||||||
|
}
|
||||||
|
|
||||||
void trackContainerForPreemption(RMContainer container) {
|
void trackContainerForPreemption(RMContainer container) {
|
||||||
synchronized (preemptionVariablesLock) {
|
synchronized (preemptionVariablesLock) {
|
||||||
if (containersToPreempt.add(container)) {
|
if (containersToPreempt.add(container)) {
|
||||||
|
@ -842,7 +851,10 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
|
||||||
}
|
}
|
||||||
|
|
||||||
// The desired container won't fit here, so reserve
|
// The desired container won't fit here, so reserve
|
||||||
|
// Reserve only, if app does not wait for preempted resources on the node,
|
||||||
|
// otherwise we may end up with duplicate reservations
|
||||||
if (isReservable(capability) &&
|
if (isReservable(capability) &&
|
||||||
|
!node.isPreemptedForApp(this) &&
|
||||||
reserve(pendingAsk.getPerAllocationResource(), node, reservedContainer,
|
reserve(pendingAsk.getPerAllocationResource(), node, reservedContainer,
|
||||||
type, schedulerKey)) {
|
type, schedulerKey)) {
|
||||||
updateAMDiagnosticMsg(capability, " exceeds the available resources of "
|
updateAMDiagnosticMsg(capability, " exceeds the available resources of "
|
||||||
|
@ -1110,7 +1122,8 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!starved ||
|
if (!starved ||
|
||||||
now - lastTimeAtFairShare < getQueue().getFairSharePreemptionTimeout()) {
|
now - lastTimeAtFairShare <
|
||||||
|
getQueue().getFairSharePreemptionTimeout()) {
|
||||||
fairshareStarvation = Resources.none();
|
fairshareStarvation = Resources.none();
|
||||||
} else {
|
} else {
|
||||||
// The app has been starved for longer than preemption-timeout.
|
// The app has been starved for longer than preemption-timeout.
|
||||||
|
@ -1138,7 +1151,7 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is application starved for fairshare or minshare
|
* Is application starved for fairshare or minshare.
|
||||||
*/
|
*/
|
||||||
boolean isStarved() {
|
boolean isStarved() {
|
||||||
return isStarvedForFairShare() || !Resources.isNone(minshareStarvation);
|
return isStarvedForFairShare() || !Resources.isNone(minshareStarvation);
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
|
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
|
@ -35,12 +36,15 @@ import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.LinkedList;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.ConcurrentSkipListSet;
|
import java.util.concurrent.ConcurrentSkipListSet;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fair Scheduler specific node features.
|
||||||
|
*/
|
||||||
@Private
|
@Private
|
||||||
@Unstable
|
@Unstable
|
||||||
public class FSSchedulerNode extends SchedulerNode {
|
public class FSSchedulerNode extends SchedulerNode {
|
||||||
|
@ -122,7 +126,8 @@ public class FSSchedulerNode extends SchedulerNode {
|
||||||
SchedulerApplicationAttempt application) {
|
SchedulerApplicationAttempt application) {
|
||||||
// Cannot unreserve for wrong application...
|
// Cannot unreserve for wrong application...
|
||||||
ApplicationAttemptId reservedApplication =
|
ApplicationAttemptId reservedApplication =
|
||||||
getReservedContainer().getContainer().getId().getApplicationAttemptId();
|
getReservedContainer().getContainer().getId()
|
||||||
|
.getApplicationAttemptId();
|
||||||
if (!reservedApplication.equals(
|
if (!reservedApplication.equals(
|
||||||
application.getApplicationAttemptId())) {
|
application.getApplicationAttemptId())) {
|
||||||
throw new IllegalStateException("Trying to unreserve " +
|
throw new IllegalStateException("Trying to unreserve " +
|
||||||
|
@ -151,20 +156,37 @@ public class FSSchedulerNode extends SchedulerNode {
|
||||||
return new LinkedHashMap<>(resourcesPreemptedForApp);
|
return new LinkedHashMap<>(resourcesPreemptedForApp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether a preemption is tracked on the node for the specified app.
|
||||||
|
* @return if preempted containers are reserved for the app
|
||||||
|
*/
|
||||||
|
synchronized boolean isPreemptedForApp(FSAppAttempt app){
|
||||||
|
return resourcesPreemptedForApp.containsKey(app);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Remove apps that have their preemption requests fulfilled.
|
* Remove apps that have their preemption requests fulfilled.
|
||||||
*/
|
*/
|
||||||
private synchronized void cleanupPreemptionList() {
|
private void cleanupPreemptionList() {
|
||||||
Iterator<Map.Entry<FSAppAttempt, Resource>> iterator =
|
// Synchronize separately to avoid potential deadlocks
|
||||||
resourcesPreemptedForApp.entrySet().iterator();
|
// This may cause delayed deletion of reservations
|
||||||
while(iterator.hasNext()) {
|
LinkedList<FSAppAttempt> candidates;
|
||||||
FSAppAttempt app = iterator.next().getKey();
|
synchronized (this) {
|
||||||
if (app.isStopped() || !app.isStarved()) {
|
candidates = Lists.newLinkedList(resourcesPreemptedForApp.keySet());
|
||||||
|
}
|
||||||
|
for (FSAppAttempt app : candidates) {
|
||||||
|
if (app.isStopped() || !app.isStarved() ||
|
||||||
|
(Resources.isNone(app.getFairshareStarvation()) &&
|
||||||
|
Resources.isNone(app.getMinshareStarvation()))) {
|
||||||
// App does not need more resources
|
// App does not need more resources
|
||||||
Resources.subtractFrom(totalResourcesPreempted,
|
synchronized (this) {
|
||||||
resourcesPreemptedForApp.get(app));
|
Resource removed = resourcesPreemptedForApp.remove(app);
|
||||||
appIdToAppMap.remove(app.getApplicationAttemptId());
|
if (removed != null) {
|
||||||
iterator.remove();
|
Resources.subtractFrom(totalResourcesPreempted,
|
||||||
|
removed);
|
||||||
|
appIdToAppMap.remove(app.getApplicationAttemptId());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -180,15 +202,23 @@ public class FSSchedulerNode extends SchedulerNode {
|
||||||
void addContainersForPreemption(Collection<RMContainer> containers,
|
void addContainersForPreemption(Collection<RMContainer> containers,
|
||||||
FSAppAttempt app) {
|
FSAppAttempt app) {
|
||||||
|
|
||||||
appIdToAppMap.putIfAbsent(app.getApplicationAttemptId(), app);
|
Resource appReserved = Resources.createResource(0);
|
||||||
resourcesPreemptedForApp.putIfAbsent(app, Resource.newInstance(0, 0));
|
|
||||||
Resource appReserved = resourcesPreemptedForApp.get(app);
|
|
||||||
|
|
||||||
for(RMContainer container : containers) {
|
for(RMContainer container : containers) {
|
||||||
containersForPreemption.add(container);
|
if(containersForPreemption.add(container)) {
|
||||||
Resources.addTo(appReserved, container.getAllocatedResource());
|
Resources.addTo(appReserved, container.getAllocatedResource());
|
||||||
Resources.addTo(totalResourcesPreempted,
|
}
|
||||||
container.getAllocatedResource());
|
}
|
||||||
|
|
||||||
|
synchronized (this) {
|
||||||
|
if (!Resources.isNone(appReserved)) {
|
||||||
|
Resources.addTo(totalResourcesPreempted,
|
||||||
|
appReserved);
|
||||||
|
appIdToAppMap.putIfAbsent(app.getApplicationAttemptId(), app);
|
||||||
|
resourcesPreemptedForApp.
|
||||||
|
putIfAbsent(app, Resource.newInstance(0, 0));
|
||||||
|
Resources.addTo(resourcesPreemptedForApp.get(app), appReserved);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -985,25 +985,22 @@ public class FairScheduler extends
|
||||||
* Assign preempted containers to the applications that have reserved
|
* Assign preempted containers to the applications that have reserved
|
||||||
* resources for preempted containers.
|
* resources for preempted containers.
|
||||||
* @param node Node to check
|
* @param node Node to check
|
||||||
* @return assignment has occurred
|
|
||||||
*/
|
*/
|
||||||
static boolean assignPreemptedContainers(FSSchedulerNode node) {
|
static void assignPreemptedContainers(FSSchedulerNode node) {
|
||||||
boolean assignedAny = false;
|
|
||||||
for (Entry<FSAppAttempt, Resource> entry :
|
for (Entry<FSAppAttempt, Resource> entry :
|
||||||
node.getPreemptionList().entrySet()) {
|
node.getPreemptionList().entrySet()) {
|
||||||
FSAppAttempt app = entry.getKey();
|
FSAppAttempt app = entry.getKey();
|
||||||
Resource preemptionPending = Resources.clone(entry.getValue());
|
Resource preemptionPending = Resources.clone(entry.getValue());
|
||||||
while (!app.isStopped() && !Resources.isNone(preemptionPending)) {
|
while (!app.isStopped() && !Resources.isNone(preemptionPending)) {
|
||||||
Resource assigned = app.assignContainer(node);
|
Resource assigned = app.assignContainer(node);
|
||||||
if (Resources.isNone(assigned)) {
|
if (Resources.isNone(assigned) ||
|
||||||
|
assigned.equals(FairScheduler.CONTAINER_RESERVED)) {
|
||||||
// Fail to assign, let's not try further
|
// Fail to assign, let's not try further
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
assignedAny = true;
|
|
||||||
Resources.subtractFromNonNegative(preemptionPending, assigned);
|
Resources.subtractFromNonNegative(preemptionPending, assigned);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return assignedAny;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
|
|
|
@ -31,6 +31,7 @@ import java.util.Collections;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertNotEquals;
|
import static org.junit.Assert.assertNotEquals;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import static org.mockito.Matchers.any;
|
||||||
import static org.mockito.Mockito.mock;
|
import static org.mockito.Mockito.mock;
|
||||||
import static org.mockito.Mockito.when;
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
@ -68,6 +69,16 @@ public class TestFSSchedulerNode {
|
||||||
when(container.getExecutionType()).thenReturn(ExecutionType.GUARANTEED);
|
when(container.getExecutionType()).thenReturn(ExecutionType.GUARANTEED);
|
||||||
when(container.getAllocatedResource()).
|
when(container.getAllocatedResource()).
|
||||||
thenReturn(Resources.clone(request));
|
thenReturn(Resources.clone(request));
|
||||||
|
when(container.compareTo(any())).thenAnswer(new Answer<Integer>() {
|
||||||
|
public Integer answer(InvocationOnMock invocation) {
|
||||||
|
return
|
||||||
|
Long.compare(
|
||||||
|
((RMContainer)invocation.getMock()).getContainerId()
|
||||||
|
.getContainerId(),
|
||||||
|
((RMContainer)invocation.getArguments()[0]).getContainerId()
|
||||||
|
.getContainerId());
|
||||||
|
}
|
||||||
|
});
|
||||||
containers.add(container);
|
containers.add(container);
|
||||||
return container;
|
return container;
|
||||||
}
|
}
|
||||||
|
@ -224,6 +235,47 @@ public class TestFSSchedulerNode {
|
||||||
finalValidation(schedulerNode);
|
finalValidation(schedulerNode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allocate a single container twice and release.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDuplicatePreemption() {
|
||||||
|
RMNode node = createNode();
|
||||||
|
FSSchedulerNode schedulerNode = new FSSchedulerNode(node, false);
|
||||||
|
|
||||||
|
// Launch containers and saturate the cluster
|
||||||
|
saturateCluster(schedulerNode);
|
||||||
|
assertEquals("Container should be allocated",
|
||||||
|
Resources.multiply(containers.get(0).getContainer().getResource(),
|
||||||
|
containers.size()),
|
||||||
|
schedulerNode.getAllocatedResource());
|
||||||
|
|
||||||
|
// Request preemption twice
|
||||||
|
FSAppAttempt starvingApp = createStarvingApp(schedulerNode,
|
||||||
|
Resource.newInstance(1024, 1));
|
||||||
|
schedulerNode.addContainersForPreemption(
|
||||||
|
Collections.singletonList(containers.get(0)), starvingApp);
|
||||||
|
schedulerNode.addContainersForPreemption(
|
||||||
|
Collections.singletonList(containers.get(0)), starvingApp);
|
||||||
|
assertEquals(
|
||||||
|
"No resource amount should be reserved for preemptees",
|
||||||
|
containers.get(0).getAllocatedResource(),
|
||||||
|
schedulerNode.getTotalReserved());
|
||||||
|
|
||||||
|
// Preemption occurs release one container
|
||||||
|
schedulerNode.releaseContainer(containers.get(0).getContainerId(), true);
|
||||||
|
allocateContainers(schedulerNode);
|
||||||
|
assertEquals("Container should be allocated",
|
||||||
|
schedulerNode.getTotalResource(),
|
||||||
|
schedulerNode.getAllocatedResource());
|
||||||
|
|
||||||
|
// Release all remaining containers
|
||||||
|
for (int i = 1; i < containers.size(); ++i) {
|
||||||
|
schedulerNode.releaseContainer(containers.get(i).getContainerId(), true);
|
||||||
|
}
|
||||||
|
finalValidation(schedulerNode);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allocate and release three containers requested by two apps.
|
* Allocate and release three containers requested by two apps.
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in New Issue