YARN-4519. Potential deadlock of CapacityScheduler between decrease container and assign containers. Contributed Meng Ding
This commit is contained in:
parent
47746f65b0
commit
145add1aec
|
@ -53,9 +53,10 @@ import org.apache.hadoop.yarn.security.YarnAuthorizationProvider;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
|
||||||
|
.SchedContainerChangeRequest;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
|
||||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||||
|
@ -114,43 +115,25 @@ public class RMServerUtils {
|
||||||
queueName, scheduler, rmContext, queueInfo);
|
queueName, scheduler, rmContext, queueInfo);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Normalize container increase/decrease request, it will normalize and update
|
* Validate increase/decrease request. This function must be called under
|
||||||
* ContainerResourceChangeRequest.targetResource
|
* the queue lock to make sure that the access to container resource is
|
||||||
|
* atomic. Refer to LeafQueue.decreaseContainer() and
|
||||||
|
* CapacityScheduelr.updateIncreaseRequests()
|
||||||
|
*
|
||||||
*
|
*
|
||||||
* <pre>
|
* <pre>
|
||||||
* - Throw exception when any other error happens
|
* - Throw exception when any other error happens
|
||||||
* </pre>
|
* </pre>
|
||||||
*/
|
*/
|
||||||
public static void checkAndNormalizeContainerChangeRequest(
|
public static void checkSchedContainerChangeRequest(
|
||||||
RMContext rmContext, ContainerResourceChangeRequest request,
|
SchedContainerChangeRequest request, boolean increase)
|
||||||
boolean increase) throws InvalidResourceRequestException {
|
throws InvalidResourceRequestException {
|
||||||
|
RMContext rmContext = request.getRmContext();
|
||||||
ContainerId containerId = request.getContainerId();
|
ContainerId containerId = request.getContainerId();
|
||||||
ResourceScheduler scheduler = rmContext.getScheduler();
|
RMContainer rmContainer = request.getRMContainer();
|
||||||
RMContainer rmContainer = scheduler.getRMContainer(containerId);
|
Resource targetResource = request.getTargetCapacity();
|
||||||
ResourceCalculator rc = scheduler.getResourceCalculator();
|
|
||||||
|
|
||||||
if (null == rmContainer) {
|
|
||||||
String msg =
|
|
||||||
"Failed to get rmContainer for "
|
|
||||||
+ (increase ? "increase" : "decrease")
|
|
||||||
+ " request, with container-id=" + containerId;
|
|
||||||
throw new InvalidResourceRequestException(msg);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rmContainer.getState() != RMContainerState.RUNNING) {
|
|
||||||
String msg =
|
|
||||||
"rmContainer's state is not RUNNING, for "
|
|
||||||
+ (increase ? "increase" : "decrease")
|
|
||||||
+ " request, with container-id=" + containerId;
|
|
||||||
throw new InvalidResourceRequestException(msg);
|
|
||||||
}
|
|
||||||
|
|
||||||
Resource targetResource = Resources.normalize(rc, request.getCapability(),
|
|
||||||
scheduler.getMinimumResourceCapability(),
|
|
||||||
scheduler.getMaximumResourceCapability(),
|
|
||||||
scheduler.getMinimumResourceCapability());
|
|
||||||
|
|
||||||
// Compare targetResource and original resource
|
// Compare targetResource and original resource
|
||||||
Resource originalResource = rmContainer.getAllocatedResource();
|
Resource originalResource = rmContainer.getAllocatedResource();
|
||||||
|
@ -181,10 +164,10 @@ public class RMServerUtils {
|
||||||
throw new InvalidResourceRequestException(msg);
|
throw new InvalidResourceRequestException(msg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
RMNode rmNode = rmContext.getRMNodes().get(rmContainer.getAllocatedNode());
|
|
||||||
|
|
||||||
// Target resource of the increase request is more than NM can offer
|
// Target resource of the increase request is more than NM can offer
|
||||||
|
ResourceScheduler scheduler = rmContext.getScheduler();
|
||||||
|
RMNode rmNode = request.getSchedulerNode().getRMNode();
|
||||||
if (!Resources.fitsIn(scheduler.getResourceCalculator(),
|
if (!Resources.fitsIn(scheduler.getResourceCalculator(),
|
||||||
scheduler.getClusterResource(), targetResource,
|
scheduler.getClusterResource(), targetResource,
|
||||||
rmNode.getTotalCapability())) {
|
rmNode.getTotalCapability())) {
|
||||||
|
@ -193,9 +176,6 @@ public class RMServerUtils {
|
||||||
+ rmNode.getTotalCapability();
|
+ rmNode.getTotalCapability();
|
||||||
throw new InvalidResourceRequestException(msg);
|
throw new InvalidResourceRequestException(msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update normalized target resource
|
|
||||||
request.setCapability(targetResource);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -253,7 +233,8 @@ public class RMServerUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Sanity check and normalize target resource
|
||||||
private static void validateIncreaseDecreaseRequest(RMContext rmContext,
|
private static void validateIncreaseDecreaseRequest(RMContext rmContext,
|
||||||
List<ContainerResourceChangeRequest> requests, Resource maximumAllocation,
|
List<ContainerResourceChangeRequest> requests, Resource maximumAllocation,
|
||||||
boolean increase)
|
boolean increase)
|
||||||
|
@ -283,8 +264,23 @@ public class RMServerUtils {
|
||||||
+ request.getCapability().getVirtualCores() + ", maxVirtualCores="
|
+ request.getCapability().getVirtualCores() + ", maxVirtualCores="
|
||||||
+ maximumAllocation.getVirtualCores());
|
+ maximumAllocation.getVirtualCores());
|
||||||
}
|
}
|
||||||
|
ContainerId containerId = request.getContainerId();
|
||||||
checkAndNormalizeContainerChangeRequest(rmContext, request, increase);
|
ResourceScheduler scheduler = rmContext.getScheduler();
|
||||||
|
RMContainer rmContainer = scheduler.getRMContainer(containerId);
|
||||||
|
if (null == rmContainer) {
|
||||||
|
String msg =
|
||||||
|
"Failed to get rmContainer for "
|
||||||
|
+ (increase ? "increase" : "decrease")
|
||||||
|
+ " request, with container-id=" + containerId;
|
||||||
|
throw new InvalidResourceRequestException(msg);
|
||||||
|
}
|
||||||
|
ResourceCalculator rc = scheduler.getResourceCalculator();
|
||||||
|
Resource targetResource = Resources.normalize(rc, request.getCapability(),
|
||||||
|
scheduler.getMinimumResourceCapability(),
|
||||||
|
scheduler.getMaximumResourceCapability(),
|
||||||
|
scheduler.getMinimumResourceCapability());
|
||||||
|
// Update normalized target resource
|
||||||
|
request.setCapability(targetResource);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,6 @@ import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -55,13 +54,13 @@ import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes;
|
import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
|
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
|
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
|
||||||
|
@ -74,6 +73,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEven
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerRecoverEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerRecoverEvent;
|
||||||
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeDecreaseContainerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeDecreaseContainerEvent;
|
||||||
|
@ -603,28 +603,20 @@ public abstract class AbstractYarnScheduler
|
||||||
SchedulerUtils.RELEASED_CONTAINER), RMContainerEventType.RELEASED);
|
SchedulerUtils.RELEASED_CONTAINER), RMContainerEventType.RELEASED);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void decreaseContainers(
|
protected void decreaseContainers(
|
||||||
List<SchedContainerChangeRequest> decreaseRequests,
|
List<ContainerResourceChangeRequest> decreaseRequests,
|
||||||
SchedulerApplicationAttempt attempt) {
|
SchedulerApplicationAttempt attempt) {
|
||||||
for (SchedContainerChangeRequest request : decreaseRequests) {
|
if (null == decreaseRequests || decreaseRequests.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Pre-process decrease requests
|
||||||
|
List<SchedContainerChangeRequest> schedDecreaseRequests =
|
||||||
|
createSchedContainerChangeRequests(decreaseRequests, false);
|
||||||
|
for (SchedContainerChangeRequest request : schedDecreaseRequests) {
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Processing decrease request:" + request);
|
LOG.debug("Processing decrease request:" + request);
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean hasIncreaseRequest =
|
|
||||||
attempt.removeIncreaseRequest(request.getNodeId(),
|
|
||||||
request.getPriority(), request.getContainerId());
|
|
||||||
|
|
||||||
if (hasIncreaseRequest) {
|
|
||||||
if (LOG.isDebugEnabled()) {
|
|
||||||
LOG.debug("While processing decrease request, found a increase request "
|
|
||||||
+ "for the same container "
|
|
||||||
+ request.getContainerId()
|
|
||||||
+ ", removed the increase request");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// handle decrease request
|
// handle decrease request
|
||||||
decreaseContainer(request, attempt);
|
decreaseContainer(request, attempt);
|
||||||
}
|
}
|
||||||
|
@ -862,7 +854,7 @@ public abstract class AbstractYarnScheduler
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Normalize container increase/decrease request, and return
|
* Sanity check increase/decrease request, and return
|
||||||
* SchedulerContainerResourceChangeRequest according to given
|
* SchedulerContainerResourceChangeRequest according to given
|
||||||
* ContainerResourceChangeRequest.
|
* ContainerResourceChangeRequest.
|
||||||
*
|
*
|
||||||
|
@ -871,37 +863,34 @@ public abstract class AbstractYarnScheduler
|
||||||
* - Throw exception when any other error happens
|
* - Throw exception when any other error happens
|
||||||
* </pre>
|
* </pre>
|
||||||
*/
|
*/
|
||||||
private SchedContainerChangeRequest
|
private SchedContainerChangeRequest createSchedContainerChangeRequest(
|
||||||
checkAndNormalizeContainerChangeRequest(
|
ContainerResourceChangeRequest request, boolean increase)
|
||||||
ContainerResourceChangeRequest request, boolean increase)
|
throws YarnException {
|
||||||
throws YarnException {
|
|
||||||
// We have done a check in ApplicationMasterService, but RMContainer status
|
|
||||||
// / Node resource could change since AMS won't acquire lock of scheduler.
|
|
||||||
RMServerUtils.checkAndNormalizeContainerChangeRequest(rmContext, request,
|
|
||||||
increase);
|
|
||||||
ContainerId containerId = request.getContainerId();
|
ContainerId containerId = request.getContainerId();
|
||||||
RMContainer rmContainer = getRMContainer(containerId);
|
RMContainer rmContainer = getRMContainer(containerId);
|
||||||
|
if (null == rmContainer) {
|
||||||
|
String msg =
|
||||||
|
"Failed to get rmContainer for "
|
||||||
|
+ (increase ? "increase" : "decrease")
|
||||||
|
+ " request, with container-id=" + containerId;
|
||||||
|
throw new InvalidResourceRequestException(msg);
|
||||||
|
}
|
||||||
SchedulerNode schedulerNode =
|
SchedulerNode schedulerNode =
|
||||||
getSchedulerNode(rmContainer.getAllocatedNode());
|
getSchedulerNode(rmContainer.getAllocatedNode());
|
||||||
|
return new SchedContainerChangeRequest(
|
||||||
return new SchedContainerChangeRequest(schedulerNode, rmContainer,
|
this.rmContext, schedulerNode, rmContainer, request.getCapability());
|
||||||
request.getCapability());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<SchedContainerChangeRequest>
|
protected List<SchedContainerChangeRequest>
|
||||||
checkAndNormalizeContainerChangeRequests(
|
createSchedContainerChangeRequests(
|
||||||
List<ContainerResourceChangeRequest> changeRequests,
|
List<ContainerResourceChangeRequest> changeRequests,
|
||||||
boolean increase) {
|
boolean increase) {
|
||||||
if (null == changeRequests || changeRequests.isEmpty()) {
|
|
||||||
return Collections.EMPTY_LIST;
|
|
||||||
}
|
|
||||||
|
|
||||||
List<SchedContainerChangeRequest> schedulerChangeRequests =
|
List<SchedContainerChangeRequest> schedulerChangeRequests =
|
||||||
new ArrayList<SchedContainerChangeRequest>();
|
new ArrayList<SchedContainerChangeRequest>();
|
||||||
for (ContainerResourceChangeRequest r : changeRequests) {
|
for (ContainerResourceChangeRequest r : changeRequests) {
|
||||||
SchedContainerChangeRequest sr = null;
|
SchedContainerChangeRequest sr = null;
|
||||||
try {
|
try {
|
||||||
sr = checkAndNormalizeContainerChangeRequest(r, increase);
|
sr = createSchedContainerChangeRequest(r, increase);
|
||||||
} catch (YarnException e) {
|
} catch (YarnException e) {
|
||||||
LOG.warn("Error happens when checking increase request, Ignoring.."
|
LOG.warn("Error happens when checking increase request, Ignoring.."
|
||||||
+ " exception=", e);
|
+ " exception=", e);
|
||||||
|
@ -909,7 +898,6 @@ public abstract class AbstractYarnScheduler
|
||||||
}
|
}
|
||||||
schedulerChangeRequests.add(sr);
|
schedulerChangeRequests.add(sr);
|
||||||
}
|
}
|
||||||
|
|
||||||
return schedulerChangeRequests;
|
return schedulerChangeRequests;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,6 +43,8 @@ import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
import org.apache.hadoop.yarn.api.records.Priority;
|
import org.apache.hadoop.yarn.api.records.Priority;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
|
||||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
@ -147,6 +149,18 @@ public class AppSchedulingInfo {
|
||||||
boolean resourceUpdated = false;
|
boolean resourceUpdated = false;
|
||||||
|
|
||||||
for (SchedContainerChangeRequest r : increaseRequests) {
|
for (SchedContainerChangeRequest r : increaseRequests) {
|
||||||
|
if (r.getRMContainer().getState() != RMContainerState.RUNNING) {
|
||||||
|
LOG.warn("rmContainer's state is not RUNNING, for increase request with"
|
||||||
|
+ " container-id=" + r.getContainerId());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
RMServerUtils.checkSchedContainerChangeRequest(r, true);
|
||||||
|
} catch (YarnException e) {
|
||||||
|
LOG.warn("Error happens when checking increase request, Ignoring.."
|
||||||
|
+ " exception=", e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
NodeId nodeId = r.getRMContainer().getAllocatedNode();
|
NodeId nodeId = r.getRMContainer().getAllocatedNode();
|
||||||
|
|
||||||
Map<Priority, Map<ContainerId, SchedContainerChangeRequest>> requestsOnNode =
|
Map<Priority, Map<ContainerId, SchedContainerChangeRequest>> requestsOnNode =
|
||||||
|
@ -220,7 +234,7 @@ public class AppSchedulingInfo {
|
||||||
|
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Added increase request:" + request.getContainerId()
|
LOG.debug("Added increase request:" + request.getContainerId()
|
||||||
+ " delta=" + request.getDeltaCapacity());
|
+ " delta=" + delta);
|
||||||
}
|
}
|
||||||
|
|
||||||
// update priorities
|
// update priorities
|
||||||
|
@ -509,24 +523,20 @@ public class AppSchedulingInfo {
|
||||||
NodeId nodeId = increaseRequest.getNodeId();
|
NodeId nodeId = increaseRequest.getNodeId();
|
||||||
Priority priority = increaseRequest.getPriority();
|
Priority priority = increaseRequest.getPriority();
|
||||||
ContainerId containerId = increaseRequest.getContainerId();
|
ContainerId containerId = increaseRequest.getContainerId();
|
||||||
|
Resource deltaCapacity = increaseRequest.getDeltaCapacity();
|
||||||
|
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("allocated increase request : applicationId=" + applicationId
|
LOG.debug("allocated increase request : applicationId=" + applicationId
|
||||||
+ " container=" + containerId + " host="
|
+ " container=" + containerId + " host="
|
||||||
+ increaseRequest.getNodeId() + " user=" + user + " resource="
|
+ increaseRequest.getNodeId() + " user=" + user + " resource="
|
||||||
+ increaseRequest.getDeltaCapacity());
|
+ deltaCapacity);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set queue metrics
|
// Set queue metrics
|
||||||
queue.getMetrics().allocateResources(user,
|
queue.getMetrics().allocateResources(user, deltaCapacity);
|
||||||
increaseRequest.getDeltaCapacity());
|
|
||||||
|
|
||||||
// remove the increase request from pending increase request map
|
// remove the increase request from pending increase request map
|
||||||
removeIncreaseRequest(nodeId, priority, containerId);
|
removeIncreaseRequest(nodeId, priority, containerId);
|
||||||
|
|
||||||
// update usage
|
// update usage
|
||||||
appResourceUsage.incUsed(increaseRequest.getNodePartition(),
|
appResourceUsage.incUsed(increaseRequest.getNodePartition(), deltaCapacity);
|
||||||
increaseRequest.getDeltaCapacity());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized void decreaseContainer(
|
public synchronized void decreaseContainer(
|
||||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
import org.apache.hadoop.yarn.api.records.Priority;
|
import org.apache.hadoop.yarn.api.records.Priority;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
|
||||||
|
@ -32,18 +33,19 @@ import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
*/
|
*/
|
||||||
public class SchedContainerChangeRequest implements
|
public class SchedContainerChangeRequest implements
|
||||||
Comparable<SchedContainerChangeRequest> {
|
Comparable<SchedContainerChangeRequest> {
|
||||||
RMContainer rmContainer;
|
private RMContext rmContext;
|
||||||
Resource targetCapacity;
|
private RMContainer rmContainer;
|
||||||
SchedulerNode schedulerNode;
|
private Resource targetCapacity;
|
||||||
Resource deltaCapacity;
|
private SchedulerNode schedulerNode;
|
||||||
|
private Resource deltaCapacity;
|
||||||
|
|
||||||
public SchedContainerChangeRequest(SchedulerNode schedulerNode,
|
public SchedContainerChangeRequest(
|
||||||
|
RMContext rmContext, SchedulerNode schedulerNode,
|
||||||
RMContainer rmContainer, Resource targetCapacity) {
|
RMContainer rmContainer, Resource targetCapacity) {
|
||||||
|
this.rmContext = rmContext;
|
||||||
this.rmContainer = rmContainer;
|
this.rmContainer = rmContainer;
|
||||||
this.targetCapacity = targetCapacity;
|
this.targetCapacity = targetCapacity;
|
||||||
this.schedulerNode = schedulerNode;
|
this.schedulerNode = schedulerNode;
|
||||||
deltaCapacity = Resources.subtract(targetCapacity,
|
|
||||||
rmContainer.getAllocatedResource());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public NodeId getNodeId() {
|
public NodeId getNodeId() {
|
||||||
|
@ -58,11 +60,19 @@ public class SchedContainerChangeRequest implements
|
||||||
return this.targetCapacity;
|
return this.targetCapacity;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public RMContext getRmContext() {
|
||||||
|
return this.rmContext;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Delta capacity = before - target, so if it is a decrease request, delta
|
* Delta capacity = target - before, so if it is a decrease request, delta
|
||||||
* capacity will be negative
|
* capacity will be negative
|
||||||
*/
|
*/
|
||||||
public Resource getDeltaCapacity() {
|
public synchronized Resource getDeltaCapacity() {
|
||||||
|
// Only calculate deltaCapacity once
|
||||||
|
if (deltaCapacity == null) {
|
||||||
|
deltaCapacity = Resources.subtract(
|
||||||
|
targetCapacity, rmContainer.getAllocatedResource());
|
||||||
|
}
|
||||||
return deltaCapacity;
|
return deltaCapacity;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -81,7 +91,7 @@ public class SchedContainerChangeRequest implements
|
||||||
public SchedulerNode getSchedulerNode() {
|
public SchedulerNode getSchedulerNode() {
|
||||||
return schedulerNode;
|
return schedulerNode;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return (getContainerId().hashCode() << 16) + targetCapacity.hashCode();
|
return (getContainerId().hashCode() << 16) + targetCapacity.hashCode();
|
||||||
|
@ -112,7 +122,6 @@ public class SchedContainerChangeRequest implements
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "<container=" + getContainerId() + ", targetCapacity="
|
return "<container=" + getContainerId() + ", targetCapacity="
|
||||||
+ targetCapacity + ", delta=" + deltaCapacity + ", node="
|
+ targetCapacity + ", node=" + getNodeId().toString() + ">";
|
||||||
+ getNodeId().toString() + ">";
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,8 @@ import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||||
import org.apache.hadoop.yarn.api.records.QueueACL;
|
import org.apache.hadoop.yarn.api.records.QueueACL;
|
||||||
import org.apache.hadoop.yarn.api.records.QueueState;
|
import org.apache.hadoop.yarn.api.records.QueueState;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
|
||||||
|
import org.apache.hadoop.yarn.security.PrivilegedEntity;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
|
||||||
|
@ -329,7 +331,7 @@ extends org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue {
|
||||||
*/
|
*/
|
||||||
public void decreaseContainer(Resource clusterResource,
|
public void decreaseContainer(Resource clusterResource,
|
||||||
SchedContainerChangeRequest decreaseRequest,
|
SchedContainerChangeRequest decreaseRequest,
|
||||||
FiCaSchedulerApp app);
|
FiCaSchedulerApp app) throws InvalidResourceRequestException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get valid Node Labels for this queue
|
* Get valid Node Labels for this queue
|
||||||
|
|
|
@ -21,8 +21,8 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -65,6 +65,7 @@ import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes;
|
import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes;
|
||||||
|
@ -892,9 +893,36 @@ public class CapacityScheduler extends
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// It is crucial to acquire leaf queue lock first to prevent:
|
||||||
|
// 1. Race condition when calculating the delta resource in
|
||||||
|
// SchedContainerChangeRequest
|
||||||
|
// 2. Deadlock with the scheduling thread.
|
||||||
|
private LeafQueue updateIncreaseRequests(
|
||||||
|
List<ContainerResourceChangeRequest> increaseRequests,
|
||||||
|
FiCaSchedulerApp app) {
|
||||||
|
if (null == increaseRequests || increaseRequests.isEmpty()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
// Pre-process increase requests
|
||||||
|
List<SchedContainerChangeRequest> schedIncreaseRequests =
|
||||||
|
createSchedContainerChangeRequests(increaseRequests, true);
|
||||||
|
LeafQueue leafQueue = (LeafQueue) app.getQueue();
|
||||||
|
synchronized(leafQueue) {
|
||||||
|
// make sure we aren't stopping/removing the application
|
||||||
|
// when the allocate comes in
|
||||||
|
if (app.isStopped()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
// Process increase resource requests
|
||||||
|
if (app.updateIncreaseRequests(schedIncreaseRequests)) {
|
||||||
|
return leafQueue;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
// Note: when AM asks to decrease container or release container, we will
|
// Note: when AM asks to release container, we will acquire scheduler lock
|
||||||
// acquire scheduler lock
|
|
||||||
@Lock(Lock.NoLock.class)
|
@Lock(Lock.NoLock.class)
|
||||||
public Allocation allocate(ApplicationAttemptId applicationAttemptId,
|
public Allocation allocate(ApplicationAttemptId applicationAttemptId,
|
||||||
List<ResourceRequest> ask, List<ContainerId> release,
|
List<ResourceRequest> ask, List<ContainerId> release,
|
||||||
|
@ -906,26 +934,23 @@ public class CapacityScheduler extends
|
||||||
if (application == null) {
|
if (application == null) {
|
||||||
return EMPTY_ALLOCATION;
|
return EMPTY_ALLOCATION;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sanity check
|
|
||||||
SchedulerUtils.normalizeRequests(
|
|
||||||
ask, getResourceCalculator(), getClusterResource(),
|
|
||||||
getMinimumResourceCapability(), getMaximumResourceCapability());
|
|
||||||
|
|
||||||
// Pre-process increase requests
|
|
||||||
List<SchedContainerChangeRequest> normalizedIncreaseRequests =
|
|
||||||
checkAndNormalizeContainerChangeRequests(increaseRequests, true);
|
|
||||||
|
|
||||||
// Pre-process decrease requests
|
|
||||||
List<SchedContainerChangeRequest> normalizedDecreaseRequests =
|
|
||||||
checkAndNormalizeContainerChangeRequests(decreaseRequests, false);
|
|
||||||
|
|
||||||
// Release containers
|
// Release containers
|
||||||
releaseContainers(release, application);
|
releaseContainers(release, application);
|
||||||
|
|
||||||
Allocation allocation;
|
// update increase requests
|
||||||
|
LeafQueue updateDemandForQueue =
|
||||||
|
updateIncreaseRequests(increaseRequests, application);
|
||||||
|
|
||||||
LeafQueue updateDemandForQueue = null;
|
// Decrease containers
|
||||||
|
decreaseContainers(decreaseRequests, application);
|
||||||
|
|
||||||
|
// Sanity check for new allocation requests
|
||||||
|
SchedulerUtils.normalizeRequests(
|
||||||
|
ask, getResourceCalculator(), getClusterResource(),
|
||||||
|
getMinimumResourceCapability(), getMaximumResourceCapability());
|
||||||
|
|
||||||
|
Allocation allocation;
|
||||||
|
|
||||||
synchronized (application) {
|
synchronized (application) {
|
||||||
|
|
||||||
|
@ -944,7 +969,8 @@ public class CapacityScheduler extends
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update application requests
|
// Update application requests
|
||||||
if (application.updateResourceRequests(ask)) {
|
if (application.updateResourceRequests(ask)
|
||||||
|
&& (updateDemandForQueue == null)) {
|
||||||
updateDemandForQueue = (LeafQueue) application.getQueue();
|
updateDemandForQueue = (LeafQueue) application.getQueue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -954,12 +980,6 @@ public class CapacityScheduler extends
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process increase resource requests
|
|
||||||
if (application.updateIncreaseRequests(normalizedIncreaseRequests)
|
|
||||||
&& (updateDemandForQueue == null)) {
|
|
||||||
updateDemandForQueue = (LeafQueue) application.getQueue();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (application.isWaitingForAMContainer()) {
|
if (application.isWaitingForAMContainer()) {
|
||||||
// Allocate is for AM and update AM blacklist for this
|
// Allocate is for AM and update AM blacklist for this
|
||||||
application.updateAMBlacklist(
|
application.updateAMBlacklist(
|
||||||
|
@ -968,8 +988,6 @@ public class CapacityScheduler extends
|
||||||
application.updateBlacklist(blacklistAdditions, blacklistRemovals);
|
application.updateBlacklist(blacklistAdditions, blacklistRemovals);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decrease containers
|
|
||||||
decreaseContainers(normalizedDecreaseRequests, application);
|
|
||||||
|
|
||||||
allocation = application.getAllocation(getResourceCalculator(),
|
allocation = application.getAllocation(getResourceCalculator(),
|
||||||
clusterResource, getMinimumResourceCapability());
|
clusterResource, getMinimumResourceCapability());
|
||||||
|
@ -1164,7 +1182,8 @@ public class CapacityScheduler extends
|
||||||
.getAssignmentInformation().getReserved());
|
.getAssignmentInformation().getReserved());
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized void allocateContainersToNode(FiCaSchedulerNode node) {
|
@VisibleForTesting
|
||||||
|
protected synchronized void allocateContainersToNode(FiCaSchedulerNode node) {
|
||||||
if (rmContext.isWorkPreservingRecoveryEnabled()
|
if (rmContext.isWorkPreservingRecoveryEnabled()
|
||||||
&& !rmContext.isSchedulerReadyForAllocatingContainers()) {
|
&& !rmContext.isSchedulerReadyForAllocatingContainers()) {
|
||||||
return;
|
return;
|
||||||
|
@ -1514,48 +1533,30 @@ public class CapacityScheduler extends
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Lock(CapacityScheduler.class)
|
|
||||||
@Override
|
@Override
|
||||||
protected synchronized void decreaseContainer(
|
protected void decreaseContainer(SchedContainerChangeRequest decreaseRequest,
|
||||||
SchedContainerChangeRequest decreaseRequest,
|
|
||||||
SchedulerApplicationAttempt attempt) {
|
SchedulerApplicationAttempt attempt) {
|
||||||
RMContainer rmContainer = decreaseRequest.getRMContainer();
|
RMContainer rmContainer = decreaseRequest.getRMContainer();
|
||||||
|
|
||||||
// Check container status before doing decrease
|
// Check container status before doing decrease
|
||||||
if (rmContainer.getState() != RMContainerState.RUNNING) {
|
if (rmContainer.getState() != RMContainerState.RUNNING) {
|
||||||
LOG.info("Trying to decrease a container not in RUNNING state, container="
|
LOG.info("Trying to decrease a container not in RUNNING state, container="
|
||||||
+ rmContainer + " state=" + rmContainer.getState().name());
|
+ rmContainer + " state=" + rmContainer.getState().name());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delta capacity of this decrease request is 0, this decrease request may
|
|
||||||
// just to cancel increase request
|
|
||||||
if (Resources.equals(decreaseRequest.getDeltaCapacity(), Resources.none())) {
|
|
||||||
if (LOG.isDebugEnabled()) {
|
|
||||||
LOG.debug("Decrease target resource equals to existing resource for container:"
|
|
||||||
+ decreaseRequest.getContainerId()
|
|
||||||
+ " ignore this decrease request.");
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save resource before decrease
|
|
||||||
Resource resourceBeforeDecrease =
|
|
||||||
Resources.clone(rmContainer.getContainer().getResource());
|
|
||||||
|
|
||||||
FiCaSchedulerApp app = (FiCaSchedulerApp)attempt;
|
FiCaSchedulerApp app = (FiCaSchedulerApp)attempt;
|
||||||
LeafQueue queue = (LeafQueue) attempt.getQueue();
|
LeafQueue queue = (LeafQueue) attempt.getQueue();
|
||||||
queue.decreaseContainer(clusterResource, decreaseRequest, app);
|
try {
|
||||||
|
queue.decreaseContainer(clusterResource, decreaseRequest, app);
|
||||||
// Notify RMNode the container will be decreased
|
// Notify RMNode that the container can be pulled by NodeManager in the
|
||||||
this.rmContext.getDispatcher().getEventHandler()
|
// next heartbeat
|
||||||
.handle(new RMNodeDecreaseContainerEvent(decreaseRequest.getNodeId(),
|
this.rmContext.getDispatcher().getEventHandler()
|
||||||
Arrays.asList(rmContainer.getContainer())));
|
.handle(new RMNodeDecreaseContainerEvent(
|
||||||
|
decreaseRequest.getNodeId(),
|
||||||
LOG.info("Application attempt " + app.getApplicationAttemptId()
|
Collections.singletonList(rmContainer.getContainer())));
|
||||||
+ " decreased container:" + decreaseRequest.getContainerId() + " from "
|
} catch (InvalidResourceRequestException e) {
|
||||||
+ resourceBeforeDecrease + " to "
|
LOG.warn("Error happens when checking decrease request, Ignoring.."
|
||||||
+ decreaseRequest.getTargetCapacity());
|
+ " exception=", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Lock(Lock.NoLock.class)
|
@Lock(Lock.NoLock.class)
|
||||||
|
|
|
@ -47,10 +47,12 @@ import org.apache.hadoop.yarn.api.records.QueueInfo;
|
||||||
import org.apache.hadoop.yarn.api.records.QueueState;
|
import org.apache.hadoop.yarn.api.records.QueueState;
|
||||||
import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
|
import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
|
||||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||||
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
|
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
|
||||||
import org.apache.hadoop.yarn.security.AccessType;
|
import org.apache.hadoop.yarn.security.AccessType;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
|
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
|
||||||
|
@ -1658,11 +1660,17 @@ public class LeafQueue extends AbstractCSQueue {
|
||||||
public Priority getDefaultApplicationPriority() {
|
public Priority getDefaultApplicationPriority() {
|
||||||
return defaultAppPriorityPerQueue;
|
return defaultAppPriorityPerQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param clusterResource Total cluster resource
|
||||||
|
* @param decreaseRequest The decrease request
|
||||||
|
* @param app The application of interest
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void decreaseContainer(Resource clusterResource,
|
public void decreaseContainer(Resource clusterResource,
|
||||||
SchedContainerChangeRequest decreaseRequest,
|
SchedContainerChangeRequest decreaseRequest,
|
||||||
FiCaSchedulerApp app) {
|
FiCaSchedulerApp app) throws InvalidResourceRequestException {
|
||||||
// If the container being decreased is reserved, we need to unreserve it
|
// If the container being decreased is reserved, we need to unreserve it
|
||||||
// first.
|
// first.
|
||||||
RMContainer rmContainer = decreaseRequest.getRMContainer();
|
RMContainer rmContainer = decreaseRequest.getRMContainer();
|
||||||
|
@ -1670,25 +1678,62 @@ public class LeafQueue extends AbstractCSQueue {
|
||||||
unreserveIncreasedContainer(clusterResource, app,
|
unreserveIncreasedContainer(clusterResource, app,
|
||||||
(FiCaSchedulerNode)decreaseRequest.getSchedulerNode(), rmContainer);
|
(FiCaSchedulerNode)decreaseRequest.getSchedulerNode(), rmContainer);
|
||||||
}
|
}
|
||||||
|
boolean resourceDecreased = false;
|
||||||
// Delta capacity is negative when it's a decrease request
|
Resource resourceBeforeDecrease;
|
||||||
Resource absDelta = Resources.negate(decreaseRequest.getDeltaCapacity());
|
// Grab queue lock to avoid race condition when getting container resource
|
||||||
|
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
// Delta is negative when it's a decrease request
|
// Make sure the decrease request is valid in terms of current resource
|
||||||
releaseResource(clusterResource, app, absDelta,
|
// and target resource. This must be done under the leaf queue lock.
|
||||||
decreaseRequest.getNodePartition(), decreaseRequest.getRMContainer(),
|
// Throws exception if the check fails.
|
||||||
true);
|
RMServerUtils.checkSchedContainerChangeRequest(decreaseRequest, false);
|
||||||
// Notify application
|
// Save resource before decrease for debug log
|
||||||
app.decreaseContainer(decreaseRequest);
|
resourceBeforeDecrease =
|
||||||
// Notify node
|
Resources.clone(rmContainer.getAllocatedResource());
|
||||||
decreaseRequest.getSchedulerNode()
|
// Do we have increase request for the same container? If so, remove it
|
||||||
.decreaseContainer(decreaseRequest.getContainerId(), absDelta);
|
boolean hasIncreaseRequest =
|
||||||
|
app.removeIncreaseRequest(decreaseRequest.getNodeId(),
|
||||||
|
decreaseRequest.getPriority(), decreaseRequest.getContainerId());
|
||||||
|
if (hasIncreaseRequest) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("While processing decrease requests, found an increase"
|
||||||
|
+ " request for the same container "
|
||||||
|
+ decreaseRequest.getContainerId()
|
||||||
|
+ ", removed the increase request");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Delta capacity is negative when it's a decrease request
|
||||||
|
Resource absDelta = Resources.negate(decreaseRequest.getDeltaCapacity());
|
||||||
|
if (Resources.equals(absDelta, Resources.none())) {
|
||||||
|
// If delta capacity of this decrease request is 0, this decrease
|
||||||
|
// request serves the purpose of cancelling an existing increase request
|
||||||
|
// if any
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Decrease target resource equals to existing resource for"
|
||||||
|
+ " container:" + decreaseRequest.getContainerId()
|
||||||
|
+ " ignore this decrease request.");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Release the delta resource
|
||||||
|
releaseResource(clusterResource, app, absDelta,
|
||||||
|
decreaseRequest.getNodePartition(),
|
||||||
|
decreaseRequest.getRMContainer(),
|
||||||
|
true);
|
||||||
|
// Notify application
|
||||||
|
app.decreaseContainer(decreaseRequest);
|
||||||
|
// Notify node
|
||||||
|
decreaseRequest.getSchedulerNode()
|
||||||
|
.decreaseContainer(decreaseRequest.getContainerId(), absDelta);
|
||||||
|
resourceDecreased = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Notify parent
|
if (resourceDecreased) {
|
||||||
if (getParent() != null) {
|
// Notify parent queue outside of leaf queue lock
|
||||||
getParent().decreaseContainer(clusterResource, decreaseRequest, app);
|
getParent().decreaseContainer(clusterResource, decreaseRequest, app);
|
||||||
|
LOG.info("Application attempt " + app.getApplicationAttemptId()
|
||||||
|
+ " decreased container:" + decreaseRequest.getContainerId()
|
||||||
|
+ " from " + resourceBeforeDecrease + " to "
|
||||||
|
+ decreaseRequest.getTargetCapacity());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -46,6 +46,7 @@ import org.apache.hadoop.yarn.api.records.QueueInfo;
|
||||||
import org.apache.hadoop.yarn.api.records.QueueState;
|
import org.apache.hadoop.yarn.api.records.QueueState;
|
||||||
import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
|
import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
|
||||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||||
import org.apache.hadoop.yarn.security.AccessType;
|
import org.apache.hadoop.yarn.security.AccessType;
|
||||||
|
@ -656,7 +657,8 @@ public class ParentQueue extends AbstractCSQueue {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void decreaseContainer(Resource clusterResource,
|
public void decreaseContainer(Resource clusterResource,
|
||||||
SchedContainerChangeRequest decreaseRequest, FiCaSchedulerApp app) {
|
SchedContainerChangeRequest decreaseRequest, FiCaSchedulerApp app)
|
||||||
|
throws InvalidResourceRequestException {
|
||||||
// delta capacity is negative when it's a decrease request
|
// delta capacity is negative when it's a decrease request
|
||||||
Resource absDeltaCapacity =
|
Resource absDeltaCapacity =
|
||||||
Resources.negate(decreaseRequest.getDeltaCapacity());
|
Resources.negate(decreaseRequest.getDeltaCapacity());
|
||||||
|
|
|
@ -21,8 +21,11 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
@ -47,8 +50,12 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerStat
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
|
||||||
|
.SchedulerApplicationAttempt;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica
|
||||||
|
.FiCaSchedulerNode;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
|
||||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
@ -57,12 +64,48 @@ import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
public class TestContainerResizing {
|
public class TestContainerResizing {
|
||||||
|
private static final Log LOG = LogFactory.getLog(TestContainerResizing.class);
|
||||||
private final int GB = 1024;
|
private final int GB = 1024;
|
||||||
|
|
||||||
private YarnConfiguration conf;
|
private YarnConfiguration conf;
|
||||||
|
|
||||||
RMNodeLabelsManager mgr;
|
RMNodeLabelsManager mgr;
|
||||||
|
|
||||||
|
class MyScheduler extends CapacityScheduler {
|
||||||
|
/*
|
||||||
|
* A Mock Scheduler to simulate the potential effect of deadlock between:
|
||||||
|
* 1. The AbstractYarnScheduler.decreaseContainers() call (from
|
||||||
|
* ApplicationMasterService thread)
|
||||||
|
* 2. The CapacityScheduler.allocateContainersToNode() call (from the
|
||||||
|
* scheduler thread)
|
||||||
|
*/
|
||||||
|
MyScheduler() {
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void decreaseContainers(
|
||||||
|
List<ContainerResourceChangeRequest> decreaseRequests,
|
||||||
|
SchedulerApplicationAttempt attempt) {
|
||||||
|
try {
|
||||||
|
Thread.sleep(1000);
|
||||||
|
} catch(InterruptedException e) {
|
||||||
|
LOG.debug("Thread interrupted.");
|
||||||
|
}
|
||||||
|
super.decreaseContainers(decreaseRequests, attempt);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void allocateContainersToNode(FiCaSchedulerNode node) {
|
||||||
|
try {
|
||||||
|
Thread.sleep(1000);
|
||||||
|
} catch(InterruptedException e) {
|
||||||
|
LOG.debug("Thread interrupted.");
|
||||||
|
}
|
||||||
|
super.allocateContainersToNode(node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
conf = new YarnConfiguration();
|
conf = new YarnConfiguration();
|
||||||
|
@ -958,6 +1001,50 @@ public class TestContainerResizing {
|
||||||
rm1.close();
|
rm1.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test (timeout = 60000)
|
||||||
|
public void testDecreaseContainerWillNotDeadlockContainerAllocation()
|
||||||
|
throws Exception {
|
||||||
|
// create and start MockRM with our MyScheduler
|
||||||
|
MockRM rm = new MockRM() {
|
||||||
|
@Override
|
||||||
|
public ResourceScheduler createScheduler() {
|
||||||
|
CapacityScheduler cs = new MyScheduler();
|
||||||
|
cs.setConf(conf);
|
||||||
|
return cs;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
rm.start();
|
||||||
|
// register a node
|
||||||
|
MockNM nm = rm.registerNode("h1:1234", 20 * GB);
|
||||||
|
// submit an application -> app1
|
||||||
|
RMApp app1 = rm.submitApp(3 * GB, "app", "user", null, "default");
|
||||||
|
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm);
|
||||||
|
// making sure resource is allocated
|
||||||
|
checkUsedResource(rm, "default", 3 * GB, null);
|
||||||
|
FiCaSchedulerApp app = getFiCaSchedulerApp(rm, app1.getApplicationId());
|
||||||
|
Assert.assertEquals(3 * GB,
|
||||||
|
app.getAppAttemptResourceUsage().getUsed().getMemory());
|
||||||
|
// making sure container is launched
|
||||||
|
ContainerId containerId1 =
|
||||||
|
ContainerId.newContainerId(am1.getApplicationAttemptId(), 1);
|
||||||
|
sentRMContainerLaunched(rm, containerId1);
|
||||||
|
// submit allocation request for a new container
|
||||||
|
am1.allocate(Collections.singletonList(ResourceRequest.newInstance(
|
||||||
|
Priority.newInstance(1), "*", Resources.createResource(2 * GB), 1)),
|
||||||
|
null);
|
||||||
|
// nm reports status update and triggers container allocation
|
||||||
|
nm.nodeHeartbeat(true);
|
||||||
|
// *In the mean time*, am1 asks to decrease its AM container resource from
|
||||||
|
// 3GB to 1GB
|
||||||
|
AllocateResponse response = am1.sendContainerResizingRequest(null,
|
||||||
|
Collections.singletonList(ContainerResourceChangeRequest
|
||||||
|
.newInstance(containerId1, Resources.createResource(GB))));
|
||||||
|
// verify that the containe resource is decreased
|
||||||
|
verifyContainerDecreased(response, containerId1, GB);
|
||||||
|
|
||||||
|
rm.close();
|
||||||
|
}
|
||||||
|
|
||||||
private void checkPendingResource(MockRM rm, String queueName, int memory,
|
private void checkPendingResource(MockRM rm, String queueName, int memory,
|
||||||
String label) {
|
String label) {
|
||||||
CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
|
CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
|
||||||
|
|
Loading…
Reference in New Issue