YARN-8248. Job hangs when a job requests a resource that its queue does not have. (Szilard Nemeth via Haibo Chen)
This commit is contained in:
parent
3d2d9dbcaa
commit
f48fec83d0
|
@ -18,9 +18,14 @@
|
||||||
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
|
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import com.google.common.collect.Maps;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -40,6 +45,8 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException;
|
import org.apache.hadoop.yarn.exceptions.InvalidLabelResourceRequestException;
|
||||||
import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
|
import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
|
||||||
|
import org.apache.hadoop.yarn.exceptions
|
||||||
|
.SchedulerInvalidResoureRequestException;
|
||||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||||
import org.apache.hadoop.yarn.security.AccessType;
|
import org.apache.hadoop.yarn.security.AccessType;
|
||||||
|
@ -61,12 +68,37 @@ import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
@Unstable
|
@Unstable
|
||||||
public class SchedulerUtils {
|
public class SchedulerUtils {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class contains invalid resource information along with its
|
||||||
|
* resource request.
|
||||||
|
*/
|
||||||
|
public static class MaxResourceValidationResult {
|
||||||
|
private ResourceRequest resourceRequest;
|
||||||
|
private List<ResourceInformation> invalidResources;
|
||||||
|
|
||||||
|
MaxResourceValidationResult(ResourceRequest resourceRequest,
|
||||||
|
List<ResourceInformation> invalidResources) {
|
||||||
|
this.resourceRequest = resourceRequest;
|
||||||
|
this.invalidResources = invalidResources;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isValid() {
|
||||||
|
return invalidResources.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "MaxResourceValidationResult{" + "resourceRequest="
|
||||||
|
+ resourceRequest + ", invalidResources=" + invalidResources + '}';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static final Log LOG = LogFactory.getLog(SchedulerUtils.class);
|
private static final Log LOG = LogFactory.getLog(SchedulerUtils.class);
|
||||||
|
|
||||||
private static final RecordFactory recordFactory =
|
private static final RecordFactory recordFactory =
|
||||||
RecordFactoryProvider.getRecordFactory(null);
|
RecordFactoryProvider.getRecordFactory(null);
|
||||||
|
|
||||||
public static final String RELEASED_CONTAINER =
|
public static final String RELEASED_CONTAINER =
|
||||||
"Container released by application";
|
"Container released by application";
|
||||||
|
|
||||||
public static final String UPDATED_CONTAINER =
|
public static final String UPDATED_CONTAINER =
|
||||||
|
@ -325,6 +357,22 @@ public class SchedulerUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static Map<String, ResourceInformation> getZeroResources(
|
||||||
|
Resource resource) {
|
||||||
|
Map<String, ResourceInformation> resourceInformations = Maps.newHashMap();
|
||||||
|
int maxLength = ResourceUtils.getNumberOfKnownResourceTypes();
|
||||||
|
|
||||||
|
for (int i = 0; i < maxLength; i++) {
|
||||||
|
ResourceInformation resourceInformation =
|
||||||
|
resource.getResourceInformation(i);
|
||||||
|
if (resourceInformation.getValue() == 0L) {
|
||||||
|
resourceInformations.put(resourceInformation.getName(),
|
||||||
|
resourceInformation);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return resourceInformations;
|
||||||
|
}
|
||||||
|
|
||||||
@Private
|
@Private
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
static void checkResourceRequestAgainstAvailableResource(Resource reqResource,
|
static void checkResourceRequestAgainstAvailableResource(Resource reqResource,
|
||||||
|
@ -339,49 +387,88 @@ public class SchedulerUtils {
|
||||||
reqResourceName);
|
reqResourceName);
|
||||||
}
|
}
|
||||||
|
|
||||||
final ResourceInformation availableRI =
|
boolean valid = checkResource(requestedRI, availableResource);
|
||||||
availableResource.getResourceInformation(reqResourceName);
|
if (!valid) {
|
||||||
|
|
||||||
long requestedResourceValue = requestedRI.getValue();
|
|
||||||
long availableResourceValue = availableRI.getValue();
|
|
||||||
int unitsRelation = UnitsConversionUtil
|
|
||||||
.compareUnits(requestedRI.getUnits(), availableRI.getUnits());
|
|
||||||
|
|
||||||
if (LOG.isDebugEnabled()) {
|
|
||||||
LOG.debug("Requested resource information: " + requestedRI);
|
|
||||||
LOG.debug("Available resource information: " + availableRI);
|
|
||||||
LOG.debug("Relation of units: " + unitsRelation);
|
|
||||||
}
|
|
||||||
|
|
||||||
// requested resource unit is less than available resource unit
|
|
||||||
// e.g. requestedUnit: "m", availableUnit: "K")
|
|
||||||
if (unitsRelation < 0) {
|
|
||||||
availableResourceValue =
|
|
||||||
UnitsConversionUtil.convert(availableRI.getUnits(),
|
|
||||||
requestedRI.getUnits(), availableRI.getValue());
|
|
||||||
|
|
||||||
// requested resource unit is greater than available resource unit
|
|
||||||
// e.g. requestedUnit: "G", availableUnit: "M")
|
|
||||||
} else if (unitsRelation > 0) {
|
|
||||||
requestedResourceValue =
|
|
||||||
UnitsConversionUtil.convert(requestedRI.getUnits(),
|
|
||||||
availableRI.getUnits(), requestedRI.getValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
if (LOG.isDebugEnabled()) {
|
|
||||||
LOG.debug("Requested resource value after conversion: " +
|
|
||||||
requestedResourceValue);
|
|
||||||
LOG.info("Available resource value after conversion: " +
|
|
||||||
availableResourceValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (requestedResourceValue > availableResourceValue) {
|
|
||||||
throwInvalidResourceException(reqResource, availableResource,
|
throwInvalidResourceException(reqResource, availableResource,
|
||||||
reqResourceName);
|
reqResourceName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static MaxResourceValidationResult
|
||||||
|
validateResourceRequestsAgainstQueueMaxResource(
|
||||||
|
ResourceRequest resReq, Resource availableResource)
|
||||||
|
throws SchedulerInvalidResoureRequestException {
|
||||||
|
final Resource reqResource = resReq.getCapability();
|
||||||
|
Map<String, ResourceInformation> resourcesWithZeroAmount =
|
||||||
|
getZeroResources(availableResource);
|
||||||
|
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("Resources with zero amount: "
|
||||||
|
+ Arrays.toString(resourcesWithZeroAmount.entrySet().toArray()));
|
||||||
|
}
|
||||||
|
|
||||||
|
List<ResourceInformation> invalidResources = Lists.newArrayList();
|
||||||
|
for (int i = 0; i < ResourceUtils.getNumberOfKnownResourceTypes(); i++) {
|
||||||
|
final ResourceInformation requestedRI =
|
||||||
|
reqResource.getResourceInformation(i);
|
||||||
|
final String reqResourceName = requestedRI.getName();
|
||||||
|
|
||||||
|
if (resourcesWithZeroAmount.containsKey(reqResourceName)
|
||||||
|
&& requestedRI.getValue() > 0) {
|
||||||
|
invalidResources.add(requestedRI);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new MaxResourceValidationResult(resReq, invalidResources);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks requested ResouceInformation against available Resource.
|
||||||
|
* @param requestedRI
|
||||||
|
* @param availableResource
|
||||||
|
* @return true if request is valid, false otherwise.
|
||||||
|
*/
|
||||||
|
private static boolean checkResource(
|
||||||
|
ResourceInformation requestedRI, Resource availableResource) {
|
||||||
|
final ResourceInformation availableRI =
|
||||||
|
availableResource.getResourceInformation(requestedRI.getName());
|
||||||
|
|
||||||
|
long requestedResourceValue = requestedRI.getValue();
|
||||||
|
long availableResourceValue = availableRI.getValue();
|
||||||
|
int unitsRelation = UnitsConversionUtil.compareUnits(requestedRI.getUnits(),
|
||||||
|
availableRI.getUnits());
|
||||||
|
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Requested resource information: " + requestedRI);
|
||||||
|
LOG.debug("Available resource information: " + availableRI);
|
||||||
|
LOG.debug("Relation of units: " + unitsRelation);
|
||||||
|
}
|
||||||
|
|
||||||
|
// requested resource unit is less than available resource unit
|
||||||
|
// e.g. requestedUnit: "m", availableUnit: "K")
|
||||||
|
if (unitsRelation < 0) {
|
||||||
|
availableResourceValue =
|
||||||
|
UnitsConversionUtil.convert(availableRI.getUnits(),
|
||||||
|
requestedRI.getUnits(), availableRI.getValue());
|
||||||
|
|
||||||
|
// requested resource unit is greater than available resource unit
|
||||||
|
// e.g. requestedUnit: "G", availableUnit: "M")
|
||||||
|
} else if (unitsRelation > 0) {
|
||||||
|
requestedResourceValue =
|
||||||
|
UnitsConversionUtil.convert(requestedRI.getUnits(),
|
||||||
|
availableRI.getUnits(), requestedRI.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Requested resource value after conversion: "
|
||||||
|
+ requestedResourceValue);
|
||||||
|
LOG.info("Available resource value after conversion: "
|
||||||
|
+ availableResourceValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
return requestedResourceValue <= availableResourceValue;
|
||||||
|
}
|
||||||
|
|
||||||
private static void throwInvalidResourceException(Resource reqResource,
|
private static void throwInvalidResourceException(Resource reqResource,
|
||||||
Resource availableResource, String reqResourceName)
|
Resource availableResource, String reqResourceName)
|
||||||
throws InvalidResourceRequestException {
|
throws InvalidResourceRequestException {
|
||||||
|
|
|
@ -459,7 +459,6 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
|
||||||
// Add it to allContainers list.
|
// Add it to allContainers list.
|
||||||
addToNewlyAllocatedContainers(node, rmContainer);
|
addToNewlyAllocatedContainers(node, rmContainer);
|
||||||
liveContainers.put(container.getId(), rmContainer);
|
liveContainers.put(container.getId(), rmContainer);
|
||||||
|
|
||||||
// Update consumption and track allocations
|
// Update consumption and track allocations
|
||||||
ContainerRequest containerRequest = appSchedulingInfo.allocate(
|
ContainerRequest containerRequest = appSchedulingInfo.allocate(
|
||||||
type, node, schedulerKey, container);
|
type, node, schedulerKey, container);
|
||||||
|
@ -867,6 +866,12 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
|
||||||
if (reserved) {
|
if (reserved) {
|
||||||
unreserve(schedulerKey, node);
|
unreserve(schedulerKey, node);
|
||||||
}
|
}
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug(String.format(
|
||||||
|
"Resource ask %s fits in available node resources %s, " +
|
||||||
|
"but no container was allocated",
|
||||||
|
capability, available));
|
||||||
|
}
|
||||||
return Resources.none();
|
return Resources.none();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1096,7 +1101,8 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
|
||||||
} else if (!getQueue().fitsInMaxShare(resource)) {
|
} else if (!getQueue().fitsInMaxShare(resource)) {
|
||||||
// The requested container must fit in queue maximum share
|
// The requested container must fit in queue maximum share
|
||||||
updateAMDiagnosticMsg(resource,
|
updateAMDiagnosticMsg(resource,
|
||||||
" exceeds current queue or its parents maximum resource allowed).");
|
" exceeds current queue or its parents maximum resource allowed). " +
|
||||||
|
"Max share of queue: " + getQueue().getMaxShare());
|
||||||
|
|
||||||
ret = false;
|
ret = false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -182,6 +182,9 @@ public class FSParentQueue extends FSQueue {
|
||||||
|
|
||||||
// If this queue is over its limit, reject
|
// If this queue is over its limit, reject
|
||||||
if (!assignContainerPreCheck(node)) {
|
if (!assignContainerPreCheck(node)) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Assign container precheck on node " + node + " failed");
|
||||||
|
}
|
||||||
return assigned;
|
return assigned;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate;
|
import org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate;
|
||||||
|
@ -42,6 +43,8 @@ import org.apache.hadoop.yarn.api.records.ResourceOption;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
import org.apache.hadoop.yarn.api.records.SchedulingRequest;
|
import org.apache.hadoop.yarn.api.records.SchedulingRequest;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.exceptions
|
||||||
|
.SchedulerInvalidResoureRequestException;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes;
|
import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes;
|
||||||
|
@ -73,6 +76,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerUpdates;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils.MaxResourceValidationResult;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
|
||||||
|
@ -449,10 +453,7 @@ public class FairScheduler extends
|
||||||
String message =
|
String message =
|
||||||
"Reject application " + applicationId + " submitted by user " + user
|
"Reject application " + applicationId + " submitted by user " + user
|
||||||
+ " with an empty queue name.";
|
+ " with an empty queue name.";
|
||||||
LOG.info(message);
|
rejectApplicationWithMessage(applicationId, message);
|
||||||
rmContext.getDispatcher().getEventHandler().handle(
|
|
||||||
new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
|
|
||||||
message));
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -461,10 +462,7 @@ public class FairScheduler extends
|
||||||
"Reject application " + applicationId + " submitted by user " + user
|
"Reject application " + applicationId + " submitted by user " + user
|
||||||
+ " with an illegal queue name " + queueName + ". "
|
+ " with an illegal queue name " + queueName + ". "
|
||||||
+ "The queue name cannot start/end with period.";
|
+ "The queue name cannot start/end with period.";
|
||||||
LOG.info(message);
|
rejectApplicationWithMessage(applicationId, message);
|
||||||
rmContext.getDispatcher().getEventHandler().handle(
|
|
||||||
new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
|
|
||||||
message));
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -476,6 +474,31 @@ public class FairScheduler extends
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rmApp != null && rmApp.getAMResourceRequests() != null) {
|
||||||
|
// Resources.fitsIn would always return false when queueMaxShare is 0
|
||||||
|
// for any resource, but only using Resources.fitsIn is not enough
|
||||||
|
// is it would return false for such cases when the requested
|
||||||
|
// resource is smaller than the max resource but that max resource is
|
||||||
|
// not zero, e.g. requested vCores = 2, max vCores = 1.
|
||||||
|
// With this check, we only reject those applications where resource
|
||||||
|
// requested is greater than 0 and we have 0
|
||||||
|
// of that resource on the queue.
|
||||||
|
List<MaxResourceValidationResult> invalidAMResourceRequests =
|
||||||
|
validateResourceRequests(rmApp.getAMResourceRequests(), queue);
|
||||||
|
|
||||||
|
if (!invalidAMResourceRequests.isEmpty()) {
|
||||||
|
String msg = String.format(
|
||||||
|
"Cannot submit application %s to queue %s because "
|
||||||
|
+ "it has zero amount of resource for a requested "
|
||||||
|
+ "resource! Invalid requested AM resources: %s, "
|
||||||
|
+ "maximum queue resources: %s",
|
||||||
|
applicationId, queue.getName(),
|
||||||
|
invalidAMResourceRequests, queue.getMaxShare());
|
||||||
|
rejectApplicationWithMessage(applicationId, msg);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Enforce ACLs
|
// Enforce ACLs
|
||||||
UserGroupInformation userUgi = UserGroupInformation.createRemoteUser(
|
UserGroupInformation userUgi = UserGroupInformation.createRemoteUser(
|
||||||
user);
|
user);
|
||||||
|
@ -485,9 +508,7 @@ public class FairScheduler extends
|
||||||
String msg = "User " + userUgi.getUserName()
|
String msg = "User " + userUgi.getUserName()
|
||||||
+ " cannot submit applications to queue " + queue.getName()
|
+ " cannot submit applications to queue " + queue.getName()
|
||||||
+ "(requested queuename is " + queueName + ")";
|
+ "(requested queuename is " + queueName + ")";
|
||||||
LOG.info(msg);
|
rejectApplicationWithMessage(applicationId, msg);
|
||||||
rmContext.getDispatcher().getEventHandler().handle(
|
|
||||||
new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED, msg));
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -604,10 +625,7 @@ public class FairScheduler extends
|
||||||
}
|
}
|
||||||
|
|
||||||
if (appRejectMsg != null && rmApp != null) {
|
if (appRejectMsg != null && rmApp != null) {
|
||||||
LOG.error(appRejectMsg);
|
rejectApplicationWithMessage(rmApp.getApplicationId(), appRejectMsg);
|
||||||
rmContext.getDispatcher().getEventHandler().handle(
|
|
||||||
new RMAppEvent(rmApp.getApplicationId(),
|
|
||||||
RMAppEventType.APP_REJECTED, appRejectMsg));
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -834,7 +852,6 @@ public class FairScheduler extends
|
||||||
List<ResourceRequest> ask, List<SchedulingRequest> schedulingRequests,
|
List<ResourceRequest> ask, List<SchedulingRequest> schedulingRequests,
|
||||||
List<ContainerId> release, List<String> blacklistAdditions,
|
List<ContainerId> release, List<String> blacklistAdditions,
|
||||||
List<String> blacklistRemovals, ContainerUpdates updateRequests) {
|
List<String> blacklistRemovals, ContainerUpdates updateRequests) {
|
||||||
|
|
||||||
// Make sure this application exists
|
// Make sure this application exists
|
||||||
FSAppAttempt application = getSchedulerApp(appAttemptId);
|
FSAppAttempt application = getSchedulerApp(appAttemptId);
|
||||||
if (application == null) {
|
if (application == null) {
|
||||||
|
@ -854,6 +871,24 @@ public class FairScheduler extends
|
||||||
return EMPTY_ALLOCATION;
|
return EMPTY_ALLOCATION;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ApplicationId applicationId = application.getApplicationId();
|
||||||
|
FSLeafQueue queue = application.getQueue();
|
||||||
|
List<MaxResourceValidationResult> invalidAsks =
|
||||||
|
validateResourceRequests(ask, queue);
|
||||||
|
|
||||||
|
// We need to be fail-fast here if any invalid ask is detected.
|
||||||
|
// If we would have thrown exception later, this could be problematic as
|
||||||
|
// tokens and promoted / demoted containers would have been lost because
|
||||||
|
// scheduler would clear them right away and AM
|
||||||
|
// would not get this information.
|
||||||
|
if (!invalidAsks.isEmpty()) {
|
||||||
|
throw new SchedulerInvalidResoureRequestException(String.format(
|
||||||
|
"Resource request is invalid for application %s because queue %s "
|
||||||
|
+ "has 0 amount of resource for a resource type! "
|
||||||
|
+ "Validation result: %s",
|
||||||
|
applicationId, queue.getName(), invalidAsks));
|
||||||
|
}
|
||||||
|
|
||||||
// Handle promotions and demotions
|
// Handle promotions and demotions
|
||||||
handleContainerUpdates(application, updateRequests);
|
handleContainerUpdates(application, updateRequests);
|
||||||
|
|
||||||
|
@ -912,6 +947,7 @@ public class FairScheduler extends
|
||||||
|
|
||||||
Resource headroom = application.getHeadroom();
|
Resource headroom = application.getHeadroom();
|
||||||
application.setApplicationHeadroomForMetrics(headroom);
|
application.setApplicationHeadroomForMetrics(headroom);
|
||||||
|
|
||||||
return new Allocation(newlyAllocatedContainers, headroom,
|
return new Allocation(newlyAllocatedContainers, headroom,
|
||||||
preemptionContainerIds, null, null,
|
preemptionContainerIds, null, null,
|
||||||
application.pullUpdatedNMTokens(), null, null,
|
application.pullUpdatedNMTokens(), null, null,
|
||||||
|
@ -920,6 +956,34 @@ public class FairScheduler extends
|
||||||
application.pullPreviousAttemptContainers());
|
application.pullPreviousAttemptContainers());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private List<MaxResourceValidationResult> validateResourceRequests(
|
||||||
|
List<ResourceRequest> requests, FSLeafQueue queue) {
|
||||||
|
List<MaxResourceValidationResult> validationResults = Lists.newArrayList();
|
||||||
|
|
||||||
|
for (ResourceRequest resourceRequest : requests) {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("Validating resource request: " + resourceRequest);
|
||||||
|
}
|
||||||
|
|
||||||
|
MaxResourceValidationResult validationResult =
|
||||||
|
SchedulerUtils.validateResourceRequestsAgainstQueueMaxResource(
|
||||||
|
resourceRequest, queue.getMaxShare());
|
||||||
|
if (!validationResult.isValid()) {
|
||||||
|
validationResults.add(validationResult);
|
||||||
|
LOG.warn(String.format("Queue %s cannot handle resource request" +
|
||||||
|
"because it has zero available amount of resource " +
|
||||||
|
"for a requested resource type, " +
|
||||||
|
"so the resource request is ignored!"
|
||||||
|
+ " Requested resources: %s, " +
|
||||||
|
"maximum queue resources: %s",
|
||||||
|
queue.getName(), resourceRequest.getCapability(),
|
||||||
|
queue.getMaxShare()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return validationResults;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void nodeUpdate(RMNode nm) {
|
protected void nodeUpdate(RMNode nm) {
|
||||||
try {
|
try {
|
||||||
|
@ -1060,9 +1124,14 @@ public class FairScheduler extends
|
||||||
Resource assignedResource = Resources.clone(Resources.none());
|
Resource assignedResource = Resources.clone(Resources.none());
|
||||||
Resource maxResourcesToAssign = Resources.multiply(
|
Resource maxResourcesToAssign = Resources.multiply(
|
||||||
node.getUnallocatedResource(), 0.5f);
|
node.getUnallocatedResource(), 0.5f);
|
||||||
|
|
||||||
while (node.getReservedContainer() == null) {
|
while (node.getReservedContainer() == null) {
|
||||||
Resource assignment = queueMgr.getRootQueue().assignContainer(node);
|
Resource assignment = queueMgr.getRootQueue().assignContainer(node);
|
||||||
|
|
||||||
if (assignment.equals(Resources.none())) {
|
if (assignment.equals(Resources.none())) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("No container is allocated on node " + node);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1254,9 +1323,7 @@ public class FairScheduler extends
|
||||||
String message = "Application " + applicationId
|
String message = "Application " + applicationId
|
||||||
+ " submitted to a reservation which is not yet "
|
+ " submitted to a reservation which is not yet "
|
||||||
+ "currently active: " + resQName;
|
+ "currently active: " + resQName;
|
||||||
this.rmContext.getDispatcher().getEventHandler().handle(
|
rejectApplicationWithMessage(applicationId, message);
|
||||||
new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
|
|
||||||
message));
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
if (!queue.getParent().getQueueName().equals(queueName)) {
|
if (!queue.getParent().getQueueName().equals(queueName)) {
|
||||||
|
@ -1264,9 +1331,7 @@ public class FairScheduler extends
|
||||||
"Application: " + applicationId + " submitted to a reservation "
|
"Application: " + applicationId + " submitted to a reservation "
|
||||||
+ resQName + " which does not belong to the specified queue: "
|
+ resQName + " which does not belong to the specified queue: "
|
||||||
+ queueName;
|
+ queueName;
|
||||||
this.rmContext.getDispatcher().getEventHandler().handle(
|
rejectApplicationWithMessage(applicationId, message);
|
||||||
new RMAppEvent(applicationId, RMAppEventType.APP_REJECTED,
|
|
||||||
message));
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
// use the reservation queue to run the app
|
// use the reservation queue to run the app
|
||||||
|
@ -1279,7 +1344,13 @@ public class FairScheduler extends
|
||||||
} finally {
|
} finally {
|
||||||
readLock.unlock();
|
readLock.unlock();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void rejectApplicationWithMessage(ApplicationId applicationId,
|
||||||
|
String msg) {
|
||||||
|
LOG.info(msg);
|
||||||
|
rmContext.getDispatcher().getEventHandler().handle(new RMAppEvent(
|
||||||
|
applicationId, RMAppEventType.APP_REJECTED, msg));
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getDefaultQueueForPlanQueue(String queueName) {
|
private String getDefaultQueueForPlanQueue(String queueName) {
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
|
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
@ -57,6 +58,7 @@ import static org.mockito.Mockito.mock;
|
||||||
import static org.mockito.Mockito.when;
|
import static org.mockito.Mockito.when;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
public class FairSchedulerTestBase {
|
public class FairSchedulerTestBase {
|
||||||
|
@ -163,37 +165,43 @@ public class FairSchedulerTestBase {
|
||||||
protected ApplicationAttemptId createSchedulingRequest(
|
protected ApplicationAttemptId createSchedulingRequest(
|
||||||
int memory, int vcores, String queueId, String userId, int numContainers,
|
int memory, int vcores, String queueId, String userId, int numContainers,
|
||||||
int priority) {
|
int priority) {
|
||||||
ApplicationAttemptId id = createAppAttemptId(this.APP_ID++,
|
ResourceRequest request = createResourceRequest(memory, vcores,
|
||||||
this.ATTEMPT_ID++);
|
ResourceRequest.ANY, priority, numContainers, true);
|
||||||
|
return createSchedulingRequest(Lists.newArrayList(request), queueId,
|
||||||
|
userId);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected ApplicationAttemptId createSchedulingRequest(
|
||||||
|
Collection<ResourceRequest> requests, String queueId, String userId) {
|
||||||
|
ApplicationAttemptId id =
|
||||||
|
createAppAttemptId(this.APP_ID++, this.ATTEMPT_ID++);
|
||||||
scheduler.addApplication(id.getApplicationId(), queueId, userId, false);
|
scheduler.addApplication(id.getApplicationId(), queueId, userId, false);
|
||||||
// This conditional is for testAclSubmitApplication where app is rejected
|
// This conditional is for testAclSubmitApplication where app is rejected
|
||||||
// and no app is added.
|
// and no app is added.
|
||||||
if (scheduler.getSchedulerApplications().
|
if (scheduler.getSchedulerApplications()
|
||||||
containsKey(id.getApplicationId())) {
|
.containsKey(id.getApplicationId())) {
|
||||||
scheduler.addApplicationAttempt(id, false, false);
|
scheduler.addApplicationAttempt(id, false, false);
|
||||||
}
|
}
|
||||||
List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
|
|
||||||
ResourceRequest request = createResourceRequest(memory, vcores,
|
List<ResourceRequest> ask = new ArrayList<>(requests);
|
||||||
ResourceRequest.ANY, priority, numContainers, true);
|
|
||||||
ask.add(request);
|
|
||||||
|
|
||||||
RMApp rmApp = mock(RMApp.class);
|
RMApp rmApp = mock(RMApp.class);
|
||||||
RMAppAttempt rmAppAttempt = mock(RMAppAttempt.class);
|
RMAppAttempt rmAppAttempt = mock(RMAppAttempt.class);
|
||||||
when(rmApp.getCurrentAppAttempt()).thenReturn(rmAppAttempt);
|
when(rmApp.getCurrentAppAttempt()).thenReturn(rmAppAttempt);
|
||||||
when(rmAppAttempt.getRMAppAttemptMetrics()).thenReturn(
|
when(rmAppAttempt.getRMAppAttemptMetrics()).thenReturn(
|
||||||
new RMAppAttemptMetrics(id, resourceManager.getRMContext()));
|
new RMAppAttemptMetrics(id, resourceManager.getRMContext()));
|
||||||
ApplicationSubmissionContext submissionContext =
|
ApplicationSubmissionContext submissionContext =
|
||||||
mock(ApplicationSubmissionContext.class);
|
mock(ApplicationSubmissionContext.class);
|
||||||
when(submissionContext.getUnmanagedAM()).thenReturn(false);
|
when(submissionContext.getUnmanagedAM()).thenReturn(false);
|
||||||
when(rmAppAttempt.getSubmissionContext()).thenReturn(submissionContext);
|
when(rmAppAttempt.getSubmissionContext()).thenReturn(submissionContext);
|
||||||
when(rmApp.getApplicationSubmissionContext()).thenReturn(submissionContext);
|
when(rmApp.getApplicationSubmissionContext()).thenReturn(submissionContext);
|
||||||
Container container = mock(Container.class);
|
Container container = mock(Container.class);
|
||||||
when(rmAppAttempt.getMasterContainer()).thenReturn(container);
|
when(rmAppAttempt.getMasterContainer()).thenReturn(container);
|
||||||
resourceManager.getRMContext().getRMApps()
|
resourceManager.getRMContext().getRMApps()
|
||||||
.put(id.getApplicationId(), rmApp);
|
.put(id.getApplicationId(), rmApp);
|
||||||
|
|
||||||
scheduler.allocate(id, ask, null, new ArrayList<ContainerId>(),
|
scheduler.allocate(id, ask, null, new ArrayList<>(),
|
||||||
null, null, NULL_UPDATE_REQUESTS);
|
null, null, NULL_UPDATE_REQUESTS);
|
||||||
scheduler.update();
|
scheduler.update();
|
||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
@ -252,13 +260,36 @@ public class FairSchedulerTestBase {
|
||||||
|
|
||||||
protected void createApplicationWithAMResource(ApplicationAttemptId attId,
|
protected void createApplicationWithAMResource(ApplicationAttemptId attId,
|
||||||
String queue, String user, Resource amResource) {
|
String queue, String user, Resource amResource) {
|
||||||
|
createApplicationWithAMResourceInternal(attId, queue, user, amResource,
|
||||||
|
null);
|
||||||
|
ApplicationId appId = attId.getApplicationId();
|
||||||
|
addApplication(queue, user, appId);
|
||||||
|
addAppAttempt(attId);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void createApplicationWithAMResource(ApplicationAttemptId attId,
|
||||||
|
String queue, String user, Resource amResource,
|
||||||
|
List<ResourceRequest> amReqs) {
|
||||||
|
createApplicationWithAMResourceInternal(attId, queue, user, amResource,
|
||||||
|
amReqs);
|
||||||
|
ApplicationId appId = attId.getApplicationId();
|
||||||
|
addApplication(queue, user, appId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void createApplicationWithAMResourceInternal(
|
||||||
|
ApplicationAttemptId attId, String queue, String user,
|
||||||
|
Resource amResource, List<ResourceRequest> amReqs) {
|
||||||
RMContext rmContext = resourceManager.getRMContext();
|
RMContext rmContext = resourceManager.getRMContext();
|
||||||
ApplicationId appId = attId.getApplicationId();
|
ApplicationId appId = attId.getApplicationId();
|
||||||
RMApp rmApp = new RMAppImpl(appId, rmContext, conf, null, user, null,
|
RMApp rmApp = new RMAppImpl(appId, rmContext, conf, null, user, null,
|
||||||
ApplicationSubmissionContext.newInstance(appId, null, queue, null,
|
ApplicationSubmissionContext.newInstance(appId, null, queue, null,
|
||||||
mock(ContainerLaunchContext.class), false, false, 0, amResource,
|
mock(ContainerLaunchContext.class), false, false, 0, amResource,
|
||||||
null), scheduler, null, 0, null, null, null);
|
null),
|
||||||
|
scheduler, null, 0, null, null, amReqs);
|
||||||
rmContext.getRMApps().put(appId, rmApp);
|
rmContext.getRMApps().put(appId, rmApp);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addApplication(String queue, String user, ApplicationId appId) {
|
||||||
RMAppEvent event = new RMAppEvent(appId, RMAppEventType.START);
|
RMAppEvent event = new RMAppEvent(appId, RMAppEventType.START);
|
||||||
resourceManager.getRMContext().getRMApps().get(appId).handle(event);
|
resourceManager.getRMContext().getRMApps().get(appId).handle(event);
|
||||||
event = new RMAppEvent(appId, RMAppEventType.APP_NEW_SAVED);
|
event = new RMAppEvent(appId, RMAppEventType.APP_NEW_SAVED);
|
||||||
|
@ -268,8 +299,11 @@ public class FairSchedulerTestBase {
|
||||||
AppAddedSchedulerEvent appAddedEvent = new AppAddedSchedulerEvent(
|
AppAddedSchedulerEvent appAddedEvent = new AppAddedSchedulerEvent(
|
||||||
appId, queue, user);
|
appId, queue, user);
|
||||||
scheduler.handle(appAddedEvent);
|
scheduler.handle(appAddedEvent);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addAppAttempt(ApplicationAttemptId attId) {
|
||||||
AppAttemptAddedSchedulerEvent attempAddedEvent =
|
AppAttemptAddedSchedulerEvent attempAddedEvent =
|
||||||
new AppAttemptAddedSchedulerEvent(attId, false);
|
new AppAttemptAddedSchedulerEvent(attId, false);
|
||||||
scheduler.handle(attempAddedEvent);
|
scheduler.handle(attempAddedEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -41,9 +41,11 @@ import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import javax.xml.parsers.ParserConfigurationException;
|
import javax.xml.parsers.ParserConfigurationException;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
||||||
import org.apache.hadoop.ha.HAServiceProtocol;
|
import org.apache.hadoop.ha.HAServiceProtocol;
|
||||||
|
@ -62,6 +64,7 @@ import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||||
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ResourceInformation;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl;
|
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
@ -69,6 +72,8 @@ import org.apache.hadoop.yarn.event.AsyncDispatcher;
|
||||||
import org.apache.hadoop.yarn.event.Dispatcher;
|
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||||
import org.apache.hadoop.yarn.event.Event;
|
import org.apache.hadoop.yarn.event.Event;
|
||||||
import org.apache.hadoop.yarn.event.EventHandler;
|
import org.apache.hadoop.yarn.event.EventHandler;
|
||||||
|
import org.apache.hadoop.yarn.exceptions
|
||||||
|
.SchedulerInvalidResoureRequestException;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
import org.apache.hadoop.yarn.security.YarnAuthorizationProvider;
|
import org.apache.hadoop.yarn.security.YarnAuthorizationProvider;
|
||||||
|
@ -5414,4 +5419,204 @@ public class TestFairScheduler extends FairSchedulerTestBase {
|
||||||
SchedulerUtils.COMPLETED_APPLICATION),
|
SchedulerUtils.COMPLETED_APPLICATION),
|
||||||
RMContainerEventType.EXPIRE);
|
RMContainerEventType.EXPIRE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAppRejectedToQueueWithZeroCapacityOfVcores()
|
||||||
|
throws IOException {
|
||||||
|
testAppRejectedToQueueWithZeroCapacityOfResource(
|
||||||
|
ResourceInformation.VCORES_URI);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAppRejectedToQueueWithZeroCapacityOfMemory()
|
||||||
|
throws IOException {
|
||||||
|
testAppRejectedToQueueWithZeroCapacityOfResource(
|
||||||
|
ResourceInformation.MEMORY_URI);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testAppRejectedToQueueWithZeroCapacityOfResource(String resource)
|
||||||
|
throws IOException {
|
||||||
|
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
|
||||||
|
generateAllocationFileWithZeroResource(resource);
|
||||||
|
|
||||||
|
final List<Event> recordedEvents = Lists.newArrayList();
|
||||||
|
|
||||||
|
RMContext spyContext = Mockito.spy(resourceManager.getRMContext());
|
||||||
|
Dispatcher mockDispatcher = mock(AsyncDispatcher.class);
|
||||||
|
when(mockDispatcher.getEventHandler()).thenReturn((EventHandler) event -> {
|
||||||
|
if (event instanceof RMAppEvent) {
|
||||||
|
recordedEvents.add(event);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
Mockito.doReturn(mockDispatcher).when(spyContext).getDispatcher();
|
||||||
|
((AsyncDispatcher) mockDispatcher).start();
|
||||||
|
|
||||||
|
scheduler.setRMContext(spyContext);
|
||||||
|
|
||||||
|
scheduler.init(conf);
|
||||||
|
scheduler.start();
|
||||||
|
scheduler.reinitialize(conf, resourceManager.getRMContext());
|
||||||
|
|
||||||
|
// submit app with queue name (queueA)
|
||||||
|
ApplicationAttemptId appAttemptId1 = createAppAttemptId(1, 1);
|
||||||
|
|
||||||
|
ResourceRequest amReqs = ResourceRequest.newBuilder()
|
||||||
|
.capability(Resource.newInstance(5 * GB, 3)).build();
|
||||||
|
createApplicationWithAMResource(appAttemptId1, "queueA", "user1",
|
||||||
|
Resource.newInstance(GB, 1), Lists.newArrayList(amReqs));
|
||||||
|
scheduler.update();
|
||||||
|
|
||||||
|
assertEquals("Exactly one APP_REJECTED event is expected", 1,
|
||||||
|
recordedEvents.size());
|
||||||
|
Event event = recordedEvents.get(0);
|
||||||
|
RMAppEvent rmAppEvent = (RMAppEvent) event;
|
||||||
|
assertEquals(RMAppEventType.APP_REJECTED, rmAppEvent.getType());
|
||||||
|
assertTrue("Diagnostic message does not match: " +
|
||||||
|
rmAppEvent.getDiagnosticMsg(),
|
||||||
|
rmAppEvent.getDiagnosticMsg()
|
||||||
|
.matches("Cannot submit application application[\\d_]+ to queue "
|
||||||
|
+ "root.queueA because it has zero amount of resource "
|
||||||
|
+ "for a requested resource! " +
|
||||||
|
"Invalid requested AM resources: .+, "
|
||||||
|
+ "maximum queue resources: .+"));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void generateAllocationFileWithZeroResource(String resource)
|
||||||
|
throws IOException {
|
||||||
|
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
|
||||||
|
out.println("<?xml version=\"1.0\"?>");
|
||||||
|
out.println("<allocations>");
|
||||||
|
out.println("<queue name=\"queueA\">");
|
||||||
|
|
||||||
|
String resources = "";
|
||||||
|
if (resource.equals(ResourceInformation.MEMORY_URI)) {
|
||||||
|
resources = "0 mb,2vcores";
|
||||||
|
} else if (resource.equals(ResourceInformation.VCORES_URI)) {
|
||||||
|
resources = "10000 mb,0vcores";
|
||||||
|
}
|
||||||
|
out.println("<minResources>" + resources + "</minResources>");
|
||||||
|
out.println("<maxResources>" + resources + "</maxResources>");
|
||||||
|
out.println("<weight>2.0</weight>");
|
||||||
|
out.println("</queue>");
|
||||||
|
out.println("<queue name=\"queueB\">");
|
||||||
|
out.println("<minResources>1 mb 1 vcores</minResources>");
|
||||||
|
out.println("<weight>0.0</weight>");
|
||||||
|
out.println("</queue>");
|
||||||
|
out.println("</allocations>");
|
||||||
|
out.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSchedulingRejectedToQueueWithZeroCapacityOfMemory()
|
||||||
|
throws IOException {
|
||||||
|
// This request is not valid as queue will have 0 capacity of memory and
|
||||||
|
// the requests asks 2048M
|
||||||
|
ResourceRequest invalidRequest =
|
||||||
|
createResourceRequest(2048, 2, ResourceRequest.ANY, 1, 2, true);
|
||||||
|
|
||||||
|
ResourceRequest validRequest =
|
||||||
|
createResourceRequest(0, 0, ResourceRequest.ANY, 1, 2, true);
|
||||||
|
testSchedulingRejectedToQueueZeroCapacityOfResource(
|
||||||
|
ResourceInformation.MEMORY_URI,
|
||||||
|
Lists.newArrayList(invalidRequest, validRequest));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSchedulingAllowedToQueueWithZeroCapacityOfMemory()
|
||||||
|
throws IOException {
|
||||||
|
testSchedulingAllowedToQueueZeroCapacityOfResource(
|
||||||
|
ResourceInformation.MEMORY_URI, 0, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSchedulingRejectedToQueueWithZeroCapacityOfVcores()
|
||||||
|
throws IOException {
|
||||||
|
// This request is not valid as queue will have 0 capacity of vCores and
|
||||||
|
// the requests asks 1
|
||||||
|
ResourceRequest invalidRequest =
|
||||||
|
createResourceRequest(0, 1, ResourceRequest.ANY, 1, 2, true);
|
||||||
|
|
||||||
|
ResourceRequest validRequest =
|
||||||
|
createResourceRequest(0, 0, ResourceRequest.ANY, 1, 2, true);
|
||||||
|
|
||||||
|
testSchedulingRejectedToQueueZeroCapacityOfResource(
|
||||||
|
ResourceInformation.VCORES_URI,
|
||||||
|
Lists.newArrayList(invalidRequest, validRequest));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSchedulingAllowedToQueueWithZeroCapacityOfVcores()
|
||||||
|
throws IOException {
|
||||||
|
testSchedulingAllowedToQueueZeroCapacityOfResource(
|
||||||
|
ResourceInformation.VCORES_URI, 2048, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testSchedulingRejectedToQueueZeroCapacityOfResource(
|
||||||
|
String resource, Collection<ResourceRequest> requests)
|
||||||
|
throws IOException {
|
||||||
|
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
|
||||||
|
generateAllocationFileWithZeroResource(resource);
|
||||||
|
|
||||||
|
scheduler.init(conf);
|
||||||
|
scheduler.start();
|
||||||
|
scheduler.reinitialize(conf, resourceManager.getRMContext());
|
||||||
|
|
||||||
|
// Add a node
|
||||||
|
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(2048, 2));
|
||||||
|
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
|
||||||
|
scheduler.handle(nodeEvent1);
|
||||||
|
|
||||||
|
try {
|
||||||
|
createSchedulingRequest(requests, "queueA", "user1");
|
||||||
|
fail("Exception is expected because the queue has zero capacity of "
|
||||||
|
+ resource + " and requested resource capabilities are: "
|
||||||
|
+ requests.stream().map(ResourceRequest::getCapability)
|
||||||
|
.collect(Collectors.toList()));
|
||||||
|
} catch (SchedulerInvalidResoureRequestException e) {
|
||||||
|
assertTrue(
|
||||||
|
"The thrown exception is not the expected one. Exception message: "
|
||||||
|
+ e.getMessage(),
|
||||||
|
e.getMessage()
|
||||||
|
.matches("Resource request is invalid for application "
|
||||||
|
+ "application[\\d_]+ because queue root\\.queueA has 0 "
|
||||||
|
+ "amount of resource for a resource type! "
|
||||||
|
+ "Validation result:.*"));
|
||||||
|
|
||||||
|
List<ApplicationAttemptId> appsInQueue =
|
||||||
|
scheduler.getAppsInQueue("queueA");
|
||||||
|
assertEquals("Number of apps in queue 'queueA' should be one!", 1,
|
||||||
|
appsInQueue.size());
|
||||||
|
|
||||||
|
ApplicationAttemptId appAttemptId =
|
||||||
|
scheduler.getAppsInQueue("queueA").get(0);
|
||||||
|
assertNotNull(
|
||||||
|
"Scheduler app for appAttemptId " + appAttemptId
|
||||||
|
+ " should not be null!",
|
||||||
|
scheduler.getSchedulerApp(appAttemptId));
|
||||||
|
|
||||||
|
FSAppAttempt schedulerApp = scheduler.getSchedulerApp(appAttemptId);
|
||||||
|
assertNotNull("Scheduler app queueInfo for appAttemptId " + appAttemptId
|
||||||
|
+ " should not be null!", schedulerApp.getAppSchedulingInfo());
|
||||||
|
|
||||||
|
assertTrue("There should be no requests accepted", schedulerApp
|
||||||
|
.getAppSchedulingInfo().getAllResourceRequests().isEmpty());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testSchedulingAllowedToQueueZeroCapacityOfResource(
|
||||||
|
String resource, int memory, int vCores) throws IOException {
|
||||||
|
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
|
||||||
|
generateAllocationFileWithZeroResource(resource);
|
||||||
|
|
||||||
|
scheduler.init(conf);
|
||||||
|
scheduler.start();
|
||||||
|
scheduler.reinitialize(conf, resourceManager.getRMContext());
|
||||||
|
|
||||||
|
// Add a node
|
||||||
|
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(2048, 2));
|
||||||
|
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
|
||||||
|
scheduler.handle(nodeEvent1);
|
||||||
|
|
||||||
|
createSchedulingRequest(memory, vCores, "queueA", "user1", 1, 2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue