YARN-9664. Improve response of scheduler/app activities for better understanding. Contributed by Tao Yang.

This commit is contained in:
Weiwei Yang 2019-08-29 18:14:39 +08:00
parent c749f62470
commit 8c0759d02a
26 changed files with 1322 additions and 614 deletions

View File

@ -30,10 +30,11 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey;
import java.util.function.Supplier;
/**
* Utility for logging scheduler activities
*/
// FIXME: make sure CandidateNodeSet works with this class
public class ActivitiesLogger {
private static final Logger LOG =
LoggerFactory.getLogger(ActivitiesLogger.class);
@ -52,9 +53,9 @@ public class ActivitiesLogger {
ActivitiesManager activitiesManager, SchedulerNode node,
SchedulerApplicationAttempt application,
SchedulerRequestKey requestKey,
String diagnostic) {
String diagnostic, ActivityLevel level) {
recordAppActivityWithoutAllocation(activitiesManager, node, application,
requestKey, diagnostic, ActivityState.SKIPPED);
requestKey, diagnostic, ActivityState.SKIPPED, level);
}
/*
@ -72,7 +73,7 @@ public class ActivitiesLogger {
if (activitiesManager.shouldRecordThisNode(nodeId)) {
recordActivity(activitiesManager, nodeId, application.getQueueName(),
application.getApplicationId().toString(), priority,
ActivityState.REJECTED, diagnostic, "app");
ActivityState.REJECTED, diagnostic, ActivityLevel.APP);
}
finishSkippedAppAllocationRecording(activitiesManager,
application.getApplicationId(), ActivityState.REJECTED, diagnostic);
@ -87,50 +88,55 @@ public class ActivitiesLogger {
ActivitiesManager activitiesManager, SchedulerNode node,
SchedulerApplicationAttempt application,
SchedulerRequestKey schedulerKey,
String diagnostic, ActivityState appState) {
String diagnostic, ActivityState appState, ActivityLevel level) {
if (activitiesManager == null) {
return;
}
NodeId nodeId = getRecordingNodeId(activitiesManager, node);
if (activitiesManager.shouldRecordThisNode(nodeId)) {
if (schedulerKey != null) {
String allocationRequestId =
String.valueOf(schedulerKey.getAllocationRequestId());
String priorityStr = getPriorityStr(schedulerKey);
String requestName = getRequestName(priorityStr, allocationRequestId);
String type = "container";
// Add application-container activity into specific node allocation.
activitiesManager.addSchedulingActivityForNode(nodeId,
requestName, null,
priorityStr, appState, diagnostic, type,
null);
type = "request";
// Add application-container activity into specific node allocation.
activitiesManager.addSchedulingActivityForNode(nodeId,
application.getApplicationId().toString(), requestName,
priorityStr, appState,
ActivityDiagnosticConstant.EMPTY, type, allocationRequestId);
String requestName = null;
Integer priority = null;
Long allocationRequestId = null;
if (level == ActivityLevel.NODE || level == ActivityLevel.REQUEST) {
if (schedulerKey == null) {
LOG.warn("Request key should not be null at " + level + " level.");
return;
}
priority = getPriority(schedulerKey);
allocationRequestId = schedulerKey.getAllocationRequestId();
requestName = getRequestName(priority, allocationRequestId);
}
switch (level) {
case NODE:
recordSchedulerActivityAtNodeLevel(activitiesManager, application,
requestName, priority, allocationRequestId, null, nodeId,
appState, diagnostic);
break;
case REQUEST:
recordSchedulerActivityAtRequestLevel(activitiesManager, application,
requestName, priority, allocationRequestId, nodeId, appState,
diagnostic);
break;
case APP:
recordSchedulerActivityAtAppLevel(activitiesManager, application,
nodeId, appState, diagnostic);
break;
default:
LOG.warn("Doesn't handle app activities at " + level + " level.");
break;
}
// Add queue-application activity into specific node allocation.
activitiesManager.addSchedulingActivityForNode(nodeId,
application.getQueueName(),
application.getApplicationId().toString(),
application.getPriority().toString(), appState,
schedulerKey != null ? ActivityDiagnosticConstant.EMPTY :
diagnostic, "app", null);
}
// Add application-container activity into specific application allocation
// Under this condition, it fails to allocate a container to this
// application, so containerId is null.
if (activitiesManager.shouldRecordThisApp(
application.getApplicationId())) {
String type = "container";
activitiesManager.addSchedulingActivityForApp(
application.getApplicationId(), null,
getPriorityStr(schedulerKey), appState,
diagnostic, type, nodeId,
getPriority(schedulerKey), appState,
diagnostic, level, nodeId,
schedulerKey == null ?
null : String.valueOf(schedulerKey.getAllocationRequestId()));
null : schedulerKey.getAllocationRequestId());
}
}
@ -150,49 +156,68 @@ public class ActivitiesLogger {
nodeId = updatedContainer.getNodeId();
}
if (activitiesManager.shouldRecordThisNode(nodeId)) {
String containerPriorityStr =
updatedContainer.getContainer().getPriority().toString();
String allocationRequestId = String
.valueOf(updatedContainer.getContainer().getAllocationRequestId());
Integer containerPriority =
updatedContainer.getContainer().getPriority().getPriority();
Long allocationRequestId =
updatedContainer.getContainer().getAllocationRequestId();
String requestName =
getRequestName(containerPriorityStr, allocationRequestId);
String type = "container";
// Add application-container activity into specific node allocation.
activitiesManager.addSchedulingActivityForNode(nodeId,
requestName,
updatedContainer.getContainer().toString(),
containerPriorityStr,
activityState, ActivityDiagnosticConstant.EMPTY, type, null);
type = "request";
// Add application-container activity into specific node allocation.
activitiesManager.addSchedulingActivityForNode(nodeId,
application.getApplicationId().toString(),
requestName, containerPriorityStr,
activityState, ActivityDiagnosticConstant.EMPTY, type,
allocationRequestId);
type = "app";
// Add queue-application activity into specific node allocation.
activitiesManager.addSchedulingActivityForNode(nodeId,
application.getQueueName(),
application.getApplicationId().toString(),
application.getPriority().toString(), ActivityState.ACCEPTED,
ActivityDiagnosticConstant.EMPTY, type, null);
getRequestName(containerPriority, allocationRequestId);
// Add node,request,app level activities into scheduler activities.
recordSchedulerActivityAtNodeLevel(activitiesManager, application,
requestName, containerPriority, allocationRequestId,
updatedContainer.getContainer().toString(), nodeId, activityState,
ActivityDiagnosticConstant.EMPTY);
}
// Add application-container activity into specific application allocation
if (activitiesManager.shouldRecordThisApp(
application.getApplicationId())) {
String type = "container";
activitiesManager.addSchedulingActivityForApp(
application.getApplicationId(),
updatedContainer.getContainerId(),
updatedContainer.getContainer().getPriority().toString(),
activityState, ActivityDiagnosticConstant.EMPTY, type, nodeId,
String.valueOf(
updatedContainer.getContainer().getAllocationRequestId()));
updatedContainer.getContainer().getPriority().getPriority(),
activityState, ActivityDiagnosticConstant.EMPTY,
ActivityLevel.NODE, nodeId,
updatedContainer.getContainer().getAllocationRequestId());
}
}
@SuppressWarnings("parameternumber")
private static void recordSchedulerActivityAtNodeLevel(
ActivitiesManager activitiesManager, SchedulerApplicationAttempt app,
String requestName, Integer priority, Long allocationRequestId,
String containerId, NodeId nodeId, ActivityState state,
String diagnostic) {
activitiesManager
.addSchedulingActivityForNode(nodeId, requestName, containerId, null,
state, diagnostic, ActivityLevel.NODE, null);
// Record request level activity additionally.
recordSchedulerActivityAtRequestLevel(activitiesManager, app, requestName,
priority, allocationRequestId, nodeId, state,
ActivityDiagnosticConstant.EMPTY);
}
@SuppressWarnings("parameternumber")
private static void recordSchedulerActivityAtRequestLevel(
ActivitiesManager activitiesManager, SchedulerApplicationAttempt app,
String requestName, Integer priority, Long allocationRequestId,
NodeId nodeId, ActivityState state, String diagnostic) {
activitiesManager.addSchedulingActivityForNode(nodeId,
app.getApplicationId().toString(), requestName, priority,
state, diagnostic, ActivityLevel.REQUEST,
allocationRequestId);
// Record app level activity additionally.
recordSchedulerActivityAtAppLevel(activitiesManager, app, nodeId, state,
ActivityDiagnosticConstant.EMPTY);
}
private static void recordSchedulerActivityAtAppLevel(
ActivitiesManager activitiesManager, SchedulerApplicationAttempt app,
NodeId nodeId, ActivityState state, String diagnostic) {
activitiesManager.addSchedulingActivityForNode(nodeId, app.getQueueName(),
app.getApplicationId().toString(), app.getPriority().getPriority(),
state, diagnostic, ActivityLevel.APP, null);
}
/*
* Invoked when scheduler starts to look at this application within one node
* update.
@ -252,13 +277,20 @@ public class ActivitiesLogger {
public static void recordQueueActivity(ActivitiesManager activitiesManager,
SchedulerNode node, String parentQueueName, String queueName,
ActivityState state, String diagnostic) {
recordQueueActivity(activitiesManager, node, parentQueueName, queueName,
state, () -> diagnostic);
}
public static void recordQueueActivity(ActivitiesManager activitiesManager,
SchedulerNode node, String parentQueueName, String queueName,
ActivityState state, Supplier<String> diagnosticSupplier) {
if (activitiesManager == null) {
return;
}
NodeId nodeId = getRecordingNodeId(activitiesManager, node);
if (activitiesManager.shouldRecordThisNode(nodeId)) {
recordActivity(activitiesManager, nodeId, parentQueueName, queueName,
null, state, diagnostic, null);
null, state, diagnosticSupplier.get(), ActivityLevel.QUEUE);
}
}
}
@ -299,11 +331,11 @@ public class ActivitiesLogger {
* Invoked when node heartbeat finishes
*/
public static void finishNodeUpdateRecording(
ActivitiesManager activitiesManager, NodeId nodeID) {
ActivitiesManager activitiesManager, NodeId nodeID, String partition) {
if (activitiesManager == null) {
return;
}
activitiesManager.finishNodeUpdateRecording(nodeID);
activitiesManager.finishNodeUpdateRecording(nodeID, partition);
}
/*
@ -320,11 +352,11 @@ public class ActivitiesLogger {
// Add queue, application or container activity into specific node allocation.
private static void recordActivity(ActivitiesManager activitiesManager,
NodeId nodeId, String parentName, String childName,
Priority priority, ActivityState state, String diagnostic, String type) {
NodeId nodeId, String parentName, String childName, Priority priority,
ActivityState state, String diagnostic, ActivityLevel level) {
activitiesManager.addSchedulingActivityForNode(nodeId, parentName,
childName, priority != null ? priority.toString() : null, state,
diagnostic, type, null);
childName, priority != null ? priority.getPriority() : null, state,
diagnostic, level, null);
}
private static NodeId getRecordingNodeId(ActivitiesManager activitiesManager,
@ -333,16 +365,16 @@ public class ActivitiesLogger {
activitiesManager.getRecordingNodeId(node);
}
private static String getRequestName(String priority,
String allocationRequestId) {
private static String getRequestName(Integer priority,
Long allocationRequestId) {
return "request_"
+ (priority == null ? "" : priority)
+ "_" + (allocationRequestId == null ? "" : allocationRequestId);
}
private static String getPriorityStr(SchedulerRequestKey schedulerKey) {
private static Integer getPriority(SchedulerRequestKey schedulerKey) {
Priority priority = schedulerKey == null ?
null : schedulerKey.getPriority();
return priority == null ? null : priority.toString();
return priority == null ? null : priority.getPriority();
}
}

View File

@ -124,7 +124,7 @@ public class ActivitiesManager extends AbstractService {
}
public AppActivitiesInfo getAppActivitiesInfo(ApplicationId applicationId,
Set<String> requestPriorities, Set<String> allocationRequestIds,
Set<Integer> requestPriorities, Set<Long> allocationRequestIds,
RMWSConsts.ActivitiesGroupBy groupBy, int limit, boolean summarize,
double maxTimeInSeconds) {
RMApp app = rmContext.getRMApps().get(applicationId);
@ -186,20 +186,18 @@ public class ActivitiesManager extends AbstractService {
}
List<ActivityNode> activityNodes = appAllocation.getAllocationAttempts();
for (ActivityNode an : activityNodes) {
if (an.getNodeId() != null) {
nodeActivities.putIfAbsent(
an.getRequestPriority() + "_" + an.getAllocationRequestId() + "_"
+ an.getNodeId(), an);
}
nodeActivities.putIfAbsent(
an.getRequestPriority() + "_" + an.getAllocationRequestId() + "_"
+ an.getNodeId(), an);
}
}
AppAllocation lastAppAllocation = allocations.get(allocations.size() - 1);
AppAllocation summarizedAppAllocation =
new AppAllocation(lastAppAllocation.getPriority(), null,
lastAppAllocation.getQueueName());
summarizedAppAllocation
.updateAppContainerStateAndTime(null, lastAppAllocation.getAppState(),
lastAppAllocation.getTime(), lastAppAllocation.getDiagnostic());
summarizedAppAllocation.updateAppContainerStateAndTime(null,
lastAppAllocation.getActivityState(), lastAppAllocation.getTime(),
lastAppAllocation.getDiagnostic());
summarizedAppAllocation
.setAllocationAttempts(new ArrayList<>(nodeActivities.values()));
return summarizedAppAllocation;
@ -282,7 +280,7 @@ public class ActivitiesManager extends AbstractService {
Map.Entry<NodeId, List<NodeAllocation>> nodeAllocation = ite.next();
List<NodeAllocation> allocations = nodeAllocation.getValue();
if (allocations.size() > 0
&& curTS - allocations.get(0).getTimeStamp()
&& curTS - allocations.get(0).getTimestamp()
> schedulerActivitiesTTL) {
ite.remove();
}
@ -383,26 +381,26 @@ public class ActivitiesManager extends AbstractService {
// Add queue, application or container activity into specific node allocation.
void addSchedulingActivityForNode(NodeId nodeId, String parentName,
String childName, String priority, ActivityState state, String diagnostic,
String type, String allocationRequestId) {
String childName, Integer priority, ActivityState state,
String diagnostic, ActivityLevel level, Long allocationRequestId) {
if (shouldRecordThisNode(nodeId)) {
NodeAllocation nodeAllocation = getCurrentNodeAllocation(nodeId);
nodeAllocation.addAllocationActivity(parentName, childName, priority,
state, diagnostic, type, nodeId, allocationRequestId);
state, diagnostic, level, nodeId, allocationRequestId);
}
}
// Add queue, application or container activity into specific application
// allocation.
void addSchedulingActivityForApp(ApplicationId applicationId,
ContainerId containerId, String priority, ActivityState state,
String diagnostic, String type, NodeId nodeId,
String allocationRequestId) {
ContainerId containerId, Integer priority, ActivityState state,
String diagnostic, ActivityLevel level, NodeId nodeId,
Long allocationRequestId) {
if (shouldRecordThisApp(applicationId)) {
AppAllocation appAllocation = appsAllocation.get().get(applicationId);
appAllocation.addAppAllocationActivity(containerId == null ?
"Container-Id-Not-Assigned" :
containerId.toString(), priority, state, diagnostic, type, nodeId,
containerId.toString(), priority, state, diagnostic, level, nodeId,
allocationRequestId);
}
}
@ -450,16 +448,17 @@ public class ActivitiesManager extends AbstractService {
}
}
void finishNodeUpdateRecording(NodeId nodeID) {
void finishNodeUpdateRecording(NodeId nodeID, String partition) {
List<NodeAllocation> value = recordingNodesAllocation.get().get(nodeID);
long timeStamp = SystemClock.getInstance().getTime();
long timestamp = SystemClock.getInstance().getTime();
if (value != null) {
if (value.size() > 0) {
lastAvailableNodeActivities = value;
for (NodeAllocation allocation : lastAvailableNodeActivities) {
allocation.transformToTree();
allocation.setTimeStamp(timeStamp);
allocation.setTimestamp(timestamp);
allocation.setPartition(partition);
}
if (recordNextAvailableNode) {
recordNextAvailableNode = false;

View File

@ -40,11 +40,11 @@ public final class ActivitiesUtils {
}
if (groupBy == RMWSConsts.ActivitiesGroupBy.DIAGNOSTIC) {
Map<ActivityState, Map<String, List<String>>> groupingResults =
activityNodes.stream().collect(Collectors
.groupingBy(ActivityNode::getState, Collectors
activityNodes.stream()
.filter(e -> e.getNodeId() != null)
.collect(Collectors.groupingBy(ActivityNode::getState, Collectors
.groupingBy(ActivityNode::getShortDiagnostic,
Collectors.mapping(e -> e.getNodeId() == null ?
"" :
Collectors.mapping(e -> e.getNodeId() == null ? "" :
e.getNodeId().toString(), Collectors.toList()))));
return groupingResults.entrySet().stream().flatMap(
stateMap -> stateMap.getValue().entrySet().stream().map(
@ -53,8 +53,8 @@ public final class ActivitiesUtils {
diagMap.getValue())))
.collect(Collectors.toList());
} else {
return activityNodes.stream().map(
e -> new ActivityNodeInfo(e.getName(), e.getState(),
return activityNodes.stream().filter(e -> e.getNodeId() != null)
.map(e -> new ActivityNodeInfo(e.getName(), e.getState(),
e.getDiagnostic(), e.getNodeId())).collect(Collectors.toList());
}
}

View File

@ -26,51 +26,81 @@ public class ActivityDiagnosticConstant {
// In order not to show "diagnostic" line in frontend,
// we set the value to null.
public final static String EMPTY = null;
public final static String NOT_ABLE_TO_ACCESS_PARTITION =
"Not able to access partition";
/*
* Initial check diagnostics
*/
public final static String INIT_CHECK_SINGLE_NODE_REMOVED =
"Initial check: node has been removed from scheduler";
public final static String INIT_CHECK_SINGLE_NODE_RESOURCE_INSUFFICIENT =
"Initial check: node resource is insufficient for minimum allocation";
public final static String INIT_CHECK_PARTITION_RESOURCE_INSUFFICIENT =
"Initial check: insufficient resource in partition";
/*
* Queue level diagnostics
*/
public final static String QUEUE_NOT_ABLE_TO_ACCESS_PARTITION =
"Queue is not able to access partition";
public final static String QUEUE_HIT_MAX_CAPACITY_LIMIT =
"Queue hits max-capacity limit";
public final static String QUEUE_HIT_USER_MAX_CAPACITY_LIMIT =
"Queue hits user max-capacity limit";
public final static String QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM =
"Queue does not have enough headroom for inner highest-priority request";
public final static String QUEUE_DO_NOT_NEED_MORE_RESOURCE =
"Queue does not need more resource";
public final static String QUEUE_MAX_CAPACITY_LIMIT =
"Hit queue max-capacity limit";
public final static String USER_CAPACITY_MAXIMUM_LIMIT =
"Hit user capacity maximum limit";
public final static String SKIP_BLACK_LISTED_NODE = "Skip black listed node";
public final static String PRIORITY_SKIPPED = "Priority skipped";
public final static String PRIORITY_SKIPPED_BECAUSE_NULL_ANY_REQUEST =
"Priority skipped because off-switch request is null";
public final static String SKIP_PRIORITY_BECAUSE_OF_RELAX_LOCALITY =
"Priority skipped because of relax locality is not allowed";
public final static String SKIP_IN_IGNORE_EXCLUSIVITY_MODE =
"Skipping assigning to Node in Ignore Exclusivity mode";
public final static String DO_NOT_NEED_ALLOCATIONATTEMPTINFOS =
"Doesn't need containers based on reservation algo!";
public final static String QUEUE_SKIPPED_HEADROOM =
"Queue skipped because of headroom";
public final static String NON_PARTITIONED_PARTITION_FIRST =
"Non-partitioned resource request should be scheduled to "
+ "non-partitioned partition first";
public final static String SKIP_NODE_LOCAL_REQUEST =
"Skip node-local request";
public final static String SKIP_RACK_LOCAL_REQUEST =
"Skip rack-local request";
public final static String SKIP_OFF_SWITCH_REQUEST =
"Skip offswitch request";
public final static String REQUEST_CAN_NOT_ACCESS_NODE_LABEL =
"Resource request can not access the label";
public final static String NOT_SUFFICIENT_RESOURCE =
"Node does not have sufficient resource for request";
public final static String LOCALITY_SKIPPED = "Locality skipped";
public final static String FAIL_TO_ALLOCATE = "Fail to allocate";
public final static String COULD_NOT_GET_CONTAINER =
"Couldn't get container for allocation";
public final static String QUEUE_SKIPPED_TO_RESPECT_FIFO = "Queue skipped "
+ "to respect FIFO of applications";
public final static String QUEUE_SKIPPED_BECAUSE_SINGLE_NODE_RESERVED =
"Queue skipped because node has been reserved";
public final static String
QUEUE_SKIPPED_BECAUSE_SINGLE_NODE_RESOURCE_INSUFFICIENT =
"Queue skipped because node resource is insufficient";
/*
* Application level diagnostics
*/
public final static String APPLICATION_FAIL_TO_ALLOCATE =
"Application fails to allocate";
public final static String APPLICATION_COULD_NOT_GET_CONTAINER =
"Application couldn't get container for allocation";
public final static String APPLICATION_DO_NOT_NEED_RESOURCE =
"Application does not need more resource";
public final static String APPLICATION_PRIORITY_DO_NOT_NEED_RESOURCE =
"Application priority does not need more resource";
public final static String SKIPPED_ALL_PRIORITIES =
"All priorities are skipped of the app";
public final static String RESPECT_FIFO = "To respect FIFO of applications, "
+ "skipped following applications in the queue";
/*
* Request level diagnostics
*/
public final static String REQUEST_SKIPPED_BECAUSE_NULL_ANY_REQUEST =
"Request skipped because off-switch request is null";
public final static String REQUEST_SKIPPED_IN_IGNORE_EXCLUSIVITY_MODE =
"Request skipped in Ignore Exclusivity mode for AM allocation";
public final static String REQUEST_SKIPPED_BECAUSE_OF_RESERVATION =
"Request skipped based on reservation algo";
public final static String
REQUEST_SKIPPED_BECAUSE_NON_PARTITIONED_PARTITION_FIRST =
"Request skipped because non-partitioned resource request should be "
+ "scheduled to non-partitioned partition first";
public final static String REQUEST_DO_NOT_NEED_RESOURCE =
"Request does not need more resource";
/*
* Node level diagnostics
*/
public final static String
NODE_SKIPPED_BECAUSE_OF_NO_OFF_SWITCH_AND_LOCALITY_VIOLATION =
"Node skipped because node/rack locality cannot be satisfied";
public final static String NODE_SKIPPED_BECAUSE_OF_OFF_SWITCH_DELAY =
"Node skipped because of off-switch delay";
public final static String NODE_SKIPPED_BECAUSE_OF_RELAX_LOCALITY =
"Node skipped because relax locality is not allowed";
public final static String NODE_TOTAL_RESOURCE_INSUFFICIENT_FOR_REQUEST =
"Node's total resource is insufficient for request";
public final static String NODE_DO_NOT_HAVE_SUFFICIENT_RESOURCE =
"Node does not have sufficient resource for request";
public final static String NODE_IS_BLACKLISTED = "Node is blacklisted";
public final static String
NODE_DO_NOT_MATCH_PARTITION_OR_PLACEMENT_CONSTRAINTS =
"Node does not match partition or placement constraints";

View File

@ -0,0 +1,29 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities;
/**
* Collection of activity operation levels.
*/
public enum ActivityLevel {
QUEUE,
APP,
REQUEST,
NODE
}

View File

@ -32,36 +32,36 @@ import java.util.List;
public class ActivityNode {
private String activityNodeName;
private String parentName;
private String appPriority;
private String requestPriority;
private Integer appPriority;
private Integer requestPriority;
private ActivityState state;
private String diagnostic;
private NodeId nodeId;
private String allocationRequestId;
private Long allocationRequestId;
private List<ActivityNode> childNode;
public ActivityNode(String activityNodeName, String parentName,
String priority, ActivityState state, String diagnostic, String type) {
this(activityNodeName, parentName, priority, state, diagnostic, type, null,
null);
}
public ActivityNode(String activityNodeName, String parentName,
String priority, ActivityState state, String diagnostic, String type,
NodeId nodeId, String allocationRequestId) {
Integer priority, ActivityState state, String diagnostic,
ActivityLevel level, NodeId nodeId, Long allocationRequestId) {
this.activityNodeName = activityNodeName;
this.parentName = parentName;
if (type != null) {
if (type.equals("app")) {
if (level != null) {
switch (level) {
case APP:
this.appPriority = priority;
} else if (type.equals("request")) {
break;
case REQUEST:
this.requestPriority = priority;
this.allocationRequestId = allocationRequestId;
} else if (type.equals("container")) {
break;
case NODE:
this.requestPriority = priority;
this.allocationRequestId = allocationRequestId;
this.nodeId = nodeId;
break;
default:
break;
}
}
this.state = state;
@ -93,11 +93,11 @@ public class ActivityNode {
return this.diagnostic;
}
public String getAppPriority() {
public Integer getAppPriority() {
return appPriority;
}
public String getRequestPriority() {
public Integer getRequestPriority() {
return requestPriority;
}
@ -105,7 +105,7 @@ public class ActivityNode {
return nodeId;
}
public String getAllocationRequestId() {
public Long getAllocationRequestId() {
return allocationRequestId;
}

View File

@ -30,29 +30,37 @@ import org.slf4j.LoggerFactory;
public class AllocationActivity {
private String childName = null;
private String parentName = null;
private String appPriority = null;
private String requestPriority = null;
private Integer appPriority = null;
private Integer requestPriority = null;
private ActivityState state;
private String diagnostic = null;
private NodeId nodeId;
private String allocationRequestId;
private Long allocationRequestId;
private ActivityLevel level;
private static final Logger LOG =
LoggerFactory.getLogger(AllocationActivity.class);
public AllocationActivity(String parentName, String queueName,
String priority, ActivityState state, String diagnostic, String type,
NodeId nodeId, String allocationRequestId) {
Integer priority, ActivityState state, String diagnostic,
ActivityLevel level, NodeId nodeId, Long allocationRequestId) {
this.childName = queueName;
this.parentName = parentName;
if (type != null) {
if (type.equals("app")) {
if (level != null) {
this.level = level;
switch (level) {
case APP:
this.appPriority = priority;
} else if (type.equals("request")) {
break;
case REQUEST:
this.requestPriority = priority;
this.allocationRequestId = allocationRequestId;
} else if (type.equals("container")) {
break;
case NODE:
this.nodeId = nodeId;
break;
default:
break;
}
}
this.state = state;
@ -60,21 +68,11 @@ public class AllocationActivity {
}
public ActivityNode createTreeNode() {
if (appPriority != null) {
return new ActivityNode(this.childName, this.parentName, this.appPriority,
this.state, this.diagnostic, "app");
} else if (requestPriority != null) {
return new ActivityNode(this.childName, this.parentName,
this.requestPriority, this.state, this.diagnostic, "request", null,
allocationRequestId);
} else if (nodeId != null) {
return new ActivityNode(this.childName, this.parentName,
this.requestPriority, this.state, this.diagnostic, "container",
this.nodeId, null);
} else {
return new ActivityNode(this.childName, this.parentName, null, this.state,
this.diagnostic, null);
}
return new ActivityNode(this.childName, this.parentName,
this.level == ActivityLevel.APP ?
this.appPriority : this.requestPriority,
this.state, this.diagnostic, this.level,
this.nodeId, this.allocationRequestId);
}
public String getName() {

View File

@ -29,18 +29,18 @@ import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
/*
/**
* It contains allocation information for one application within a period of
* time.
* Each application allocation may have several allocation attempts.
*/
public class AppAllocation {
private Priority priority = null;
private Priority priority;
private NodeId nodeId;
private ContainerId containerId = null;
private ActivityState appState = null;
private String diagnostic = null;
private String queueName = null;
private ContainerId containerId;
private ActivityState activityState;
private String diagnostic;
private String queueName;
private List<ActivityNode> allocationAttempts;
private long timestamp;
@ -51,24 +51,24 @@ public class AppAllocation {
this.queueName = queueName;
}
public void updateAppContainerStateAndTime(ContainerId containerId,
public void updateAppContainerStateAndTime(ContainerId cId,
ActivityState appState, long ts, String diagnostic) {
this.timestamp = ts;
this.containerId = containerId;
this.appState = appState;
this.containerId = cId;
this.activityState = appState;
this.diagnostic = diagnostic;
}
public void addAppAllocationActivity(String containerId, String priority,
ActivityState state, String diagnose, String type, NodeId nId,
String allocationRequestId) {
ActivityNode container = new ActivityNode(containerId, null, priority,
state, diagnose, type, nId, allocationRequestId);
public void addAppAllocationActivity(String cId, Integer reqPriority,
ActivityState state, String diagnose, ActivityLevel level, NodeId nId,
Long allocationRequestId) {
ActivityNode container = new ActivityNode(cId, null, reqPriority,
state, diagnose, level, nId, allocationRequestId);
this.allocationAttempts.add(container);
if (state == ActivityState.REJECTED) {
this.appState = ActivityState.SKIPPED;
this.activityState = ActivityState.SKIPPED;
} else {
this.appState = state;
this.activityState = state;
}
}
@ -80,8 +80,8 @@ public class AppAllocation {
return queueName;
}
public ActivityState getAppState() {
return appState;
public ActivityState getActivityState() {
return activityState;
}
public Priority getPriority() {
@ -107,11 +107,11 @@ public class AppAllocation {
return allocationAttempts;
}
public AppAllocation filterAllocationAttempts(Set<String> requestPriorities,
Set<String> allocationRequestIds) {
public AppAllocation filterAllocationAttempts(Set<Integer> requestPriorities,
Set<Long> allocationRequestIds) {
AppAllocation appAllocation =
new AppAllocation(this.priority, this.nodeId, this.queueName);
appAllocation.appState = this.appState;
appAllocation.activityState = this.activityState;
appAllocation.containerId = this.containerId;
appAllocation.timestamp = this.timestamp;
appAllocation.diagnostic = this.diagnostic;

View File

@ -39,10 +39,11 @@ import java.util.concurrent.ConcurrentHashMap;
*/
public class NodeAllocation {
private NodeId nodeId;
private long timeStamp;
private long timestamp;
private ContainerId containerId = null;
private AllocationState containerState = AllocationState.DEFAULT;
private List<AllocationActivity> allocationOperations;
private String partition;
private ActivityNode root = null;
@ -55,10 +56,10 @@ public class NodeAllocation {
}
public void addAllocationActivity(String parentName, String childName,
String priority, ActivityState state, String diagnostic, String type,
NodeId nId, String allocationRequestId) {
Integer priority, ActivityState state, String diagnostic,
ActivityLevel level, NodeId nId, Long allocationRequestId) {
AllocationActivity allocate = new AllocationActivity(parentName, childName,
priority, state, diagnostic, type, nId, allocationRequestId);
priority, state, diagnostic, level, nId, allocationRequestId);
this.allocationOperations.add(allocate);
}
@ -113,12 +114,12 @@ public class NodeAllocation {
}
}
public void setTimeStamp(long timeStamp) {
this.timeStamp = timeStamp;
public void setTimestamp(long timestamp) {
this.timestamp = timestamp;
}
public long getTimeStamp() {
return this.timeStamp;
public long getTimestamp() {
return this.timestamp;
}
public AllocationState getFinalAllocationState() {
@ -138,4 +139,12 @@ public class NodeAllocation {
public NodeId getNodeId() {
return nodeId;
}
public String getPartition() {
return partition;
}
public void setPartition(String partition) {
this.partition = partition;
}
}

View File

@ -1518,6 +1518,11 @@ public class CapacityScheduler extends
if (getNode(node.getNodeID()) != node) {
LOG.error("Trying to schedule on a removed node, please double check, "
+ "nodeId=" + node.getNodeID());
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node,
"", getRootQueue().getQueueName(), ActivityState.REJECTED,
ActivityDiagnosticConstant.INIT_CHECK_SINGLE_NODE_REMOVED);
ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager,
node);
return null;
}
@ -1527,12 +1532,9 @@ public class CapacityScheduler extends
RMContainer reservedContainer = node.getReservedContainer();
if (reservedContainer != null) {
allocateFromReservedContainer(node, withNodeHeartbeat, reservedContainer);
}
// Do not schedule if there are any reservations to fulfill on the node
if (node.getReservedContainer() != null) {
// Do not schedule if there are any reservations to fulfill on the node
LOG.debug("Skipping scheduling since node {} is reserved by"
+ " application {}", node.getNodeID(), node.getReservedContainer().
+ " application {}", node.getNodeID(), reservedContainer.
getContainerId().getApplicationAttemptId());
return null;
}
@ -1543,8 +1545,14 @@ public class CapacityScheduler extends
if (calculator.computeAvailableContainers(Resources
.add(node.getUnallocatedResource(), node.getTotalKillableResources()),
minimumAllocation) <= 0) {
LOG.debug("This node or node partition doesn't have available or" +
" preemptible resource");
LOG.debug("This node " + node.getNodeID() + " doesn't have sufficient "
+ "available or preemptible resource for minimum allocation");
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node,
"", getRootQueue().getQueueName(), ActivityState.REJECTED,
ActivityDiagnosticConstant.
INIT_CHECK_SINGLE_NODE_RESOURCE_INSUFFICIENT);
ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager,
node);
return null;
}
@ -1594,12 +1602,12 @@ public class CapacityScheduler extends
ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager,
node, reservedContainer.getContainerId(),
AllocationState.ALLOCATED_FROM_RESERVED);
} else{
} else if (assignment.getAssignmentInformation().getNumReservations() > 0) {
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node,
queue.getParent().getQueueName(), queue.getQueueName(),
ActivityState.SKIPPED, ActivityDiagnosticConstant.EMPTY);
ActivityState.RE_RESERVED, ActivityDiagnosticConstant.EMPTY);
ActivitiesLogger.NODE.finishAllocatedNodeAllocation(activitiesManager,
node, reservedContainer.getContainerId(), AllocationState.SKIPPED);
node, reservedContainer.getContainerId(), AllocationState.RESERVED);
}
assignment.setSchedulingMode(
@ -1685,12 +1693,14 @@ public class CapacityScheduler extends
allocateFromReservedContainer(node, false, reservedContainer);
}
}
LOG.debug("This node or this node partition doesn't have available or "
+ "killable resource");
LOG.debug("This partition '{}' doesn't have available or "
+ "killable resource", candidates.getPartition());
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, null,
"", getRootQueue().getQueueName(), ActivityState.REJECTED,
ActivityDiagnosticConstant.NOT_ABLE_TO_ACCESS_PARTITION + " "
+ candidates.getPartition());
ActivityDiagnosticConstant.
INIT_CHECK_PARTITION_RESOURCE_INSUFFICIENT);
ActivitiesLogger.NODE
.finishSkippedNodeAllocation(activitiesManager, null);
return null;
}
@ -1721,13 +1731,13 @@ public class CapacityScheduler extends
assignment = allocateContainerOnSingleNode(candidates,
node, withNodeHeartbeat);
ActivitiesLogger.NODE.finishNodeUpdateRecording(activitiesManager,
node.getNodeID());
node.getNodeID(), candidates.getPartition());
} else{
ActivitiesLogger.NODE.startNodeUpdateRecording(activitiesManager,
ActivitiesManager.EMPTY_NODE_ID);
assignment = allocateContainersOnMultiNodes(candidates);
ActivitiesLogger.NODE.finishNodeUpdateRecording(activitiesManager,
ActivitiesManager.EMPTY_NODE_ID);
ActivitiesManager.EMPTY_NODE_ID, candidates.getPartition());
}
if (assignment != null && assignment.getAssignmentInformation() != null

View File

@ -1074,8 +1074,7 @@ public class LeafQueue extends AbstractCSQueue {
&& !accessibleToPartition(candidates.getPartition())) {
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node,
getParent().getQueueName(), getQueueName(), ActivityState.REJECTED,
ActivityDiagnosticConstant.NOT_ABLE_TO_ACCESS_PARTITION + " "
+ candidates.getPartition());
ActivityDiagnosticConstant.QUEUE_NOT_ABLE_TO_ACCESS_PARTITION);
return CSAssignment.NULL_ASSIGNMENT;
}
@ -1113,10 +1112,11 @@ public class LeafQueue extends AbstractCSQueue {
schedulingMode)) {
ActivitiesLogger.APP.recordRejectedAppActivityFromLeafQueue(
activitiesManager, node, application, application.getPriority(),
ActivityDiagnosticConstant.QUEUE_MAX_CAPACITY_LIMIT);
ActivityDiagnosticConstant.QUEUE_HIT_MAX_CAPACITY_LIMIT);
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node,
getParent().getQueueName(), getQueueName(), ActivityState.SKIPPED,
ActivityDiagnosticConstant.EMPTY);
getParent().getQueueName(), getQueueName(),
ActivityState.REJECTED,
ActivityDiagnosticConstant.QUEUE_HIT_MAX_CAPACITY_LIMIT);
return CSAssignment.NULL_ASSIGNMENT;
}
// If there was no reservation and canAssignToThisQueue returned
@ -1157,7 +1157,7 @@ public class LeafQueue extends AbstractCSQueue {
"User capacity has reached its maximum limit.");
ActivitiesLogger.APP.recordRejectedAppActivityFromLeafQueue(
activitiesManager, node, application, application.getPriority(),
ActivityDiagnosticConstant.USER_CAPACITY_MAXIMUM_LIMIT);
ActivityDiagnosticConstant.QUEUE_HIT_USER_MAX_CAPACITY_LIMIT);
continue;
}
@ -1189,15 +1189,16 @@ public class LeafQueue extends AbstractCSQueue {
} else if (assignment.getSkippedType()
== CSAssignment.SkippedType.QUEUE_LIMIT) {
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node,
getParent().getQueueName(), getQueueName(), ActivityState.SKIPPED,
ActivityDiagnosticConstant.QUEUE_SKIPPED_HEADROOM);
getParent().getQueueName(), getQueueName(), ActivityState.REJECTED,
() -> ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM
+ " from " + application.getApplicationId());
return assignment;
} else{
// If we don't allocate anything, and it is not skipped by application,
// we will return to respect FIFO of applications
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node,
getParent().getQueueName(), getQueueName(), ActivityState.SKIPPED,
ActivityDiagnosticConstant.RESPECT_FIFO);
ActivityDiagnosticConstant.QUEUE_SKIPPED_TO_RESPECT_FIFO);
ActivitiesLogger.APP.finishSkippedAppAllocationRecording(
activitiesManager, application.getApplicationId(),
ActivityState.SKIPPED, ActivityDiagnosticConstant.EMPTY);

View File

@ -559,8 +559,7 @@ public class ParentQueue extends AbstractCSQueue {
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node,
getParentName(), getQueueName(), ActivityState.REJECTED,
ActivityDiagnosticConstant.NOT_ABLE_TO_ACCESS_PARTITION
+ candidates.getPartition());
ActivityDiagnosticConstant.QUEUE_NOT_ABLE_TO_ACCESS_PARTITION);
if (rootQueue) {
ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager,
node);
@ -613,8 +612,8 @@ public class ParentQueue extends AbstractCSQueue {
getMetrics().getReservedVirtualCores()), schedulingMode)) {
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node,
getParentName(), getQueueName(), ActivityState.SKIPPED,
ActivityDiagnosticConstant.QUEUE_MAX_CAPACITY_LIMIT);
getParentName(), getQueueName(), ActivityState.REJECTED,
ActivityDiagnosticConstant.QUEUE_HIT_MAX_CAPACITY_LIMIT);
if (rootQueue) {
ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager,
node);
@ -648,22 +647,13 @@ public class ParentQueue extends AbstractCSQueue {
assignedToChild.getAssignmentInformation().getReservationDetails()
!= null && !assignedToChild.getAssignmentInformation()
.getReservationDetails().isEmpty();
if (node != null && !isReserved) {
if (rootQueue) {
ActivitiesLogger.NODE.finishAllocatedNodeAllocation(
activitiesManager, node,
assignedToChild.getAssignmentInformation()
.getFirstAllocatedOrReservedContainerId(),
AllocationState.ALLOCATED);
}
} else{
if (rootQueue) {
ActivitiesLogger.NODE.finishAllocatedNodeAllocation(
activitiesManager, node,
assignedToChild.getAssignmentInformation()
.getFirstAllocatedOrReservedContainerId(),
AllocationState.RESERVED);
}
if (rootQueue) {
ActivitiesLogger.NODE.finishAllocatedNodeAllocation(
activitiesManager, node,
assignedToChild.getAssignmentInformation()
.getFirstAllocatedOrReservedContainerId(),
isReserved ?
AllocationState.RESERVED : AllocationState.ALLOCATED);
}
// Track resource utilization in this pass of the scheduler
@ -735,10 +725,24 @@ public class ParentQueue extends AbstractCSQueue {
// Two conditions need to meet when trying to allocate:
// 1) Node doesn't have reserved container
// 2) Node's available-resource + killable-resource should > 0
return node.getReservedContainer() == null && Resources.greaterThanOrEqual(
resourceCalculator, clusterResource, Resources
boolean accept = node.getReservedContainer() == null && Resources
.greaterThanOrEqual(resourceCalculator, clusterResource, Resources
.add(node.getUnallocatedResource(),
node.getTotalKillableResources()), minimumAllocation);
if (!accept) {
ActivitiesLogger.QUEUE.recordQueueActivity(activitiesManager, node,
getParentName(), getQueueName(), ActivityState.REJECTED,
() -> node.getReservedContainer() != null ?
ActivityDiagnosticConstant.
QUEUE_SKIPPED_BECAUSE_SINGLE_NODE_RESERVED :
ActivityDiagnosticConstant.
QUEUE_SKIPPED_BECAUSE_SINGLE_NODE_RESOURCE_INSUFFICIENT);
if (rootQueue) {
ActivitiesLogger.NODE.finishSkippedNodeAllocation(activitiesManager,
node);
}
}
return accept;
}
private ResourceLimits getResourceLimitsOfChild(CSQueue child,

View File

@ -24,6 +24,7 @@ import java.util.List;
import java.util.Optional;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityLevel;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.DiagnosticsCollector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -107,7 +108,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
if (offswitchPendingAsk.getCount() <= 0) {
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.PRIORITY_SKIPPED_BECAUSE_NULL_ANY_REQUEST);
ActivityDiagnosticConstant.REQUEST_DO_NOT_NEED_RESOURCE,
ActivityLevel.REQUEST);
return ContainerAllocation.PRIORITY_SKIPPED;
}
@ -118,7 +120,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
if (application.getOutstandingAsksCount(schedulerKey) <= 0) {
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.APPLICATION_PRIORITY_DO_NOT_NEED_RESOURCE);
ActivityDiagnosticConstant.REQUEST_DO_NOT_NEED_RESOURCE,
ActivityLevel.REQUEST);
return ContainerAllocation.PRIORITY_SKIPPED;
}
@ -133,7 +136,9 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
"Skipping assigning to Node in Ignore Exclusivity mode. ");
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.SKIP_IN_IGNORE_EXCLUSIVITY_MODE);
ActivityDiagnosticConstant.
REQUEST_SKIPPED_IN_IGNORE_EXCLUSIVITY_MODE,
ActivityLevel.REQUEST);
return ContainerAllocation.APP_SKIPPED;
}
}
@ -148,7 +153,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.
NODE_DO_NOT_MATCH_PARTITION_OR_PLACEMENT_CONSTRAINTS
+ ActivitiesManager.getDiagnostics(dcOpt));
+ ActivitiesManager.getDiagnostics(dcOpt),
ActivityLevel.NODE);
return ContainerAllocation.PRIORITY_SKIPPED;
}
@ -157,7 +163,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
LOG.debug("doesn't need containers based on reservation algo!");
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.DO_NOT_NEED_ALLOCATIONATTEMPTINFOS);
ActivityDiagnosticConstant.REQUEST_SKIPPED_BECAUSE_OF_RESERVATION,
ActivityLevel.REQUEST);
return ContainerAllocation.PRIORITY_SKIPPED;
}
}
@ -166,9 +173,11 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
node.getPartition())) {
LOG.debug("cannot allocate required resource={} because of headroom",
required);
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
ActivitiesLogger.APP.recordAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.QUEUE_SKIPPED_HEADROOM);
ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM,
ActivityState.REJECTED,
ActivityLevel.REQUEST);
return ContainerAllocation.QUEUE_SKIPPED;
}
@ -183,7 +192,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
// thread.
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.PRIORITY_SKIPPED_BECAUSE_NULL_ANY_REQUEST);
ActivityDiagnosticConstant.REQUEST_SKIPPED_BECAUSE_NULL_ANY_REQUEST,
ActivityLevel.REQUEST);
return ContainerAllocation.PRIORITY_SKIPPED;
}
String requestPartition =
@ -213,7 +223,9 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
}
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.NON_PARTITIONED_PARTITION_FIRST);
ActivityDiagnosticConstant.
REQUEST_SKIPPED_BECAUSE_NON_PARTITIONED_PARTITION_FIRST,
ActivityLevel.REQUEST);
return ContainerAllocation.APP_SKIPPED;
}
}
@ -228,7 +240,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
CSAMContainerLaunchDiagnosticsConstants.SKIP_AM_ALLOCATION_IN_BLACK_LISTED_NODE);
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.SKIP_BLACK_LISTED_NODE);
ActivityDiagnosticConstant.NODE_IS_BLACKLISTED,
ActivityLevel.NODE);
return ContainerAllocation.APP_SKIPPED;
}
@ -366,9 +379,6 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
}
// Skip node-local request, go to rack-local request
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.SKIP_NODE_LOCAL_REQUEST);
return ContainerAllocation.LOCALITY_SKIPPED;
}
@ -384,9 +394,6 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
}
// Skip rack-local request, go to off-switch request
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.SKIP_RACK_LOCAL_REQUEST);
return ContainerAllocation.LOCALITY_SKIPPED;
}
@ -405,7 +412,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
CSAMContainerLaunchDiagnosticsConstants.SKIP_AM_ALLOCATION_DUE_TO_LOCALITY);
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.SKIP_OFF_SWITCH_REQUEST);
ActivityDiagnosticConstant.NODE_SKIPPED_BECAUSE_OF_OFF_SWITCH_DELAY,
ActivityLevel.NODE);
return ContainerAllocation.APP_SKIPPED;
}
@ -439,7 +447,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
if (!appInfo.canDelayTo(schedulerKey, node.getRackName())) {
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.SKIP_PRIORITY_BECAUSE_OF_RELAX_LOCALITY);
ActivityDiagnosticConstant.NODE_SKIPPED_BECAUSE_OF_RELAX_LOCALITY,
ActivityLevel.NODE);
return ContainerAllocation.PRIORITY_SKIPPED;
}
@ -465,7 +474,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
if (!appInfo.canDelayTo(schedulerKey, ResourceRequest.ANY)) {
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.SKIP_PRIORITY_BECAUSE_OF_RELAX_LOCALITY);
ActivityDiagnosticConstant.NODE_SKIPPED_BECAUSE_OF_RELAX_LOCALITY,
ActivityLevel.NODE);
return ContainerAllocation.PRIORITY_SKIPPED;
}
@ -489,7 +499,9 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
}
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.PRIORITY_SKIPPED);
ActivityDiagnosticConstant.
NODE_SKIPPED_BECAUSE_OF_NO_OFF_SWITCH_AND_LOCALITY_VIOLATION,
ActivityLevel.NODE);
return ContainerAllocation.PRIORITY_SKIPPED;
}
@ -516,8 +528,10 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
// Skip this locality request
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.NOT_SUFFICIENT_RESOURCE
+ getResourceDiagnostics(capability, totalResource));
ActivityDiagnosticConstant.
NODE_TOTAL_RESOURCE_INSUFFICIENT_FOR_REQUEST
+ getResourceDiagnostics(capability, totalResource),
ActivityLevel.NODE);
return ContainerAllocation.LOCALITY_SKIPPED;
}
@ -597,7 +611,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.
NODE_CAN_NOT_FIND_CONTAINER_TO_BE_UNRESERVED_WHEN_NEEDED);
NODE_CAN_NOT_FIND_CONTAINER_TO_BE_UNRESERVED_WHEN_NEEDED,
ActivityLevel.NODE);
return ContainerAllocation.LOCALITY_SKIPPED;
}
}
@ -622,18 +637,20 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
// Skip the locality request
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.NOT_SUFFICIENT_RESOURCE
+ getResourceDiagnostics(capability, availableForDC));
ActivityDiagnosticConstant.NODE_DO_NOT_HAVE_SUFFICIENT_RESOURCE
+ getResourceDiagnostics(capability, availableForDC),
ActivityLevel.NODE);
return ContainerAllocation.LOCALITY_SKIPPED;
}
}
ActivitiesLogger.APP.recordAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.NOT_SUFFICIENT_RESOURCE
ActivityDiagnosticConstant.NODE_DO_NOT_HAVE_SUFFICIENT_RESOURCE
+ getResourceDiagnostics(capability, availableForDC),
rmContainer == null ?
ActivityState.RESERVED : ActivityState.RE_RESERVED);
ActivityState.RESERVED : ActivityState.RE_RESERVED,
ActivityLevel.NODE);
ContainerAllocation result = new ContainerAllocation(null,
pendingAsk.getPerAllocationResource(), AllocationState.RESERVED);
result.containerNodeType = type;
@ -643,8 +660,9 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
// Skip the locality request
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, schedulerKey,
ActivityDiagnosticConstant.NOT_SUFFICIENT_RESOURCE
+ getResourceDiagnostics(capability, availableForDC));
ActivityDiagnosticConstant.NODE_DO_NOT_HAVE_SUFFICIENT_RESOURCE
+ getResourceDiagnostics(capability, availableForDC),
ActivityLevel.NODE);
return ContainerAllocation.LOCALITY_SKIPPED;
}
}
@ -719,7 +737,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
null, AllocationState.APP_SKIPPED);
ActivitiesLogger.APP.recordAppActivityWithoutAllocation(activitiesManager,
node, application, schedulerKey,
ActivityDiagnosticConstant.FAIL_TO_ALLOCATE, ActivityState.REJECTED);
ActivityDiagnosticConstant.APPLICATION_FAIL_TO_ALLOCATE,
ActivityState.REJECTED, ActivityLevel.APP);
return ret;
}
@ -741,8 +760,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
LOG.warn("Couldn't get container for allocation!");
ActivitiesLogger.APP.recordAppActivityWithoutAllocation(activitiesManager,
node, application, schedulerKey,
ActivityDiagnosticConstant.COULD_NOT_GET_CONTAINER,
ActivityState.REJECTED);
ActivityDiagnosticConstant.APPLICATION_COULD_NOT_GET_CONTAINER,
ActivityState.REJECTED, ActivityLevel.APP);
return ContainerAllocation.APP_SKIPPED;
}
@ -765,8 +784,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
.recordAppActivityWithoutAllocation(activitiesManager, node,
application, schedulerKey,
ActivityDiagnosticConstant.
PRIORITY_SKIPPED_BECAUSE_NULL_ANY_REQUEST,
ActivityState.REJECTED);
REQUEST_SKIPPED_BECAUSE_NULL_ANY_REQUEST,
ActivityState.REJECTED, ActivityLevel.REQUEST);
return ContainerAllocation.PRIORITY_SKIPPED;
}
updatedContainer = new RMContainerImpl(container, schedulerKey,
@ -827,8 +846,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
if (schedulingPS == null) {
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, null, application, schedulerKey,
ActivityDiagnosticConstant.
APPLICATION_PRIORITY_DO_NOT_NEED_RESOURCE);
ActivityDiagnosticConstant.REQUEST_SKIPPED_BECAUSE_NULL_ANY_REQUEST,
ActivityLevel.REQUEST);
return new ContainerAllocation(reservedContainer, null,
AllocationState.PRIORITY_SKIPPED);
}
@ -888,7 +907,8 @@ public class RegularContainerAllocator extends AbstractContainerAllocator {
}
ActivitiesLogger.APP.recordSkippedAppActivityWithoutAllocation(
activitiesManager, node, application, null,
ActivityDiagnosticConstant.APPLICATION_DO_NOT_NEED_RESOURCE);
ActivityDiagnosticConstant.APPLICATION_DO_NOT_NEED_RESOURCE,
ActivityLevel.APP);
return CSAssignment.SKIP_ASSIGNMENT;
}

View File

@ -55,7 +55,8 @@ public class JAXBContextResolver implements ContextResolver<JAXBContext> {
UsersInfo.class, UserInfo.class, ApplicationStatisticsInfo.class,
StatisticsItemInfo.class, CapacitySchedulerHealthInfo.class,
FairSchedulerQueueInfoList.class, AppTimeoutsInfo.class,
AppTimeoutInfo.class, ResourceInformationsInfo.class };
AppTimeoutInfo.class, ResourceInformationsInfo.class,
ActivitiesInfo.class, AppActivitiesInfo.class};
// these dao classes need root unwrapping
final Class[] rootUnwrappedTypes =
{ NewApplication.class, ApplicationSubmissionContextInfo.class,

View File

@ -36,6 +36,7 @@ import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ConcurrentMap;
import java.util.stream.Collectors;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
@ -756,11 +757,29 @@ public class RMWebServices extends WebServices implements RMWebServiceProtocol {
Set<RMWSConsts.AppActivitiesRequiredAction> requiredActions;
try {
requiredActions = parseAppActivitiesRequiredActions(actions);
requiredActions =
parseAppActivitiesRequiredActions(getFlatSet(actions));
} catch (IllegalArgumentException e) {
return new AppActivitiesInfo(e.getMessage(), appId);
}
Set<Integer> parsedRequestPriorities;
try {
parsedRequestPriorities = getFlatSet(requestPriorities).stream()
.map(e -> Integer.valueOf(e)).collect(Collectors.toSet());
} catch (NumberFormatException e) {
return new AppActivitiesInfo("request priorities must be integers!",
appId);
}
Set<Long> parsedAllocationRequestIds;
try {
parsedAllocationRequestIds = getFlatSet(allocationRequestIds).stream()
.map(e -> Long.valueOf(e)).collect(Collectors.toSet());
} catch (NumberFormatException e) {
return new AppActivitiesInfo(
"allocation request Ids must be integers!", appId);
}
int limitNum = -1;
if (limit != null) {
try {
@ -795,12 +814,13 @@ public class RMWebServices extends WebServices implements RMWebServiceProtocol {
if (requiredActions
.contains(RMWSConsts.AppActivitiesRequiredAction.GET)) {
AppActivitiesInfo appActivitiesInfo = activitiesManager
.getAppActivitiesInfo(applicationId, requestPriorities,
allocationRequestIds, activitiesGroupBy, limitNum,
.getAppActivitiesInfo(applicationId, parsedRequestPriorities,
parsedAllocationRequestIds, activitiesGroupBy, limitNum,
summarize, maxTime);
return appActivitiesInfo;
}
return new AppActivitiesInfo("Successfully notified actions: "
return new AppActivitiesInfo("Successfully received "
+ (actions.size() == 1 ? "action: " : "actions: ")
+ StringUtils.join(',', actions), appId);
} catch (Exception e) {
String errMessage = "Cannot find application with given appId";
@ -812,6 +832,15 @@ public class RMWebServices extends WebServices implements RMWebServiceProtocol {
return null;
}
private Set<String> getFlatSet(Set<String> set) {
if (set == null) {
return null;
}
return set.stream()
.flatMap(e -> Arrays.asList(e.split(StringUtils.COMMA_STR)).stream())
.collect(Collectors.toSet());
}
private Set<RMWSConsts.AppActivitiesRequiredAction>
parseAppActivitiesRequiredActions(Set<String> actions) {
Set<RMWSConsts.AppActivitiesRequiredAction> requiredActions =

View File

@ -32,16 +32,17 @@ import java.util.Date;
import java.util.List;
import java.util.ArrayList;
/*
* DAO object to display node allocation activity.
/**
* DAO object to display allocation activities.
*/
@XmlRootElement
@XmlRootElement(name = "activities")
@XmlAccessorType(XmlAccessType.FIELD)
public class ActivitiesInfo {
protected String nodeId;
protected String timeStamp;
protected String diagnostic = null;
protected List<NodeAllocationInfo> allocations;
private String nodeId;
private Long timestamp;
private String dateTime;
private String diagnostic;
private List<NodeAllocationInfo> allocations;
private static final Logger LOG =
LoggerFactory.getLogger(ActivitiesInfo.class);
@ -73,9 +74,10 @@ public class ActivitiesInfo {
this.nodeId = nodeAllocations.get(0).getNodeId().toString();
}
this.timestamp = nodeAllocations.get(0).getTimestamp();
Date date = new Date();
date.setTime(nodeAllocations.get(0).getTimeStamp());
this.timeStamp = date.toString();
date.setTime(this.timestamp);
this.dateTime = date.toString();
for (int i = 0; i < nodeAllocations.size(); i++) {
NodeAllocation nodeAllocation = nodeAllocations.get(i);
@ -86,4 +88,24 @@ public class ActivitiesInfo {
}
}
}
public String getNodeId() {
return nodeId;
}
public Long getTimestamp() {
return timestamp;
}
public String getDateTime() {
return dateTime;
}
public String getDiagnostic() {
return diagnostic;
}
public List<NodeAllocationInfo> getAllocations() {
return allocations;
}
}

View File

@ -38,16 +38,16 @@ import java.util.stream.Collectors;
@XmlRootElement
@XmlAccessorType(XmlAccessType.FIELD)
public class ActivityNodeInfo {
protected String name; // The name for activity node
protected String appPriority;
protected String requestPriority;
protected String allocationState;
protected String diagnostic;
private String name; // The name for activity node
private Integer appPriority;
private Integer requestPriority;
private Long allocationRequestId;
private String allocationState;
private String diagnostic;
private String nodeId;
private String allocationRequestId;
// Used for groups of activities
private String count;
private Integer count;
private List<String> nodeIds;
protected List<ActivityNodeInfo> children;
@ -55,19 +55,19 @@ public class ActivityNodeInfo {
ActivityNodeInfo() {
}
public ActivityNodeInfo(String name, ActivityState allocationState,
public ActivityNodeInfo(String name, ActivityState activityState,
String diagnostic, NodeId nId) {
this.name = name;
this.allocationState = allocationState.name();
this.allocationState = activityState.name();
this.diagnostic = diagnostic;
setNodeId(nId);
}
public ActivityNodeInfo(ActivityState groupAllocationState,
public ActivityNodeInfo(ActivityState groupActivityState,
String groupDiagnostic, List<String> groupNodeIds) {
this.allocationState = groupAllocationState.name();
this.allocationState = groupActivityState.name();
this.diagnostic = groupDiagnostic;
this.count = String.valueOf(groupNodeIds.size());
this.count = groupNodeIds.size();
this.nodeIds = groupNodeIds;
}
@ -113,11 +113,11 @@ public class ActivityNodeInfo {
this.nodeIds = nodeIds;
}
public String getAllocationRequestId() {
public Long getAllocationRequestId() {
return allocationRequestId;
}
public String getCount() {
public Integer getCount() {
return count;
}
@ -128,4 +128,24 @@ public class ActivityNodeInfo {
public List<ActivityNodeInfo> getChildren() {
return children;
}
public String getAllocationState() {
return allocationState;
}
public String getName() {
return name;
}
public Integer getAppPriority() {
return appPriority;
}
public Integer getRequestPriority() {
return requestPriority;
}
public String getDiagnostic() {
return diagnostic;
}
}

View File

@ -33,16 +33,17 @@ import java.util.ArrayList;
import java.util.Date;
import java.util.List;
/*
/**
* DAO object to display application activity.
*/
@XmlRootElement
@XmlRootElement(name = "appActivities")
@XmlAccessorType(XmlAccessType.FIELD)
public class AppActivitiesInfo {
protected String applicationId;
protected String diagnostic;
protected String timeStamp;
protected List<AppAllocationInfo> allocations;
private String applicationId;
private String diagnostic;
private Long timestamp;
private String dateTime;
private List<AppAllocationInfo> allocations;
private static final Logger LOG =
LoggerFactory.getLogger(AppActivitiesInfo.class);
@ -53,10 +54,7 @@ public class AppActivitiesInfo {
public AppActivitiesInfo(String errorMessage, String applicationId) {
this.diagnostic = errorMessage;
this.applicationId = applicationId;
Date date = new Date();
date.setTime(SystemClock.getInstance().getTime());
this.timeStamp = date.toString();
setTime(SystemClock.getInstance().getTime());
}
public AppActivitiesInfo(List<AppAllocation> appAllocations,
@ -67,10 +65,7 @@ public class AppActivitiesInfo {
if (appAllocations == null) {
diagnostic = "waiting for display";
Date date = new Date();
date.setTime(SystemClock.getInstance().getTime());
this.timeStamp = date.toString();
setTime(SystemClock.getInstance().getTime());
} else {
for (int i = appAllocations.size() - 1; i > -1; i--) {
AppAllocation appAllocation = appAllocations.get(i);
@ -81,8 +76,29 @@ public class AppActivitiesInfo {
}
}
private void setTime(long ts) {
this.timestamp = ts;
this.dateTime = new Date(ts).toString();
}
@VisibleForTesting
public List<AppAllocationInfo> getAllocations() {
return allocations;
}
public Long getTimestamp() {
return timestamp;
}
public String getDateTime() {
return dateTime;
}
public String getApplicationId() {
return applicationId;
}
public String getDiagnostic() {
return diagnostic;
}
}

View File

@ -38,27 +38,27 @@ import java.util.stream.Collectors;
@XmlAccessorType(XmlAccessType.FIELD)
public class AppAllocationInfo {
private String nodeId;
private String queueName;
private String appPriority;
private long timestamp;
private Long timestamp;
private String dateTime;
private String queueName;
private Integer appPriority;
private String allocationState;
private String diagnostic;
private List<AppRequestAllocationInfo> requestAllocation;
private List<AppRequestAllocationInfo> children;
AppAllocationInfo() {
}
AppAllocationInfo(AppAllocation allocation,
RMWSConsts.ActivitiesGroupBy groupBy) {
this.requestAllocation = new ArrayList<>();
this.children = new ArrayList<>();
this.nodeId = allocation.getNodeId();
this.queueName = allocation.getQueueName();
this.appPriority = allocation.getPriority() == null ?
null : allocation.getPriority().toString();
null : allocation.getPriority().getPriority();
this.timestamp = allocation.getTime();
this.dateTime = new Date(allocation.getTime()).toString();
this.allocationState = allocation.getAppState().name();
this.allocationState = allocation.getActivityState().name();
this.diagnostic = allocation.getDiagnostic();
Map<String, List<ActivityNode>> requestToActivityNodes =
allocation.getAllocationAttempts().stream().collect(Collectors
@ -68,7 +68,7 @@ public class AppAllocationInfo {
.values()) {
AppRequestAllocationInfo requestAllocationInfo =
new AppRequestAllocationInfo(requestActivityNodes, groupBy);
this.requestAllocation.add(requestAllocationInfo);
this.children.add(requestAllocationInfo);
}
}
@ -80,11 +80,11 @@ public class AppAllocationInfo {
return queueName;
}
public String getAppPriority() {
public Integer getAppPriority() {
return appPriority;
}
public long getTimestamp() {
public Long getTimestamp() {
return timestamp;
}
@ -96,8 +96,8 @@ public class AppAllocationInfo {
return allocationState;
}
public List<AppRequestAllocationInfo> getRequestAllocation() {
return requestAllocation;
public List<AppRequestAllocationInfo> getChildren() {
return children;
}
public String getDiagnostic() {

View File

@ -34,10 +34,11 @@ import java.util.List;
@XmlRootElement
@XmlAccessorType(XmlAccessType.FIELD)
public class AppRequestAllocationInfo {
private String requestPriority;
private String allocationRequestId;
private Integer requestPriority;
private Long allocationRequestId;
private String allocationState;
private List<ActivityNodeInfo> allocationAttempt;
private String diagnostic;
private List<ActivityNodeInfo> children;
AppRequestAllocationInfo() {
}
@ -48,15 +49,19 @@ public class AppRequestAllocationInfo {
this.requestPriority = lastActivityNode.getRequestPriority();
this.allocationRequestId = lastActivityNode.getAllocationRequestId();
this.allocationState = lastActivityNode.getState().name();
this.allocationAttempt = ActivitiesUtils
if (lastActivityNode.isRequestType()
&& lastActivityNode.getDiagnostic() != null) {
this.diagnostic = lastActivityNode.getDiagnostic();
}
this.children = ActivitiesUtils
.getRequestActivityNodeInfos(activityNodes, groupBy);
}
public String getRequestPriority() {
public Integer getRequestPriority() {
return requestPriority;
}
public String getAllocationRequestId() {
public Long getAllocationRequestId() {
return allocationRequestId;
}
@ -64,7 +69,11 @@ public class AppRequestAllocationInfo {
return allocationState;
}
public List<ActivityNodeInfo> getAllocationAttempt() {
return allocationAttempt;
public List<ActivityNodeInfo> getChildren() {
return children;
}
public String getDiagnostic() {
return diagnostic;
}
}

View File

@ -33,9 +33,10 @@ import javax.xml.bind.annotation.XmlRootElement;
@XmlRootElement
@XmlAccessorType(XmlAccessType.FIELD)
public class NodeAllocationInfo {
protected String allocatedContainerId;
protected String finalAllocationState;
protected ActivityNodeInfo root = null;
private String partition;
private String updatedContainerId;
private String finalAllocationState;
private ActivityNodeInfo root = null;
private static final Logger LOG =
LoggerFactory.getLogger(NodeAllocationInfo.class);
@ -45,10 +46,25 @@ public class NodeAllocationInfo {
NodeAllocationInfo(NodeAllocation allocation,
RMWSConsts.ActivitiesGroupBy groupBy) {
this.allocatedContainerId = allocation.getContainerId();
this.partition = allocation.getPartition();
this.updatedContainerId = allocation.getContainerId();
this.finalAllocationState = allocation.getFinalAllocationState().name();
root = new ActivityNodeInfo(allocation.getRoot(), groupBy);
}
public String getPartition() {
return partition;
}
public String getUpdatedContainerId() {
return updatedContainerId;
}
public String getFinalAllocationState() {
return finalAllocationState;
}
public ActivityNodeInfo getRoot() {
return root;
}
}

View File

@ -150,10 +150,10 @@ public class TestActivitiesManager {
.recordAppActivityWithoutAllocation(activitiesManager, node,
randomApp,
new SchedulerRequestKey(Priority.newInstance(0), 0, null),
ActivityDiagnosticConstant.FAIL_TO_ALLOCATE,
ActivityState.REJECTED);
ActivityDiagnosticConstant.NODE_IS_BLACKLISTED,
ActivityState.REJECTED, ActivityLevel.NODE);
ActivitiesLogger.NODE
.finishNodeUpdateRecording(activitiesManager, node.getNodeID());
.finishNodeUpdateRecording(activitiesManager, node.getNodeID(), "");
return null;
};
futures.add(threadPoolExecutor.submit(task));
@ -195,10 +195,10 @@ public class TestActivitiesManager {
.recordAppActivityWithoutAllocation(activitiesManager, node,
randomApp,
new SchedulerRequestKey(Priority.newInstance(0), 0, null),
ActivityDiagnosticConstant.FAIL_TO_ALLOCATE,
ActivityState.REJECTED);
ActivityDiagnosticConstant.NODE_IS_BLACKLISTED,
ActivityState.REJECTED, ActivityLevel.NODE);
ActivitiesLogger.NODE.finishNodeUpdateRecording(activitiesManager,
ActivitiesManager.EMPTY_NODE_ID);
ActivitiesManager.EMPTY_NODE_ID, "");
return null;
};
futures.add(threadPoolExecutor.submit(task));
@ -236,13 +236,13 @@ public class TestActivitiesManager {
.recordAppActivityWithoutAllocation(activitiesManager, node,
randomApp,
new SchedulerRequestKey(Priority.newInstance(0), 0, null),
ActivityDiagnosticConstant.FAIL_TO_ALLOCATE,
ActivityState.REJECTED);
ActivityDiagnosticConstant.NODE_IS_BLACKLISTED,
ActivityState.REJECTED, ActivityLevel.NODE);
}
ActivitiesLogger.APP
.finishAllocatedAppAllocationRecording(activitiesManager,
randomApp.getApplicationId(), null, ActivityState.SKIPPED,
ActivityDiagnosticConstant.SKIPPED_ALL_PRIORITIES);
.finishSkippedAppAllocationRecording(activitiesManager,
randomApp.getApplicationId(), ActivityState.SKIPPED,
ActivityDiagnosticConstant.EMPTY);
return null;
};
futures.add(threadPoolExecutor.submit(task));
@ -285,12 +285,12 @@ public class TestActivitiesManager {
ActivitiesLogger.APP
.recordAppActivityWithoutAllocation(newActivitiesManager, node, app,
new SchedulerRequestKey(Priority.newInstance(0), 0, null),
ActivityDiagnosticConstant.FAIL_TO_ALLOCATE,
ActivityState.REJECTED);
ActivityDiagnosticConstant.NODE_IS_BLACKLISTED,
ActivityState.REJECTED, ActivityLevel.NODE);
ActivitiesLogger.APP
.finishAllocatedAppAllocationRecording(newActivitiesManager,
app.getApplicationId(), null, ActivityState.SKIPPED,
ActivityDiagnosticConstant.SKIPPED_ALL_PRIORITIES);
.finishSkippedAppAllocationRecording(newActivitiesManager,
app.getApplicationId(), ActivityState.SKIPPED,
ActivityDiagnosticConstant.EMPTY);
}
AppActivitiesInfo appActivitiesInfo = newActivitiesManager
.getAppActivitiesInfo(app.getApplicationId(), null, null, null, -1,
@ -322,15 +322,15 @@ public class TestActivitiesManager {
for (int i = 0; i < numNodes; i++) {
NodeId nodeId = NodeId.newInstance("host" + i, 0);
activitiesManager
.addSchedulingActivityForApp(app.getApplicationId(), null, "0",
.addSchedulingActivityForApp(app.getApplicationId(), null, 0,
ActivityState.SKIPPED,
ActivityDiagnosticConstant.FAIL_TO_ALLOCATE, "container",
nodeId, "0");
ActivityDiagnosticConstant.NODE_IS_BLACKLISTED,
ActivityLevel.NODE, nodeId, 0L);
}
ActivitiesLogger.APP
.finishAllocatedAppAllocationRecording(activitiesManager,
app.getApplicationId(), null, ActivityState.SKIPPED,
ActivityDiagnosticConstant.SKIPPED_ALL_PRIORITIES);
.finishSkippedAppAllocationRecording(activitiesManager,
app.getApplicationId(), ActivityState.SKIPPED,
ActivityDiagnosticConstant.EMPTY);
}
// It often take a longer time for the first query, ignore this distraction
@ -346,11 +346,11 @@ public class TestActivitiesManager {
Assert.assertEquals(numActivities,
appActivitiesInfo.getAllocations().size());
Assert.assertEquals(1,
appActivitiesInfo.getAllocations().get(0).getRequestAllocation()
appActivitiesInfo.getAllocations().get(0).getChildren()
.size());
Assert.assertEquals(numNodes,
appActivitiesInfo.getAllocations().get(0).getRequestAllocation()
.get(0).getAllocationAttempt().size());
appActivitiesInfo.getAllocations().get(0).getChildren()
.get(0).getChildren().size());
return null;
};
testManyTimes("Getting normal app activities", normalSupplier,
@ -364,14 +364,14 @@ public class TestActivitiesManager {
Assert.assertEquals(numActivities,
appActivitiesInfo.getAllocations().size());
Assert.assertEquals(1,
appActivitiesInfo.getAllocations().get(0).getRequestAllocation()
appActivitiesInfo.getAllocations().get(0).getChildren()
.size());
Assert.assertEquals(1,
appActivitiesInfo.getAllocations().get(0).getRequestAllocation()
.get(0).getAllocationAttempt().size());
appActivitiesInfo.getAllocations().get(0).getChildren()
.get(0).getChildren().size());
Assert.assertEquals(numNodes,
appActivitiesInfo.getAllocations().get(0).getRequestAllocation()
.get(0).getAllocationAttempt().get(0).getNodeIds().size());
appActivitiesInfo.getAllocations().get(0).getChildren()
.get(0).getChildren().get(0).getNodeIds().size());
return null;
};
testManyTimes("Getting aggregated app activities", aggregatedSupplier,
@ -384,14 +384,14 @@ public class TestActivitiesManager {
RMWSConsts.ActivitiesGroupBy.DIAGNOSTIC, -1, true, 100);
Assert.assertEquals(1, appActivitiesInfo.getAllocations().size());
Assert.assertEquals(1,
appActivitiesInfo.getAllocations().get(0).getRequestAllocation()
appActivitiesInfo.getAllocations().get(0).getChildren()
.size());
Assert.assertEquals(1,
appActivitiesInfo.getAllocations().get(0).getRequestAllocation()
.get(0).getAllocationAttempt().size());
appActivitiesInfo.getAllocations().get(0).getChildren()
.get(0).getChildren().size());
Assert.assertEquals(numNodes,
appActivitiesInfo.getAllocations().get(0).getRequestAllocation()
.get(0).getAllocationAttempt().get(0).getNodeIds().size());
appActivitiesInfo.getAllocations().get(0).getChildren()
.get(0).getChildren().get(0).getNodeIds().size());
return null;
};
testManyTimes("Getting summarized app activities", summarizedSupplier,

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.yarn.server.resourcemanager.webapp;
import com.google.common.collect.Lists;
import com.sun.jersey.api.client.ClientResponse;
import com.sun.jersey.api.client.WebResource;
import org.apache.hadoop.http.JettyUtils;
@ -43,6 +44,7 @@ import java.util.List;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals;
@ -51,8 +53,8 @@ import static org.junit.Assert.assertEquals;
*/
public final class ActivitiesTestUtils {
public static final String INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX =
ActivityDiagnosticConstant.NOT_SUFFICIENT_RESOURCE
public static final String TOTAL_RESOURCE_INSUFFICIENT_DIAGNOSTIC_PREFIX =
ActivityDiagnosticConstant.NODE_TOTAL_RESOURCE_INSUFFICIENT_FOR_REQUEST
+ ", " + GenericDiagnosticsCollector.RESOURCE_DIAGNOSTICS_PREFIX;
public static final String UNMATCHED_PARTITION_OR_PC_DIAGNOSTIC_PREFIX =
@ -60,12 +62,51 @@ public final class ActivitiesTestUtils {
NODE_DO_NOT_MATCH_PARTITION_OR_PLACEMENT_CONSTRAINTS + ", "
+ GenericDiagnosticsCollector.PLACEMENT_CONSTRAINT_DIAGNOSTICS_PREFIX;
/*
* Field names in response of scheduler/app activities.
*/
public static final String FN_ACT_ALLOCATIONS = "allocations";
public static final String FN_ACT_DIAGNOSTIC = "diagnostic";
public static final String FN_ACT_ALLOCATION_STATE = "allocationState";
public static final String FN_ACT_FINAL_ALLOCATION_STATE =
"finalAllocationState";
public static final String FN_ACT_NODE_ID = "nodeId";
public static final String FN_ACT_NODE_IDS = "nodeIds";
public static final String FN_ACT_COUNT = "count";
public static final String FN_ACT_APP_PRIORITY = "appPriority";
public static final String FN_ACT_REQUEST_PRIORITY = "requestPriority";
public static final String FN_ACT_ALLOCATION_REQUEST_ID =
"allocationRequestId";
public static final String FN_APP_ACT_CHILDREN = "children";
public static final String FN_APP_ACT_ROOT = "appActivities";
public static final String FN_SCHEDULER_ACT_NAME = "name";
public static final String FN_SCHEDULER_ACT_ALLOCATIONS_ROOT = "root";
public static final String FN_SCHEDULER_ACT_CHILDREN = "children";
public static final String FN_SCHEDULER_ACT_ROOT = "activities";
private ActivitiesTestUtils(){}
public static List<JSONObject> findInAllocations(JSONObject allocationObj,
Predicate p) throws JSONException {
List<JSONObject> target = new ArrayList<>();
recursiveFindObj(allocationObj.getJSONObject("root"), p, target);
recursiveFindObj(allocationObj.getJSONObject(
FN_SCHEDULER_ACT_ALLOCATIONS_ROOT), p,
target);
return target;
}
@ -74,14 +115,14 @@ public final class ActivitiesTestUtils {
if (p.test(obj)) {
target.add(obj);
}
if (obj.has("children")) {
JSONArray childrenObjs = obj.optJSONArray("children");
if (obj.has(FN_SCHEDULER_ACT_CHILDREN)) {
JSONArray childrenObjs = obj.optJSONArray(FN_SCHEDULER_ACT_CHILDREN);
if (childrenObjs != null) {
for (int i = 0; i < childrenObjs.length(); i++) {
recursiveFindObj(childrenObjs.getJSONObject(i), p, target);
}
} else {
JSONObject childrenObj = obj.optJSONObject("children");
JSONObject childrenObj = obj.optJSONObject(FN_SCHEDULER_ACT_CHILDREN);
recursiveFindObj(childrenObj, p, target);
}
}
@ -103,17 +144,18 @@ public final class ActivitiesTestUtils {
public static void verifyNumberOfNodes(JSONObject allocation, int expectValue)
throws Exception {
if (allocation.isNull("root")) {
if (allocation.isNull(FN_SCHEDULER_ACT_ALLOCATIONS_ROOT)) {
assertEquals("State of allocation is wrong", expectValue, 0);
} else {
assertEquals("State of allocation is wrong", expectValue,
1 + getNumberOfNodes(allocation.getJSONObject("root")));
1 + getNumberOfNodes(
allocation.getJSONObject(FN_SCHEDULER_ACT_ALLOCATIONS_ROOT)));
}
}
public static int getNumberOfNodes(JSONObject allocation) throws Exception {
if (!allocation.isNull("children")) {
Object object = allocation.get("children");
if (!allocation.isNull(FN_SCHEDULER_ACT_CHILDREN)) {
Object object = allocation.get(FN_SCHEDULER_ACT_CHILDREN);
if (object.getClass() == JSONObject.class) {
return 1 + getNumberOfNodes((JSONObject) object);
} else {
@ -137,10 +179,18 @@ public final class ActivitiesTestUtils {
public static void verifyNumberOfAllocations(JSONObject json, int expectValue)
throws Exception {
if (json.isNull("allocations")) {
JSONObject activitiesJson;
if (json.has(FN_APP_ACT_ROOT)) {
activitiesJson = json.getJSONObject(FN_APP_ACT_ROOT);
} else if (json.has(FN_SCHEDULER_ACT_ROOT)) {
activitiesJson = json.getJSONObject(FN_SCHEDULER_ACT_ROOT);
} else {
throw new IllegalArgumentException("Can't parse allocations!");
}
if (activitiesJson.isNull(FN_ACT_ALLOCATIONS)) {
assertEquals("Number of allocations is wrong", expectValue, 0);
} else {
Object object = json.get("allocations");
Object object = activitiesJson.get(FN_ACT_ALLOCATIONS);
if (object.getClass() == JSONObject.class) {
assertEquals("Number of allocations is wrong", expectValue, 1);
} else if (object.getClass() == JSONArray.class) {
@ -153,31 +203,32 @@ public final class ActivitiesTestUtils {
public static void verifyQueueOrder(JSONObject json, String expectOrder)
throws Exception {
String order = "";
if (!json.isNull("root")) {
JSONObject root = json.getJSONObject("root");
order = root.getString("name") + "-" + getQueueOrder(root);
if (!json.isNull(FN_SCHEDULER_ACT_ALLOCATIONS_ROOT)) {
JSONObject root = json.getJSONObject(FN_SCHEDULER_ACT_ALLOCATIONS_ROOT);
order = root.getString(FN_SCHEDULER_ACT_NAME) + "-" + getQueueOrder(root);
}
assertEquals("Order of queue is wrong", expectOrder,
order.substring(0, order.length() - 1));
}
public static String getQueueOrder(JSONObject node) throws Exception {
if (!node.isNull("children")) {
Object children = node.get("children");
if (!node.isNull(FN_SCHEDULER_ACT_CHILDREN)) {
Object children = node.get(FN_SCHEDULER_ACT_CHILDREN);
if (children.getClass() == JSONObject.class) {
if (!((JSONObject) children).isNull("appPriority")) {
if (!((JSONObject) children).isNull(FN_ACT_APP_PRIORITY)) {
return "";
}
return ((JSONObject) children).getString("name") + "-" + getQueueOrder(
(JSONObject) children);
return ((JSONObject) children).getString(FN_SCHEDULER_ACT_NAME) + "-"
+ getQueueOrder((JSONObject) children);
} else if (children.getClass() == JSONArray.class) {
String order = "";
for (int i = 0; i < ((JSONArray) children).length(); i++) {
JSONObject child = (JSONObject) ((JSONArray) children).get(i);
if (!child.isNull("appPriority")) {
if (!child.isNull(FN_ACT_APP_PRIORITY)) {
return "";
}
order += (child.getString("name") + "-" + getQueueOrder(child));
order += (child.getString(FN_SCHEDULER_ACT_NAME) + "-"
+ getQueueOrder(child));
}
return order;
}
@ -185,12 +236,52 @@ public final class ActivitiesTestUtils {
return "";
}
public static JSONObject getFirstSubNodeFromJson(JSONObject json,
String... hierarchicalFieldNames) {
return getSubNodesFromJson(json, hierarchicalFieldNames).get(0);
}
public static List<JSONObject> getSubNodesFromJson(JSONObject json,
String... hierarchicalFieldNames) {
List<JSONObject> results = Lists.newArrayList(json);
for (String fieldName : hierarchicalFieldNames) {
results = results.stream().filter(e -> e.has(fieldName))
.flatMap(e -> getJSONObjects(e, fieldName).stream())
.collect(Collectors.toList());
if (results.isEmpty()) {
throw new IllegalArgumentException("Can't find hierarchical fields "
+ Arrays.toString(hierarchicalFieldNames));
}
}
return results;
}
private static List<JSONObject> getJSONObjects(JSONObject json,
String fieldName) {
List<JSONObject> objects = new ArrayList<>();
if (json.has(fieldName)) {
try {
Object tmpObj = json.get(fieldName);
if (tmpObj.getClass() == JSONObject.class) {
objects.add((JSONObject) tmpObj);
} else if (tmpObj.getClass() == JSONArray.class) {
for (int i = 0; i < ((JSONArray) tmpObj).length(); i++) {
objects.add(((JSONArray) tmpObj).getJSONObject(i));
}
}
} catch (JSONException e) {
throw new RuntimeException(e);
}
}
return objects;
}
public static void verifyNumberOfAllocationAttempts(JSONObject allocation,
int expectValue) throws Exception {
if (allocation.isNull("allocationAttempt")) {
if (allocation.isNull(FN_APP_ACT_CHILDREN)) {
assertEquals("Number of allocation attempts is wrong", expectValue, 0);
} else {
Object object = allocation.get("allocationAttempt");
Object object = allocation.get(FN_APP_ACT_CHILDREN);
if (object.getClass() == JSONObject.class) {
assertEquals("Number of allocations attempts is wrong", expectValue, 1);
} else if (object.getClass() == JSONArray.class) {

View File

@ -18,26 +18,47 @@
package org.apache.hadoop.yarn.server.resourcemanager.webapp;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ALLOCATION_STATE;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ALLOCATIONS;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_DIAGNOSTIC;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_NAME;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_ROOT;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.getFirstSubNodeFromJson;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyNumberOfAllocations;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.StringReader;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.function.Predicate;
import javax.ws.rs.core.MediaType;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Sets;
import org.apache.hadoop.http.JettyUtils;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.NodeLabel;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.resourcemanager.MockAM;
import org.apache.hadoop.yarn.server.resourcemanager.MockNM;
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityDiagnosticConstant;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityState;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
import org.apache.hadoop.yarn.util.resource.Resources;
import org.apache.hadoop.yarn.webapp.GenericExceptionHandler;
import org.apache.hadoop.yarn.webapp.GuiceServletConfig;
import org.apache.hadoop.yarn.webapp.JerseyTestBase;
@ -242,6 +263,82 @@ public class TestRMWebServicesForCSWithPartitions extends JerseyTestBase {
verifySchedulerInfoXML(dom);
}
@Test
public void testPartitionInSchedulerActivities() throws Exception {
rm.start();
rm.getRMContext().getNodeLabelManager().addLabelsToNode(ImmutableMap
.of(NodeId.newInstance("127.0.0.1", 0), Sets.newHashSet(LABEL_LX)));
MockNM nm1 = new MockNM("127.0.0.1:1234", 2 * 1024,
rm.getResourceTrackerService());
MockNM nm2 = new MockNM("127.0.0.2:1234", 2 * 1024,
rm.getResourceTrackerService());
nm1.registerNode();
nm2.registerNode();
try {
RMApp app1 = rm.submitApp(1024, "app1", "user1", null, QUEUE_B, LABEL_LX);
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1);
am1.allocate(Arrays.asList(
ResourceRequest.newBuilder().priority(Priority.UNDEFINED)
.resourceName("*").nodeLabelExpression(LABEL_LX)
.capability(Resources.createResource(2048)).numContainers(1)
.build()), null);
WebResource sr = resource().path(RMWSConsts.RM_WEB_SERVICE_PATH)
.path(RMWSConsts.SCHEDULER_ACTIVITIES);
ActivitiesTestUtils.requestWebResource(sr, null);
nm1.nodeHeartbeat(true);
Thread.sleep(1000);
JSONObject schedulerActivitiesJson =
ActivitiesTestUtils.requestWebResource(sr, null);
/*
* verify scheduler activities
*/
verifyNumberOfAllocations(schedulerActivitiesJson, 1);
// verify queue Qb
Predicate<JSONObject> findQueueBPred =
(obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals(QUEUE_B);
List<JSONObject> queueBObj = ActivitiesTestUtils.findInAllocations(
getFirstSubNodeFromJson(schedulerActivitiesJson,
FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS), findQueueBPred);
assertEquals(1, queueBObj.size());
assertEquals(ActivityState.REJECTED.name(),
queueBObj.get(0).optString(FN_ACT_ALLOCATION_STATE));
assertEquals(ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM
+ " from " + am1.getApplicationAttemptId().getApplicationId(),
queueBObj.get(0).optString(FN_ACT_DIAGNOSTIC));
// verify queue Qa
Predicate<JSONObject> findQueueAPred =
(obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals(QUEUE_A);
List<JSONObject> queueAObj = ActivitiesTestUtils.findInAllocations(
getFirstSubNodeFromJson(schedulerActivitiesJson,
FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS), findQueueAPred);
assertEquals(1, queueAObj.size());
assertEquals(ActivityState.REJECTED.name(),
queueAObj.get(0).optString(FN_ACT_ALLOCATION_STATE));
assertEquals(
ActivityDiagnosticConstant.QUEUE_NOT_ABLE_TO_ACCESS_PARTITION,
queueAObj.get(0).optString(FN_ACT_DIAGNOSTIC));
// verify queue Qc
Predicate<JSONObject> findQueueCPred =
(obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals(QUEUE_C);
List<JSONObject> queueCObj = ActivitiesTestUtils.findInAllocations(
getFirstSubNodeFromJson(schedulerActivitiesJson,
FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS), findQueueCPred);
assertEquals(1, queueCObj.size());
assertEquals(ActivityState.SKIPPED.name(),
queueCObj.get(0).optString(FN_ACT_ALLOCATION_STATE));
assertEquals(ActivityDiagnosticConstant.QUEUE_DO_NOT_NEED_MORE_RESOURCE,
queueCObj.get(0).optString(FN_ACT_DIAGNOSTIC));
} finally {
rm.stop();
}
}
private void verifySchedulerInfoXML(Document dom) throws Exception {
NodeList scheduler = dom.getElementsByTagName("scheduler");
assertEquals("incorrect number of elements", 1, scheduler.getLength());

View File

@ -22,6 +22,8 @@ import com.sun.jersey.api.client.ClientResponse;
import com.sun.jersey.api.client.WebResource;
import com.sun.jersey.core.util.MultivaluedMapImpl;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityDiagnosticConstant;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.activities.ActivityState;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.http.JettyUtils;
@ -54,17 +56,37 @@ import java.util.function.Predicate;
import static org.apache.hadoop.yarn.api.resource.PlacementConstraints.NODE;
import static org.apache.hadoop.yarn.api.resource.PlacementConstraints.PlacementTargets.allocationTag;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ALLOCATIONS;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ALLOCATION_REQUEST_ID;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ALLOCATION_STATE;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_DIAGNOSTIC;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_FINAL_ALLOCATION_STATE;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_NODE_ID;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_NODE_IDS;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_REQUEST_PRIORITY;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_APP_ACT_CHILDREN;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_APP_ACT_ROOT;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_CHILDREN;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_NAME;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_ALLOCATIONS_ROOT;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_ROOT;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.TOTAL_RESOURCE_INSUFFICIENT_DIAGNOSTIC_PREFIX;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.UNMATCHED_PARTITION_OR_PC_DIAGNOSTIC_PREFIX;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.getFirstSubNodeFromJson;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.getSubNodesFromJson;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyNumberOfAllocationAttempts;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyNumberOfAllocations;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyNumberOfNodes;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyQueueOrder;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyStateOfAllocations;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
/**
* Tests for scheduler/app activities.
*/
public class TestRMWebServicesSchedulerActivities
extends TestRMWebServicesCapacitySched {
@ -117,9 +139,12 @@ public class TestRMWebServicesSchedulerActivities
// Collection logic of scheduler activities changed after YARN-9313,
// only one allocation should be recorded for all scenarios.
verifyNumberOfAllocations(json, 1);
verifyStateOfAllocations(json.getJSONObject("allocations"),
"finalAllocationState", "ALLOCATED");
verifyQueueOrder(json.getJSONObject("allocations"), "root-a-b-b2-b3-b1");
JSONObject allocation = getFirstSubNodeFromJson(json,
FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS);
verifyStateOfAllocations(allocation,
FN_ACT_FINAL_ALLOCATION_STATE, "ALLOCATED");
verifyQueueOrder(allocation,
"root-a-b-b2-b3-b1");
} finally {
rm.stop();
}
@ -167,7 +192,14 @@ public class TestRMWebServicesSchedulerActivities
response.getType().toString());
json = response.getEntity(JSONObject.class);
verifyNumberOfAllocations(json, 0);
// verify scheduler activities
verifyNumberOfAllocations(json, 1);
JSONObject rootObj = getFirstSubNodeFromJson(json,
FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS)
.getJSONObject(FN_SCHEDULER_ACT_ALLOCATIONS_ROOT);
assertTrue(rootObj.optString(FN_ACT_DIAGNOSTIC).startsWith(
ActivityDiagnosticConstant.
INIT_CHECK_SINGLE_NODE_RESOURCE_INSUFFICIENT));
} finally {
rm.stop();
}
@ -301,10 +333,12 @@ public class TestRMWebServicesSchedulerActivities
verifyNumberOfAllocations(json, 1);
verifyQueueOrder(json.getJSONObject("allocations"), "root-a-b-b3-b1");
JSONObject allocations = json.getJSONObject("allocations");
verifyStateOfAllocations(allocations, "finalAllocationState", "RESERVED");
JSONObject allocations = getFirstSubNodeFromJson(json,
FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS);
verifyQueueOrder(allocations,
"root-a-b-b3-b1");
verifyStateOfAllocations(allocations, FN_ACT_FINAL_ALLOCATION_STATE,
"RESERVED");
// Do a node heartbeat again without releasing container from app2
r = resource();
@ -329,10 +363,11 @@ public class TestRMWebServicesSchedulerActivities
verifyNumberOfAllocations(json, 1);
verifyQueueOrder(json.getJSONObject("allocations"), "b1");
allocations = json.getJSONObject("allocations");
verifyStateOfAllocations(allocations, "finalAllocationState", "SKIPPED");
JSONObject allocation = getFirstSubNodeFromJson(json,
FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS);
verifyQueueOrder(allocation, "b1");
verifyStateOfAllocations(allocation, FN_ACT_FINAL_ALLOCATION_STATE,
"RESERVED");
// Finish application 2
CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
@ -365,10 +400,10 @@ public class TestRMWebServicesSchedulerActivities
verifyNumberOfAllocations(json, 1);
verifyQueueOrder(json.getJSONObject("allocations"), "b1");
allocations = json.getJSONObject("allocations");
verifyStateOfAllocations(allocations, "finalAllocationState",
allocations = getFirstSubNodeFromJson(json,
FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS);
verifyQueueOrder(allocations, "b1");
verifyStateOfAllocations(allocations, FN_ACT_FINAL_ALLOCATION_STATE,
"ALLOCATED_FROM_RESERVED");
} finally {
rm.stop();
@ -411,14 +446,15 @@ public class TestRMWebServicesSchedulerActivities
verifyNumberOfAllocations(json, 1);
JSONObject allocations = json.getJSONObject("allocations");
verifyStateOfAllocations(allocations, "finalAllocationState",
JSONObject allocation = getFirstSubNodeFromJson(json,
FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS);
verifyStateOfAllocations(allocation, FN_ACT_FINAL_ALLOCATION_STATE,
"ALLOCATED");
// Increase number of nodes to 6 since request node has been added
verifyNumberOfNodes(allocations, 6);
verifyNumberOfNodes(allocation, 6);
verifyQueueOrder(json.getJSONObject("allocations"), "root-b-b1");
verifyQueueOrder(allocation, "root-b-b1");
} finally {
rm.stop();
}
@ -451,22 +487,27 @@ public class TestRMWebServicesSchedulerActivities
//Check app activities
verifyNumberOfAllocations(json, 1);
JSONObject allocations = json.getJSONObject("allocations");
verifyStateOfAllocations(allocations, "allocationState", "ALLOCATED");
JSONObject allocation = getFirstSubNodeFromJson(json,
FN_APP_ACT_ROOT, FN_ACT_ALLOCATIONS);
verifyStateOfAllocations(allocation, FN_ACT_ALLOCATION_STATE,
"ALLOCATED");
//Check request allocation
JSONObject requestAllocationObj =
allocations.getJSONObject("requestAllocation");
verifyStateOfAllocations(requestAllocationObj, "allocationState",
getFirstSubNodeFromJson(allocation, FN_APP_ACT_CHILDREN);
verifyStateOfAllocations(requestAllocationObj, FN_ACT_ALLOCATION_STATE,
"ALLOCATED");
assertEquals("0", requestAllocationObj.optString("requestPriority"));
assertEquals("-1", requestAllocationObj.optString("allocationRequestId"));
assertEquals(0,
requestAllocationObj.optInt(FN_ACT_REQUEST_PRIORITY));
assertEquals(-1,
requestAllocationObj.optLong(FN_ACT_ALLOCATION_REQUEST_ID));
//Check allocation attempts
verifyNumberOfAllocationAttempts(requestAllocationObj, 1);
JSONObject allocationAttemptObj =
requestAllocationObj.getJSONObject("allocationAttempt");
verifyStateOfAllocations(allocationAttemptObj, "allocationState",
"ALLOCATED");
assertNotNull(allocationAttemptObj.get("nodeId"));
List<JSONObject> allocationAttempts =
getSubNodesFromJson(requestAllocationObj, FN_APP_ACT_CHILDREN);
assertEquals(1, allocationAttempts.size());
verifyStateOfAllocations(allocationAttempts.get(0),
FN_ACT_ALLOCATION_STATE, "ALLOCATED");
assertNotNull(allocationAttempts.get(0).get(FN_ACT_NODE_ID));
} finally {
rm.stop();
}
@ -508,10 +549,11 @@ public class TestRMWebServicesSchedulerActivities
verifyNumberOfAllocations(json, 10);
JSONArray allocations = json.getJSONArray("allocations");
for (int i = 0; i < allocations.length(); i++) {
verifyStateOfAllocations(allocations.getJSONObject(i),
"allocationState", "ALLOCATED");
List<JSONObject> allocations =
getSubNodesFromJson(json, FN_APP_ACT_ROOT, FN_ACT_ALLOCATIONS);
for (int i = 0; i < allocations.size(); i++) {
verifyStateOfAllocations(allocations.get(i),
FN_ACT_ALLOCATION_STATE, "ALLOCATED");
}
} finally {
rm.stop();
@ -643,8 +685,7 @@ public class TestRMWebServicesSchedulerActivities
}
@Test (timeout=30000)
public void testInsufficientResourceDiagnostic()
throws Exception {
public void testInsufficientResourceDiagnostic() throws Exception {
rm.start();
CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
@ -664,7 +705,8 @@ public class TestRMWebServicesSchedulerActivities
response.getType().toString());
JSONObject json = response.getEntity(JSONObject.class);
assertEquals("waiting for next allocation",
json.getString("diagnostic"));
getFirstSubNodeFromJson(json, FN_SCHEDULER_ACT_ROOT)
.optString(FN_ACT_DIAGNOSTIC));
am1.allocate(Arrays.asList(ResourceRequest
.newInstance(Priority.UNDEFINED, "*",
@ -682,24 +724,26 @@ public class TestRMWebServicesSchedulerActivities
json = response.getEntity(JSONObject.class);
verifyNumberOfAllocations(json, 1);
JSONObject allocationObj = json.getJSONObject("allocations");
JSONObject allocationObj = getFirstSubNodeFromJson(json,
FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS);
// check diagnostics
Predicate<JSONObject> findReqPred =
(obj) -> obj.optString("name").equals("request_-1_-1");
(obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("request_-1_-1");
List<JSONObject> app2ReqObjs =
ActivitiesTestUtils.findInAllocations(allocationObj, findReqPred);
assertEquals(1, app2ReqObjs.size());
JSONObject reqChild = app2ReqObjs.get(0).getJSONObject("children");
assertTrue(reqChild.getString("diagnostic")
.contains(INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX));
List<JSONObject> reqAllocations =
getSubNodesFromJson(app2ReqObjs.get(0), FN_SCHEDULER_ACT_CHILDREN);
assertEquals(1, reqAllocations.size());
assertTrue(reqAllocations.get(0).getString(FN_ACT_DIAGNOSTIC)
.contains(TOTAL_RESOURCE_INSUFFICIENT_DIAGNOSTIC_PREFIX));
} finally {
rm.stop();
}
}
@Test (timeout=30000)
public void testPlacementConstraintDiagnostic()
throws Exception {
public void testPlacementConstraintDiagnostic() throws Exception {
rm.start();
CapacityScheduler cs = (CapacityScheduler)rm.getResourceScheduler();
@ -728,7 +772,8 @@ public class TestRMWebServicesSchedulerActivities
response.getType().toString());
JSONObject json = response.getEntity(JSONObject.class);
assertEquals("waiting for next allocation",
json.getString("diagnostic"));
getFirstSubNodeFromJson(json, FN_SCHEDULER_ACT_ROOT)
.optString(FN_ACT_DIAGNOSTIC));
// trigger scheduling
cs.handle(new NodeUpdateSchedulerEvent(
@ -742,15 +787,17 @@ public class TestRMWebServicesSchedulerActivities
json = response.getEntity(JSONObject.class);
verifyNumberOfAllocations(json, 1);
JSONObject allocationObj = json.getJSONObject("allocations");
JSONObject allocationObj = getFirstSubNodeFromJson(json,
FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS);
// check diagnostics
Predicate<JSONObject> findReqPred =
(obj) -> obj.optString("name").equals("request_1_1");
(obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("request_1_1");
List<JSONObject> reqObjs =
ActivitiesTestUtils.findInAllocations(allocationObj, findReqPred);
assertEquals(1, reqObjs.size());
JSONObject reqChild = reqObjs.get(0).getJSONObject("children");
assertTrue(reqChild.getString("diagnostic")
JSONObject reqChild =
getFirstSubNodeFromJson(reqObjs.get(0), FN_SCHEDULER_ACT_CHILDREN);
assertTrue(reqChild.getString(FN_ACT_DIAGNOSTIC)
.contains(UNMATCHED_PARTITION_OR_PC_DIAGNOSTIC_PREFIX));
} finally {
rm.stop();
@ -758,8 +805,7 @@ public class TestRMWebServicesSchedulerActivities
}
@Test (timeout=30000)
public void testAppInsufficientResourceDiagnostic()
throws Exception {
public void testAppInsufficientResourceDiagnostic() throws Exception {
rm.start();
CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
@ -776,7 +822,7 @@ public class TestRMWebServicesSchedulerActivities
MultivaluedMapImpl params = new MultivaluedMapImpl();
JSONObject json = ActivitiesTestUtils.requestWebResource(r, params);
assertEquals("waiting for display",
json.getString("diagnostic"));
json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC));
// am1 asks for 1 * 5GB container
am1.allocate(Arrays.asList(ResourceRequest
@ -788,24 +834,24 @@ public class TestRMWebServicesSchedulerActivities
json = ActivitiesTestUtils.requestWebResource(r, params);
verifyNumberOfAllocations(json, 1);
JSONObject allocationObj = json.getJSONObject("allocations");
JSONObject allocationObj = getFirstSubNodeFromJson(json,
FN_APP_ACT_ROOT, FN_ACT_ALLOCATIONS);
JSONObject requestAllocationObj =
allocationObj.getJSONObject("requestAllocation");
getFirstSubNodeFromJson(allocationObj, FN_APP_ACT_CHILDREN);
verifyNumberOfAllocationAttempts(requestAllocationObj, 1);
JSONObject allocationAttemptObj =
requestAllocationObj.getJSONObject("allocationAttempt");
verifyStateOfAllocations(allocationAttemptObj, "allocationState",
JSONObject allocationAttemptObj = getFirstSubNodeFromJson(
requestAllocationObj, FN_APP_ACT_CHILDREN);
verifyStateOfAllocations(allocationAttemptObj, FN_ACT_ALLOCATION_STATE,
"SKIPPED");
assertTrue(allocationAttemptObj.optString("diagnostic")
.contains(INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX));
assertTrue(allocationAttemptObj.optString(FN_ACT_DIAGNOSTIC)
.contains(TOTAL_RESOURCE_INSUFFICIENT_DIAGNOSTIC_PREFIX));
} finally {
rm.stop();
}
}
@Test (timeout=30000)
public void testAppPlacementConstraintDiagnostic()
throws Exception {
@Test(timeout=30000)
public void testAppPlacementConstraintDiagnostic() throws Exception {
rm.start();
CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
@ -822,7 +868,7 @@ public class TestRMWebServicesSchedulerActivities
MultivaluedMapImpl params = new MultivaluedMapImpl();
JSONObject json = ActivitiesTestUtils.requestWebResource(r, params);
assertEquals("waiting for display",
json.getString("diagnostic"));
json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC));
// am1 asks for 1 * 5GB container with PC expression: in,node,foo
PlacementConstraint pcExpression = PlacementConstraints
@ -840,15 +886,16 @@ public class TestRMWebServicesSchedulerActivities
json = ActivitiesTestUtils.requestWebResource(r, params);
verifyNumberOfAllocations(json, 1);
JSONObject allocationObj = json.getJSONObject("allocations");
JSONObject allocationObj = getFirstSubNodeFromJson(json,
FN_APP_ACT_ROOT, FN_ACT_ALLOCATIONS);
JSONObject requestAllocationObj =
allocationObj.getJSONObject("requestAllocation");
getFirstSubNodeFromJson(allocationObj, FN_APP_ACT_CHILDREN);
verifyNumberOfAllocationAttempts(requestAllocationObj, 1);
JSONObject allocationAttemptObj =
requestAllocationObj.getJSONObject("allocationAttempt");
verifyStateOfAllocations(allocationAttemptObj, "allocationState",
JSONObject allocationAttemptObj = getFirstSubNodeFromJson(
requestAllocationObj, FN_APP_ACT_CHILDREN);
verifyStateOfAllocations(allocationAttemptObj, FN_ACT_ALLOCATION_STATE,
"SKIPPED");
assertTrue(allocationAttemptObj.optString("diagnostic")
assertTrue(allocationAttemptObj.optString(FN_ACT_DIAGNOSTIC)
.contains(UNMATCHED_PARTITION_OR_PC_DIAGNOSTIC_PREFIX));
} finally {
rm.stop();
@ -873,7 +920,7 @@ public class TestRMWebServicesSchedulerActivities
MultivaluedMapImpl params = new MultivaluedMapImpl();
JSONObject json = ActivitiesTestUtils.requestWebResource(r, params);
assertEquals("waiting for display",
json.getString("diagnostic"));
json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC));
// am1 asks for 1 * 1GB container with requestPriority=-1
// and allocationRequestId=1
@ -919,23 +966,22 @@ public class TestRMWebServicesSchedulerActivities
cs.handle(new NodeUpdateSchedulerEvent(
rm.getRMContext().getRMNodes().get(nm1.getNodeId())));
// query app activities with requestPriorities={0,1}
// query app activities with requestPriorities={0,-1}
MultivaluedMapImpl filterParams1 = new MultivaluedMapImpl(params);
filterParams1.add(RMWSConsts.REQUEST_PRIORITIES, "-1");
filterParams1.add(RMWSConsts.REQUEST_PRIORITIES, "0");
filterParams1.add(RMWSConsts.REQUEST_PRIORITIES, "0,-1");
json = ActivitiesTestUtils.requestWebResource(r, filterParams1);
verifyNumberOfAllocations(json, 4);
// query app activities with requestPriorities=0
// query app activities with requestPriorities=-1
MultivaluedMapImpl filterParams2 = new MultivaluedMapImpl(params);
filterParams2.add(RMWSConsts.REQUEST_PRIORITIES, "-1");
json = ActivitiesTestUtils.requestWebResource(r, filterParams2);
verifyNumberOfAllocations(json, 2);
JSONArray allocations = json.getJSONArray("allocations");
JSONArray allocations =
json.getJSONObject(FN_APP_ACT_ROOT).getJSONArray(FN_ACT_ALLOCATIONS);
for (int i=0; i<allocations.length(); i++) {
assertEquals("-1",
allocations.getJSONObject(i).getJSONObject("requestAllocation")
.optString("requestPriority"));
assertEquals("-1", getFirstSubNodeFromJson(allocations.getJSONObject(i),
FN_APP_ACT_CHILDREN).optString(FN_ACT_REQUEST_PRIORITY));
}
// query app activities with allocationRequestId=1
@ -943,11 +989,11 @@ public class TestRMWebServicesSchedulerActivities
filterParams3.add(RMWSConsts.ALLOCATION_REQUEST_IDS, "1");
json = ActivitiesTestUtils.requestWebResource(r, filterParams3);
verifyNumberOfAllocations(json, 2);
allocations = json.getJSONArray("allocations");
allocations =
json.getJSONObject(FN_APP_ACT_ROOT).getJSONArray(FN_ACT_ALLOCATIONS);
for (int i = 0; i < allocations.length(); i++) {
assertEquals("1",
allocations.getJSONObject(i).getJSONObject("requestAllocation")
.optString("allocationRequestId"));
assertEquals("1", getFirstSubNodeFromJson(allocations.getJSONObject(i),
FN_APP_ACT_CHILDREN).optString(FN_ACT_ALLOCATION_REQUEST_ID));
}
// query app activities with requestPriorities=0 and allocationRequestId=1
@ -956,11 +1002,34 @@ public class TestRMWebServicesSchedulerActivities
filterParams4.add(RMWSConsts.ALLOCATION_REQUEST_IDS, "1");
json = ActivitiesTestUtils.requestWebResource(r, filterParams4);
verifyNumberOfAllocations(json, 1);
JSONObject allocation = json.getJSONObject("allocations");
assertEquals("0", allocation.getJSONObject("requestAllocation")
.optString("requestPriority"));
assertEquals("1", allocation.getJSONObject("requestAllocation")
.optString("allocationRequestId"));
JSONObject allocation = getFirstSubNodeFromJson(json,
FN_APP_ACT_ROOT, FN_ACT_ALLOCATIONS);
JSONObject request =
getFirstSubNodeFromJson(allocation, FN_APP_ACT_CHILDREN);
assertEquals("0", request.optString(FN_ACT_REQUEST_PRIORITY));
assertEquals("1", request.optString(FN_ACT_ALLOCATION_REQUEST_ID));
// query app activities with requestPriorities=-1
// and allocationRequestId={1,2}
MultivaluedMapImpl filterParams5 = new MultivaluedMapImpl(params);
filterParams5.add(RMWSConsts.REQUEST_PRIORITIES, "-1");
filterParams5.add(RMWSConsts.ALLOCATION_REQUEST_IDS, "1,2");
json = ActivitiesTestUtils.requestWebResource(r, filterParams5);
verifyNumberOfAllocations(json, 2);
allocations =
json.getJSONObject(FN_APP_ACT_ROOT).getJSONArray(FN_ACT_ALLOCATIONS);
for (int i = 0; i < allocations.length(); i++) {
assertEquals("-1", getFirstSubNodeFromJson(allocations.getJSONObject(i),
FN_APP_ACT_CHILDREN).optString(FN_ACT_REQUEST_PRIORITY));
}
// query app activities with requestPriorities=-1
// and allocationRequestId={-1,1}
MultivaluedMapImpl filterParams6 = new MultivaluedMapImpl(params);
filterParams6.add(RMWSConsts.REQUEST_PRIORITIES, "-1");
filterParams6.add(RMWSConsts.ALLOCATION_REQUEST_IDS, "-1,1");
json = ActivitiesTestUtils.requestWebResource(r, filterParams6);
verifyNumberOfAllocations(json, 1);
} finally {
rm.stop();
}
@ -982,7 +1051,7 @@ public class TestRMWebServicesSchedulerActivities
MultivaluedMapImpl params = new MultivaluedMapImpl();
JSONObject json = ActivitiesTestUtils.requestWebResource(r, params);
assertEquals("waiting for display",
json.getString("diagnostic"));
json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC));
// am1 asks for 1 * 5GB container
am1.allocate("*", 5120, 1, new ArrayList<>());
@ -1016,19 +1085,20 @@ public class TestRMWebServicesSchedulerActivities
// query all app activities with invalid limit
params.putSingle(RMWSConsts.LIMIT, "STRING");
json = ActivitiesTestUtils.requestWebResource(r, params);
assertEquals("limit must be integer!", json.getString("diagnostic"));
assertEquals("limit must be integer!",
json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC));
// query all app activities with limit = 0
params.putSingle(RMWSConsts.LIMIT, "0");
json = ActivitiesTestUtils.requestWebResource(r, params);
assertEquals("limit must be greater than 0!",
json.getString("diagnostic"));
json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC));
// query all app activities with limit < 0
params.putSingle(RMWSConsts.LIMIT, "-3");
json = ActivitiesTestUtils.requestWebResource(r, params);
assertEquals("limit must be greater than 0!",
json.getString("diagnostic"));
json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC));
} finally {
rm.stop();
}
@ -1052,17 +1122,18 @@ public class TestRMWebServicesSchedulerActivities
params.add("maxTime", 1); //only last for 1 second
// testing invalid action
params.add(RMWSConsts.ACTIONS, "get");
params.add(RMWSConsts.ACTIONS, "invalid-action");
params.add(RMWSConsts.ACTIONS, "get,invalid-action");
JSONObject json = ActivitiesTestUtils.requestWebResource(r, params);
assertTrue(json.getString("diagnostic").startsWith("Got invalid action"));
assertTrue(json.getJSONObject(FN_APP_ACT_ROOT)
.getString(FN_ACT_DIAGNOSTIC).startsWith("Got invalid action"));
/*
* testing get action
*/
params.putSingle(RMWSConsts.ACTIONS, "get");
json = ActivitiesTestUtils.requestWebResource(r, params);
assertEquals("waiting for display", json.getString("diagnostic"));
assertEquals("waiting for display",
json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC));
// trigger scheduling
cs.handle(new NodeUpdateSchedulerEvent(
@ -1071,7 +1142,8 @@ public class TestRMWebServicesSchedulerActivities
// app activities won't be recorded
params.putSingle(RMWSConsts.ACTIONS, "get");
json = ActivitiesTestUtils.requestWebResource(r, params);
assertEquals("waiting for display", json.getString("diagnostic"));
assertEquals("waiting for display",
json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC));
// trigger scheduling
cs.handle(new NodeUpdateSchedulerEvent(
@ -1082,8 +1154,8 @@ public class TestRMWebServicesSchedulerActivities
*/
params.putSingle(RMWSConsts.ACTIONS, "refresh");
json = ActivitiesTestUtils.requestWebResource(r, params);
assertEquals("Successfully notified actions: refresh",
json.getString("diagnostic"));
assertEquals("Successfully received action: refresh",
json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC));
// trigger scheduling
cs.handle(new NodeUpdateSchedulerEvent(
@ -1104,8 +1176,7 @@ public class TestRMWebServicesSchedulerActivities
* testing update and get actions
*/
params.remove(RMWSConsts.ACTIONS);
params.add(RMWSConsts.ACTIONS, "refresh");
params.add(RMWSConsts.ACTIONS, "get");
params.add(RMWSConsts.ACTIONS, "refresh,get");
json = ActivitiesTestUtils.requestWebResource(r, params);
verifyNumberOfAllocations(json, 1);
@ -1149,7 +1220,7 @@ public class TestRMWebServicesSchedulerActivities
MultivaluedMapImpl params = new MultivaluedMapImpl();
JSONObject json = ActivitiesTestUtils.requestWebResource(r, params);
assertEquals("waiting for display",
json.getString("diagnostic"));
json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC));
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1);
// am1 asks for 1 * 5GB container
@ -1170,23 +1241,191 @@ public class TestRMWebServicesSchedulerActivities
// verify that response contains an allocation summary for all nodes
verifyNumberOfAllocations(json, 1);
JSONObject allocation = json.getJSONObject("allocations");
JSONObject allocation = getFirstSubNodeFromJson(json,
FN_APP_ACT_ROOT, FN_ACT_ALLOCATIONS);
JSONObject reqestAllocation =
allocation.getJSONObject("requestAllocation");
JSONArray attempts = reqestAllocation.getJSONArray("allocationAttempt");
getFirstSubNodeFromJson(allocation, FN_APP_ACT_CHILDREN);
JSONArray attempts = reqestAllocation.getJSONArray(FN_APP_ACT_CHILDREN);
assertEquals(2, attempts.length());
for (int i = 0; i < attempts.length(); i++) {
JSONObject attempt = attempts.getJSONObject(i);
if (attempt.getString("allocationState").equals("SKIPPED")) {
JSONArray nodeIds = attempt.optJSONArray("nodeIds");
assertEquals(2, nodeIds.length());
} else if (attempt.getString("allocationState").equals("RESERVED")) {
if (attempt.getString(FN_ACT_ALLOCATION_STATE)
.equals(ActivityState.SKIPPED.name())) {
assertEquals(2, attempt.getJSONArray(FN_ACT_NODE_IDS).length());
} else if (attempt.getString(FN_ACT_ALLOCATION_STATE)
.equals(ActivityState.RESERVED.name())) {
assertEquals(1, attempt.getJSONArray(FN_ACT_NODE_IDS).length());
assertEquals(nm1.getNodeId().toString(),
attempt.getString("nodeIds"));
attempt.getJSONArray(FN_ACT_NODE_IDS).getString(0));
}
}
} finally {
rm.stop();
}
}
@Test
public void testNodeSkippedBecauseOfRelaxLocality() throws Exception {
//Start RM so that it accepts app submissions
rm.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 4 * 1024,
rm.getResourceTrackerService());
MockNM nm2 = new MockNM("127.0.0.2:1234", 4 * 1024,
rm.getResourceTrackerService());
nm1.registerNode();
nm2.registerNode();
try {
RMApp app1 = rm.submitApp(10, "app1", "user1", null, "b1");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1);
am1.allocate(Arrays.asList(
ResourceRequest.newBuilder().priority(Priority.UNDEFINED)
.resourceName("127.0.0.2")
.capability(Resources.createResource(1024)).numContainers(1)
.build(),
ResourceRequest.newBuilder().priority(Priority.UNDEFINED)
.resourceName("/default-rack")
.capability(Resources.createResource(1024)).numContainers(1)
.relaxLocality(false)
.build(),
ResourceRequest.newBuilder().priority(Priority.UNDEFINED)
.resourceName("*")
.capability(Resources.createResource(1024)).numContainers(1)
.relaxLocality(false)
.build()), null);
WebResource r = resource().path(RMWSConsts.RM_WEB_SERVICE_PATH)
.path(ActivitiesTestUtils.format(RMWSConsts.SCHEDULER_APP_ACTIVITIES,
app1.getApplicationId().toString()));
ActivitiesTestUtils.requestWebResource(r, null);
WebResource sr = resource().path(RMWSConsts.RM_WEB_SERVICE_PATH)
.path(RMWSConsts.SCHEDULER_ACTIVITIES);
ActivitiesTestUtils.requestWebResource(sr, null);
nm1.nodeHeartbeat(true);
Thread.sleep(1000);
JSONObject appActivitiesJson =
ActivitiesTestUtils.requestWebResource(r, null);
JSONObject schedulerActivitiesJson =
ActivitiesTestUtils.requestWebResource(sr, null);
// verify app activities
verifyNumberOfAllocations(appActivitiesJson, 1);
List<JSONObject> allocationAttempts = ActivitiesTestUtils
.getSubNodesFromJson(appActivitiesJson, FN_APP_ACT_ROOT,
FN_ACT_ALLOCATIONS, FN_APP_ACT_CHILDREN, FN_APP_ACT_CHILDREN);
assertEquals(1, allocationAttempts.size());
assertEquals(
ActivityDiagnosticConstant.NODE_SKIPPED_BECAUSE_OF_RELAX_LOCALITY,
allocationAttempts.get(0).optString(FN_ACT_DIAGNOSTIC));
/*
* verify scheduler activities
*/
verifyNumberOfAllocations(schedulerActivitiesJson, 1);
// verify request activity
Predicate<JSONObject> findA1AQueuePred =
(obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("request_-1_-1");
List<JSONObject> reqObjs = ActivitiesTestUtils.findInAllocations(
getFirstSubNodeFromJson(schedulerActivitiesJson,
FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS),
findA1AQueuePred);
assertEquals(1, reqObjs.size());
assertEquals(ActivityState.SKIPPED.name(),
reqObjs.get(0).optString(FN_ACT_ALLOCATION_STATE));
// verify node activity
JSONObject nodeObj =
getFirstSubNodeFromJson(reqObjs.get(0), FN_SCHEDULER_ACT_CHILDREN);
assertEquals(nm1.getNodeId().toString(),
nodeObj.optString(FN_ACT_NODE_ID));
assertEquals(
ActivityDiagnosticConstant.NODE_SKIPPED_BECAUSE_OF_RELAX_LOCALITY,
nodeObj.optString(FN_ACT_DIAGNOSTIC));
} finally {
rm.stop();
}
}
@Test
public void testQueueSkippedBecauseOfHeadroom() throws Exception {
//Start RM so that it accepts app submissions
rm.start();
MockNM nm1 = new MockNM("127.0.0.1:1234", 4 * 1024,
rm.getResourceTrackerService());
MockNM nm2 = new MockNM("127.0.0.2:1234", 4 * 1024,
rm.getResourceTrackerService());
nm1.registerNode();
nm2.registerNode();
try {
RMApp app1 = rm.submitApp(10, "app1", "user1", null, "a1a");
MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1);
am1.allocate(Arrays.asList(
ResourceRequest.newBuilder().priority(Priority.UNDEFINED)
.resourceName("*").capability(Resources.createResource(3072))
.numContainers(1).relaxLocality(false).build()), null);
WebResource r = resource().path(RMWSConsts.RM_WEB_SERVICE_PATH)
.path(ActivitiesTestUtils.format(RMWSConsts.SCHEDULER_APP_ACTIVITIES,
app1.getApplicationId().toString()));
ActivitiesTestUtils.requestWebResource(r, null);
WebResource sr = resource().path(RMWSConsts.RM_WEB_SERVICE_PATH)
.path(RMWSConsts.SCHEDULER_ACTIVITIES);
ActivitiesTestUtils.requestWebResource(sr, null);
nm1.nodeHeartbeat(true);
Thread.sleep(1000);
JSONObject appActivitiesJson =
ActivitiesTestUtils.requestWebResource(r, null);
JSONObject schedulerActivitiesJson =
ActivitiesTestUtils.requestWebResource(sr, null);
// verify app activities: diagnostic should be attached at request level
// and there should be no allocation attempts at node level
verifyNumberOfAllocations(appActivitiesJson, 1);
List<JSONObject> requestAllocations = ActivitiesTestUtils
.getSubNodesFromJson(appActivitiesJson, FN_APP_ACT_ROOT,
FN_ACT_ALLOCATIONS, FN_APP_ACT_CHILDREN);
assertEquals(1, requestAllocations.size());
assertEquals(ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM,
requestAllocations.get(0).optString(FN_ACT_DIAGNOSTIC));
assertFalse(requestAllocations.get(0).has(FN_APP_ACT_CHILDREN));
// verify scheduler activities: diagnostic should be attached at request
// level and queue level
verifyNumberOfAllocations(schedulerActivitiesJson, 1);
// verify at queue level
Predicate<JSONObject> findA1AQueuePred =
(obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("a1a");
List<JSONObject> a1aQueueObj = ActivitiesTestUtils.findInAllocations(
getFirstSubNodeFromJson(schedulerActivitiesJson,
FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS), findA1AQueuePred);
assertEquals(1, a1aQueueObj.size());
assertEquals(ActivityState.REJECTED.name(),
a1aQueueObj.get(0).optString(FN_ACT_ALLOCATION_STATE));
assertTrue(a1aQueueObj.get(0).optString(FN_ACT_DIAGNOSTIC).startsWith(
ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM));
// verify at request level
Predicate<JSONObject> findReqPred =
(obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("request_-1_-1");
List<JSONObject> reqObj = ActivitiesTestUtils.findInAllocations(
getFirstSubNodeFromJson(schedulerActivitiesJson,
FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS), findReqPred);
assertEquals(1, reqObj.size());
assertEquals(ActivityState.REJECTED.name(),
reqObj.get(0).optString(FN_ACT_ALLOCATION_STATE));
assertTrue(reqObj.get(0).optString(FN_ACT_DIAGNOSTIC).startsWith(
ActivityDiagnosticConstant.QUEUE_DO_NOT_HAVE_ENOUGH_HEADROOM));
} finally {
rm.stop();
}
}
}

View File

@ -57,8 +57,21 @@ import java.util.Arrays;
import java.util.List;
import java.util.function.Predicate;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ALLOCATIONS;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_ALLOCATION_STATE;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_COUNT;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_DIAGNOSTIC;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_FINAL_ALLOCATION_STATE;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_ACT_NODE_IDS;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_APP_ACT_CHILDREN;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_APP_ACT_ROOT;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_CHILDREN;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_NAME;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.FN_SCHEDULER_ACT_ROOT;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.TOTAL_RESOURCE_INSUFFICIENT_DIAGNOSTIC_PREFIX;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.findInAllocations;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.getFirstSubNodeFromJson;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.getSubNodesFromJson;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyNumberOfAllocationAttempts;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyNumberOfAllocations;
import static org.apache.hadoop.yarn.server.resourcemanager.webapp.ActivitiesTestUtils.verifyStateOfAllocations;
@ -184,9 +197,10 @@ public class TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled
verifyNumberOfAllocations(json, 1);
JSONObject allocations = json.getJSONObject("allocations");
JSONObject allocations = getFirstSubNodeFromJson(json,
FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS);
verifyStateOfAllocations(allocations,
"finalAllocationState", "ALLOCATED");
FN_ACT_FINAL_ALLOCATION_STATE, "ALLOCATED");
} finally {
rm.stop();
}
@ -225,9 +239,10 @@ public class TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled
JSONObject json = response.getEntity(JSONObject.class);
verifyNumberOfAllocations(json, 1);
JSONObject allocations = json.getJSONObject("allocations");
verifyStateOfAllocations(allocations,
"finalAllocationState", "SKIPPED");
JSONObject allocation = getFirstSubNodeFromJson(json,
FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS);
verifyStateOfAllocations(allocation,
FN_ACT_FINAL_ALLOCATION_STATE, "SKIPPED");
} finally {
rm.stop();
}
@ -254,7 +269,8 @@ public class TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled
app1.getApplicationId().toString()));
MultivaluedMapImpl params = new MultivaluedMapImpl();
JSONObject json = ActivitiesTestUtils.requestWebResource(r, params);
assertEquals("waiting for display", json.getString("diagnostic"));
assertEquals("waiting for display",
json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC));
//Trigger scheduling for this app
CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler();
@ -267,22 +283,24 @@ public class TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled
verifyNumberOfAllocations(json, 1);
JSONObject allocationObj = json.getJSONObject("allocations");
verifyStateOfAllocations(allocationObj, "allocationState", "ALLOCATED");
JSONObject allocationObj = getFirstSubNodeFromJson(json,
FN_APP_ACT_ROOT, FN_ACT_ALLOCATIONS);
verifyStateOfAllocations(allocationObj, FN_ACT_ALLOCATION_STATE,
"ALLOCATED");
JSONObject requestAllocationObj =
allocationObj.getJSONObject("requestAllocation");
getFirstSubNodeFromJson(allocationObj, FN_APP_ACT_CHILDREN);
verifyNumberOfAllocationAttempts(requestAllocationObj, 2);
verifyStateOfAllocations(requestAllocationObj, "allocationState",
verifyStateOfAllocations(requestAllocationObj, FN_ACT_ALLOCATION_STATE,
"ALLOCATED");
JSONArray allocationAttemptArray =
requestAllocationObj.getJSONArray("allocationAttempt");
requestAllocationObj.getJSONArray(FN_APP_ACT_CHILDREN);
JSONObject allocationAttempt1 = allocationAttemptArray.getJSONObject(0);
verifyStateOfAllocations(allocationAttempt1, "allocationState",
verifyStateOfAllocations(allocationAttempt1, FN_ACT_ALLOCATION_STATE,
"SKIPPED");
assertTrue(allocationAttempt1.optString("diagnostic")
.contains(INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX));
assertTrue(allocationAttempt1.optString(FN_ACT_DIAGNOSTIC)
.contains(TOTAL_RESOURCE_INSUFFICIENT_DIAGNOSTIC_PREFIX));
JSONObject allocationAttempt2 = allocationAttemptArray.getJSONObject(1);
verifyStateOfAllocations(allocationAttempt2, "allocationState",
verifyStateOfAllocations(allocationAttempt2, FN_ACT_ALLOCATION_STATE,
"ALLOCATED");
} finally {
rm.stop();
@ -313,7 +331,8 @@ public class TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled
assertEquals(MediaType.APPLICATION_JSON_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
JSONObject json = response.getEntity(JSONObject.class);
assertEquals("waiting for next allocation", json.getString("diagnostic"));
assertEquals("waiting for next allocation",
json.getJSONObject(FN_SCHEDULER_ACT_ROOT).getString("diagnostic"));
//Request a container for am2, will reserve a container on nm1
am2.allocate("*", 4096, 1, new ArrayList<>());
@ -329,29 +348,32 @@ public class TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled
//Check app activities
verifyNumberOfAllocations(json, 1);
JSONObject allocationObj = json.getJSONObject("allocations");
JSONObject allocationObj = getFirstSubNodeFromJson(json,
FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS);
//Check diagnostic for request of app1
Predicate<JSONObject> findApp1Pred = (obj) -> obj.optString("name")
.equals(app1.getApplicationId().toString());
Predicate<JSONObject> findApp1Pred =
(obj) -> obj.optString(FN_SCHEDULER_ACT_NAME)
.equals(app1.getApplicationId().toString());
JSONObject app1Obj =
findInAllocations(allocationObj, findApp1Pred).get(0);
assertEquals("SKIPPED", app1Obj.optString("allocationState"));
assertEquals("SKIPPED", app1Obj.optString(FN_ACT_ALLOCATION_STATE));
assertEquals(ActivityDiagnosticConstant.APPLICATION_DO_NOT_NEED_RESOURCE,
app1Obj.optString("diagnostic"));
app1Obj.optString(FN_ACT_DIAGNOSTIC));
//Check diagnostic for request of app2
Predicate<JSONObject> findApp2ReqPred =
(obj) -> obj.optString("name").equals("request_1_-1");
(obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("request_1_-1");
List<JSONObject> app2ReqObjs =
findInAllocations(allocationObj, findApp2ReqPred);
assertEquals(1, app2ReqObjs.size());
JSONArray app2ReqChildren = app2ReqObjs.get(0).getJSONArray("children");
JSONArray app2ReqChildren =
app2ReqObjs.get(0).getJSONArray(FN_SCHEDULER_ACT_CHILDREN);
assertEquals(4, app2ReqChildren.length());
for (int i = 0; i < app2ReqChildren.length(); i++) {
JSONObject reqChild = app2ReqChildren.getJSONObject(i);
if (reqChild.getString("allocationState").equals("SKIPPED")) {
String diagnostic = reqChild.getString("diagnostic");
assertTrue(
diagnostic.contains(INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX));
if (reqChild.getString(FN_ACT_ALLOCATION_STATE).equals("SKIPPED")) {
String diagnostic = reqChild.getString(FN_ACT_DIAGNOSTIC);
assertTrue(diagnostic
.contains(TOTAL_RESOURCE_INSUFFICIENT_DIAGNOSTIC_PREFIX));
}
}
} finally {
@ -378,7 +400,8 @@ public class TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled
app1.getApplicationId().toString()));
MultivaluedMapImpl params = new MultivaluedMapImpl();
JSONObject json = ActivitiesTestUtils.requestWebResource(r, params);
assertEquals("waiting for display", json.getString("diagnostic"));
assertEquals("waiting for display",
json.getJSONObject(FN_APP_ACT_ROOT).getString(FN_ACT_DIAGNOSTIC));
//Request two containers with different priority for am1
am1.allocate(Arrays.asList(ResourceRequest
@ -398,32 +421,35 @@ public class TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled
//Check app activities
json = ActivitiesTestUtils.requestWebResource(r, params);
verifyNumberOfAllocations(json, 2);
JSONArray allocationArray = json.getJSONArray("allocations");
JSONArray allocationArray =
json.getJSONObject(FN_APP_ACT_ROOT).getJSONArray(FN_ACT_ALLOCATIONS);
//Check first activity is for second allocation with RESERVED state
JSONObject allocationObj = allocationArray.getJSONObject(0);
verifyStateOfAllocations(allocationObj, "allocationState", "RESERVED");
verifyStateOfAllocations(allocationObj, FN_ACT_ALLOCATION_STATE,
"RESERVED");
JSONObject requestAllocationObj =
allocationObj.getJSONObject("requestAllocation");
getFirstSubNodeFromJson(allocationObj, FN_APP_ACT_CHILDREN);
verifyNumberOfAllocationAttempts(requestAllocationObj, 4);
JSONArray allocationAttemptArray =
requestAllocationObj.getJSONArray("allocationAttempt");
requestAllocationObj.getJSONArray(FN_APP_ACT_CHILDREN);
for (int i=0; i<allocationAttemptArray.length(); i++) {
JSONObject allocationAttemptObj =
allocationAttemptArray.getJSONObject(i);
if (i != allocationAttemptArray.length()-1) {
assertTrue(allocationAttemptObj.optString("diagnostic")
.contains(INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX));
assertTrue(allocationAttemptObj.optString(FN_ACT_DIAGNOSTIC)
.contains(TOTAL_RESOURCE_INSUFFICIENT_DIAGNOSTIC_PREFIX));
}
}
// check second activity is for first allocation with ALLOCATED state
allocationObj = allocationArray.getJSONObject(1);
verifyStateOfAllocations(allocationObj, "allocationState", "ALLOCATED");
requestAllocationObj = allocationObj.getJSONObject("requestAllocation");
verifyNumberOfAllocationAttempts(requestAllocationObj, 1);
verifyStateOfAllocations(requestAllocationObj, "allocationState",
verifyStateOfAllocations(allocationObj, FN_ACT_ALLOCATION_STATE,
"ALLOCATED");
}
finally {
requestAllocationObj =
getFirstSubNodeFromJson(allocationObj, FN_APP_ACT_CHILDREN);
verifyNumberOfAllocationAttempts(requestAllocationObj, 1);
verifyStateOfAllocations(requestAllocationObj, FN_ACT_ALLOCATION_STATE,
"ALLOCATED");
} finally {
rm.stop();
}
}
@ -451,8 +477,8 @@ public class TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled
*/
params.add(RMWSConsts.GROUP_BY, "NON-EXIST-GROUP-BY");
JSONObject json = ActivitiesTestUtils.requestWebResource(r, params);
Assert.assertTrue(json.getString("diagnostic")
.startsWith("Got invalid groupBy:"));
Assert.assertTrue(json.getJSONObject(FN_SCHEDULER_ACT_ROOT)
.getString(FN_ACT_DIAGNOSTIC).startsWith("Got invalid groupBy:"));
params.remove(RMWSConsts.GROUP_BY);
/*
@ -461,7 +487,9 @@ public class TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled
params.add(RMWSConsts.GROUP_BY, RMWSConsts.ActivitiesGroupBy.
DIAGNOSTIC.name().toLowerCase());
json = ActivitiesTestUtils.requestWebResource(r, params);
assertEquals("waiting for next allocation", json.getString("diagnostic"));
assertEquals("waiting for next allocation",
json.getJSONObject(FN_SCHEDULER_ACT_ROOT)
.getString(FN_ACT_DIAGNOSTIC));
//Request a container for am2, will reserve a container on nm1
am1.allocate("*", 4096, 1, new ArrayList<>());
@ -472,27 +500,29 @@ public class TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled
//Check activities
verifyNumberOfAllocations(json, 1);
JSONObject allocationObj = json.getJSONObject("allocations");
JSONObject allocationObj = getFirstSubNodeFromJson(json,
FN_SCHEDULER_ACT_ROOT, FN_ACT_ALLOCATIONS);
//Check diagnostic for request of app1
Predicate<JSONObject> findReqPred =
(obj) -> obj.optString("name").equals("request_1_-1");
(obj) -> obj.optString(FN_SCHEDULER_ACT_NAME).equals("request_1_-1");
List<JSONObject> reqObjs =
findInAllocations(allocationObj, findReqPred);
assertEquals(1, reqObjs.size());
JSONArray reqChildren = reqObjs.get(0).getJSONArray("children");
JSONArray reqChildren =
reqObjs.get(0).getJSONArray(FN_SCHEDULER_ACT_CHILDREN);
assertEquals(2, reqChildren.length());
for (int i = 0; i < reqChildren.length(); i++) {
JSONObject reqChild = reqChildren.getJSONObject(i);
if (reqChild.getString("allocationState")
if (reqChild.getString(FN_ACT_ALLOCATION_STATE)
.equals(AllocationState.SKIPPED.name())) {
assertEquals("3", reqChild.getString("count"));
assertEquals(3, reqChild.getJSONArray("nodeIds").length());
assertTrue(reqChild.optString("diagnostic")
.contains(INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX));
} else if (reqChild.getString("allocationState")
assertEquals("3", reqChild.getString(FN_ACT_COUNT));
assertEquals(3, reqChild.getJSONArray(FN_ACT_NODE_IDS).length());
assertTrue(reqChild.optString(FN_ACT_DIAGNOSTIC)
.contains(TOTAL_RESOURCE_INSUFFICIENT_DIAGNOSTIC_PREFIX));
} else if (reqChild.getString(FN_ACT_ALLOCATION_STATE)
.equals(AllocationState.RESERVED.name())) {
assertEquals("1", reqChild.getString("count"));
assertNotNull(reqChild.getString("nodeIds"));
assertEquals("1", reqChild.getString(FN_ACT_COUNT));
assertNotNull(reqChild.getString(FN_ACT_NODE_IDS));
} else {
Assert.fail("Allocation state should be "
+ AllocationState.SKIPPED.name() + " or "
@ -528,7 +558,8 @@ public class TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled
*/
params.add(RMWSConsts.GROUP_BY, "NON-EXIST-GROUP-BY");
JSONObject json = ActivitiesTestUtils.requestWebResource(r, params);
Assert.assertTrue(json.getString("diagnostic")
Assert.assertTrue(json.getJSONObject(FN_APP_ACT_ROOT)
.getString(FN_ACT_DIAGNOSTIC)
.startsWith("Got invalid groupBy:"));
params.remove(RMWSConsts.GROUP_BY);
@ -538,7 +569,8 @@ public class TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled
params.add(RMWSConsts.GROUP_BY, RMWSConsts.ActivitiesGroupBy.
DIAGNOSTIC.name().toLowerCase());
json = ActivitiesTestUtils.requestWebResource(r, params);
assertEquals("waiting for display", json.getString("diagnostic"));
assertEquals("waiting for display", json.getJSONObject(FN_APP_ACT_ROOT)
.getString(FN_ACT_DIAGNOSTIC));
//Request two containers with different priority for am1
am1.allocate(Arrays.asList(ResourceRequest
@ -559,29 +591,31 @@ public class TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled
//Check app activities
verifyNumberOfAllocations(json, 2);
JSONArray allocationArray = json.getJSONArray("allocations");
List<JSONObject> allocations = getSubNodesFromJson(json,
FN_APP_ACT_ROOT, FN_ACT_ALLOCATIONS);
//Check first activity is for second allocation with RESERVED state
JSONObject allocationObj = allocationArray.getJSONObject(0);
verifyStateOfAllocations(allocationObj, "allocationState", "RESERVED");
JSONObject allocationObj = allocations.get(0);
verifyStateOfAllocations(allocationObj, FN_ACT_ALLOCATION_STATE,
"RESERVED");
JSONObject requestAllocationObj =
allocationObj.getJSONObject("requestAllocation");
getFirstSubNodeFromJson(allocationObj, FN_APP_ACT_CHILDREN);
verifyNumberOfAllocationAttempts(requestAllocationObj, 2);
JSONArray allocationAttemptArray =
requestAllocationObj.getJSONArray("allocationAttempt");
requestAllocationObj.getJSONArray(FN_APP_ACT_CHILDREN);
for (int i=0; i<allocationAttemptArray.length(); i++) {
JSONObject allocationAttemptObj =
allocationAttemptArray.getJSONObject(i);
if (allocationAttemptObj.getString("allocationState")
if (allocationAttemptObj.getString(FN_ACT_ALLOCATION_STATE)
.equals(AllocationState.SKIPPED.name())) {
assertEquals("3", allocationAttemptObj.getString("count"));
assertEquals("3", allocationAttemptObj.getString(FN_ACT_COUNT));
assertEquals(3,
allocationAttemptObj.getJSONArray("nodeIds").length());
assertTrue(allocationAttemptObj.optString("diagnostic")
.contains(INSUFFICIENT_RESOURCE_DIAGNOSTIC_PREFIX));
} else if (allocationAttemptObj.getString("allocationState")
allocationAttemptObj.getJSONArray(FN_ACT_NODE_IDS).length());
assertTrue(allocationAttemptObj.optString(FN_ACT_DIAGNOSTIC)
.contains(TOTAL_RESOURCE_INSUFFICIENT_DIAGNOSTIC_PREFIX));
} else if (allocationAttemptObj.getString(FN_ACT_ALLOCATION_STATE)
.equals(AllocationState.RESERVED.name())) {
assertEquals("1", allocationAttemptObj.getString("count"));
assertNotNull(allocationAttemptObj.getString("nodeIds"));
assertEquals("1", allocationAttemptObj.getString(FN_ACT_COUNT));
assertNotNull(allocationAttemptObj.getString(FN_ACT_NODE_IDS));
} else {
Assert.fail("Allocation state should be "
+ AllocationState.SKIPPED.name() + " or "
@ -589,16 +623,18 @@ public class TestRMWebServicesSchedulerActivitiesWithMultiNodesEnabled
}
}
// check second activity is for first allocation with ALLOCATED state
allocationObj = allocationArray.getJSONObject(1);
verifyStateOfAllocations(allocationObj, "allocationState", "ALLOCATED");
requestAllocationObj = allocationObj.getJSONObject("requestAllocation");
verifyNumberOfAllocationAttempts(requestAllocationObj, 1);
verifyStateOfAllocations(requestAllocationObj, "allocationState",
allocationObj = allocations.get(1);
verifyStateOfAllocations(allocationObj, FN_ACT_ALLOCATION_STATE,
"ALLOCATED");
JSONObject allocationAttemptObj =
requestAllocationObj.getJSONObject("allocationAttempt");
assertEquals("1", allocationAttemptObj.getString("count"));
assertNotNull(allocationAttemptObj.getString("nodeIds"));
requestAllocationObj =
getFirstSubNodeFromJson(allocationObj, FN_APP_ACT_CHILDREN);
verifyNumberOfAllocationAttempts(requestAllocationObj, 1);
verifyStateOfAllocations(requestAllocationObj, FN_ACT_ALLOCATION_STATE,
"ALLOCATED");
JSONObject allocationAttemptObj = getFirstSubNodeFromJson(
requestAllocationObj, FN_APP_ACT_CHILDREN);
assertEquals("1", allocationAttemptObj.getString(FN_ACT_COUNT));
assertNotNull(allocationAttemptObj.getString(FN_ACT_NODE_IDS));
} finally {
rm.stop();
}