YARN-4719. Add a helper library to maintain node state and allows common queries. (kasha)
This commit is contained in:
parent
5644137ada
commit
20d389ce61
|
@ -27,11 +27,7 @@ import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.Timer;
|
import java.util.Timer;
|
||||||
import java.util.TimerTask;
|
import java.util.TimerTask;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
|
||||||
import java.util.concurrent.ConcurrentMap;
|
import java.util.concurrent.ConcurrentMap;
|
||||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
|
||||||
import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock;
|
|
||||||
import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -92,22 +88,10 @@ public abstract class AbstractYarnScheduler
|
||||||
|
|
||||||
private static final Log LOG = LogFactory.getLog(AbstractYarnScheduler.class);
|
private static final Log LOG = LogFactory.getLog(AbstractYarnScheduler.class);
|
||||||
|
|
||||||
// Nodes in the cluster, indexed by NodeId
|
protected final ClusterNodeTracker<N> nodeTracker =
|
||||||
protected Map<NodeId, N> nodes = new ConcurrentHashMap<NodeId, N>();
|
new ClusterNodeTracker<>();
|
||||||
|
|
||||||
// Whole capacity of the cluster
|
|
||||||
protected Resource clusterResource = Resource.newInstance(0, 0);
|
|
||||||
|
|
||||||
protected Resource minimumAllocation;
|
protected Resource minimumAllocation;
|
||||||
protected Resource maximumAllocation;
|
|
||||||
private Resource configuredMaximumAllocation;
|
|
||||||
private int maxNodeMemory = -1;
|
|
||||||
private int maxNodeVCores = -1;
|
|
||||||
private final ReadLock maxAllocReadLock;
|
|
||||||
private final WriteLock maxAllocWriteLock;
|
|
||||||
|
|
||||||
private boolean useConfiguredMaximumAllocationOnly = true;
|
|
||||||
private long configuredMaximumAllocationWaitTime;
|
|
||||||
|
|
||||||
protected RMContext rmContext;
|
protected RMContext rmContext;
|
||||||
|
|
||||||
|
@ -132,9 +116,6 @@ public abstract class AbstractYarnScheduler
|
||||||
*/
|
*/
|
||||||
public AbstractYarnScheduler(String name) {
|
public AbstractYarnScheduler(String name) {
|
||||||
super(name);
|
super(name);
|
||||||
ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
|
|
||||||
this.maxAllocReadLock = lock.readLock();
|
|
||||||
this.maxAllocWriteLock = lock.writeLock();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -142,14 +123,21 @@ public abstract class AbstractYarnScheduler
|
||||||
nmExpireInterval =
|
nmExpireInterval =
|
||||||
conf.getInt(YarnConfiguration.RM_NM_EXPIRY_INTERVAL_MS,
|
conf.getInt(YarnConfiguration.RM_NM_EXPIRY_INTERVAL_MS,
|
||||||
YarnConfiguration.DEFAULT_RM_NM_EXPIRY_INTERVAL_MS);
|
YarnConfiguration.DEFAULT_RM_NM_EXPIRY_INTERVAL_MS);
|
||||||
configuredMaximumAllocationWaitTime =
|
long configuredMaximumAllocationWaitTime =
|
||||||
conf.getLong(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS,
|
conf.getLong(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS,
|
||||||
YarnConfiguration.DEFAULT_RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS);
|
YarnConfiguration.DEFAULT_RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS);
|
||||||
|
nodeTracker.setConfiguredMaxAllocationWaitTime(
|
||||||
|
configuredMaximumAllocationWaitTime);
|
||||||
maxClusterLevelAppPriority = getMaxPriorityFromConf(conf);
|
maxClusterLevelAppPriority = getMaxPriorityFromConf(conf);
|
||||||
createReleaseCache();
|
createReleaseCache();
|
||||||
super.serviceInit(conf);
|
super.serviceInit(conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public ClusterNodeTracker getNodeTracker() {
|
||||||
|
return nodeTracker;
|
||||||
|
}
|
||||||
|
|
||||||
public List<Container> getTransferredContainers(
|
public List<Container> getTransferredContainers(
|
||||||
ApplicationAttemptId currentAttempt) {
|
ApplicationAttemptId currentAttempt) {
|
||||||
ApplicationId appId = currentAttempt.getApplicationId();
|
ApplicationId appId = currentAttempt.getApplicationId();
|
||||||
|
@ -184,20 +172,21 @@ public abstract class AbstractYarnScheduler
|
||||||
* Add blacklisted NodeIds to the list that is passed.
|
* Add blacklisted NodeIds to the list that is passed.
|
||||||
*
|
*
|
||||||
* @param app application attempt.
|
* @param app application attempt.
|
||||||
* @param blacklistNodeIdList the list to store blacklisted NodeIds.
|
|
||||||
*/
|
*/
|
||||||
public void addBlacklistedNodeIdsToList(SchedulerApplicationAttempt app,
|
public List<N> getBlacklistedNodes(final SchedulerApplicationAttempt app) {
|
||||||
List<NodeId> blacklistNodeIdList) {
|
|
||||||
for (Map.Entry<NodeId, N> nodeEntry : nodes.entrySet()) {
|
NodeFilter nodeFilter = new NodeFilter() {
|
||||||
if (SchedulerAppUtils.isBlacklisted(app, nodeEntry.getValue(), LOG)) {
|
@Override
|
||||||
blacklistNodeIdList.add(nodeEntry.getKey());
|
public boolean accept(SchedulerNode node) {
|
||||||
|
return SchedulerAppUtils.isBlacklisted(app, node, LOG);
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
return nodeTracker.getNodes(nodeFilter);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Resource getClusterResource() {
|
public Resource getClusterResource() {
|
||||||
return clusterResource;
|
return nodeTracker.getClusterCapacity();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -207,22 +196,7 @@ public abstract class AbstractYarnScheduler
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Resource getMaximumResourceCapability() {
|
public Resource getMaximumResourceCapability() {
|
||||||
Resource maxResource;
|
return nodeTracker.getMaxAllowedAllocation();
|
||||||
maxAllocReadLock.lock();
|
|
||||||
try {
|
|
||||||
if (useConfiguredMaximumAllocationOnly) {
|
|
||||||
if (System.currentTimeMillis() - ResourceManager.getClusterTimeStamp()
|
|
||||||
> configuredMaximumAllocationWaitTime) {
|
|
||||||
useConfiguredMaximumAllocationOnly = false;
|
|
||||||
}
|
|
||||||
maxResource = Resources.clone(configuredMaximumAllocation);
|
|
||||||
} else {
|
|
||||||
maxResource = Resources.clone(maximumAllocation);
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
maxAllocReadLock.unlock();
|
|
||||||
}
|
|
||||||
return maxResource;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -231,15 +205,7 @@ public abstract class AbstractYarnScheduler
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void initMaximumResourceCapability(Resource maximumAllocation) {
|
protected void initMaximumResourceCapability(Resource maximumAllocation) {
|
||||||
maxAllocWriteLock.lock();
|
nodeTracker.setConfiguredMaxAllocation(maximumAllocation);
|
||||||
try {
|
|
||||||
if (this.configuredMaximumAllocation == null) {
|
|
||||||
this.configuredMaximumAllocation = Resources.clone(maximumAllocation);
|
|
||||||
this.maximumAllocation = Resources.clone(maximumAllocation);
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
maxAllocWriteLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected synchronized void containerLaunchedOnNode(
|
protected synchronized void containerLaunchedOnNode(
|
||||||
|
@ -332,8 +298,7 @@ public abstract class AbstractYarnScheduler
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SchedulerNodeReport getNodeReport(NodeId nodeId) {
|
public SchedulerNodeReport getNodeReport(NodeId nodeId) {
|
||||||
N node = nodes.get(nodeId);
|
return nodeTracker.getNodeReport(nodeId);
|
||||||
return node == null ? null : new SchedulerNodeReport(node);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -431,12 +396,13 @@ public abstract class AbstractYarnScheduler
|
||||||
container));
|
container));
|
||||||
|
|
||||||
// recover scheduler node
|
// recover scheduler node
|
||||||
SchedulerNode schedulerNode = nodes.get(nm.getNodeID());
|
SchedulerNode schedulerNode = nodeTracker.getNode(nm.getNodeID());
|
||||||
schedulerNode.recoverContainer(rmContainer);
|
schedulerNode.recoverContainer(rmContainer);
|
||||||
|
|
||||||
// recover queue: update headroom etc.
|
// recover queue: update headroom etc.
|
||||||
Queue queue = schedulerAttempt.getQueue();
|
Queue queue = schedulerAttempt.getQueue();
|
||||||
queue.recoverContainer(clusterResource, schedulerAttempt, rmContainer);
|
queue.recoverContainer(
|
||||||
|
getClusterResource(), schedulerAttempt, rmContainer);
|
||||||
|
|
||||||
// recover scheduler attempt
|
// recover scheduler attempt
|
||||||
schedulerAttempt.recoverContainer(schedulerNode, rmContainer);
|
schedulerAttempt.recoverContainer(schedulerNode, rmContainer);
|
||||||
|
@ -621,7 +587,7 @@ public abstract class AbstractYarnScheduler
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SchedulerNode getSchedulerNode(NodeId nodeId) {
|
public SchedulerNode getSchedulerNode(NodeId nodeId) {
|
||||||
return nodes.get(nodeId);
|
return nodeTracker.getNode(nodeId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -690,18 +656,12 @@ public abstract class AbstractYarnScheduler
|
||||||
+ " from: " + oldResource + ", to: "
|
+ " from: " + oldResource + ", to: "
|
||||||
+ newResource);
|
+ newResource);
|
||||||
|
|
||||||
nodes.remove(nm.getNodeID());
|
nodeTracker.removeNode(nm.getNodeID());
|
||||||
updateMaximumAllocation(node, false);
|
|
||||||
|
|
||||||
// update resource to node
|
// update resource to node
|
||||||
node.setTotalResource(newResource);
|
node.setTotalResource(newResource);
|
||||||
|
|
||||||
nodes.put(nm.getNodeID(), (N)node);
|
nodeTracker.addNode((N) node);
|
||||||
updateMaximumAllocation(node, true);
|
|
||||||
|
|
||||||
// update resource to clusterResource
|
|
||||||
Resources.subtractFrom(clusterResource, oldResource);
|
|
||||||
Resources.addTo(clusterResource, newResource);
|
|
||||||
} else {
|
} else {
|
||||||
// Log resource change
|
// Log resource change
|
||||||
LOG.warn("Update resource on node: " + node.getNodeName()
|
LOG.warn("Update resource on node: " + node.getNodeName()
|
||||||
|
@ -721,80 +681,8 @@ public abstract class AbstractYarnScheduler
|
||||||
+ " does not support reservations");
|
+ " does not support reservations");
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void updateMaximumAllocation(SchedulerNode node, boolean add) {
|
|
||||||
Resource totalResource = node.getTotalResource();
|
|
||||||
maxAllocWriteLock.lock();
|
|
||||||
try {
|
|
||||||
if (add) { // added node
|
|
||||||
int nodeMemory = totalResource.getMemory();
|
|
||||||
if (nodeMemory > maxNodeMemory) {
|
|
||||||
maxNodeMemory = nodeMemory;
|
|
||||||
maximumAllocation.setMemory(Math.min(
|
|
||||||
configuredMaximumAllocation.getMemory(), maxNodeMemory));
|
|
||||||
}
|
|
||||||
int nodeVCores = totalResource.getVirtualCores();
|
|
||||||
if (nodeVCores > maxNodeVCores) {
|
|
||||||
maxNodeVCores = nodeVCores;
|
|
||||||
maximumAllocation.setVirtualCores(Math.min(
|
|
||||||
configuredMaximumAllocation.getVirtualCores(), maxNodeVCores));
|
|
||||||
}
|
|
||||||
} else { // removed node
|
|
||||||
if (maxNodeMemory == totalResource.getMemory()) {
|
|
||||||
maxNodeMemory = -1;
|
|
||||||
}
|
|
||||||
if (maxNodeVCores == totalResource.getVirtualCores()) {
|
|
||||||
maxNodeVCores = -1;
|
|
||||||
}
|
|
||||||
// We only have to iterate through the nodes if the current max memory
|
|
||||||
// or vcores was equal to the removed node's
|
|
||||||
if (maxNodeMemory == -1 || maxNodeVCores == -1) {
|
|
||||||
for (Map.Entry<NodeId, N> nodeEntry : nodes.entrySet()) {
|
|
||||||
int nodeMemory =
|
|
||||||
nodeEntry.getValue().getTotalResource().getMemory();
|
|
||||||
if (nodeMemory > maxNodeMemory) {
|
|
||||||
maxNodeMemory = nodeMemory;
|
|
||||||
}
|
|
||||||
int nodeVCores =
|
|
||||||
nodeEntry.getValue().getTotalResource().getVirtualCores();
|
|
||||||
if (nodeVCores > maxNodeVCores) {
|
|
||||||
maxNodeVCores = nodeVCores;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (maxNodeMemory == -1) { // no nodes
|
|
||||||
maximumAllocation.setMemory(configuredMaximumAllocation.getMemory());
|
|
||||||
} else {
|
|
||||||
maximumAllocation.setMemory(
|
|
||||||
Math.min(configuredMaximumAllocation.getMemory(), maxNodeMemory));
|
|
||||||
}
|
|
||||||
if (maxNodeVCores == -1) { // no nodes
|
|
||||||
maximumAllocation.setVirtualCores(configuredMaximumAllocation.getVirtualCores());
|
|
||||||
} else {
|
|
||||||
maximumAllocation.setVirtualCores(
|
|
||||||
Math.min(configuredMaximumAllocation.getVirtualCores(), maxNodeVCores));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
maxAllocWriteLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void refreshMaximumAllocation(Resource newMaxAlloc) {
|
protected void refreshMaximumAllocation(Resource newMaxAlloc) {
|
||||||
maxAllocWriteLock.lock();
|
nodeTracker.setConfiguredMaxAllocation(newMaxAlloc);
|
||||||
try {
|
|
||||||
configuredMaximumAllocation = Resources.clone(newMaxAlloc);
|
|
||||||
int maxMemory = newMaxAlloc.getMemory();
|
|
||||||
if (maxNodeMemory != -1) {
|
|
||||||
maxMemory = Math.min(maxMemory, maxNodeMemory);
|
|
||||||
}
|
|
||||||
int maxVcores = newMaxAlloc.getVirtualCores();
|
|
||||||
if (maxNodeVCores != -1) {
|
|
||||||
maxVcores = Math.min(maxVcores, maxNodeVCores);
|
|
||||||
}
|
|
||||||
maximumAllocation = Resources.createResource(maxMemory, maxVcores);
|
|
||||||
} finally {
|
|
||||||
maxAllocWriteLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -0,0 +1,300 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||||
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.locks.Lock;
|
||||||
|
import java.util.concurrent.locks.ReadWriteLock;
|
||||||
|
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper library that:
|
||||||
|
* - tracks the state of all cluster {@link SchedulerNode}s
|
||||||
|
* - provides convenience methods to filter and sort nodes
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public class ClusterNodeTracker<N extends SchedulerNode> {
|
||||||
|
private static final Log LOG = LogFactory.getLog(ClusterNodeTracker.class);
|
||||||
|
|
||||||
|
private ReadWriteLock readWriteLock = new ReentrantReadWriteLock(true);
|
||||||
|
private Lock readLock = readWriteLock.readLock();
|
||||||
|
private Lock writeLock = readWriteLock.writeLock();
|
||||||
|
|
||||||
|
private HashMap<NodeId, N> nodes = new HashMap<>();
|
||||||
|
private Map<String, Integer> nodesPerRack = new HashMap<>();
|
||||||
|
|
||||||
|
private Resource clusterCapacity = Resources.clone(Resources.none());
|
||||||
|
private Resource staleClusterCapacity = null;
|
||||||
|
|
||||||
|
// Max allocation
|
||||||
|
private int maxNodeMemory = -1;
|
||||||
|
private int maxNodeVCores = -1;
|
||||||
|
private Resource configuredMaxAllocation;
|
||||||
|
private boolean forceConfiguredMaxAllocation = true;
|
||||||
|
private long configuredMaxAllocationWaitTime;
|
||||||
|
|
||||||
|
public void addNode(N node) {
|
||||||
|
writeLock.lock();
|
||||||
|
try {
|
||||||
|
nodes.put(node.getNodeID(), node);
|
||||||
|
|
||||||
|
// Update nodes per rack as well
|
||||||
|
String rackName = node.getRackName();
|
||||||
|
Integer numNodes = nodesPerRack.get(rackName);
|
||||||
|
if (numNodes == null) {
|
||||||
|
numNodes = 0;
|
||||||
|
}
|
||||||
|
nodesPerRack.put(rackName, ++numNodes);
|
||||||
|
|
||||||
|
// Update cluster capacity
|
||||||
|
Resources.addTo(clusterCapacity, node.getTotalResource());
|
||||||
|
|
||||||
|
// Update maximumAllocation
|
||||||
|
updateMaxResources(node, true);
|
||||||
|
} finally {
|
||||||
|
writeLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean exists(NodeId nodeId) {
|
||||||
|
readLock.lock();
|
||||||
|
try {
|
||||||
|
return nodes.containsKey(nodeId);
|
||||||
|
} finally {
|
||||||
|
readLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public N getNode(NodeId nodeId) {
|
||||||
|
readLock.lock();
|
||||||
|
try {
|
||||||
|
return nodes.get(nodeId);
|
||||||
|
} finally {
|
||||||
|
readLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public SchedulerNodeReport getNodeReport(NodeId nodeId) {
|
||||||
|
readLock.lock();
|
||||||
|
try {
|
||||||
|
N n = nodes.get(nodeId);
|
||||||
|
return n == null ? null : new SchedulerNodeReport(n);
|
||||||
|
} finally {
|
||||||
|
readLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int nodeCount() {
|
||||||
|
readLock.lock();
|
||||||
|
try {
|
||||||
|
return nodes.size();
|
||||||
|
} finally {
|
||||||
|
readLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int nodeCount(String rackName) {
|
||||||
|
readLock.lock();
|
||||||
|
String rName = rackName == null ? "NULL" : rackName;
|
||||||
|
try {
|
||||||
|
Integer nodeCount = nodesPerRack.get(rName);
|
||||||
|
return nodeCount == null ? 0 : nodeCount;
|
||||||
|
} finally {
|
||||||
|
readLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Resource getClusterCapacity() {
|
||||||
|
readLock.lock();
|
||||||
|
try {
|
||||||
|
if (staleClusterCapacity == null ||
|
||||||
|
!Resources.equals(staleClusterCapacity, clusterCapacity)) {
|
||||||
|
staleClusterCapacity = Resources.clone(clusterCapacity);
|
||||||
|
}
|
||||||
|
return staleClusterCapacity;
|
||||||
|
} finally {
|
||||||
|
readLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public N removeNode(NodeId nodeId) {
|
||||||
|
writeLock.lock();
|
||||||
|
try {
|
||||||
|
N node = nodes.remove(nodeId);
|
||||||
|
if (node == null) {
|
||||||
|
LOG.warn("Attempting to remove a non-existent node " + nodeId);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update nodes per rack as well
|
||||||
|
String rackName = node.getRackName();
|
||||||
|
Integer numNodes = nodesPerRack.get(rackName);
|
||||||
|
if (numNodes > 0) {
|
||||||
|
nodesPerRack.put(rackName, --numNodes);
|
||||||
|
} else {
|
||||||
|
LOG.error("Attempting to remove node from an empty rack " + rackName);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update cluster capacity
|
||||||
|
Resources.subtractFrom(clusterCapacity, node.getTotalResource());
|
||||||
|
|
||||||
|
// Update maximumAllocation
|
||||||
|
updateMaxResources(node, false);
|
||||||
|
|
||||||
|
return node;
|
||||||
|
} finally {
|
||||||
|
writeLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setConfiguredMaxAllocation(Resource resource) {
|
||||||
|
writeLock.lock();
|
||||||
|
try {
|
||||||
|
configuredMaxAllocation = Resources.clone(resource);
|
||||||
|
} finally {
|
||||||
|
writeLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setConfiguredMaxAllocationWaitTime(
|
||||||
|
long configuredMaxAllocationWaitTime) {
|
||||||
|
writeLock.lock();
|
||||||
|
try {
|
||||||
|
this.configuredMaxAllocationWaitTime =
|
||||||
|
configuredMaxAllocationWaitTime;
|
||||||
|
} finally {
|
||||||
|
writeLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Resource getMaxAllowedAllocation() {
|
||||||
|
readLock.lock();
|
||||||
|
try {
|
||||||
|
if (forceConfiguredMaxAllocation &&
|
||||||
|
System.currentTimeMillis() - ResourceManager.getClusterTimeStamp()
|
||||||
|
> configuredMaxAllocationWaitTime) {
|
||||||
|
forceConfiguredMaxAllocation = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (forceConfiguredMaxAllocation
|
||||||
|
|| maxNodeMemory == -1 || maxNodeVCores == -1) {
|
||||||
|
return configuredMaxAllocation;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Resources.createResource(
|
||||||
|
Math.min(configuredMaxAllocation.getMemory(), maxNodeMemory),
|
||||||
|
Math.min(configuredMaxAllocation.getVirtualCores(), maxNodeVCores)
|
||||||
|
);
|
||||||
|
} finally {
|
||||||
|
readLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateMaxResources(SchedulerNode node, boolean add) {
|
||||||
|
Resource totalResource = node.getTotalResource();
|
||||||
|
writeLock.lock();
|
||||||
|
try {
|
||||||
|
if (add) { // added node
|
||||||
|
int nodeMemory = totalResource.getMemory();
|
||||||
|
if (nodeMemory > maxNodeMemory) {
|
||||||
|
maxNodeMemory = nodeMemory;
|
||||||
|
}
|
||||||
|
int nodeVCores = totalResource.getVirtualCores();
|
||||||
|
if (nodeVCores > maxNodeVCores) {
|
||||||
|
maxNodeVCores = nodeVCores;
|
||||||
|
}
|
||||||
|
} else { // removed node
|
||||||
|
if (maxNodeMemory == totalResource.getMemory()) {
|
||||||
|
maxNodeMemory = -1;
|
||||||
|
}
|
||||||
|
if (maxNodeVCores == totalResource.getVirtualCores()) {
|
||||||
|
maxNodeVCores = -1;
|
||||||
|
}
|
||||||
|
// We only have to iterate through the nodes if the current max memory
|
||||||
|
// or vcores was equal to the removed node's
|
||||||
|
if (maxNodeMemory == -1 || maxNodeVCores == -1) {
|
||||||
|
// Treat it like an empty cluster and add nodes
|
||||||
|
for (N n : nodes.values()) {
|
||||||
|
updateMaxResources(n, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
writeLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<N> getAllNodes() {
|
||||||
|
return getNodes(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience method to filter nodes based on a condition.
|
||||||
|
*/
|
||||||
|
public List<N> getNodes(NodeFilter nodeFilter) {
|
||||||
|
List<N> nodeList = new ArrayList<>();
|
||||||
|
readLock.lock();
|
||||||
|
try {
|
||||||
|
if (nodeFilter == null) {
|
||||||
|
nodeList.addAll(nodes.values());
|
||||||
|
} else {
|
||||||
|
for (N node : nodes.values()) {
|
||||||
|
if (nodeFilter.accept(node)) {
|
||||||
|
nodeList.add(node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
readLock.unlock();
|
||||||
|
}
|
||||||
|
return nodeList;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience method to sort nodes.
|
||||||
|
*
|
||||||
|
* Note that the sort is performed without holding a lock. We are sorting
|
||||||
|
* here instead of on the caller to allow for future optimizations (e.g.
|
||||||
|
* sort once every x milliseconds).
|
||||||
|
*/
|
||||||
|
public List<N> sortedNodeList(Comparator<N> comparator) {
|
||||||
|
List<N> sortedList = null;
|
||||||
|
readLock.lock();
|
||||||
|
try {
|
||||||
|
sortedList = new ArrayList(nodes.values());
|
||||||
|
} finally {
|
||||||
|
readLock.unlock();
|
||||||
|
}
|
||||||
|
Collections.sort(sortedList, comparator);
|
||||||
|
return sortedList;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,33 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience way to filter nodes based on a criteria. To be used in
|
||||||
|
* conjunction with {@link ClusterNodeTracker}
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public interface NodeFilter {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Criteria to accept node in the filtered list.
|
||||||
|
*/
|
||||||
|
boolean accept(SchedulerNode node);
|
||||||
|
}
|
|
@ -34,7 +34,6 @@ import java.util.Random;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
|
||||||
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
|
@ -219,8 +218,6 @@ public class CapacityScheduler extends
|
||||||
|
|
||||||
private Map<String, CSQueue> queues = new ConcurrentHashMap<String, CSQueue>();
|
private Map<String, CSQueue> queues = new ConcurrentHashMap<String, CSQueue>();
|
||||||
|
|
||||||
private AtomicInteger numNodeManagers = new AtomicInteger(0);
|
|
||||||
|
|
||||||
private ResourceCalculator calculator;
|
private ResourceCalculator calculator;
|
||||||
private boolean usePortForNodeName;
|
private boolean usePortForNodeName;
|
||||||
|
|
||||||
|
@ -280,7 +277,7 @@ public class CapacityScheduler extends
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getNumClusterNodes() {
|
public int getNumClusterNodes() {
|
||||||
return numNodeManagers.get();
|
return nodeTracker.nodeCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -387,7 +384,7 @@ public class CapacityScheduler extends
|
||||||
static void schedule(CapacityScheduler cs) {
|
static void schedule(CapacityScheduler cs) {
|
||||||
// First randomize the start point
|
// First randomize the start point
|
||||||
int current = 0;
|
int current = 0;
|
||||||
Collection<FiCaSchedulerNode> nodes = cs.getAllNodes().values();
|
Collection<FiCaSchedulerNode> nodes = cs.nodeTracker.getAllNodes();
|
||||||
int start = random.nextInt(nodes.size());
|
int start = random.nextInt(nodes.size());
|
||||||
for (FiCaSchedulerNode node : nodes) {
|
for (FiCaSchedulerNode node : nodes) {
|
||||||
if (current++ >= start) {
|
if (current++ >= start) {
|
||||||
|
@ -524,10 +521,11 @@ public class CapacityScheduler extends
|
||||||
addNewQueues(queues, newQueues);
|
addNewQueues(queues, newQueues);
|
||||||
|
|
||||||
// Re-configure queues
|
// Re-configure queues
|
||||||
root.reinitialize(newRoot, clusterResource);
|
root.reinitialize(newRoot, getClusterResource());
|
||||||
updatePlacementRules();
|
updatePlacementRules();
|
||||||
|
|
||||||
// Re-calculate headroom for active applications
|
// Re-calculate headroom for active applications
|
||||||
|
Resource clusterResource = getClusterResource();
|
||||||
root.updateClusterResource(clusterResource, new ResourceLimits(
|
root.updateClusterResource(clusterResource, new ResourceLimits(
|
||||||
clusterResource));
|
clusterResource));
|
||||||
|
|
||||||
|
@ -995,7 +993,7 @@ public class CapacityScheduler extends
|
||||||
|
|
||||||
|
|
||||||
allocation = application.getAllocation(getResourceCalculator(),
|
allocation = application.getAllocation(getResourceCalculator(),
|
||||||
clusterResource, getMinimumResourceCapability());
|
getClusterResource(), getMinimumResourceCapability());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (updateDemandForQueue != null && !application
|
if (updateDemandForQueue != null && !application
|
||||||
|
@ -1036,7 +1034,8 @@ public class CapacityScheduler extends
|
||||||
|
|
||||||
private synchronized void nodeUpdate(RMNode nm) {
|
private synchronized void nodeUpdate(RMNode nm) {
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("nodeUpdate: " + nm + " clusterResources: " + clusterResource);
|
LOG.debug("nodeUpdate: " + nm +
|
||||||
|
" clusterResources: " + getClusterResource());
|
||||||
}
|
}
|
||||||
|
|
||||||
Resource releaseResources = Resource.newInstance(0, 0);
|
Resource releaseResources = Resource.newInstance(0, 0);
|
||||||
|
@ -1119,6 +1118,7 @@ public class CapacityScheduler extends
|
||||||
private synchronized void updateNodeAndQueueResource(RMNode nm,
|
private synchronized void updateNodeAndQueueResource(RMNode nm,
|
||||||
ResourceOption resourceOption) {
|
ResourceOption resourceOption) {
|
||||||
updateNodeResource(nm, resourceOption);
|
updateNodeResource(nm, resourceOption);
|
||||||
|
Resource clusterResource = getClusterResource();
|
||||||
root.updateClusterResource(clusterResource, new ResourceLimits(
|
root.updateClusterResource(clusterResource, new ResourceLimits(
|
||||||
clusterResource));
|
clusterResource));
|
||||||
}
|
}
|
||||||
|
@ -1128,7 +1128,7 @@ public class CapacityScheduler extends
|
||||||
*/
|
*/
|
||||||
private synchronized void updateLabelsOnNode(NodeId nodeId,
|
private synchronized void updateLabelsOnNode(NodeId nodeId,
|
||||||
Set<String> newLabels) {
|
Set<String> newLabels) {
|
||||||
FiCaSchedulerNode node = nodes.get(nodeId);
|
FiCaSchedulerNode node = nodeTracker.getNode(nodeId);
|
||||||
if (null == node) {
|
if (null == node) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1230,12 +1230,12 @@ public class CapacityScheduler extends
|
||||||
LeafQueue queue = ((LeafQueue) reservedApplication.getQueue());
|
LeafQueue queue = ((LeafQueue) reservedApplication.getQueue());
|
||||||
assignment =
|
assignment =
|
||||||
queue.assignContainers(
|
queue.assignContainers(
|
||||||
clusterResource,
|
getClusterResource(),
|
||||||
node,
|
node,
|
||||||
// TODO, now we only consider limits for parent for non-labeled
|
// TODO, now we only consider limits for parent for non-labeled
|
||||||
// resources, should consider labeled resources as well.
|
// resources, should consider labeled resources as well.
|
||||||
new ResourceLimits(labelManager.getResourceByLabel(
|
new ResourceLimits(labelManager.getResourceByLabel(
|
||||||
RMNodeLabelsManager.NO_LABEL, clusterResource)),
|
RMNodeLabelsManager.NO_LABEL, getClusterResource())),
|
||||||
SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
|
SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
|
||||||
if (assignment.isFulfilledReservation()) {
|
if (assignment.isFulfilledReservation()) {
|
||||||
CSAssignment tmp =
|
CSAssignment tmp =
|
||||||
|
@ -1261,14 +1261,14 @@ public class CapacityScheduler extends
|
||||||
}
|
}
|
||||||
|
|
||||||
assignment = root.assignContainers(
|
assignment = root.assignContainers(
|
||||||
clusterResource,
|
getClusterResource(),
|
||||||
node,
|
node,
|
||||||
// TODO, now we only consider limits for parent for non-labeled
|
// TODO, now we only consider limits for parent for non-labeled
|
||||||
// resources, should consider labeled resources as well.
|
// resources, should consider labeled resources as well.
|
||||||
new ResourceLimits(labelManager.getResourceByLabel(
|
new ResourceLimits(labelManager.getResourceByLabel(
|
||||||
RMNodeLabelsManager.NO_LABEL, clusterResource)),
|
RMNodeLabelsManager.NO_LABEL, getClusterResource())),
|
||||||
SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
|
SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
|
||||||
if (Resources.greaterThan(calculator, clusterResource,
|
if (Resources.greaterThan(calculator, getClusterResource(),
|
||||||
assignment.getResource(), Resources.none())) {
|
assignment.getResource(), Resources.none())) {
|
||||||
updateSchedulerHealth(lastNodeUpdateTime, node, assignment);
|
updateSchedulerHealth(lastNodeUpdateTime, node, assignment);
|
||||||
return;
|
return;
|
||||||
|
@ -1294,12 +1294,12 @@ public class CapacityScheduler extends
|
||||||
|
|
||||||
// Try to use NON_EXCLUSIVE
|
// Try to use NON_EXCLUSIVE
|
||||||
assignment = root.assignContainers(
|
assignment = root.assignContainers(
|
||||||
clusterResource,
|
getClusterResource(),
|
||||||
node,
|
node,
|
||||||
// TODO, now we only consider limits for parent for non-labeled
|
// TODO, now we only consider limits for parent for non-labeled
|
||||||
// resources, should consider labeled resources as well.
|
// resources, should consider labeled resources as well.
|
||||||
new ResourceLimits(labelManager.getResourceByLabel(
|
new ResourceLimits(labelManager.getResourceByLabel(
|
||||||
RMNodeLabelsManager.NO_LABEL, clusterResource)),
|
RMNodeLabelsManager.NO_LABEL, getClusterResource())),
|
||||||
SchedulingMode.IGNORE_PARTITION_EXCLUSIVITY);
|
SchedulingMode.IGNORE_PARTITION_EXCLUSIVITY);
|
||||||
updateSchedulerHealth(lastNodeUpdateTime, node, assignment);
|
updateSchedulerHealth(lastNodeUpdateTime, node, assignment);
|
||||||
}
|
}
|
||||||
|
@ -1451,24 +1451,22 @@ public class CapacityScheduler extends
|
||||||
private synchronized void addNode(RMNode nodeManager) {
|
private synchronized void addNode(RMNode nodeManager) {
|
||||||
FiCaSchedulerNode schedulerNode = new FiCaSchedulerNode(nodeManager,
|
FiCaSchedulerNode schedulerNode = new FiCaSchedulerNode(nodeManager,
|
||||||
usePortForNodeName, nodeManager.getNodeLabels());
|
usePortForNodeName, nodeManager.getNodeLabels());
|
||||||
this.nodes.put(nodeManager.getNodeID(), schedulerNode);
|
nodeTracker.addNode(schedulerNode);
|
||||||
Resources.addTo(clusterResource, schedulerNode.getTotalResource());
|
|
||||||
|
|
||||||
// update this node to node label manager
|
// update this node to node label manager
|
||||||
if (labelManager != null) {
|
if (labelManager != null) {
|
||||||
labelManager.activateNode(nodeManager.getNodeID(),
|
labelManager.activateNode(nodeManager.getNodeID(),
|
||||||
schedulerNode.getTotalResource());
|
schedulerNode.getTotalResource());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Resource clusterResource = getClusterResource();
|
||||||
root.updateClusterResource(clusterResource, new ResourceLimits(
|
root.updateClusterResource(clusterResource, new ResourceLimits(
|
||||||
clusterResource));
|
clusterResource));
|
||||||
int numNodes = numNodeManagers.incrementAndGet();
|
|
||||||
updateMaximumAllocation(schedulerNode, true);
|
|
||||||
|
|
||||||
LOG.info("Added node " + nodeManager.getNodeAddress() +
|
LOG.info("Added node " + nodeManager.getNodeAddress() +
|
||||||
" clusterResource: " + clusterResource);
|
" clusterResource: " + clusterResource);
|
||||||
|
|
||||||
if (scheduleAsynchronously && numNodes == 1) {
|
if (scheduleAsynchronously && getNumClusterNodes() == 1) {
|
||||||
asyncSchedulerThread.beginSchedule();
|
asyncSchedulerThread.beginSchedule();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1478,20 +1476,14 @@ public class CapacityScheduler extends
|
||||||
if (labelManager != null) {
|
if (labelManager != null) {
|
||||||
labelManager.deactivateNode(nodeInfo.getNodeID());
|
labelManager.deactivateNode(nodeInfo.getNodeID());
|
||||||
}
|
}
|
||||||
|
|
||||||
FiCaSchedulerNode node = nodes.get(nodeInfo.getNodeID());
|
NodeId nodeId = nodeInfo.getNodeID();
|
||||||
|
FiCaSchedulerNode node = nodeTracker.getNode(nodeId);
|
||||||
if (node == null) {
|
if (node == null) {
|
||||||
|
LOG.error("Attempting to remove non-existent node " + nodeId);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Resources.subtractFrom(clusterResource, node.getTotalResource());
|
|
||||||
root.updateClusterResource(clusterResource, new ResourceLimits(
|
|
||||||
clusterResource));
|
|
||||||
int numNodes = numNodeManagers.decrementAndGet();
|
|
||||||
|
|
||||||
if (scheduleAsynchronously && numNodes == 0) {
|
|
||||||
asyncSchedulerThread.suspendSchedule();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove running containers
|
// Remove running containers
|
||||||
List<RMContainer> runningContainers = node.getRunningContainers();
|
List<RMContainer> runningContainers = node.getRunningContainers();
|
||||||
for (RMContainer container : runningContainers) {
|
for (RMContainer container : runningContainers) {
|
||||||
|
@ -1512,11 +1504,18 @@ public class CapacityScheduler extends
|
||||||
RMContainerEventType.KILL);
|
RMContainerEventType.KILL);
|
||||||
}
|
}
|
||||||
|
|
||||||
this.nodes.remove(nodeInfo.getNodeID());
|
nodeTracker.removeNode(nodeId);
|
||||||
updateMaximumAllocation(node, false);
|
Resource clusterResource = getClusterResource();
|
||||||
|
root.updateClusterResource(clusterResource, new ResourceLimits(
|
||||||
|
clusterResource));
|
||||||
|
int numNodes = nodeTracker.nodeCount();
|
||||||
|
|
||||||
|
if (scheduleAsynchronously && numNodes == 0) {
|
||||||
|
asyncSchedulerThread.suspendSchedule();
|
||||||
|
}
|
||||||
|
|
||||||
LOG.info("Removed node " + nodeInfo.getNodeAddress() +
|
LOG.info("Removed node " + nodeInfo.getNodeAddress() +
|
||||||
" clusterResource: " + clusterResource);
|
" clusterResource: " + getClusterResource());
|
||||||
}
|
}
|
||||||
|
|
||||||
private void rollbackContainerResource(
|
private void rollbackContainerResource(
|
||||||
|
@ -1568,7 +1567,7 @@ public class CapacityScheduler extends
|
||||||
|
|
||||||
// Inform the queue
|
// Inform the queue
|
||||||
LeafQueue queue = (LeafQueue)application.getQueue();
|
LeafQueue queue = (LeafQueue)application.getQueue();
|
||||||
queue.completedContainer(clusterResource, application, node,
|
queue.completedContainer(getClusterResource(), application, node,
|
||||||
rmContainer, containerStatus, event, null, true);
|
rmContainer, containerStatus, event, null, true);
|
||||||
|
|
||||||
if (containerStatus.getExitStatus() == ContainerExitStatus.PREEMPTED) {
|
if (containerStatus.getExitStatus() == ContainerExitStatus.PREEMPTED) {
|
||||||
|
@ -1594,7 +1593,7 @@ public class CapacityScheduler extends
|
||||||
FiCaSchedulerApp app = (FiCaSchedulerApp)attempt;
|
FiCaSchedulerApp app = (FiCaSchedulerApp)attempt;
|
||||||
LeafQueue queue = (LeafQueue) attempt.getQueue();
|
LeafQueue queue = (LeafQueue) attempt.getQueue();
|
||||||
try {
|
try {
|
||||||
queue.decreaseContainer(clusterResource, decreaseRequest, app);
|
queue.decreaseContainer(getClusterResource(), decreaseRequest, app);
|
||||||
// Notify RMNode that the container can be pulled by NodeManager in the
|
// Notify RMNode that the container can be pulled by NodeManager in the
|
||||||
// next heartbeat
|
// next heartbeat
|
||||||
this.rmContext.getDispatcher().getEventHandler()
|
this.rmContext.getDispatcher().getEventHandler()
|
||||||
|
@ -1617,14 +1616,9 @@ public class CapacityScheduler extends
|
||||||
|
|
||||||
@Lock(Lock.NoLock.class)
|
@Lock(Lock.NoLock.class)
|
||||||
public FiCaSchedulerNode getNode(NodeId nodeId) {
|
public FiCaSchedulerNode getNode(NodeId nodeId) {
|
||||||
return nodes.get(nodeId);
|
return nodeTracker.getNode(nodeId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Lock(Lock.NoLock.class)
|
|
||||||
Map<NodeId, FiCaSchedulerNode> getAllNodes() {
|
|
||||||
return nodes;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Lock(Lock.NoLock.class)
|
@Lock(Lock.NoLock.class)
|
||||||
public void recover(RMState state) throws Exception {
|
public void recover(RMState state) throws Exception {
|
||||||
|
@ -1869,9 +1863,9 @@ public class CapacityScheduler extends
|
||||||
}
|
}
|
||||||
// Move all live containers
|
// Move all live containers
|
||||||
for (RMContainer rmContainer : app.getLiveContainers()) {
|
for (RMContainer rmContainer : app.getLiveContainers()) {
|
||||||
source.detachContainer(clusterResource, app, rmContainer);
|
source.detachContainer(getClusterResource(), app, rmContainer);
|
||||||
// attach the Container to another queue
|
// attach the Container to another queue
|
||||||
dest.attachContainer(clusterResource, app, rmContainer);
|
dest.attachContainer(getClusterResource(), app, rmContainer);
|
||||||
}
|
}
|
||||||
// Detach the application..
|
// Detach the application..
|
||||||
source.finishApplicationAttempt(app, sourceQueueName);
|
source.finishApplicationAttempt(app, sourceQueueName);
|
||||||
|
|
|
@ -86,7 +86,7 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
|
||||||
// Key = RackName, Value = Set of Nodes reserved by app on rack
|
// Key = RackName, Value = Set of Nodes reserved by app on rack
|
||||||
private Map<String, Set<String>> reservations = new HashMap<>();
|
private Map<String, Set<String>> reservations = new HashMap<>();
|
||||||
|
|
||||||
private List<NodeId> blacklistNodeIds = new ArrayList<NodeId>();
|
private List<FSSchedulerNode> blacklistNodeIds = new ArrayList<>();
|
||||||
/**
|
/**
|
||||||
* Delay scheduling: We often want to prioritize scheduling of node-local
|
* Delay scheduling: We often want to prioritize scheduling of node-local
|
||||||
* containers over rack-local or off-switch containers. To achieve this
|
* containers over rack-local or off-switch containers. To achieve this
|
||||||
|
@ -185,14 +185,11 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
|
||||||
Resource availableResources) {
|
Resource availableResources) {
|
||||||
if (appSchedulingInfo.getAndResetBlacklistChanged()) {
|
if (appSchedulingInfo.getAndResetBlacklistChanged()) {
|
||||||
blacklistNodeIds.clear();
|
blacklistNodeIds.clear();
|
||||||
scheduler.addBlacklistedNodeIdsToList(this, blacklistNodeIds);
|
blacklistNodeIds.addAll(scheduler.getBlacklistedNodes(this));
|
||||||
}
|
}
|
||||||
for (NodeId nodeId: blacklistNodeIds) {
|
for (FSSchedulerNode node: blacklistNodeIds) {
|
||||||
SchedulerNode node = scheduler.getSchedulerNode(nodeId);
|
Resources.subtractFrom(availableResources,
|
||||||
if (node != null) {
|
node.getUnallocatedResource());
|
||||||
Resources.subtractFrom(availableResources,
|
|
||||||
node.getUnallocatedResource());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (availableResources.getMemory() < 0) {
|
if (availableResources.getMemory() < 0) {
|
||||||
availableResources.setMemory(0);
|
availableResources.setMemory(0);
|
||||||
|
|
|
@ -20,13 +20,11 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
|
@ -186,14 +184,11 @@ public class FairScheduler extends
|
||||||
private float reservableNodesRatio; // percentage of available nodes
|
private float reservableNodesRatio; // percentage of available nodes
|
||||||
// an app can be reserved on
|
// an app can be reserved on
|
||||||
|
|
||||||
// Count of number of nodes per rack
|
|
||||||
private Map<String, Integer> nodesPerRack = new ConcurrentHashMap<>();
|
|
||||||
|
|
||||||
protected boolean sizeBasedWeight; // Give larger weights to larger jobs
|
protected boolean sizeBasedWeight; // Give larger weights to larger jobs
|
||||||
protected WeightAdjuster weightAdjuster; // Can be null for no weight adjuster
|
protected WeightAdjuster weightAdjuster; // Can be null for no weight adjuster
|
||||||
protected boolean continuousSchedulingEnabled; // Continuous Scheduling enabled or not
|
protected boolean continuousSchedulingEnabled; // Continuous Scheduling enabled or not
|
||||||
protected int continuousSchedulingSleepMs; // Sleep time for each pass in continuous scheduling
|
protected int continuousSchedulingSleepMs; // Sleep time for each pass in continuous scheduling
|
||||||
private Comparator<NodeId> nodeAvailableResourceComparator =
|
private Comparator<FSSchedulerNode> nodeAvailableResourceComparator =
|
||||||
new NodeAvailableResourceComparator(); // Node available resource comparator
|
new NodeAvailableResourceComparator(); // Node available resource comparator
|
||||||
protected double nodeLocalityThreshold; // Cluster threshold for node locality
|
protected double nodeLocalityThreshold; // Cluster threshold for node locality
|
||||||
protected double rackLocalityThreshold; // Cluster threshold for rack locality
|
protected double rackLocalityThreshold; // Cluster threshold for rack locality
|
||||||
|
@ -225,8 +220,8 @@ public class FairScheduler extends
|
||||||
|
|
||||||
public boolean isAtLeastReservationThreshold(
|
public boolean isAtLeastReservationThreshold(
|
||||||
ResourceCalculator resourceCalculator, Resource resource) {
|
ResourceCalculator resourceCalculator, Resource resource) {
|
||||||
return Resources.greaterThanOrEqual(
|
return Resources.greaterThanOrEqual(resourceCalculator,
|
||||||
resourceCalculator, clusterResource, resource, reservationThreshold);
|
getClusterResource(), resource, reservationThreshold);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void validateConf(Configuration conf) {
|
private void validateConf(Configuration conf) {
|
||||||
|
@ -272,11 +267,7 @@ public class FairScheduler extends
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getNumNodesInRack(String rackName) {
|
public int getNumNodesInRack(String rackName) {
|
||||||
String rName = rackName == null ? "NULL" : rackName;
|
return nodeTracker.nodeCount(rackName);
|
||||||
if (nodesPerRack.containsKey(rName)) {
|
|
||||||
return nodesPerRack.get(rName);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public QueueManager getQueueManager() {
|
public QueueManager getQueueManager() {
|
||||||
|
@ -352,6 +343,7 @@ public class FairScheduler extends
|
||||||
// Recursively update demands for all queues
|
// Recursively update demands for all queues
|
||||||
rootQueue.updateDemand();
|
rootQueue.updateDemand();
|
||||||
|
|
||||||
|
Resource clusterResource = getClusterResource();
|
||||||
rootQueue.setFairShare(clusterResource);
|
rootQueue.setFairShare(clusterResource);
|
||||||
// Recursively compute fair shares for all queues
|
// Recursively compute fair shares for all queues
|
||||||
// and update metrics
|
// and update metrics
|
||||||
|
@ -526,6 +518,7 @@ public class FairScheduler extends
|
||||||
Resource resDueToMinShare = Resources.none();
|
Resource resDueToMinShare = Resources.none();
|
||||||
Resource resDueToFairShare = Resources.none();
|
Resource resDueToFairShare = Resources.none();
|
||||||
ResourceCalculator calc = sched.getPolicy().getResourceCalculator();
|
ResourceCalculator calc = sched.getPolicy().getResourceCalculator();
|
||||||
|
Resource clusterResource = getClusterResource();
|
||||||
if (curTime - sched.getLastTimeAtMinShare() > minShareTimeout) {
|
if (curTime - sched.getLastTimeAtMinShare() > minShareTimeout) {
|
||||||
Resource target = Resources.componentwiseMin(
|
Resource target = Resources.componentwiseMin(
|
||||||
sched.getMinShare(), sched.getDemand());
|
sched.getMinShare(), sched.getDemand());
|
||||||
|
@ -577,7 +570,7 @@ public class FairScheduler extends
|
||||||
}
|
}
|
||||||
|
|
||||||
private FSSchedulerNode getFSSchedulerNode(NodeId nodeId) {
|
private FSSchedulerNode getFSSchedulerNode(NodeId nodeId) {
|
||||||
return nodes.get(nodeId);
|
return nodeTracker.getNode(nodeId);
|
||||||
}
|
}
|
||||||
|
|
||||||
public double getNodeLocalityThreshold() {
|
public double getNodeLocalityThreshold() {
|
||||||
|
@ -882,18 +875,11 @@ public class FairScheduler extends
|
||||||
private synchronized void addNode(List<NMContainerStatus> containerReports,
|
private synchronized void addNode(List<NMContainerStatus> containerReports,
|
||||||
RMNode node) {
|
RMNode node) {
|
||||||
FSSchedulerNode schedulerNode = new FSSchedulerNode(node, usePortForNodeName);
|
FSSchedulerNode schedulerNode = new FSSchedulerNode(node, usePortForNodeName);
|
||||||
nodes.put(node.getNodeID(), schedulerNode);
|
nodeTracker.addNode(schedulerNode);
|
||||||
String rackName = node.getRackName() == null ? "NULL" : node.getRackName();
|
|
||||||
if (nodesPerRack.containsKey(rackName)) {
|
|
||||||
nodesPerRack.put(rackName, nodesPerRack.get(rackName) + 1);
|
|
||||||
} else {
|
|
||||||
nodesPerRack.put(rackName, 1);
|
|
||||||
}
|
|
||||||
Resources.addTo(clusterResource, schedulerNode.getTotalResource());
|
|
||||||
updateMaximumAllocation(schedulerNode, true);
|
|
||||||
|
|
||||||
triggerUpdate();
|
triggerUpdate();
|
||||||
|
|
||||||
|
Resource clusterResource = getClusterResource();
|
||||||
queueMgr.getRootQueue().setSteadyFairShare(clusterResource);
|
queueMgr.getRootQueue().setSteadyFairShare(clusterResource);
|
||||||
queueMgr.getRootQueue().recomputeSteadyShares();
|
queueMgr.getRootQueue().recomputeSteadyShares();
|
||||||
LOG.info("Added node " + node.getNodeAddress() +
|
LOG.info("Added node " + node.getNodeAddress() +
|
||||||
|
@ -904,15 +890,12 @@ public class FairScheduler extends
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized void removeNode(RMNode rmNode) {
|
private synchronized void removeNode(RMNode rmNode) {
|
||||||
FSSchedulerNode node = getFSSchedulerNode(rmNode.getNodeID());
|
NodeId nodeId = rmNode.getNodeID();
|
||||||
// This can occur when an UNHEALTHY node reconnects
|
FSSchedulerNode node = nodeTracker.getNode(nodeId);
|
||||||
if (node == null) {
|
if (node == null) {
|
||||||
|
LOG.error("Attempting to remove non-existent node " + nodeId);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Resources.subtractFrom(clusterResource, node.getTotalResource());
|
|
||||||
updateRootQueueMetrics();
|
|
||||||
|
|
||||||
triggerUpdate();
|
|
||||||
|
|
||||||
// Remove running containers
|
// Remove running containers
|
||||||
List<RMContainer> runningContainers = node.getRunningContainers();
|
List<RMContainer> runningContainers = node.getRunningContainers();
|
||||||
|
@ -934,18 +917,13 @@ public class FairScheduler extends
|
||||||
RMContainerEventType.KILL);
|
RMContainerEventType.KILL);
|
||||||
}
|
}
|
||||||
|
|
||||||
nodes.remove(rmNode.getNodeID());
|
nodeTracker.removeNode(nodeId);
|
||||||
String rackName = node.getRackName() == null ? "NULL" : node.getRackName();
|
Resource clusterResource = getClusterResource();
|
||||||
if (nodesPerRack.containsKey(rackName)
|
|
||||||
&& (nodesPerRack.get(rackName) > 0)) {
|
|
||||||
nodesPerRack.put(rackName, nodesPerRack.get(rackName) - 1);
|
|
||||||
} else {
|
|
||||||
LOG.error("Node [" + rmNode.getNodeAddress() + "] being removed from" +
|
|
||||||
" unknown rack [" + rackName + "] !!");
|
|
||||||
}
|
|
||||||
queueMgr.getRootQueue().setSteadyFairShare(clusterResource);
|
queueMgr.getRootQueue().setSteadyFairShare(clusterResource);
|
||||||
queueMgr.getRootQueue().recomputeSteadyShares();
|
queueMgr.getRootQueue().recomputeSteadyShares();
|
||||||
updateMaximumAllocation(node, false);
|
updateRootQueueMetrics();
|
||||||
|
triggerUpdate();
|
||||||
|
|
||||||
LOG.info("Removed node " + rmNode.getNodeAddress() +
|
LOG.info("Removed node " + rmNode.getNodeAddress() +
|
||||||
" cluster capacity: " + clusterResource);
|
" cluster capacity: " + clusterResource);
|
||||||
}
|
}
|
||||||
|
@ -967,7 +945,7 @@ public class FairScheduler extends
|
||||||
|
|
||||||
// Sanity check
|
// Sanity check
|
||||||
SchedulerUtils.normalizeRequests(ask, DOMINANT_RESOURCE_CALCULATOR,
|
SchedulerUtils.normalizeRequests(ask, DOMINANT_RESOURCE_CALCULATOR,
|
||||||
clusterResource, minimumAllocation, getMaximumResourceCapability(),
|
getClusterResource(), minimumAllocation, getMaximumResourceCapability(),
|
||||||
incrAllocation);
|
incrAllocation);
|
||||||
|
|
||||||
// Record container allocation start time
|
// Record container allocation start time
|
||||||
|
@ -1034,7 +1012,8 @@ public class FairScheduler extends
|
||||||
private synchronized void nodeUpdate(RMNode nm) {
|
private synchronized void nodeUpdate(RMNode nm) {
|
||||||
long start = getClock().getTime();
|
long start = getClock().getTime();
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("nodeUpdate: " + nm + " cluster capacity: " + clusterResource);
|
LOG.debug("nodeUpdate: " + nm +
|
||||||
|
" cluster capacity: " + getClusterResource());
|
||||||
}
|
}
|
||||||
eventLog.log("HEARTBEAT", nm.getHostName());
|
eventLog.log("HEARTBEAT", nm.getHostName());
|
||||||
FSSchedulerNode node = getFSSchedulerNode(nm.getNodeID());
|
FSSchedulerNode node = getFSSchedulerNode(nm.getNodeID());
|
||||||
|
@ -1091,20 +1070,13 @@ public class FairScheduler extends
|
||||||
|
|
||||||
void continuousSchedulingAttempt() throws InterruptedException {
|
void continuousSchedulingAttempt() throws InterruptedException {
|
||||||
long start = getClock().getTime();
|
long start = getClock().getTime();
|
||||||
List<NodeId> nodeIdList = new ArrayList<NodeId>(nodes.keySet());
|
List<FSSchedulerNode> nodeIdList =
|
||||||
// Sort the nodes by space available on them, so that we offer
|
nodeTracker.sortedNodeList(nodeAvailableResourceComparator);
|
||||||
// containers on emptier nodes first, facilitating an even spread. This
|
|
||||||
// requires holding the scheduler lock, so that the space available on a
|
|
||||||
// node doesn't change during the sort.
|
|
||||||
synchronized (this) {
|
|
||||||
Collections.sort(nodeIdList, nodeAvailableResourceComparator);
|
|
||||||
}
|
|
||||||
|
|
||||||
// iterate all nodes
|
// iterate all nodes
|
||||||
for (NodeId nodeId : nodeIdList) {
|
for (FSSchedulerNode node : nodeIdList) {
|
||||||
FSSchedulerNode node = getFSSchedulerNode(nodeId);
|
|
||||||
try {
|
try {
|
||||||
if (node != null && Resources.fitsIn(minimumAllocation,
|
if (Resources.fitsIn(minimumAllocation,
|
||||||
node.getUnallocatedResource())) {
|
node.getUnallocatedResource())) {
|
||||||
attemptScheduling(node);
|
attemptScheduling(node);
|
||||||
}
|
}
|
||||||
|
@ -1126,19 +1098,14 @@ public class FairScheduler extends
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Sort nodes by available resource */
|
/** Sort nodes by available resource */
|
||||||
private class NodeAvailableResourceComparator implements Comparator<NodeId> {
|
private class NodeAvailableResourceComparator
|
||||||
|
implements Comparator<FSSchedulerNode> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compare(NodeId n1, NodeId n2) {
|
public int compare(FSSchedulerNode n1, FSSchedulerNode n2) {
|
||||||
if (!nodes.containsKey(n1)) {
|
return RESOURCE_CALCULATOR.compare(getClusterResource(),
|
||||||
return 1;
|
n2.getUnallocatedResource(),
|
||||||
}
|
n1.getUnallocatedResource());
|
||||||
if (!nodes.containsKey(n2)) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
return RESOURCE_CALCULATOR.compare(clusterResource,
|
|
||||||
nodes.get(n2).getUnallocatedResource(),
|
|
||||||
nodes.get(n1).getUnallocatedResource());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1150,7 +1117,7 @@ public class FairScheduler extends
|
||||||
}
|
}
|
||||||
|
|
||||||
final NodeId nodeID = node.getNodeID();
|
final NodeId nodeID = node.getNodeID();
|
||||||
if (!nodes.containsKey(nodeID)) {
|
if (!nodeTracker.exists(nodeID)) {
|
||||||
// The node might have just been removed while this thread was waiting
|
// The node might have just been removed while this thread was waiting
|
||||||
// on the synchronized lock before it entered this synchronized method
|
// on the synchronized lock before it entered this synchronized method
|
||||||
LOG.info("Skipping scheduling as the node " + nodeID +
|
LOG.info("Skipping scheduling as the node " + nodeID +
|
||||||
|
@ -1203,7 +1170,7 @@ public class FairScheduler extends
|
||||||
private void updateRootQueueMetrics() {
|
private void updateRootQueueMetrics() {
|
||||||
rootMetrics.setAvailableResourcesToQueue(
|
rootMetrics.setAvailableResourcesToQueue(
|
||||||
Resources.subtract(
|
Resources.subtract(
|
||||||
clusterResource, rootMetrics.getAllocatedResources()));
|
getClusterResource(), rootMetrics.getAllocatedResources()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1214,6 +1181,7 @@ public class FairScheduler extends
|
||||||
*/
|
*/
|
||||||
private boolean shouldAttemptPreemption() {
|
private boolean shouldAttemptPreemption() {
|
||||||
if (preemptionEnabled) {
|
if (preemptionEnabled) {
|
||||||
|
Resource clusterResource = getClusterResource();
|
||||||
return (preemptionUtilizationThreshold < Math.max(
|
return (preemptionUtilizationThreshold < Math.max(
|
||||||
(float) rootMetrics.getAllocatedMB() / clusterResource.getMemory(),
|
(float) rootMetrics.getAllocatedMB() / clusterResource.getMemory(),
|
||||||
(float) rootMetrics.getAllocatedVirtualCores() /
|
(float) rootMetrics.getAllocatedVirtualCores() /
|
||||||
|
@ -1547,7 +1515,7 @@ public class FairScheduler extends
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getNumClusterNodes() {
|
public int getNumClusterNodes() {
|
||||||
return nodes.size();
|
return nodeTracker.nodeCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -1577,7 +1545,7 @@ public class FairScheduler extends
|
||||||
// if it does not already exist, so it can be displayed on the web UI.
|
// if it does not already exist, so it can be displayed on the web UI.
|
||||||
synchronized (FairScheduler.this) {
|
synchronized (FairScheduler.this) {
|
||||||
allocConf = queueInfo;
|
allocConf = queueInfo;
|
||||||
allocConf.getDefaultSchedulingPolicy().initialize(clusterResource);
|
allocConf.getDefaultSchedulingPolicy().initialize(getClusterResource());
|
||||||
queueMgr.updateAllocationConfiguration(allocConf);
|
queueMgr.updateAllocationConfiguration(allocConf);
|
||||||
maxRunningEnforcer.updateRunnabilityOnReload();
|
maxRunningEnforcer.updateRunnabilityOnReload();
|
||||||
}
|
}
|
||||||
|
@ -1721,7 +1689,7 @@ public class FairScheduler extends
|
||||||
ResourceOption resourceOption) {
|
ResourceOption resourceOption) {
|
||||||
super.updateNodeResource(nm, resourceOption);
|
super.updateNodeResource(nm, resourceOption);
|
||||||
updateRootQueueMetrics();
|
updateRootQueueMetrics();
|
||||||
queueMgr.getRootQueue().setSteadyFairShare(clusterResource);
|
queueMgr.getRootQueue().setSteadyFairShare(getClusterResource());
|
||||||
queueMgr.getRootQueue().recomputeSteadyShares();
|
queueMgr.getRootQueue().recomputeSteadyShares();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -142,6 +142,7 @@ public class FifoScheduler extends
|
||||||
QueueInfo queueInfo = recordFactory.newRecordInstance(QueueInfo.class);
|
QueueInfo queueInfo = recordFactory.newRecordInstance(QueueInfo.class);
|
||||||
queueInfo.setQueueName(DEFAULT_QUEUE.getQueueName());
|
queueInfo.setQueueName(DEFAULT_QUEUE.getQueueName());
|
||||||
queueInfo.setCapacity(1.0f);
|
queueInfo.setCapacity(1.0f);
|
||||||
|
Resource clusterResource = getClusterResource();
|
||||||
if (clusterResource.getMemory() == 0) {
|
if (clusterResource.getMemory() == 0) {
|
||||||
queueInfo.setCurrentCapacity(0.0f);
|
queueInfo.setCurrentCapacity(0.0f);
|
||||||
} else {
|
} else {
|
||||||
|
@ -297,7 +298,7 @@ public class FifoScheduler extends
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getNumClusterNodes() {
|
public int getNumClusterNodes() {
|
||||||
return nodes.size();
|
return nodeTracker.nodeCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -327,7 +328,8 @@ public class FifoScheduler extends
|
||||||
|
|
||||||
// Sanity check
|
// Sanity check
|
||||||
SchedulerUtils.normalizeRequests(ask, resourceCalculator,
|
SchedulerUtils.normalizeRequests(ask, resourceCalculator,
|
||||||
clusterResource, minimumAllocation, getMaximumResourceCapability());
|
getClusterResource(), minimumAllocation,
|
||||||
|
getMaximumResourceCapability());
|
||||||
|
|
||||||
// Release containers
|
// Release containers
|
||||||
releaseContainers(release, application);
|
releaseContainers(release, application);
|
||||||
|
@ -377,7 +379,7 @@ public class FifoScheduler extends
|
||||||
}
|
}
|
||||||
|
|
||||||
private FiCaSchedulerNode getNode(NodeId nodeId) {
|
private FiCaSchedulerNode getNode(NodeId nodeId) {
|
||||||
return nodes.get(nodeId);
|
return nodeTracker.getNode(nodeId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
|
@ -526,7 +528,7 @@ public class FifoScheduler extends
|
||||||
application.showRequests();
|
application.showRequests();
|
||||||
|
|
||||||
// Done
|
// Done
|
||||||
if (Resources.lessThan(resourceCalculator, clusterResource,
|
if (Resources.lessThan(resourceCalculator, getClusterResource(),
|
||||||
node.getUnallocatedResource(), minimumAllocation)) {
|
node.getUnallocatedResource(), minimumAllocation)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -764,7 +766,7 @@ public class FifoScheduler extends
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Resources.greaterThanOrEqual(resourceCalculator, clusterResource,
|
if (Resources.greaterThanOrEqual(resourceCalculator, getClusterResource(),
|
||||||
node.getUnallocatedResource(), minimumAllocation)) {
|
node.getUnallocatedResource(), minimumAllocation)) {
|
||||||
LOG.debug("Node heartbeat " + rmNode.getNodeID() +
|
LOG.debug("Node heartbeat " + rmNode.getNodeID() +
|
||||||
" available resource = " + node.getUnallocatedResource());
|
" available resource = " + node.getUnallocatedResource());
|
||||||
|
@ -783,13 +785,13 @@ public class FifoScheduler extends
|
||||||
}
|
}
|
||||||
|
|
||||||
private void updateAppHeadRoom(SchedulerApplicationAttempt schedulerAttempt) {
|
private void updateAppHeadRoom(SchedulerApplicationAttempt schedulerAttempt) {
|
||||||
schedulerAttempt.setHeadroom(Resources.subtract(clusterResource,
|
schedulerAttempt.setHeadroom(Resources.subtract(getClusterResource(),
|
||||||
usedResource));
|
usedResource));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void updateAvailableResourcesMetrics() {
|
private void updateAvailableResourcesMetrics() {
|
||||||
metrics.setAvailableResourcesToQueue(Resources.subtract(clusterResource,
|
metrics.setAvailableResourcesToQueue(
|
||||||
usedResource));
|
Resources.subtract(getClusterResource(), usedResource));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -925,7 +927,7 @@ public class FifoScheduler extends
|
||||||
private Resource usedResource = recordFactory.newRecordInstance(Resource.class);
|
private Resource usedResource = recordFactory.newRecordInstance(Resource.class);
|
||||||
|
|
||||||
private synchronized void removeNode(RMNode nodeInfo) {
|
private synchronized void removeNode(RMNode nodeInfo) {
|
||||||
FiCaSchedulerNode node = getNode(nodeInfo.getNodeID());
|
FiCaSchedulerNode node = nodeTracker.getNode(nodeInfo.getNodeID());
|
||||||
if (node == null) {
|
if (node == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -937,13 +939,7 @@ public class FifoScheduler extends
|
||||||
SchedulerUtils.LOST_CONTAINER),
|
SchedulerUtils.LOST_CONTAINER),
|
||||||
RMContainerEventType.KILL);
|
RMContainerEventType.KILL);
|
||||||
}
|
}
|
||||||
|
nodeTracker.removeNode(nodeInfo.getNodeID());
|
||||||
//Remove the node
|
|
||||||
this.nodes.remove(nodeInfo.getNodeID());
|
|
||||||
updateMaximumAllocation(node, false);
|
|
||||||
|
|
||||||
// Update cluster metrics
|
|
||||||
Resources.subtractFrom(clusterResource, node.getTotalResource());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -965,9 +961,7 @@ public class FifoScheduler extends
|
||||||
private synchronized void addNode(RMNode nodeManager) {
|
private synchronized void addNode(RMNode nodeManager) {
|
||||||
FiCaSchedulerNode schedulerNode = new FiCaSchedulerNode(nodeManager,
|
FiCaSchedulerNode schedulerNode = new FiCaSchedulerNode(nodeManager,
|
||||||
usePortForNodeName);
|
usePortForNodeName);
|
||||||
this.nodes.put(nodeManager.getNodeID(), schedulerNode);
|
nodeTracker.addNode(schedulerNode);
|
||||||
Resources.addTo(clusterResource, schedulerNode.getTotalResource());
|
|
||||||
updateMaximumAllocation(schedulerNode, true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -300,22 +300,16 @@ public class TestAbstractYarnScheduler extends ParameterizedSchedulerTestBase {
|
||||||
|
|
||||||
verifyMaximumResourceCapability(configuredMaximumResource, scheduler);
|
verifyMaximumResourceCapability(configuredMaximumResource, scheduler);
|
||||||
|
|
||||||
scheduler.nodes = new HashMap<NodeId, SchedulerNode>();
|
scheduler.nodeTracker.addNode(mockNode1);
|
||||||
|
|
||||||
scheduler.nodes.put(mockNode1.getNodeID(), mockNode1);
|
|
||||||
scheduler.updateMaximumAllocation(mockNode1, true);
|
|
||||||
verifyMaximumResourceCapability(fullResource1, scheduler);
|
verifyMaximumResourceCapability(fullResource1, scheduler);
|
||||||
|
|
||||||
scheduler.nodes.put(mockNode2.getNodeID(), mockNode2);
|
scheduler.nodeTracker.addNode(mockNode2);
|
||||||
scheduler.updateMaximumAllocation(mockNode2, true);
|
|
||||||
verifyMaximumResourceCapability(fullResource2, scheduler);
|
verifyMaximumResourceCapability(fullResource2, scheduler);
|
||||||
|
|
||||||
scheduler.nodes.remove(mockNode2.getNodeID());
|
scheduler.nodeTracker.removeNode(mockNode2.getNodeID());
|
||||||
scheduler.updateMaximumAllocation(mockNode2, false);
|
|
||||||
verifyMaximumResourceCapability(fullResource1, scheduler);
|
verifyMaximumResourceCapability(fullResource1, scheduler);
|
||||||
|
|
||||||
scheduler.nodes.remove(mockNode1.getNodeID());
|
scheduler.nodeTracker.removeNode(mockNode1.getNodeID());
|
||||||
scheduler.updateMaximumAllocation(mockNode1, false);
|
|
||||||
verifyMaximumResourceCapability(configuredMaximumResource, scheduler);
|
verifyMaximumResourceCapability(configuredMaximumResource, scheduler);
|
||||||
} finally {
|
} finally {
|
||||||
rm.stop();
|
rm.stop();
|
||||||
|
|
|
@ -183,6 +183,7 @@ public class TestReservations {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
public void testReservation() throws Exception {
|
public void testReservation() throws Exception {
|
||||||
// Test that we now unreserve and use a node that has space
|
// Test that we now unreserve and use a node that has space
|
||||||
|
|
||||||
|
@ -231,9 +232,9 @@ public class TestReservations {
|
||||||
when(csContext.getNode(node_1.getNodeID())).thenReturn(node_1);
|
when(csContext.getNode(node_1.getNodeID())).thenReturn(node_1);
|
||||||
when(csContext.getNode(node_2.getNodeID())).thenReturn(node_2);
|
when(csContext.getNode(node_2.getNodeID())).thenReturn(node_2);
|
||||||
|
|
||||||
cs.getAllNodes().put(node_0.getNodeID(), node_0);
|
cs.getNodeTracker().addNode(node_0);
|
||||||
cs.getAllNodes().put(node_1.getNodeID(), node_1);
|
cs.getNodeTracker().addNode(node_1);
|
||||||
cs.getAllNodes().put(node_2.getNodeID(), node_2);
|
cs.getNodeTracker().addNode(node_2);
|
||||||
|
|
||||||
final int numNodes = 3;
|
final int numNodes = 3;
|
||||||
Resource clusterResource = Resources.createResource(numNodes * (8 * GB));
|
Resource clusterResource = Resources.createResource(numNodes * (8 * GB));
|
||||||
|
@ -346,6 +347,7 @@ public class TestReservations {
|
||||||
// Test that hitting a reservation limit and needing to unreserve
|
// Test that hitting a reservation limit and needing to unreserve
|
||||||
// does not affect assigning containers for other users
|
// does not affect assigning containers for other users
|
||||||
@Test
|
@Test
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
public void testReservationLimitOtherUsers() throws Exception {
|
public void testReservationLimitOtherUsers() throws Exception {
|
||||||
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
|
CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration();
|
||||||
setup(csConf, true);
|
setup(csConf, true);
|
||||||
|
@ -395,9 +397,9 @@ public class TestReservations {
|
||||||
when(csContext.getNode(node_1.getNodeID())).thenReturn(node_1);
|
when(csContext.getNode(node_1.getNodeID())).thenReturn(node_1);
|
||||||
when(csContext.getNode(node_2.getNodeID())).thenReturn(node_2);
|
when(csContext.getNode(node_2.getNodeID())).thenReturn(node_2);
|
||||||
|
|
||||||
cs.getAllNodes().put(node_0.getNodeID(), node_0);
|
cs.getNodeTracker().addNode(node_0);
|
||||||
cs.getAllNodes().put(node_1.getNodeID(), node_1);
|
cs.getNodeTracker().addNode(node_1);
|
||||||
cs.getAllNodes().put(node_2.getNodeID(), node_2);
|
cs.getNodeTracker().addNode(node_2);
|
||||||
|
|
||||||
final int numNodes = 3;
|
final int numNodes = 3;
|
||||||
Resource clusterResource = Resources.createResource(numNodes * (8 * GB));
|
Resource clusterResource = Resources.createResource(numNodes * (8 * GB));
|
||||||
|
@ -641,6 +643,7 @@ public class TestReservations {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
public void testAssignContainersNeedToUnreserve() throws Exception {
|
public void testAssignContainersNeedToUnreserve() throws Exception {
|
||||||
// Test that we now unreserve and use a node that has space
|
// Test that we now unreserve and use a node that has space
|
||||||
Logger rootLogger = LogManager.getRootLogger();
|
Logger rootLogger = LogManager.getRootLogger();
|
||||||
|
@ -684,8 +687,8 @@ public class TestReservations {
|
||||||
FiCaSchedulerNode node_1 = TestUtils.getMockNode(host_1, DEFAULT_RACK, 0,
|
FiCaSchedulerNode node_1 = TestUtils.getMockNode(host_1, DEFAULT_RACK, 0,
|
||||||
8 * GB);
|
8 * GB);
|
||||||
|
|
||||||
cs.getAllNodes().put(node_0.getNodeID(), node_0);
|
cs.getNodeTracker().addNode(node_0);
|
||||||
cs.getAllNodes().put(node_1.getNodeID(), node_1);
|
cs.getNodeTracker().addNode(node_1);
|
||||||
|
|
||||||
when(csContext.getNode(node_0.getNodeID())).thenReturn(node_0);
|
when(csContext.getNode(node_0.getNodeID())).thenReturn(node_0);
|
||||||
when(csContext.getNode(node_1.getNodeID())).thenReturn(node_1);
|
when(csContext.getNode(node_1.getNodeID())).thenReturn(node_1);
|
||||||
|
|
|
@ -75,12 +75,10 @@ import org.apache.hadoop.yarn.event.Event;
|
||||||
import org.apache.hadoop.yarn.event.EventHandler;
|
import org.apache.hadoop.yarn.event.EventHandler;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.Application;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService;
|
import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.MockNodes;
|
import org.apache.hadoop.yarn.server.resourcemanager.MockNodes;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
|
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.Task;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceType;
|
import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceType;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.MockRMApp;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
|
@ -108,7 +106,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateS
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.QueuePlacementRule.Default;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.QueuePlacementRule.Default;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.DominantResourceFairnessPolicy;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.DominantResourceFairnessPolicy;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FifoPolicy;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FifoPolicy;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler;
|
|
||||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||||
import org.apache.hadoop.yarn.util.ControlledClock;
|
import org.apache.hadoop.yarn.util.ControlledClock;
|
||||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
@ -2751,8 +2748,8 @@ public class TestFairScheduler extends FairSchedulerTestBase {
|
||||||
|
|
||||||
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(1024), 1, "127.0.0.1");
|
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(1024), 1, "127.0.0.1");
|
||||||
RMNode node2 = MockNodes.newNodeInfo(1, Resources.createResource(1024), 2, "127.0.0.2");
|
RMNode node2 = MockNodes.newNodeInfo(1, Resources.createResource(1024), 2, "127.0.0.2");
|
||||||
NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node1);
|
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
|
||||||
scheduler.handle(nodeEvent2);
|
scheduler.handle(nodeEvent1);
|
||||||
|
|
||||||
ApplicationAttemptId attId = createSchedulingRequest(1024, "queue1",
|
ApplicationAttemptId attId = createSchedulingRequest(1024, "queue1",
|
||||||
"user1", 0);
|
"user1", 0);
|
||||||
|
|
|
@ -306,12 +306,7 @@ public class TestFifoScheduler {
|
||||||
nmTokenSecretManager.rollMasterKey();
|
nmTokenSecretManager.rollMasterKey();
|
||||||
RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class);
|
RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class);
|
||||||
|
|
||||||
FifoScheduler scheduler = new FifoScheduler(){
|
FifoScheduler scheduler = new FifoScheduler();
|
||||||
@SuppressWarnings("unused")
|
|
||||||
public Map<NodeId, FiCaSchedulerNode> getNodes(){
|
|
||||||
return nodes;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
RMContext rmContext = new RMContextImpl(dispatcher, null, null, null, null,
|
RMContext rmContext = new RMContextImpl(dispatcher, null, null, null, null,
|
||||||
null, containerTokenSecretManager, nmTokenSecretManager, null, scheduler);
|
null, containerTokenSecretManager, nmTokenSecretManager, null, scheduler);
|
||||||
rmContext.setSystemMetricsPublisher(mock(SystemMetricsPublisher.class));
|
rmContext.setSystemMetricsPublisher(mock(SystemMetricsPublisher.class));
|
||||||
|
@ -331,11 +326,7 @@ public class TestFifoScheduler {
|
||||||
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node0);
|
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node0);
|
||||||
scheduler.handle(nodeEvent1);
|
scheduler.handle(nodeEvent1);
|
||||||
|
|
||||||
Method method = scheduler.getClass().getDeclaredMethod("getNodes");
|
assertEquals(scheduler.getNumClusterNodes(), 1);
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
Map<NodeId, FiCaSchedulerNode> schedulerNodes =
|
|
||||||
(Map<NodeId, FiCaSchedulerNode>) method.invoke(scheduler);
|
|
||||||
assertEquals(schedulerNodes.values().size(), 1);
|
|
||||||
|
|
||||||
Resource newResource = Resources.createResource(1024, 4);
|
Resource newResource = Resources.createResource(1024, 4);
|
||||||
|
|
||||||
|
@ -345,9 +336,9 @@ public class TestFifoScheduler {
|
||||||
scheduler.handle(node0ResourceUpdate);
|
scheduler.handle(node0ResourceUpdate);
|
||||||
|
|
||||||
// SchedulerNode's total resource and available resource are changed.
|
// SchedulerNode's total resource and available resource are changed.
|
||||||
assertEquals(schedulerNodes.get(node0.getNodeID()).getTotalResource()
|
assertEquals(1024, scheduler.getNodeTracker().getNode(node0.getNodeID())
|
||||||
.getMemory(), 1024);
|
.getTotalResource().getMemory());
|
||||||
assertEquals(schedulerNodes.get(node0.getNodeID()).
|
assertEquals(1024, scheduler.getNodeTracker().getNode(node0.getNodeID()).
|
||||||
getUnallocatedResource().getMemory(), 1024);
|
getUnallocatedResource().getMemory(), 1024);
|
||||||
QueueInfo queueInfo = scheduler.getQueueInfo(null, false, false);
|
QueueInfo queueInfo = scheduler.getQueueInfo(null, false, false);
|
||||||
Assert.assertEquals(0.0f, queueInfo.getCurrentCapacity(), 0.0f);
|
Assert.assertEquals(0.0f, queueInfo.getCurrentCapacity(), 0.0f);
|
||||||
|
|
Loading…
Reference in New Issue