YARN-8373. RM Received RMFatalEvent of type CRITICAL_THREAD_CRASH. Contributed by Wilfred Spiegelenburg.

(cherry picked from commit ea68756c0c)
This commit is contained in:
Sunil G 2019-11-19 14:10:41 +05:30
parent 049279bb66
commit c1ec51696c
3 changed files with 18 additions and 17 deletions

View File

@ -33,12 +33,12 @@ import org.apache.hadoop.yarn.util.resource.ResourceUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
@ -386,21 +386,21 @@ public class ClusterNodeTracker<N extends SchedulerNode> {
/**
* Convenience method to sort nodes.
* Nodes can change while being sorted. Using a standard sort will fail
* without locking each node, the TreeSet handles this without locks.
*
* Note that the sort is performed without holding a lock. We are sorting
* here instead of on the caller to allow for future optimizations (e.g.
* sort once every x milliseconds).
* @param comparator the comparator to sort the nodes with
* @return sorted set of nodes in the form of a TreeSet
*/
public List<N> sortedNodeList(Comparator<N> comparator) {
List<N> sortedList = null;
public TreeSet<N> sortedNodeSet(Comparator<N> comparator) {
TreeSet<N> sortedSet = new TreeSet<>(comparator);
readLock.lock();
try {
sortedList = new ArrayList(nodes.values());
sortedSet.addAll(nodes.values());
} finally {
readLock.unlock();
}
Collections.sort(sortedList, comparator);
return sortedList;
return sortedSet;
}
/**

View File

@ -111,6 +111,7 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock;
@ -1036,15 +1037,17 @@ public class FairScheduler extends
@Deprecated
void continuousSchedulingAttempt() throws InterruptedException {
long start = getClock().getTime();
List<FSSchedulerNode> nodeIdList;
// Hold a lock to prevent comparator order changes due to changes of node
// unallocated resources
synchronized (this) {
nodeIdList = nodeTracker.sortedNodeList(nodeAvailableResourceComparator);
TreeSet<FSSchedulerNode> nodeIdSet;
// Hold a lock to prevent node changes as much as possible.
readLock.lock();
try {
nodeIdSet = nodeTracker.sortedNodeSet(nodeAvailableResourceComparator);
} finally {
readLock.unlock();
}
// iterate all nodes
for (FSSchedulerNode node : nodeIdList) {
for (FSSchedulerNode node : nodeIdSet) {
try {
if (Resources.fitsIn(minimumAllocation,
node.getUnallocatedResource())) {

View File

@ -323,12 +323,10 @@ public class TestContinuousScheduling extends FairSchedulerTestBase {
for (int j = 0; j < 100; j++) {
for (FSSchedulerNode node : clusterNodeTracker.getAllNodes()) {
int i = ThreadLocalRandom.current().nextInt(-30, 30);
synchronized (scheduler) {
node.deductUnallocatedResource(Resource.newInstance(i * 1024, i));
}
}
}
}
}.start();
try {