YARN-9552. FairScheduler: NODE_UPDATE can cause NoSuchElementException. Contributed by Peter Bacsko.

This commit is contained in:
Szilard Nemeth 2019-10-09 14:19:56 +02:00
parent 73bc8ef9b8
commit 57e88a63cf
3 changed files with 51 additions and 6 deletions

View File

@ -477,14 +477,17 @@ public class AppSchedulingInfo {
}
public PendingAsk getNextPendingAsk() {
readLock.lock();
try {
readLock.lock();
SchedulerRequestKey firstRequestKey = schedulerKeys.first();
return getPendingAsk(firstRequestKey, ResourceRequest.ANY);
if (!schedulerKeys.isEmpty()) {
SchedulerRequestKey firstRequestKey = schedulerKeys.first();
return getPendingAsk(firstRequestKey, ResourceRequest.ANY);
} else {
return null;
}
} finally {
readLock.unlock();
}
}
public PendingAsk getPendingAsk(SchedulerRequestKey schedulerKey) {

View File

@ -936,8 +936,8 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
if (!isAmRunning() && !getUnmanagedAM()) {
// Return true if we have not ask, or queue is not be able to run app's AM
PendingAsk ask = appSchedulingInfo.getNextPendingAsk();
if (ask.getCount() == 0 || !getQueue().canRunAppAM(
ask.getPerAllocationResource())) {
if (ask != null && (ask.getCount() == 0 || !getQueue().canRunAppAM(
ask.getPerAllocationResource()))) {
return true;
}
}

View File

@ -19,7 +19,10 @@
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.api.records.Resource;
@ -33,8 +36,12 @@ import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.spy;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.LogAggregationContext;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
@ -341,6 +348,41 @@ public class TestFSAppAttempt extends FairSchedulerTestBase {
assertEquals(clusterResource, spyApp.getHeadroom());
}
/**
* Ensure that no pending ask request inside appSchedulingInfo
* does not result in an error.
*/
@Test
public void testNoNextPendingAsk() {
FSLeafQueue queue = Mockito.mock(FSLeafQueue.class);
ApplicationAttemptId applicationAttemptId = createAppAttemptId(1, 1);
RMContext rmContext = Mockito.mock(RMContext.class);
ConcurrentMap<ApplicationId, RMApp> rmApps = new ConcurrentHashMap<>();
RMApp rmApp = Mockito.mock(RMApp.class);
rmApps.put(applicationAttemptId.getApplicationId(), rmApp);
ApplicationSubmissionContext appContext =
Mockito.mock(ApplicationSubmissionContext.class);
Mockito.when(appContext.getUnmanagedAM()).thenReturn(false);
LogAggregationContext logAggregationContext =
Mockito.mock(LogAggregationContext.class);
Mockito.when(appContext.getLogAggregationContext())
.thenReturn(logAggregationContext);
Mockito.when(rmApp.getApplicationSchedulingEnvs())
.thenReturn(new HashMap<>());
Mockito.when(rmApp.getApplicationSubmissionContext())
.thenReturn(appContext);
Mockito.when(rmContext.getRMApps()).thenReturn(rmApps);
FSAppAttempt schedulerApp =
new FSAppAttempt(scheduler, applicationAttemptId, "user1", queue,
null, rmContext);
schedulerApp.setAmRunning(false);
FSSchedulerNode schedulerNode = Mockito.mock(FSSchedulerNode.class);
Resource resource = schedulerApp.assignContainer(schedulerNode);
assertEquals(Resources.none(), resource);
}
private static long min(long value1, long value2, long value3) {
return Math.min(Math.min(value1, value2), value3);
}