From 8e135fc73c53e4b9a1de1da0cf652e0df2bc0db1 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Thu, 2 Jan 2014 20:21:03 +0000 Subject: [PATCH] YARN-1493. Changed ResourceManager and Scheduler interfacing to recognize app-attempts separately from apps. Contributed by Jian He. svn merge --ignore-ancestry -c 1554896 ../../trunk/ git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1554898 13f79535-47bb-0310-9956-ffa450edef68 --- .../scheduler/ResourceSchedulerWrapper.java | 37 +- hadoop-yarn-project/CHANGES.txt | 3 + .../resourcemanager/rmapp/RMAppEventType.java | 4 +- .../resourcemanager/rmapp/RMAppImpl.java | 82 +++- .../rmapp/attempt/RMAppAttemptEventType.java | 3 +- .../rmapp/attempt/RMAppAttemptImpl.java | 66 +-- .../scheduler/ActiveUsersManager.java | 6 +- .../scheduler/SchedulerAppReport.java | 2 +- .../scheduler/SchedulerAppUtils.java | 2 +- .../scheduler/SchedulerApplication.java | 381 +--------------- .../SchedulerApplicationAttempt.java | 410 ++++++++++++++++++ .../scheduler/capacity/CSQueue.java | 31 +- .../scheduler/capacity/CapacityScheduler.java | 128 ++++-- .../scheduler/capacity/LeafQueue.java | 79 ++-- .../scheduler/capacity/ParentQueue.java | 41 +- .../common/fica/FiCaSchedulerApp.java | 4 +- .../common/fica/FiCaSchedulerNode.java | 6 +- .../event/AppAddedSchedulerEvent.java} | 34 +- .../event/AppAttemptAddedSchedulerEvent.java | 15 +- .../event/AppRemovedSchedulerEvent.java | 43 ++ .../scheduler/event/SchedulerEventType.java | 6 +- .../scheduler/fair/FSParentQueue.java | 2 +- .../scheduler/fair/FSSchedulerApp.java | 4 +- .../scheduler/fair/FairScheduler.java | 125 ++++-- .../scheduler/fifo/FifoScheduler.java | 96 ++-- .../server/resourcemanager/Application.java | 12 +- .../resourcemanager/TestClientRMService.java | 2 +- .../resourcemanager/TestFifoScheduler.java | 28 +- .../server/resourcemanager/TestRMRestart.java | 27 +- .../rmapp/TestRMAppTransitions.java | 12 +- .../attempt/TestRMAppAttemptTransitions.java | 20 +- .../scheduler/TestSchedulerUtils.java | 25 ++ .../capacity/TestApplicationLimits.java | 38 +- .../capacity/TestCapacityScheduler.java | 35 +- .../scheduler/capacity/TestLeafQueue.java | 70 +-- .../scheduler/fair/TestFairScheduler.java | 210 +++++---- .../scheduler/fifo/TestFifoScheduler.java | 63 ++- .../webapp/TestRMWebServicesApps.java | 5 + 38 files changed, 1261 insertions(+), 896 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java rename hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/{rmapp/attempt/event/RMAppAttemptRejectedEvent.java => scheduler/event/AppAddedSchedulerEvent.java} (54%) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppRemovedSchedulerEvent.java diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java index ad066e75d28..3034d822a3b 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java @@ -64,8 +64,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType; @@ -105,8 +106,8 @@ public class ResourceSchedulerWrapper implements ResourceScheduler, private Configuration conf; private ResourceScheduler scheduler; - private Map appQueueMap = - new ConcurrentHashMap(); + private Map appQueueMap = + new ConcurrentHashMap(); private BufferedWriter jobRuntimeLogBW; // Priority of the ResourceSchedulerWrapper shutdown hook. @@ -241,7 +242,7 @@ public void handle(SchedulerEvent schedulerEvent) { (AppAttemptRemovedSchedulerEvent) schedulerEvent; ApplicationAttemptId appAttemptId = appRemoveEvent.getApplicationAttemptID(); - String queue = appQueueMap.get(appAttemptId); + String queue = appQueueMap.get(appAttemptId.getApplicationId()); SchedulerAppReport app = scheduler.getSchedulerAppInfo(appAttemptId); if (! app.getLiveContainers().isEmpty()) { // have 0 or 1 // should have one container which is AM container @@ -263,20 +264,18 @@ public void handle(SchedulerEvent schedulerEvent) { schedulerHandleCounter.inc(); schedulerHandleCounterMap.get(schedulerEvent.getType()).inc(); - if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_REMOVED - && schedulerEvent instanceof AppAttemptRemovedSchedulerEvent) { + if (schedulerEvent.getType() == SchedulerEventType.APP_REMOVED + && schedulerEvent instanceof AppRemovedSchedulerEvent) { SLSRunner.decreaseRemainingApps(); - AppAttemptRemovedSchedulerEvent appRemoveEvent = - (AppAttemptRemovedSchedulerEvent) schedulerEvent; - ApplicationAttemptId appAttemptId = - appRemoveEvent.getApplicationAttemptID(); - appQueueMap.remove(appRemoveEvent.getApplicationAttemptID()); - } else if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_ADDED - && schedulerEvent instanceof AppAttemptAddedSchedulerEvent) { - AppAttemptAddedSchedulerEvent appAddEvent = - (AppAttemptAddedSchedulerEvent) schedulerEvent; + AppRemovedSchedulerEvent appRemoveEvent = + (AppRemovedSchedulerEvent) schedulerEvent; + appQueueMap.remove(appRemoveEvent.getApplicationID()); + } else if (schedulerEvent.getType() == SchedulerEventType.APP_ADDED + && schedulerEvent instanceof AppAddedSchedulerEvent) { + AppAddedSchedulerEvent appAddEvent = + (AppAddedSchedulerEvent) schedulerEvent; String queueName = appAddEvent.getQueue(); - appQueueMap.put(appAddEvent.getApplicationAttemptId(), queueName); + appQueueMap.put(appAddEvent.getApplicationId(), queueName); } } } @@ -298,7 +297,9 @@ private void updateQueueWithNodeUpdate( continue; } - String queue = appQueueMap.get(containerId.getApplicationAttemptId()); + String queue = + appQueueMap.get(containerId.getApplicationAttemptId() + .getApplicationId()); int releasedMemory = 0, releasedVCores = 0; if (status.getExitStatus() == ContainerExitStatus.SUCCESS) { for (RMContainer rmc : app.getLiveContainers()) { @@ -330,7 +331,7 @@ private void updateQueueWithAllocateRequest(Allocation allocation, // update queue information Resource pendingResource = Resources.createResource(0, 0); Resource allocatedResource = Resources.createResource(0, 0); - String queueName = appQueueMap.get(attemptId); + String queueName = appQueueMap.get(attemptId.getApplicationId()); // container requested for (ResourceRequest request : resourceRequests) { if (request.getResourceName().equals(ResourceRequest.ANY)) { diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 0dcd8959566..429c4aba90d 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -176,6 +176,9 @@ Release 2.4.0 - UNRELEASED YARN-1541. Changed ResourceManager to invalidate ApplicationMaster host/port information once an AM crashes. (Jian He via vinodkv) + YARN-1493. Changed ResourceManager and Scheduler interfacing to recognize + app-attempts separately from apps. (Jian He via vinodkv) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java index ad3f20d23d9..bddcb352bb8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java @@ -24,9 +24,11 @@ public enum RMAppEventType { RECOVER, KILL, - // Source: RMAppAttempt + // Source: Scheduler APP_REJECTED, APP_ACCEPTED, + + // Source: RMAppAttempt ATTEMPT_REGISTERED, ATTEMPT_UNREGISTERED, ATTEMPT_FINISHED, // Will send the final state diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 0bf7c817454..1d451fbc85b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -66,6 +66,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.state.InvalidStateTransitonException; import org.apache.hadoop.yarn.state.MultipleArcTransition; @@ -136,7 +138,7 @@ RMAppEventType.NODE_UPDATE, new RMAppNodeUpdateTransition()) .addTransition(RMAppState.NEW, RMAppState.NEW_SAVING, RMAppEventType.START, new RMAppNewlySavingTransition()) .addTransition(RMAppState.NEW, EnumSet.of(RMAppState.SUBMITTED, - RMAppState.RUNNING, RMAppState.FINISHED, RMAppState.FAILED, + RMAppState.ACCEPTED, RMAppState.FINISHED, RMAppState.FAILED, RMAppState.KILLED, RMAppState.FINAL_SAVING), RMAppEventType.RECOVER, new RMAppRecoveredTransition()) .addTransition(RMAppState.NEW, RMAppState.FINAL_SAVING, RMAppEventType.KILL, @@ -151,7 +153,7 @@ RMAppEventType.RECOVER, new RMAppRecoveredTransition()) .addTransition(RMAppState.NEW_SAVING, RMAppState.NEW_SAVING, RMAppEventType.NODE_UPDATE, new RMAppNodeUpdateTransition()) .addTransition(RMAppState.NEW_SAVING, RMAppState.SUBMITTED, - RMAppEventType.APP_NEW_SAVED, new StartAppAttemptTransition()) + RMAppEventType.APP_NEW_SAVED, new AddApplicationToSchedulerTransition()) .addTransition(RMAppState.NEW_SAVING, RMAppState.FINAL_SAVING, RMAppEventType.KILL, new FinalSavingTransition( @@ -169,9 +171,12 @@ RMAppEventType.NODE_UPDATE, new RMAppNodeUpdateTransition()) new FinalSavingTransition( new AppRejectedTransition(), RMAppState.FAILED)) .addTransition(RMAppState.SUBMITTED, RMAppState.ACCEPTED, - RMAppEventType.APP_ACCEPTED) - .addTransition(RMAppState.SUBMITTED, RMAppState.KILLING, - RMAppEventType.KILL,new KillAttemptTransition()) + RMAppEventType.APP_ACCEPTED, new StartAppAttemptTransition()) + .addTransition(RMAppState.SUBMITTED, RMAppState.FINAL_SAVING, + RMAppEventType.KILL, + new FinalSavingTransition( + new AppKilledTransition(), RMAppState.KILLED)) + // Transitions from ACCEPTED state .addTransition(RMAppState.ACCEPTED, RMAppState.ACCEPTED, @@ -179,11 +184,22 @@ RMAppEventType.NODE_UPDATE, new RMAppNodeUpdateTransition()) .addTransition(RMAppState.ACCEPTED, RMAppState.RUNNING, RMAppEventType.ATTEMPT_REGISTERED) .addTransition(RMAppState.ACCEPTED, - EnumSet.of(RMAppState.SUBMITTED, RMAppState.FINAL_SAVING), + EnumSet.of(RMAppState.ACCEPTED, RMAppState.FINAL_SAVING), + // ACCEPTED state is possible to receive ATTEMPT_FAILED event because + // RMAppRecoveredTransition is returning ACCEPTED state directly and + // waiting for the previous AM to exit. RMAppEventType.ATTEMPT_FAILED, - new AttemptFailedTransition(RMAppState.SUBMITTED)) - .addTransition(RMAppState.ACCEPTED, RMAppState.KILLING, - RMAppEventType.KILL,new KillAttemptTransition()) + new AttemptFailedTransition(RMAppState.ACCEPTED)) + .addTransition(RMAppState.ACCEPTED, RMAppState.FINAL_SAVING, + RMAppEventType.KILL, + new FinalSavingTransition( + new AppKilledTransition(), RMAppState.KILLED)) + // ACCECPTED state can once again receive APP_ACCEPTED event, because on + // recovery the app returns ACCEPTED state and the app once again go + // through the scheduler and triggers one more APP_ACCEPTED event at + // ACCEPTED state. + .addTransition(RMAppState.ACCEPTED, RMAppState.ACCEPTED, + RMAppEventType.APP_ACCEPTED) // Transitions from RUNNING state .addTransition(RMAppState.RUNNING, RMAppState.RUNNING, @@ -197,9 +213,9 @@ RMAppEventType.NODE_UPDATE, new RMAppNodeUpdateTransition()) // UnManagedAM directly jumps to finished RMAppEventType.ATTEMPT_FINISHED, FINISHED_TRANSITION) .addTransition(RMAppState.RUNNING, - EnumSet.of(RMAppState.SUBMITTED, RMAppState.FINAL_SAVING), + EnumSet.of(RMAppState.ACCEPTED, RMAppState.FINAL_SAVING), RMAppEventType.ATTEMPT_FAILED, - new AttemptFailedTransition(RMAppState.SUBMITTED)) + new AttemptFailedTransition(RMAppState.ACCEPTED)) .addTransition(RMAppState.RUNNING, RMAppState.KILLING, RMAppEventType.KILL, new KillAttemptTransition()) @@ -641,7 +657,7 @@ private void createNewAttempt(boolean startAttempt) { ApplicationAttemptId.newInstance(applicationId, attempts.size() + 1); RMAppAttempt attempt = new RMAppAttemptImpl(appAttemptId, rmContext, scheduler, masterService, - submissionContext, conf, user); + submissionContext, conf); attempts.put(appAttemptId, attempt); currentAttempt = attempt; if(startAttempt) { @@ -695,29 +711,46 @@ public RMAppState transition(RMAppImpl app, RMAppEvent event) { return app.recoveredFinalState; } + // Notify scheduler about the app on recovery + new AddApplicationToSchedulerTransition().transition(app, event); + // No existent attempts means the attempt associated with this app was not // started or started but not yet saved. if (app.attempts.isEmpty()) { - app.createNewAttempt(true); return RMAppState.SUBMITTED; } - return RMAppState.RUNNING; + // YARN-1507 is saving the application state after the application is + // accepted. So after YARN-1507, an app is saved meaning it is accepted. + // Thus we return ACCECPTED state on recovery. + return RMAppState.ACCEPTED; + } + } + + private static final class AddApplicationToSchedulerTransition extends + RMAppTransition { + @SuppressWarnings("unchecked") + @Override + public void transition(RMAppImpl app, RMAppEvent event) { + if (event instanceof RMAppNewSavedEvent) { + RMAppNewSavedEvent storeEvent = (RMAppNewSavedEvent) event; + // For HA this exception needs to be handled by giving up + // master status if we got fenced + if (((RMAppNewSavedEvent) event).getStoredException() != null) { + LOG.error( + "Failed to store application: " + storeEvent.getApplicationId(), + storeEvent.getStoredException()); + ExitUtil.terminate(1, storeEvent.getStoredException()); + } + } + app.handler.handle(new AppAddedSchedulerEvent(app.applicationId, + app.submissionContext.getQueue(), app.user)); } } private static final class StartAppAttemptTransition extends RMAppTransition { @Override public void transition(RMAppImpl app, RMAppEvent event) { - RMAppNewSavedEvent storeEvent = (RMAppNewSavedEvent) event; - if (storeEvent.getStoredException() != null) { - // For HA this exception needs to be handled by giving up - // master status if we got fenced - LOG.error( - "Failed to store application: " + storeEvent.getApplicationId(), - storeEvent.getStoredException()); - ExitUtil.terminate(1, storeEvent.getStoredException()); - } app.createNewAttempt(true); }; } @@ -965,6 +998,8 @@ public void transition(RMAppImpl app, RMAppEvent event) { if (app.finishTime == 0 ) { app.finishTime = System.currentTimeMillis(); } + app.handler.handle(new AppRemovedSchedulerEvent(app.applicationId, app + .getState())); app.handler.handle( new RMAppManagerEvent(app.applicationId, RMAppManagerEventType.APP_COMPLETED)); @@ -993,7 +1028,6 @@ public RMAppState transition(RMAppImpl app, RMAppEvent event) { return RMAppState.FINAL_SAVING; } } - } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptEventType.java index bac27139647..e1522f1bf73 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptEventType.java @@ -45,8 +45,7 @@ public enum RMAppAttemptEventType { ATTEMPT_UPDATE_SAVED, // Source: Scheduler - APP_REJECTED, - APP_ACCEPTED, + ATTEMPT_ADDED, // Source: RMAttemptImpl.recover RECOVER diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 647bc59c9dd..f805f423ee2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -75,13 +75,11 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFinishedAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAcquiredEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptLaunchFailedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptNewSavedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRejectedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStatusupdateEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUpdateSavedEvent; @@ -150,7 +148,6 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { private final StringBuilder diagnostics = new StringBuilder(); private Configuration conf; - private String user; private static final ExpiredTransition EXPIRED_TRANSITION = new ExpiredTransition(); @@ -186,14 +183,10 @@ RMAppAttemptEventType.START, new AttemptStartedTransition()) RMAppAttemptEventType.RECOVER, new AttemptRecoveredTransition()) // Transitions from SUBMITTED state - .addTransition(RMAppAttemptState.SUBMITTED, RMAppAttemptState.FINAL_SAVING, - RMAppAttemptEventType.APP_REJECTED, - new FinalSavingTransition(new AppRejectedTransition(), - RMAppAttemptState.FAILED)) .addTransition(RMAppAttemptState.SUBMITTED, EnumSet.of(RMAppAttemptState.LAUNCHED_UNMANAGED_SAVING, RMAppAttemptState.SCHEDULED), - RMAppAttemptEventType.APP_ACCEPTED, + RMAppAttemptEventType.ATTEMPT_ADDED, new ScheduleTransition()) .addTransition(RMAppAttemptState.SUBMITTED, RMAppAttemptState.FINAL_SAVING, RMAppAttemptEventType.KILL, @@ -380,8 +373,7 @@ RMAppAttemptEventType.STATUS_UPDATE, new StatusUpdateTransition()) .addTransition( RMAppAttemptState.KILLED, RMAppAttemptState.KILLED, - EnumSet.of(RMAppAttemptEventType.APP_ACCEPTED, - RMAppAttemptEventType.APP_REJECTED, + EnumSet.of(RMAppAttemptEventType.ATTEMPT_ADDED, RMAppAttemptEventType.EXPIRE, RMAppAttemptEventType.LAUNCHED, RMAppAttemptEventType.LAUNCH_FAILED, @@ -398,7 +390,7 @@ public RMAppAttemptImpl(ApplicationAttemptId appAttemptId, RMContext rmContext, YarnScheduler scheduler, ApplicationMasterService masterService, ApplicationSubmissionContext submissionContext, - Configuration conf, String user) { + Configuration conf) { this.conf = conf; this.applicationAttemptId = appAttemptId; this.rmContext = rmContext; @@ -414,7 +406,6 @@ public RMAppAttemptImpl(ApplicationAttemptId appAttemptId, this.proxiedTrackingUrl = generateProxyUriWithScheme(null); this.stateMachine = stateMachineFactory.make(this); - this.user = user; } @Override @@ -750,35 +741,8 @@ public void transition(RMAppAttemptImpl appAttempt, appAttempt.rmContext.getAMRMTokenSecretManager()); // Add the applicationAttempt to the scheduler - appAttempt.eventHandler.handle( - new AppAttemptAddedSchedulerEvent(appAttempt.applicationAttemptId, - appAttempt.submissionContext.getQueue(), appAttempt.user)); - } - } - - private static final class AppRejectedTransition extends BaseTransition { - @Override - public void transition(RMAppAttemptImpl appAttempt, - RMAppAttemptEvent event) { - - RMAppAttemptRejectedEvent rejectedEvent = (RMAppAttemptRejectedEvent) event; - - // Tell the AMS. Unregister from the ApplicationMasterService - appAttempt.masterService - .unregisterAttempt(appAttempt.applicationAttemptId); - - // Save the diagnostic message - String message = rejectedEvent.getMessage(); - appAttempt.diagnostics.append(message); - - // Send the rejection event to app - appAttempt.eventHandler.handle( - new RMAppRejectedEvent( - rejectedEvent.getApplicationAttemptId().getApplicationId(), - message) - ); - - appAttempt.removeCredentials(appAttempt); + appAttempt.eventHandler.handle(new AppAttemptAddedSchedulerEvent( + appAttempt.applicationAttemptId)); } } @@ -794,11 +758,6 @@ private static final class ScheduleTransition public RMAppAttemptState transition(RMAppAttemptImpl appAttempt, RMAppAttemptEvent event) { if (!appAttempt.submissionContext.getUnmanagedAM()) { - // Send the acceptance to the app - appAttempt.eventHandler.handle(new RMAppEvent(event - .getApplicationAttemptId().getApplicationId(), - RMAppEventType.APP_ACCEPTED)); - // Request a container for the AM. ResourceRequest request = BuilderUtils.newResourceRequest( @@ -918,11 +877,6 @@ private void rememberTargetTransitionsAndStoreState(RMAppAttemptEvent event, FinalApplicationStatus finalStatus = null; switch (event.getType()) { - case APP_REJECTED: - RMAppAttemptRejectedEvent rejectedEvent = - (RMAppAttemptRejectedEvent) event; - diags = rejectedEvent.getMessage(); - break; case LAUNCH_FAILED: RMAppAttemptLaunchFailedEvent launchFaileEvent = (RMAppAttemptLaunchFailedEvent) event; @@ -1091,16 +1045,6 @@ private static final class UnmanagedAMAttemptSavedTransition public void transition(RMAppAttemptImpl appAttempt, RMAppAttemptEvent event) { appAttempt.checkAttemptStoreError(event); - // Send the acceptance to the app - // Ideally this should have been done when the scheduler accepted the app. - // But its here because until the attempt is saved the client should not - // launch the unmanaged AM. Client waits for the app status to be accepted - // before doing so. So we have to delay the accepted state until we have - // completed storing the attempt - appAttempt.eventHandler.handle(new RMAppEvent(event - .getApplicationAttemptId().getApplicationId(), - RMAppEventType.APP_ACCEPTED)); - super.transition(appAttempt, event); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ActiveUsersManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ActiveUsersManager.java index e9c5c5ae53a..36e68583857 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ActiveUsersManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ActiveUsersManager.java @@ -56,7 +56,7 @@ public ActiveUsersManager(QueueMetrics metrics) { * @param user application user * @param applicationId activated application */ - @Lock({Queue.class, SchedulerApplication.class}) + @Lock({Queue.class, SchedulerApplicationAttempt.class}) synchronized public void activateApplication( String user, ApplicationId applicationId) { Set userApps = usersApplications.get(user); @@ -79,7 +79,7 @@ synchronized public void activateApplication( * @param user application user * @param applicationId deactivated application */ - @Lock({Queue.class, SchedulerApplication.class}) + @Lock({Queue.class, SchedulerApplicationAttempt.class}) synchronized public void deactivateApplication( String user, ApplicationId applicationId) { Set userApps = usersApplications.get(user); @@ -102,7 +102,7 @@ synchronized public void deactivateApplication( * resource requests. * @return number of active users */ - @Lock({Queue.class, SchedulerApplication.class}) + @Lock({Queue.class, SchedulerApplicationAttempt.class}) synchronized public int getNumActiveUsers() { return activeUsers; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerAppReport.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerAppReport.java index f1dc9d2ae37..669b97a841b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerAppReport.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerAppReport.java @@ -36,7 +36,7 @@ public class SchedulerAppReport { private final Collection reserved; private final boolean pending; - public SchedulerAppReport(SchedulerApplication app) { + public SchedulerAppReport(SchedulerApplicationAttempt app) { this.live = app.getLiveContainers(); this.reserved = app.getReservedContainers(); this.pending = app.isPending(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerAppUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerAppUtils.java index be68fe2e28f..36a124421d0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerAppUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerAppUtils.java @@ -22,7 +22,7 @@ public class SchedulerAppUtils { - public static boolean isBlacklisted(SchedulerApplication application, + public static boolean isBlacklisted(SchedulerApplicationAttempt application, SchedulerNode node, Log LOG) { if (application.isBlacklisted(node.getNodeName())) { if (LOG.isDebugEnabled()) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplication.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplication.java index 0fb8acbfbc1..48e3ee85f76 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplication.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplication.java @@ -17,393 +17,26 @@ */ package org.apache.hadoop.yarn.server.resourcemanager.scheduler; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; -import org.apache.hadoop.classification.InterfaceStability.Stable; import org.apache.hadoop.classification.InterfaceStability.Unstable; -import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; -import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; -import org.apache.hadoop.yarn.api.records.Container; -import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.api.records.NodeId; -import org.apache.hadoop.yarn.api.records.Priority; -import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.api.records.ResourceRequest; -import org.apache.hadoop.yarn.server.resourcemanager.RMContext; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; -import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; -import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEvent; -import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; -import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; -import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerReservedEvent; -import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent; -import org.apache.hadoop.yarn.util.resource.Resources; -import com.google.common.collect.HashMultiset; -import com.google.common.collect.Multiset; - -/** - * Represents an application attempt from the viewpoint of the scheduler. - * Each running app attempt in the RM corresponds to one instance - * of this class. - */ @Private @Unstable -public abstract class SchedulerApplication { - - private static final Log LOG = LogFactory.getLog(SchedulerApplication.class); +public class SchedulerApplication { - protected final AppSchedulingInfo appSchedulingInfo; - - protected final Map liveContainers = - new HashMap(); - protected final Map> reservedContainers = - new HashMap>(); + private final Queue queue; + private final String user; - private final Multiset reReservations = HashMultiset.create(); - - protected final Resource currentReservation = Resource.newInstance(0, 0); - private Resource resourceLimit = Resource.newInstance(0, 0); - protected final Resource currentConsumption = Resource.newInstance(0, 0); - - protected List newlyAllocatedContainers = - new ArrayList(); - - /** - * Count how many times the application has been given an opportunity - * to schedule a task at each priority. Each time the scheduler - * asks the application for a task at this priority, it is incremented, - * and each time the application successfully schedules a task, it - * is reset to 0. - */ - Multiset schedulingOpportunities = HashMultiset.create(); - - // Time of the last container scheduled at the current allowed level - protected Map lastScheduledContainer = - new HashMap(); - - protected final Queue queue; - protected boolean isStopped = false; - - protected final RMContext rmContext; - - public SchedulerApplication(ApplicationAttemptId applicationAttemptId, - String user, Queue queue, ActiveUsersManager activeUsersManager, - RMContext rmContext) { - this.rmContext = rmContext; - this.appSchedulingInfo = - new AppSchedulingInfo(applicationAttemptId, user, queue, - activeUsersManager); + public SchedulerApplication(Queue queue, String user) { this.queue = queue; - } - - /** - * Get the live containers of the application. - * @return live containers of the application - */ - public synchronized Collection getLiveContainers() { - return new ArrayList(liveContainers.values()); - } - - /** - * Is this application pending? - * @return true if it is else false. - */ - public boolean isPending() { - return appSchedulingInfo.isPending(); - } - - /** - * Get {@link ApplicationAttemptId} of the application master. - * @return ApplicationAttemptId of the application master - */ - public ApplicationAttemptId getApplicationAttemptId() { - return appSchedulingInfo.getApplicationAttemptId(); - } - - public ApplicationId getApplicationId() { - return appSchedulingInfo.getApplicationId(); - } - - public String getUser() { - return appSchedulingInfo.getUser(); + this.user = user; } - public Map getResourceRequests(Priority priority) { - return appSchedulingInfo.getResourceRequests(priority); - } - - public int getNewContainerId() { - return appSchedulingInfo.getNewContainerId(); - } - - public Collection getPriorities() { - return appSchedulingInfo.getPriorities(); - } - - public ResourceRequest getResourceRequest(Priority priority, String resourceName) { - return this.appSchedulingInfo.getResourceRequest(priority, resourceName); - } - - public synchronized int getTotalRequiredResources(Priority priority) { - return getResourceRequest(priority, ResourceRequest.ANY).getNumContainers(); - } - - public Resource getResource(Priority priority) { - return appSchedulingInfo.getResource(priority); - } - - public String getQueueName() { - return appSchedulingInfo.getQueueName(); - } - - public synchronized RMContainer getRMContainer(ContainerId id) { - return liveContainers.get(id); - } - - protected synchronized void resetReReservations(Priority priority) { - reReservations.setCount(priority, 0); - } - - protected synchronized void addReReservation(Priority priority) { - reReservations.add(priority); - } - - public synchronized int getReReservations(Priority priority) { - return reReservations.count(priority); - } - - /** - * Get total current reservations. - * Used only by unit tests - * @return total current reservations - */ - @Stable - @Private - public synchronized Resource getCurrentReservation() { - return currentReservation; - } - public Queue getQueue() { return queue; } - - public synchronized void updateResourceRequests( - List requests) { - if (!isStopped) { - appSchedulingInfo.updateResourceRequests(requests); - } - } - - public synchronized void stop(RMAppAttemptState rmAppAttemptFinalState) { - // Cleanup all scheduling information - isStopped = true; - appSchedulingInfo.stop(rmAppAttemptFinalState); - } - public synchronized boolean isStopped() { - return isStopped; + public String getUser() { + return user; } - - /** - * Get the list of reserved containers - * @return All of the reserved containers. - */ - public synchronized List getReservedContainers() { - List reservedContainers = new ArrayList(); - for (Map.Entry> e : - this.reservedContainers.entrySet()) { - reservedContainers.addAll(e.getValue().values()); - } - return reservedContainers; - } - - public synchronized RMContainer reserve(SchedulerNode node, Priority priority, - RMContainer rmContainer, Container container) { - // Create RMContainer if necessary - if (rmContainer == null) { - rmContainer = - new RMContainerImpl(container, getApplicationAttemptId(), - node.getNodeID(), rmContext.getDispatcher().getEventHandler(), - rmContext.getContainerAllocationExpirer()); - - Resources.addTo(currentReservation, container.getResource()); - - // Reset the re-reservation count - resetReReservations(priority); - } else { - // Note down the re-reservation - addReReservation(priority); - } - rmContainer.handle(new RMContainerReservedEvent(container.getId(), - container.getResource(), node.getNodeID(), priority)); - - Map reservedContainers = - this.reservedContainers.get(priority); - if (reservedContainers == null) { - reservedContainers = new HashMap(); - this.reservedContainers.put(priority, reservedContainers); - } - reservedContainers.put(node.getNodeID(), rmContainer); - - LOG.info("Application " + getApplicationId() - + " reserved container " + rmContainer - + " on node " + node + ", currently has " + reservedContainers.size() - + " at priority " + priority - + "; currentReservation " + currentReservation.getMemory()); - - return rmContainer; - } - - /** - * Has the application reserved the given node at the - * given priority? - * @param node node to be checked - * @param priority priority of reserved container - * @return true is reserved, false if not - */ - public synchronized boolean isReserved(SchedulerNode node, Priority priority) { - Map reservedContainers = - this.reservedContainers.get(priority); - if (reservedContainers != null) { - return reservedContainers.containsKey(node.getNodeID()); - } - return false; - } - - public synchronized void setHeadroom(Resource globalLimit) { - this.resourceLimit = globalLimit; - } - - /** - * Get available headroom in terms of resources for the application's user. - * @return available resource headroom - */ - public synchronized Resource getHeadroom() { - // Corner case to deal with applications being slightly over-limit - if (resourceLimit.getMemory() < 0) { - resourceLimit.setMemory(0); - } - - return resourceLimit; - } - - public synchronized int getNumReservedContainers(Priority priority) { - Map reservedContainers = - this.reservedContainers.get(priority); - return (reservedContainers == null) ? 0 : reservedContainers.size(); - } - - @SuppressWarnings("unchecked") - public synchronized void containerLaunchedOnNode(ContainerId containerId, - NodeId nodeId) { - // Inform the container - RMContainer rmContainer = getRMContainer(containerId); - if (rmContainer == null) { - // Some unknown container sneaked into the system. Kill it. - rmContext.getDispatcher().getEventHandler() - .handle(new RMNodeCleanContainerEvent(nodeId, containerId)); - return; - } - - rmContainer.handle(new RMContainerEvent(containerId, - RMContainerEventType.LAUNCHED)); - } - - public synchronized void showRequests() { - if (LOG.isDebugEnabled()) { - for (Priority priority : getPriorities()) { - Map requests = getResourceRequests(priority); - if (requests != null) { - LOG.debug("showRequests:" + " application=" + getApplicationId() + - " headRoom=" + getHeadroom() + - " currentConsumption=" + currentConsumption.getMemory()); - for (ResourceRequest request : requests.values()) { - LOG.debug("showRequests:" + " application=" + getApplicationId() - + " request=" + request); - } - } - } - } - } - - public Resource getCurrentConsumption() { - return currentConsumption; - } - - public synchronized List pullNewlyAllocatedContainers() { - List returnContainerList = new ArrayList( - newlyAllocatedContainers.size()); - for (RMContainer rmContainer : newlyAllocatedContainers) { - rmContainer.handle(new RMContainerEvent(rmContainer.getContainerId(), - RMContainerEventType.ACQUIRED)); - returnContainerList.add(rmContainer.getContainer()); - } - newlyAllocatedContainers.clear(); - return returnContainerList; - } - - public synchronized void updateBlacklist( - List blacklistAdditions, List blacklistRemovals) { - if (!isStopped) { - this.appSchedulingInfo.updateBlacklist( - blacklistAdditions, blacklistRemovals); - } - } - - public boolean isBlacklisted(String resourceName) { - return this.appSchedulingInfo.isBlacklisted(resourceName); - } - - public synchronized void addSchedulingOpportunity(Priority priority) { - schedulingOpportunities.setCount(priority, - schedulingOpportunities.count(priority) + 1); - } - - public synchronized void subtractSchedulingOpportunity(Priority priority) { - int count = schedulingOpportunities.count(priority) - 1; - this.schedulingOpportunities.setCount(priority, Math.max(count, 0)); - } - - /** - * Return the number of times the application has been given an opportunity - * to schedule a task at the given priority since the last time it - * successfully did so. - */ - public synchronized int getSchedulingOpportunities(Priority priority) { - return schedulingOpportunities.count(priority); - } - - /** - * Should be called when an application has successfully scheduled a container, - * or when the scheduling locality threshold is relaxed. - * Reset various internal counters which affect delay scheduling - * - * @param priority The priority of the container scheduled. - */ - public synchronized void resetSchedulingOpportunities(Priority priority) { - resetSchedulingOpportunities(priority, System.currentTimeMillis()); - } - // used for continuous scheduling - public synchronized void resetSchedulingOpportunities(Priority priority, - long currentTimeMs) { - lastScheduledContainer.put(priority, currentTimeMs); - schedulingOpportunities.setCount(priority, 0); - } - - public synchronized ApplicationResourceUsageReport getResourceUsageReport() { - return ApplicationResourceUsageReport.newInstance(liveContainers.size(), - reservedContainers.size(), Resources.clone(currentConsumption), - Resources.clone(currentReservation), - Resources.add(currentConsumption, currentReservation)); - } - } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java new file mode 100644 index 00000000000..c601ceef03c --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java @@ -0,0 +1,410 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.hadoop.yarn.server.resourcemanager.scheduler; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Stable; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerReservedEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent; +import org.apache.hadoop.yarn.util.resource.Resources; + +import com.google.common.collect.HashMultiset; +import com.google.common.collect.Multiset; + +/** + * Represents an application attempt from the viewpoint of the scheduler. + * Each running app attempt in the RM corresponds to one instance + * of this class. + */ +@Private +@Unstable +public abstract class SchedulerApplicationAttempt { + + private static final Log LOG = LogFactory + .getLog(SchedulerApplicationAttempt.class); + + protected final AppSchedulingInfo appSchedulingInfo; + + protected final Map liveContainers = + new HashMap(); + protected final Map> reservedContainers = + new HashMap>(); + + private final Multiset reReservations = HashMultiset.create(); + + protected final Resource currentReservation = Resource.newInstance(0, 0); + private Resource resourceLimit = Resource.newInstance(0, 0); + protected final Resource currentConsumption = Resource.newInstance(0, 0); + + protected List newlyAllocatedContainers = + new ArrayList(); + + /** + * Count how many times the application has been given an opportunity + * to schedule a task at each priority. Each time the scheduler + * asks the application for a task at this priority, it is incremented, + * and each time the application successfully schedules a task, it + * is reset to 0. + */ + Multiset schedulingOpportunities = HashMultiset.create(); + + // Time of the last container scheduled at the current allowed level + protected Map lastScheduledContainer = + new HashMap(); + + protected final Queue queue; + protected boolean isStopped = false; + + protected final RMContext rmContext; + + public SchedulerApplicationAttempt(ApplicationAttemptId applicationAttemptId, + String user, Queue queue, ActiveUsersManager activeUsersManager, + RMContext rmContext) { + this.rmContext = rmContext; + this.appSchedulingInfo = + new AppSchedulingInfo(applicationAttemptId, user, queue, + activeUsersManager); + this.queue = queue; + } + + /** + * Get the live containers of the application. + * @return live containers of the application + */ + public synchronized Collection getLiveContainers() { + return new ArrayList(liveContainers.values()); + } + + /** + * Is this application pending? + * @return true if it is else false. + */ + public boolean isPending() { + return appSchedulingInfo.isPending(); + } + + /** + * Get {@link ApplicationAttemptId} of the application master. + * @return ApplicationAttemptId of the application master + */ + public ApplicationAttemptId getApplicationAttemptId() { + return appSchedulingInfo.getApplicationAttemptId(); + } + + public ApplicationId getApplicationId() { + return appSchedulingInfo.getApplicationId(); + } + + public String getUser() { + return appSchedulingInfo.getUser(); + } + + public Map getResourceRequests(Priority priority) { + return appSchedulingInfo.getResourceRequests(priority); + } + + public int getNewContainerId() { + return appSchedulingInfo.getNewContainerId(); + } + + public Collection getPriorities() { + return appSchedulingInfo.getPriorities(); + } + + public ResourceRequest getResourceRequest(Priority priority, String resourceName) { + return this.appSchedulingInfo.getResourceRequest(priority, resourceName); + } + + public synchronized int getTotalRequiredResources(Priority priority) { + return getResourceRequest(priority, ResourceRequest.ANY).getNumContainers(); + } + + public Resource getResource(Priority priority) { + return appSchedulingInfo.getResource(priority); + } + + public String getQueueName() { + return appSchedulingInfo.getQueueName(); + } + + public synchronized RMContainer getRMContainer(ContainerId id) { + return liveContainers.get(id); + } + + protected synchronized void resetReReservations(Priority priority) { + reReservations.setCount(priority, 0); + } + + protected synchronized void addReReservation(Priority priority) { + reReservations.add(priority); + } + + public synchronized int getReReservations(Priority priority) { + return reReservations.count(priority); + } + + /** + * Get total current reservations. + * Used only by unit tests + * @return total current reservations + */ + @Stable + @Private + public synchronized Resource getCurrentReservation() { + return currentReservation; + } + + public Queue getQueue() { + return queue; + } + + public synchronized void updateResourceRequests( + List requests) { + if (!isStopped) { + appSchedulingInfo.updateResourceRequests(requests); + } + } + + public synchronized void stop(RMAppAttemptState rmAppAttemptFinalState) { + // Cleanup all scheduling information + isStopped = true; + appSchedulingInfo.stop(rmAppAttemptFinalState); + } + + public synchronized boolean isStopped() { + return isStopped; + } + + /** + * Get the list of reserved containers + * @return All of the reserved containers. + */ + public synchronized List getReservedContainers() { + List reservedContainers = new ArrayList(); + for (Map.Entry> e : + this.reservedContainers.entrySet()) { + reservedContainers.addAll(e.getValue().values()); + } + return reservedContainers; + } + + public synchronized RMContainer reserve(SchedulerNode node, Priority priority, + RMContainer rmContainer, Container container) { + // Create RMContainer if necessary + if (rmContainer == null) { + rmContainer = + new RMContainerImpl(container, getApplicationAttemptId(), + node.getNodeID(), rmContext.getDispatcher().getEventHandler(), + rmContext.getContainerAllocationExpirer()); + + Resources.addTo(currentReservation, container.getResource()); + + // Reset the re-reservation count + resetReReservations(priority); + } else { + // Note down the re-reservation + addReReservation(priority); + } + rmContainer.handle(new RMContainerReservedEvent(container.getId(), + container.getResource(), node.getNodeID(), priority)); + + Map reservedContainers = + this.reservedContainers.get(priority); + if (reservedContainers == null) { + reservedContainers = new HashMap(); + this.reservedContainers.put(priority, reservedContainers); + } + reservedContainers.put(node.getNodeID(), rmContainer); + + LOG.info("Application " + getApplicationId() + + " reserved container " + rmContainer + + " on node " + node + ", currently has " + reservedContainers.size() + + " at priority " + priority + + "; currentReservation " + currentReservation.getMemory()); + + return rmContainer; + } + + /** + * Has the application reserved the given node at the + * given priority? + * @param node node to be checked + * @param priority priority of reserved container + * @return true is reserved, false if not + */ + public synchronized boolean isReserved(SchedulerNode node, Priority priority) { + Map reservedContainers = + this.reservedContainers.get(priority); + if (reservedContainers != null) { + return reservedContainers.containsKey(node.getNodeID()); + } + return false; + } + + public synchronized void setHeadroom(Resource globalLimit) { + this.resourceLimit = globalLimit; + } + + /** + * Get available headroom in terms of resources for the application's user. + * @return available resource headroom + */ + public synchronized Resource getHeadroom() { + // Corner case to deal with applications being slightly over-limit + if (resourceLimit.getMemory() < 0) { + resourceLimit.setMemory(0); + } + + return resourceLimit; + } + + public synchronized int getNumReservedContainers(Priority priority) { + Map reservedContainers = + this.reservedContainers.get(priority); + return (reservedContainers == null) ? 0 : reservedContainers.size(); + } + + @SuppressWarnings("unchecked") + public synchronized void containerLaunchedOnNode(ContainerId containerId, + NodeId nodeId) { + // Inform the container + RMContainer rmContainer = getRMContainer(containerId); + if (rmContainer == null) { + // Some unknown container sneaked into the system. Kill it. + rmContext.getDispatcher().getEventHandler() + .handle(new RMNodeCleanContainerEvent(nodeId, containerId)); + return; + } + + rmContainer.handle(new RMContainerEvent(containerId, + RMContainerEventType.LAUNCHED)); + } + + public synchronized void showRequests() { + if (LOG.isDebugEnabled()) { + for (Priority priority : getPriorities()) { + Map requests = getResourceRequests(priority); + if (requests != null) { + LOG.debug("showRequests:" + " application=" + getApplicationId() + + " headRoom=" + getHeadroom() + + " currentConsumption=" + currentConsumption.getMemory()); + for (ResourceRequest request : requests.values()) { + LOG.debug("showRequests:" + " application=" + getApplicationId() + + " request=" + request); + } + } + } + } + } + + public Resource getCurrentConsumption() { + return currentConsumption; + } + + public synchronized List pullNewlyAllocatedContainers() { + List returnContainerList = new ArrayList( + newlyAllocatedContainers.size()); + for (RMContainer rmContainer : newlyAllocatedContainers) { + rmContainer.handle(new RMContainerEvent(rmContainer.getContainerId(), + RMContainerEventType.ACQUIRED)); + returnContainerList.add(rmContainer.getContainer()); + } + newlyAllocatedContainers.clear(); + return returnContainerList; + } + + public synchronized void updateBlacklist( + List blacklistAdditions, List blacklistRemovals) { + if (!isStopped) { + this.appSchedulingInfo.updateBlacklist( + blacklistAdditions, blacklistRemovals); + } + } + + public boolean isBlacklisted(String resourceName) { + return this.appSchedulingInfo.isBlacklisted(resourceName); + } + + public synchronized void addSchedulingOpportunity(Priority priority) { + schedulingOpportunities.setCount(priority, + schedulingOpportunities.count(priority) + 1); + } + + public synchronized void subtractSchedulingOpportunity(Priority priority) { + int count = schedulingOpportunities.count(priority) - 1; + this.schedulingOpportunities.setCount(priority, Math.max(count, 0)); + } + + /** + * Return the number of times the application has been given an opportunity + * to schedule a task at the given priority since the last time it + * successfully did so. + */ + public synchronized int getSchedulingOpportunities(Priority priority) { + return schedulingOpportunities.count(priority); + } + + /** + * Should be called when an application has successfully scheduled a container, + * or when the scheduling locality threshold is relaxed. + * Reset various internal counters which affect delay scheduling + * + * @param priority The priority of the container scheduled. + */ + public synchronized void resetSchedulingOpportunities(Priority priority) { + resetSchedulingOpportunities(priority, System.currentTimeMillis()); + } + // used for continuous scheduling + public synchronized void resetSchedulingOpportunities(Priority priority, + long currentTimeMs) { + lastScheduledContainer.put(priority, currentTimeMs); + schedulingOpportunities.setCount(priority, 0); + } + + public synchronized ApplicationResourceUsageReport getResourceUsageReport() { + return ApplicationResourceUsageReport.newInstance(liveContainers.size(), + reservedContainers.size(), Resources.clone(currentConsumption), + Resources.clone(currentReservation), + Resources.add(currentConsumption, currentReservation)); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java index c317df51a66..f5090ba699e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java @@ -27,6 +27,7 @@ import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.QueueACL; @@ -35,7 +36,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; @@ -155,21 +155,32 @@ public interface CSQueue /** * Submit a new application to the queue. - * @param application application being submitted + * @param applicationId the applicationId of the application being submitted * @param user user who submitted the application * @param queue queue to which the application is submitted */ - public void submitApplication(FiCaSchedulerApp application, String user, - String queue) - throws AccessControlException; - + public void submitApplication(ApplicationId applicationId, String user, + String queue) throws AccessControlException; + + /** + * Submit an application attempt to the queue. + */ + public void submitApplicationAttempt(FiCaSchedulerApp application, + String userName); + /** * An application submitted to this queue has finished. - * @param application - * @param queue application queue + * @param applicationId + * @param user user who submitted the application */ - public void finishApplication(FiCaSchedulerApp application, String queue); - + public void finishApplication(ApplicationId applicationId, String user); + + /** + * An application attempt submitted to this queue has finished. + */ + public void finishApplicationAttempt(FiCaSchedulerApp application, + String queue); + /** * Assign containers to applications in the queue or it's children (if any). * @param clusterResource the resource of the cluster. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 60256398461..5f341089ba8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -36,6 +36,7 @@ import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -53,10 +54,13 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRejectedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; @@ -65,14 +69,16 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.ContainerExpiredSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; @@ -185,7 +191,11 @@ public Configuration getConf() { private Resource maximumAllocation; @VisibleForTesting - protected Map applications = + protected Map applications = + new ConcurrentHashMap(); + + @VisibleForTesting + protected Map appAttempts = new ConcurrentHashMap(); private boolean initialized = false; @@ -415,61 +425,84 @@ static CSQueue parseQueue( synchronized CSQueue getQueue(String queueName) { return queues.get(queueName); } - - private synchronized void - addApplicationAttempt(ApplicationAttemptId applicationAttemptId, - String queueName, String user) { - // Sanity checks + private synchronized void addApplication(ApplicationId applicationId, + String queueName, String user) { + // santiy checks. CSQueue queue = getQueue(queueName); if (queue == null) { - String message = "Application " + applicationAttemptId + + String message = "Application " + applicationId + " submitted by user " + user + " to unknown queue: " + queueName; - this.rmContext.getDispatcher().getEventHandler().handle( - new RMAppAttemptRejectedEvent(applicationAttemptId, message)); + this.rmContext.getDispatcher().getEventHandler() + .handle(new RMAppRejectedEvent(applicationId, message)); return; } if (!(queue instanceof LeafQueue)) { - String message = "Application " + applicationAttemptId + + String message = "Application " + applicationId + " submitted by user " + user + " to non-leaf queue: " + queueName; - this.rmContext.getDispatcher().getEventHandler().handle( - new RMAppAttemptRejectedEvent(applicationAttemptId, message)); + this.rmContext.getDispatcher().getEventHandler() + .handle(new RMAppRejectedEvent(applicationId, message)); return; } - - // TODO: Fix store - FiCaSchedulerApp SchedulerApp = - new FiCaSchedulerApp(applicationAttemptId, user, queue, - queue.getActiveUsersManager(), rmContext); - // Submit to the queue try { - queue.submitApplication(SchedulerApp, user, queueName); + queue.submitApplication(applicationId, user, queueName); } catch (AccessControlException ace) { - LOG.info("Failed to submit application " + applicationAttemptId + - " to queue " + queueName + " from user " + user, ace); - this.rmContext.getDispatcher().getEventHandler().handle( - new RMAppAttemptRejectedEvent(applicationAttemptId, - ace.toString())); + LOG.info("Failed to submit application " + applicationId + " to queue " + + queueName + " from user " + user, ace); + this.rmContext.getDispatcher().getEventHandler() + .handle(new RMAppRejectedEvent(applicationId, ace.toString())); return; } + SchedulerApplication application = + new SchedulerApplication(queue, user); + applications.put(applicationId, application); + LOG.info("Accepted application " + applicationId + " from user: " + user + + ", in queue: " + queueName); + rmContext.getDispatcher().getEventHandler() + .handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED)); + } - applications.put(applicationAttemptId, SchedulerApp); - - LOG.info("Application Submission: " + applicationAttemptId + - ", user: " + user + - " queue: " + queue + - ", currently active: " + applications.size()); + private synchronized void addApplicationAttempt( + ApplicationAttemptId applicationAttemptId) { + SchedulerApplication application = + applications.get(applicationAttemptId.getApplicationId()); + CSQueue queue = (CSQueue) application.getQueue(); + FiCaSchedulerApp SchedulerApp = + new FiCaSchedulerApp(applicationAttemptId, application.getUser(), + queue, queue.getActiveUsersManager(), rmContext); + appAttempts.put(applicationAttemptId, SchedulerApp); + queue.submitApplicationAttempt(SchedulerApp, application.getUser()); + LOG.info("Added Application Attempt " + applicationAttemptId + + " to scheduler from user " + application.getUser() + " in queue " + + queue.getQueueName()); rmContext.getDispatcher().getEventHandler().handle( - new RMAppAttemptEvent(applicationAttemptId, - RMAppAttemptEventType.APP_ACCEPTED)); + new RMAppAttemptEvent(applicationAttemptId, + RMAppAttemptEventType.ATTEMPT_ADDED)); + } + + private synchronized void doneApplication(ApplicationId applicationId, + RMAppState finalState) { + SchedulerApplication application = applications.get(applicationId); + if (application == null){ + // The AppRemovedSchedulerEvent maybe sent on recovery for completed apps. + return; + } + CSQueue queue = (CSQueue) application.getQueue(); + if (!(queue instanceof LeafQueue)) { + LOG.error("Cannot finish application " + "from non-leaf queue: " + + queue.getQueueName()); + } else { + queue.finishApplication(applicationId, application.getUser()); + } + applications.remove(applicationId); } private synchronized void doneApplicationAttempt( ApplicationAttemptId applicationAttemptId, RMAppAttemptState rmAppAttemptFinalState) { - LOG.info("Application " + applicationAttemptId + " is done." + + LOG.info("Application Attempt " + applicationAttemptId + " is done." + " finalState=" + rmAppAttemptFinalState); FiCaSchedulerApp application = getApplication(applicationAttemptId); @@ -509,11 +542,11 @@ private synchronized void doneApplicationAttempt( LOG.error("Cannot finish application " + "from non-leaf queue: " + queueName); } else { - queue.finishApplication(application, queue.getQueueName()); + queue.finishApplicationAttempt(application, queue.getQueueName()); } // Remove from our data-structure - applications.remove(applicationAttemptId); + appAttempts.remove(applicationAttemptId); } private static final Allocation EMPTY_ALLOCATION = @@ -740,12 +773,25 @@ public void handle(SchedulerEvent event) { nodeUpdate(nodeUpdatedEvent.getRMNode()); } break; + case APP_ADDED: + { + AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event; + addApplication(appAddedEvent.getApplicationId(), + appAddedEvent.getQueue(), appAddedEvent.getUser()); + } + break; + case APP_REMOVED: + { + AppRemovedSchedulerEvent appRemovedEvent = (AppRemovedSchedulerEvent)event; + doneApplication(appRemovedEvent.getApplicationID(), + appRemovedEvent.getFinalState()); + } + break; case APP_ATTEMPT_ADDED: { AppAttemptAddedSchedulerEvent appAttemptAddedEvent = (AppAttemptAddedSchedulerEvent) event; - addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(), - appAttemptAddedEvent.getQueue(), appAttemptAddedEvent.getUser()); + addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId()); } break; case APP_ATTEMPT_REMOVED: @@ -854,7 +900,7 @@ private synchronized void completedContainer(RMContainer rmContainer, @Lock(Lock.NoLock.class) FiCaSchedulerApp getApplication(ApplicationAttemptId applicationAttemptId) { - return applications.get(applicationAttemptId); + return appAttempts.get(applicationAttemptId); } @Override @@ -912,7 +958,7 @@ public void preemptContainer(ApplicationAttemptId aid, RMContainer cont) { LOG.debug("PREEMPT_CONTAINER: application:" + aid.toString() + " container: " + cont.toString()); } - FiCaSchedulerApp app = applications.get(aid); + FiCaSchedulerApp app = appAttempts.get(aid); if (app != null) { app.addPreemptContainer(cont.getContainerId()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index db7db607ba9..a8581a0a8d3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -38,6 +38,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.AccessControlList; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerStatus; @@ -59,7 +60,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppUtils; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; @@ -99,7 +99,7 @@ public class LeafQueue implements CSQueue { private volatile int numContainers; Set activeApplications; - Map applicationsMap = + Map applicationAttemptMap = new HashMap(); Set pendingApplications; @@ -635,7 +635,22 @@ public boolean hasAccess(QueueACL acl, UserGroupInformation user) { } @Override - public void submitApplication(FiCaSchedulerApp application, String userName, + public void submitApplicationAttempt(FiCaSchedulerApp application, + String userName) { + // Careful! Locking order is important! + synchronized (this) { + User user = getUser(userName); + // Add the attempt to our data-structures + addApplicationAttempt(application, user); + } + + int attemptId = application.getApplicationAttemptId().getAttemptId(); + metrics.submitApp(userName, attemptId); + getParent().submitApplicationAttempt(application, userName); + } + + @Override + public void submitApplication(ApplicationId applicationId, String userName, String queue) throws AccessControlException { // Careful! Locking order is important! @@ -653,8 +668,7 @@ public void submitApplication(FiCaSchedulerApp application, String userName, // Check if the queue is accepting jobs if (getState() != QueueState.RUNNING) { String msg = "Queue " + getQueuePath() + - " is STOPPED. Cannot accept submission of application: " + - application.getApplicationId(); + " is STOPPED. Cannot accept submission of application: " + applicationId; LOG.info(msg); throw new AccessControlException(msg); } @@ -663,8 +677,7 @@ public void submitApplication(FiCaSchedulerApp application, String userName, if (getNumApplications() >= getMaxApplications()) { String msg = "Queue " + getQueuePath() + " already has " + getNumApplications() + " applications," + - " cannot accept submission of application: " + - application.getApplicationId(); + " cannot accept submission of application: " + applicationId; LOG.info(msg); throw new AccessControlException(msg); } @@ -675,26 +688,18 @@ public void submitApplication(FiCaSchedulerApp application, String userName, String msg = "Queue " + getQueuePath() + " already has " + user.getTotalApplications() + " applications from user " + userName + - " cannot accept submission of application: " + - application.getApplicationId(); + " cannot accept submission of application: " + applicationId; LOG.info(msg); throw new AccessControlException(msg); } - - // Add the application to our data-structures - addApplication(application, user); } - int attemptId = application.getApplicationAttemptId().getAttemptId(); - metrics.submitApp(userName, attemptId); - // Inform the parent queue try { - getParent().submitApplication(application, userName, queue); + getParent().submitApplication(applicationId, userName, queue); } catch (AccessControlException ace) { LOG.info("Failed to submit application to parent-queue: " + getParent().getQueuePath(), ace); - removeApplication(application, user); throw ace; } } @@ -722,11 +727,11 @@ private synchronized void activateApplications() { } } - private synchronized void addApplication(FiCaSchedulerApp application, User user) { + private synchronized void addApplicationAttempt(FiCaSchedulerApp application, User user) { // Accept user.submitApplication(); pendingApplications.add(application); - applicationsMap.put(application.getApplicationAttemptId(), application); + applicationAttemptMap.put(application.getApplicationAttemptId(), application); // Activate applications activateApplications(); @@ -742,22 +747,28 @@ private synchronized void addApplication(FiCaSchedulerApp application, User user } @Override - public void finishApplication(FiCaSchedulerApp application, String queue) { - // Careful! Locking order is important! - synchronized (this) { - removeApplication(application, getUser(application.getUser())); - } - + public void finishApplication(ApplicationId application, String user) { + // Inform the activeUsersManager + activeUsersManager.deactivateApplication(user, application); // Inform the parent queue - getParent().finishApplication(application, queue); + getParent().finishApplication(application, user); } - public synchronized void removeApplication(FiCaSchedulerApp application, User user) { + @Override + public void finishApplicationAttempt(FiCaSchedulerApp application, String queue) { + // Careful! Locking order is important! + synchronized (this) { + removeApplicationAttempt(application, getUser(application.getUser())); + } + getParent().finishApplicationAttempt(application, queue); + } + + public synchronized void removeApplicationAttempt(FiCaSchedulerApp application, User user) { boolean wasActive = activeApplications.remove(application); if (!wasActive) { pendingApplications.remove(application); } - applicationsMap.remove(application.getApplicationAttemptId()); + applicationAttemptMap.remove(application.getApplicationAttemptId()); user.finishApplication(wasActive); if (user.getTotalApplications() == 0) { @@ -766,13 +777,7 @@ public synchronized void removeApplication(FiCaSchedulerApp application, User us // Check if we can activate more applications activateApplications(); - - // Inform the activeUsersManager - synchronized (application) { - activeUsersManager.deactivateApplication( - application.getUser(), application.getApplicationId()); - } - + LOG.info("Application removed -" + " appId: " + application.getApplicationId() + " user: " + application.getUser() + @@ -783,10 +788,10 @@ public synchronized void removeApplication(FiCaSchedulerApp application, User us " #queue-active-applications: " + getNumActiveApplications() ); } - + private synchronized FiCaSchedulerApp getApplication( ApplicationAttemptId applicationAttemptId) { - return applicationsMap.get(applicationAttemptId); + return applicationAttemptMap.get(applicationAttemptId); } private static final CSAssignment NULL_ASSIGNMENT = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java index b22b24ed4ea..1f094759a4b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java @@ -37,6 +37,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.AccessControlList; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.QueueACL; @@ -51,7 +52,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; @@ -442,7 +442,7 @@ public boolean hasAccess(QueueACL acl, UserGroupInformation user) { } @Override - public void submitApplication(FiCaSchedulerApp application, String user, + public void submitApplication(ApplicationId applicationId, String user, String queue) throws AccessControlException { synchronized (this) { @@ -455,57 +455,70 @@ public void submitApplication(FiCaSchedulerApp application, String user, if (state != QueueState.RUNNING) { throw new AccessControlException("Queue " + getQueuePath() + " is STOPPED. Cannot accept submission of application: " + - application.getApplicationId()); + applicationId); } - addApplication(application, user); + addApplication(applicationId, user); } // Inform the parent queue if (parent != null) { try { - parent.submitApplication(application, user, queue); + parent.submitApplication(applicationId, user, queue); } catch (AccessControlException ace) { LOG.info("Failed to submit application to parent-queue: " + parent.getQueuePath(), ace); - removeApplication(application, user); + removeApplication(applicationId, user); throw ace; } } } - private synchronized void addApplication(FiCaSchedulerApp application, + + @Override + public void submitApplicationAttempt(FiCaSchedulerApp application, + String userName) { + // submit attempt logic. + } + + @Override + public void finishApplicationAttempt(FiCaSchedulerApp application, + String queue) { + // finish attempt logic. + } + + private synchronized void addApplication(ApplicationId applicationId, String user) { - + ++numApplications; LOG.info("Application added -" + - " appId: " + application.getApplicationId() + + " appId: " + applicationId + " user: " + user + " leaf-queue of parent: " + getQueueName() + " #applications: " + getNumApplications()); } @Override - public void finishApplication(FiCaSchedulerApp application, String queue) { + public void finishApplication(ApplicationId application, String user) { synchronized (this) { - removeApplication(application, application.getUser()); + removeApplication(application, user); } // Inform the parent queue if (parent != null) { - parent.finishApplication(application, queue); + parent.finishApplication(application, user); } } - public synchronized void removeApplication(FiCaSchedulerApp application, + public synchronized void removeApplication(ApplicationId applicationId, String user) { --numApplications; LOG.info("Application removed -" + - " appId: " + application.getApplicationId() + + " appId: " + applicationId + " user: " + user + " leaf-queue of parent: " + getQueueName() + " #applications: " + getNumApplications()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java index 7f51126fec8..dcbc5ad7a46 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java @@ -47,7 +47,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; import org.apache.hadoop.yarn.util.resource.Resources; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; @@ -57,7 +57,7 @@ */ @Private @Unstable -public class FiCaSchedulerApp extends SchedulerApplication { +public class FiCaSchedulerApp extends SchedulerApplicationAttempt { private static final Log LOG = LogFactory.getLog(FiCaSchedulerApp.class); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java index 23068fefde3..9c5a6062094 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java @@ -36,7 +36,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.util.resource.Resources; @@ -206,7 +206,7 @@ public synchronized List getRunningContainers() { } public synchronized void reserveResource( - SchedulerApplication application, Priority priority, + SchedulerApplicationAttempt application, Priority priority, RMContainer reservedContainer) { // Check if it's already reserved if (this.reservedContainer != null) { @@ -241,7 +241,7 @@ public synchronized void reserveResource( } public synchronized void unreserveResource( - SchedulerApplication application) { + SchedulerApplicationAttempt application) { // adding NP checks as this can now be called for preemption if (reservedContainer != null diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptRejectedEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppAddedSchedulerEvent.java similarity index 54% rename from hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptRejectedEvent.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppAddedSchedulerEvent.java index 8f795a2fa9e..d6fb36df78b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/event/RMAppAttemptRejectedEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppAddedSchedulerEvent.java @@ -16,22 +16,34 @@ * limitations under the License. */ -package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event; +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.event; -import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType; +import org.apache.hadoop.yarn.api.records.ApplicationId; -public class RMAppAttemptRejectedEvent extends RMAppAttemptEvent { +public class AppAddedSchedulerEvent extends SchedulerEvent { - private final String message; + private final ApplicationId applicationId; + private final String queue; + private final String user; - public RMAppAttemptRejectedEvent(ApplicationAttemptId appAttemptId, String message) { - super(appAttemptId, RMAppAttemptEventType.APP_REJECTED); - this.message = message; + public AppAddedSchedulerEvent( + ApplicationId applicationId, String queue, String user) { + super(SchedulerEventType.APP_ADDED); + this.applicationId = applicationId; + this.queue = queue; + this.user = user; } - public String getMessage() { - return this.message; + public ApplicationId getApplicationId() { + return applicationId; } + + public String getQueue() { + return queue; + } + + public String getUser() { + return user; + } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppAttemptAddedSchedulerEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppAttemptAddedSchedulerEvent.java index 7b9ffff4b53..d50c1570e06 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppAttemptAddedSchedulerEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppAttemptAddedSchedulerEvent.java @@ -23,27 +23,14 @@ public class AppAttemptAddedSchedulerEvent extends SchedulerEvent { private final ApplicationAttemptId applicationAttemptId; - private final String queue; - private final String user; public AppAttemptAddedSchedulerEvent( - ApplicationAttemptId applicationAttemptId, String queue, String user) { + ApplicationAttemptId applicationAttemptId) { super(SchedulerEventType.APP_ATTEMPT_ADDED); this.applicationAttemptId = applicationAttemptId; - this.queue = queue; - this.user = user; } public ApplicationAttemptId getApplicationAttemptId() { return applicationAttemptId; } - - public String getQueue() { - return queue; - } - - public String getUser() { - return user; - } - } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppRemovedSchedulerEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppRemovedSchedulerEvent.java new file mode 100644 index 00000000000..9842bed00b2 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/AppRemovedSchedulerEvent.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.event; + +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; + +public class AppRemovedSchedulerEvent extends SchedulerEvent { + + private final ApplicationId applicationId; + private final RMAppState finalState; + + public AppRemovedSchedulerEvent(ApplicationId applicationId, + RMAppState finalState) { + super(SchedulerEventType.APP_REMOVED); + this.applicationId = applicationId; + this.finalState = finalState; + } + + public ApplicationId getApplicationID() { + return this.applicationId; + } + + public RMAppState getFinalState() { + return this.finalState; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/SchedulerEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/SchedulerEventType.java index dd1aec71bff..243c72ba676 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/SchedulerEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/SchedulerEventType.java @@ -24,7 +24,11 @@ public enum SchedulerEventType { NODE_ADDED, NODE_REMOVED, NODE_UPDATE, - + + // Source: RMApp + APP_ADDED, + APP_REMOVED, + // Source: RMAppAttempt APP_ATTEMPT_ADDED, APP_ATTEMPT_REMOVED, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java index 90a87416180..7f7d26487ff 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java @@ -33,7 +33,7 @@ import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.util.resource.Resources; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; @Private @Unstable diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java index 10913b17ea9..0bdac8c3652 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSSchedulerApp.java @@ -44,7 +44,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; import org.apache.hadoop.yarn.util.resource.Resources; /** @@ -52,7 +52,7 @@ */ @Private @Unstable -public class FSSchedulerApp extends SchedulerApplication { +public class FSSchedulerApp extends SchedulerApplicationAttempt { private static final Log LOG = LogFactory.getLog(FSSchedulerApp.class); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index bc716c1401f..bdfbcabe312 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -38,6 +38,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -58,10 +59,13 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRejectedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; @@ -75,8 +79,10 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.ContainerExpiredSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; @@ -151,10 +157,15 @@ public class FairScheduler implements ResourceScheduler { // Time we last ran preemptTasksIfNecessary private long lastPreemptCheckTime; - // This stores per-application scheduling information, indexed by + // This stores per-application scheduling information, + @VisibleForTesting + protected Map applications = + new ConcurrentHashMap(); + + // This stores per-application-attempt scheduling information, indexed by // attempt ID's for fast lookup. @VisibleForTesting - protected Map applications = + protected Map appAttempts = new ConcurrentHashMap(); // Nodes in the cluster, indexed by NodeId @@ -253,7 +264,7 @@ public QueueManager getQueueManager() { private RMContainer getRMContainer(ContainerId containerId) { FSSchedulerApp application = - applications.get(containerId.getApplicationAttemptId()); + appAttempts.get(containerId.getApplicationAttemptId()); return (application == null) ? null : application.getRMContainer(containerId); } @@ -591,44 +602,63 @@ public FairSchedulerEventLog getEventLog() { * user. This will accept a new app even if the user or queue is above * configured limits, but the app will not be marked as runnable. */ - protected synchronized void addApplicationAttempt( - ApplicationAttemptId applicationAttemptId, String queueName, String user) { + protected synchronized void addApplication(ApplicationId applicationId, + String queueName, String user) { if (queueName == null || queueName.isEmpty()) { - String message = "Reject application " + applicationAttemptId + + String message = "Reject application " + applicationId + " submitted by user " + user + " with an empty queue name."; LOG.info(message); - rmContext.getDispatcher().getEventHandler().handle( - new RMAppAttemptRejectedEvent(applicationAttemptId, message)); + rmContext.getDispatcher().getEventHandler() + .handle(new RMAppRejectedEvent(applicationId, message)); return; } - RMApp rmApp = rmContext.getRMApps().get( - applicationAttemptId.getApplicationId()); + RMApp rmApp = rmContext.getRMApps().get(applicationId); FSLeafQueue queue = assignToQueue(rmApp, queueName, user); if (queue == null) { rmContext.getDispatcher().getEventHandler().handle( - new RMAppAttemptRejectedEvent(applicationAttemptId, + new RMAppRejectedEvent(applicationId, "Application rejected by queue placement policy")); return; } - FSSchedulerApp schedulerApp = - new FSSchedulerApp(applicationAttemptId, user, - queue, new ActiveUsersManager(getRootQueueMetrics()), - rmContext); - // Enforce ACLs UserGroupInformation userUgi = UserGroupInformation.createRemoteUser(user); if (!queue.hasAccess(QueueACL.SUBMIT_APPLICATIONS, userUgi) && !queue.hasAccess(QueueACL.ADMINISTER_QUEUE, userUgi)) { String msg = "User " + userUgi.getUserName() + - " cannot submit applications to queue " + queue.getName(); + " cannot submit applications to queue " + queue.getName(); LOG.info(msg); - rmContext.getDispatcher().getEventHandler().handle( - new RMAppAttemptRejectedEvent(applicationAttemptId, msg)); + rmContext.getDispatcher().getEventHandler() + .handle(new RMAppRejectedEvent(applicationId, msg)); return; } + + SchedulerApplication application = + new SchedulerApplication(queue, user); + applications.put(applicationId, application); + + LOG.info("Accepted application " + applicationId + " from user: " + user + + ", in queue: " + queueName); + rmContext.getDispatcher().getEventHandler() + .handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED)); + } + + /** + * Add a new application attempt to the scheduler. + */ + protected synchronized void addApplicationAttempt( + ApplicationAttemptId applicationAttemptId) { + SchedulerApplication application = + applications.get(applicationAttemptId.getApplicationId()); + String user = application.getUser(); + FSLeafQueue queue = (FSLeafQueue) application.getQueue(); + + FSSchedulerApp schedulerApp = + new FSSchedulerApp(applicationAttemptId, user, + queue, new ActiveUsersManager(getRootQueueMetrics()), + rmContext); boolean runnable = maxRunningEnforcer.canAppBeRunnable(queue, user); queue.addApp(schedulerApp, runnable); @@ -639,16 +669,14 @@ queue, new ActiveUsersManager(getRootQueueMetrics()), } queue.getMetrics().submitApp(user, applicationAttemptId.getAttemptId()); + appAttempts.put(applicationAttemptId, schedulerApp); - applications.put(applicationAttemptId, schedulerApp); - - LOG.info("Application Submission: " + applicationAttemptId + - ", user: "+ user + - ", currently active: " + applications.size()); - + LOG.info("Added Application Attempt " + applicationAttemptId + + " to scheduler from user: " + user + ", currently active: " + + appAttempts.size()); rmContext.getDispatcher().getEventHandler().handle( new RMAppAttemptEvent(applicationAttemptId, - RMAppAttemptEventType.APP_ACCEPTED)); + RMAppAttemptEventType.ATTEMPT_ADDED)); } @VisibleForTesting @@ -674,13 +702,18 @@ FSLeafQueue assignToQueue(RMApp rmApp, String queueName, String user) { return queue; } + private synchronized void removeApplication(ApplicationId applicationId, + RMAppState finalState) { + applications.remove(applicationId); + } + private synchronized void removeApplicationAttempt( ApplicationAttemptId applicationAttemptId, RMAppAttemptState rmAppAttemptFinalState) { LOG.info("Application " + applicationAttemptId + " is done." + " finalState=" + rmAppAttemptFinalState); - FSSchedulerApp application = applications.get(applicationAttemptId); + FSSchedulerApp application = appAttempts.get(applicationAttemptId); if (application == null) { LOG.info("Unknown application " + applicationAttemptId + " has completed!"); @@ -720,7 +753,7 @@ private synchronized void removeApplicationAttempt( } // Remove from our data-structure - applications.remove(applicationAttemptId); + appAttempts.remove(applicationAttemptId); } /** @@ -737,7 +770,7 @@ private synchronized void completedContainer(RMContainer rmContainer, // Get the application for the finished container ApplicationAttemptId applicationAttemptId = container.getId().getApplicationAttemptId(); - FSSchedulerApp application = applications.get(applicationAttemptId); + FSSchedulerApp application = appAttempts.get(applicationAttemptId); if (application == null) { LOG.info("Container " + container + " of" + " unknown application " + applicationAttemptId + @@ -811,7 +844,7 @@ public Allocation allocate(ApplicationAttemptId appAttemptId, List ask, List release, List blacklistAdditions, List blacklistRemovals) { // Make sure this application exists - FSSchedulerApp application = applications.get(appAttemptId); + FSSchedulerApp application = appAttempts.get(appAttemptId); if (application == null) { LOG.info("Calling allocate on removed " + "or non existant application " + appAttemptId); @@ -882,7 +915,7 @@ public Allocation allocate(ApplicationAttemptId appAttemptId, private void containerLaunchedOnNode(ContainerId containerId, FSSchedulerNode node) { // Get the application for the finished container ApplicationAttemptId applicationAttemptId = containerId.getApplicationAttemptId(); - FSSchedulerApp application = applications.get(applicationAttemptId); + FSSchedulerApp application = appAttempts.get(applicationAttemptId); if (application == null) { LOG.info("Unknown application: " + applicationAttemptId + " launched container " + containerId + @@ -1025,23 +1058,23 @@ public SchedulerNodeReport getNodeReport(NodeId nodeId) { } public FSSchedulerApp getSchedulerApp(ApplicationAttemptId appAttemptId) { - return applications.get(appAttemptId); + return appAttempts.get(appAttemptId); } @Override public SchedulerAppReport getSchedulerAppInfo( ApplicationAttemptId appAttemptId) { - if (!applications.containsKey(appAttemptId)) { + if (!appAttempts.containsKey(appAttemptId)) { LOG.error("Request for appInfo of unknown attempt" + appAttemptId); return null; } - return new SchedulerAppReport(applications.get(appAttemptId)); + return new SchedulerAppReport(appAttempts.get(appAttemptId)); } @Override public ApplicationResourceUsageReport getAppResourceUsageReport( ApplicationAttemptId appAttemptId) { - FSSchedulerApp app = applications.get(appAttemptId); + FSSchedulerApp app = appAttempts.get(appAttemptId); if (app == null) { LOG.error("Request for appInfo of unknown attempt" + appAttemptId); return null; @@ -1090,15 +1123,29 @@ public void handle(SchedulerEvent event) { NodeUpdateSchedulerEvent nodeUpdatedEvent = (NodeUpdateSchedulerEvent)event; nodeUpdate(nodeUpdatedEvent.getRMNode()); break; + case APP_ADDED: + if (!(event instanceof AppAddedSchedulerEvent)) { + throw new RuntimeException("Unexpected event type: " + event); + } + AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event; + addApplication(appAddedEvent.getApplicationId(), + appAddedEvent.getQueue(), appAddedEvent.getUser()); + break; + case APP_REMOVED: + if (!(event instanceof AppRemovedSchedulerEvent)) { + throw new RuntimeException("Unexpected event type: " + event); + } + AppRemovedSchedulerEvent appRemovedEvent = (AppRemovedSchedulerEvent)event; + removeApplication(appRemovedEvent.getApplicationID(), + appRemovedEvent.getFinalState()); + break; case APP_ATTEMPT_ADDED: if (!(event instanceof AppAttemptAddedSchedulerEvent)) { throw new RuntimeException("Unexpected event type: " + event); } AppAttemptAddedSchedulerEvent appAttemptAddedEvent = (AppAttemptAddedSchedulerEvent) event; - String queue = appAttemptAddedEvent.getQueue(); - addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(), - queue, appAttemptAddedEvent.getUser()); + addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId()); break; case APP_ATTEMPT_REMOVED: if (!(event instanceof AppAttemptRemovedSchedulerEvent)) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index c5f0bd67d92..9d429136db9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -37,6 +37,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.AccessControlList; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -58,6 +59,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; @@ -74,12 +78,15 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppUtils; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.ContainerExpiredSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; @@ -116,11 +123,15 @@ public class FifoScheduler implements ResourceScheduler, Configurable { private Resource maximumAllocation; private boolean usePortForNodeName; + @VisibleForTesting + protected Map applications = + new ConcurrentSkipListMap(); + // Use ConcurrentSkipListMap because applications need to be ordered @VisibleForTesting - protected Map applications + protected Map appAttempts = new ConcurrentSkipListMap(); - + private ActiveUsersManager activeUsersManager; private static final String DEFAULT_QUEUE_NAME = "default"; @@ -327,7 +338,7 @@ public Allocation allocate( @VisibleForTesting FiCaSchedulerApp getApplication( ApplicationAttemptId applicationAttemptId) { - return applications.get(applicationAttemptId); + return appAttempts.get(applicationAttemptId); } @Override @@ -347,20 +358,44 @@ public ApplicationResourceUsageReport getAppResourceUsageReport( private FiCaSchedulerNode getNode(NodeId nodeId) { return nodes.get(nodeId); } - - private synchronized void addApplicationAttempt(ApplicationAttemptId appAttemptId, - String user) { + + private synchronized void addApplication(ApplicationId applicationId, + String queue, String user) { + SchedulerApplication application = + new SchedulerApplication(null, user); + applications.put(applicationId, application); + LOG.info("Accepted application " + applicationId + " from user: " + user); + rmContext.getDispatcher().getEventHandler() + .handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED)); + } + + private synchronized void addApplicationAttempt( + ApplicationAttemptId appAttemptId) { + SchedulerApplication application = + applications.get(appAttemptId.getApplicationId()); + String user = application.getUser(); // TODO: Fix store - FiCaSchedulerApp schedulerApp = - new FiCaSchedulerApp(appAttemptId, user, DEFAULT_QUEUE, activeUsersManager, - this.rmContext); - applications.put(appAttemptId, schedulerApp); + FiCaSchedulerApp schedulerApp = + new FiCaSchedulerApp(appAttemptId, user, DEFAULT_QUEUE, + activeUsersManager, this.rmContext); + appAttempts.put(appAttemptId, schedulerApp); metrics.submitApp(user, appAttemptId.getAttemptId()); - LOG.info("Application Submission: " + appAttemptId.getApplicationId() + - " from " + user + ", currently active: " + applications.size()); + LOG.info("Added Application Attempt " + appAttemptId + + " to scheduler from user " + application.getUser() + + ", currently active: " + appAttempts.size()); rmContext.getDispatcher().getEventHandler().handle( new RMAppAttemptEvent(appAttemptId, - RMAppAttemptEventType.APP_ACCEPTED)); + RMAppAttemptEventType.ATTEMPT_ADDED)); + } + + private synchronized void doneApplication(ApplicationId applicationId, + RMAppState finalState) { + SchedulerApplication application = applications.get(applicationId); + + // Inform the activeUsersManager + activeUsersManager.deactivateApplication(application.getUser(), + applicationId); + applications.remove(applicationId); } private synchronized void doneApplicationAttempt( @@ -382,17 +417,11 @@ private synchronized void doneApplicationAttempt( RMContainerEventType.KILL); } - // Inform the activeUsersManager - synchronized (application) { - activeUsersManager.deactivateApplication( - application.getUser(), application.getApplicationId()); - } - // Clean up pending requests, metrics etc. application.stop(rmAppAttemptFinalState); // Remove the application - applications.remove(applicationAttemptId); + appAttempts.remove(applicationAttemptId); } /** @@ -403,10 +432,10 @@ private synchronized void doneApplicationAttempt( private void assignContainers(FiCaSchedulerNode node) { LOG.debug("assignContainers:" + " node=" + node.getRMNode().getNodeAddress() + - " #applications=" + applications.size()); + " #applications=" + appAttempts.size()); // Try to assign containers to applications in fifo order - for (Map.Entry e : applications + for (Map.Entry e : appAttempts .entrySet()) { FiCaSchedulerApp application = e.getValue(); LOG.debug("pre-assignContainers"); @@ -445,7 +474,7 @@ private void assignContainers(FiCaSchedulerNode node) { // Update the applications' headroom to correctly take into // account the containers assigned in this update. - for (FiCaSchedulerApp application : applications.values()) { + for (FiCaSchedulerApp application : appAttempts.values()) { application.setHeadroom(Resources.subtract(clusterResource, usedResource)); } } @@ -697,12 +726,25 @@ public void handle(SchedulerEvent event) { nodeUpdate(nodeUpdatedEvent.getRMNode()); } break; + case APP_ADDED: + { + AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event; + addApplication(appAddedEvent.getApplicationId(), + appAddedEvent.getQueue(), appAddedEvent.getUser()); + } + break; + case APP_REMOVED: + { + AppRemovedSchedulerEvent appRemovedEvent = (AppRemovedSchedulerEvent)event; + doneApplication(appRemovedEvent.getApplicationID(), + appRemovedEvent.getFinalState()); + } + break; case APP_ATTEMPT_ADDED: { AppAttemptAddedSchedulerEvent appAttemptAddedEvent = (AppAttemptAddedSchedulerEvent) event; - addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(), - appAttemptAddedEvent.getUser()); + addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId()); } break; case APP_ATTEMPT_REMOVED: @@ -867,8 +909,8 @@ public synchronized boolean checkAccess(UserGroupInformation callerUGI, public synchronized List getAppsInQueue(String queueName) { if (queueName.equals(DEFAULT_QUEUE.getQueueName())) { List apps = new ArrayList( - applications.size()); - for (FiCaSchedulerApp app : applications.values()) { + appAttempts.size()); + for (FiCaSchedulerApp app : appAttempts.values()) { apps.add(app.getApplicationAttemptId()); } return apps; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java index 6767180b62a..1192c30774e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java @@ -57,6 +57,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; import org.apache.hadoop.yarn.util.Records; import org.apache.hadoop.yarn.util.resource.Resources; @@ -164,11 +165,14 @@ public synchronized void submit() throws IOException, YarnException { final ResourceScheduler scheduler = resourceManager.getResourceScheduler(); resourceManager.getClientRMService().submitApplication(request); - + // Notify scheduler - AppAttemptAddedSchedulerEvent appAddedEvent1 = new AppAttemptAddedSchedulerEvent( - this.applicationAttemptId, this.queue, this.user); - scheduler.handle(appAddedEvent1); + AppAddedSchedulerEvent addAppEvent = + new AppAddedSchedulerEvent(this.applicationId, this.queue, "user"); + scheduler.handle(addAppEvent); + AppAttemptAddedSchedulerEvent addAttemptEvent = + new AppAttemptAddedSchedulerEvent(this.applicationAttemptId); + scheduler.handle(addAttemptEvent); } public synchronized void addResourceRequestSpec( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java index d425dda2aba..cbb70d57a4a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java @@ -649,7 +649,7 @@ private RMAppImpl getRMApp(RMContext rmContext, YarnScheduler yarnScheduler, .currentTimeMillis(), "YARN")); ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(applicationId3, 1); RMAppAttemptImpl rmAppAttemptImpl = new RMAppAttemptImpl(attemptId, - rmContext, yarnScheduler, null, asContext, config, null); + rmContext, yarnScheduler, null, asContext, config); when(app.getCurrentAppAttempt()).thenReturn(rmAppAttemptImpl); return app; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java index 2d370fcfb46..4bf0c449598 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java @@ -44,6 +44,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; @@ -297,9 +298,12 @@ public void testBlackListNodes() throws Exception { ApplicationId appId1 = BuilderUtils.newApplicationId(100, 1); ApplicationAttemptId appAttemptId1 = BuilderUtils.newApplicationAttemptId( appId1, 1); - SchedulerEvent event1 = - new AppAttemptAddedSchedulerEvent(appAttemptId1, "queue", "user"); - fs.handle(event1); + SchedulerEvent appEvent = + new AppAddedSchedulerEvent(appId1, "queue", "user"); + fs.handle(appEvent); + SchedulerEvent attemptEvent = + new AppAttemptAddedSchedulerEvent(appAttemptId1); + fs.handle(attemptEvent); List emptyId = new ArrayList(); List emptyAsk = new ArrayList(); @@ -388,16 +392,22 @@ public void testHeadroom() throws Exception { ApplicationId appId1 = BuilderUtils.newApplicationId(100, 1); ApplicationAttemptId appAttemptId1 = BuilderUtils.newApplicationAttemptId( appId1, 1); - SchedulerEvent event1 = - new AppAttemptAddedSchedulerEvent(appAttemptId1, "queue", "user"); - fs.handle(event1); + SchedulerEvent appEvent = + new AppAddedSchedulerEvent(appId1, "queue", "user"); + fs.handle(appEvent); + SchedulerEvent attemptEvent = + new AppAttemptAddedSchedulerEvent(appAttemptId1); + fs.handle(attemptEvent); ApplicationId appId2 = BuilderUtils.newApplicationId(200, 2); ApplicationAttemptId appAttemptId2 = BuilderUtils.newApplicationAttemptId( appId2, 1); - SchedulerEvent event2 = - new AppAttemptAddedSchedulerEvent(appAttemptId2, "queue", "user"); - fs.handle(event2); + SchedulerEvent appEvent2 = + new AppAddedSchedulerEvent(appId2, "queue", "user"); + fs.handle(appEvent2); + SchedulerEvent attemptEvent2 = + new AppAttemptAddedSchedulerEvent(appAttemptId2); + fs.handle(attemptEvent2); List emptyId = new ArrayList(); List emptyAsk = new ArrayList(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java index c7ef857cc6b..440bddc510c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -248,7 +248,7 @@ public void testRMRestart() throws Exception { // verify correct number of attempts and other data RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId()); Assert.assertNotNull(loadedApp1); - //Assert.assertEquals(1, loadedApp1.getAppAttempts().size()); + Assert.assertEquals(1, loadedApp1.getAppAttempts().size()); Assert.assertEquals(app1.getApplicationSubmissionContext() .getApplicationId(), loadedApp1.getApplicationSubmissionContext() .getApplicationId()); @@ -261,7 +261,7 @@ public void testRMRestart() throws Exception { .getApplicationId()); // verify state machine kicked into expected states - rm2.waitForState(loadedApp1.getApplicationId(), RMAppState.RUNNING); + rm2.waitForState(loadedApp1.getApplicationId(), RMAppState.ACCEPTED); rm2.waitForState(loadedApp2.getApplicationId(), RMAppState.ACCEPTED); // verify attempts for apps @@ -299,7 +299,11 @@ public void testRMRestart() throws Exception { nm2.registerNode(); rm2.waitForState(loadedApp1.getApplicationId(), RMAppState.ACCEPTED); - Assert.assertEquals(2, loadedApp1.getAppAttempts().size()); + // wait for the 2nd attempt to be started. + int timeoutSecs = 0; + while (loadedApp1.getAppAttempts().size() != 2 && timeoutSecs++ < 40) {; + Thread.sleep(200); + } // verify no more reboot response sent hbResponse = nm1.nodeHeartbeat(true); @@ -476,10 +480,10 @@ public void testRMRestartWaitForPreviousAMToFinish() throws Exception { Assert.assertEquals(NodeAction.RESYNC, res.getNodeAction()); RMApp rmApp = rm2.getRMContext().getRMApps().get(app1.getApplicationId()); - // application should be in running state - rm2.waitForState(app1.getApplicationId(), RMAppState.RUNNING); + // application should be in ACCEPTED state + rm2.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED); - Assert.assertEquals(RMAppState.RUNNING, rmApp.getState()); + Assert.assertEquals(RMAppState.ACCEPTED, rmApp.getState()); // new attempt should not be started Assert.assertEquals(2, rmApp.getAppAttempts().size()); // am1 attempt should be in FAILED state where as am2 attempt should be in @@ -516,9 +520,9 @@ public void testRMRestartWaitForPreviousAMToFinish() throws Exception { nm1.setResourceTrackerService(rm3.getResourceTrackerService()); rmApp = rm3.getRMContext().getRMApps().get(app1.getApplicationId()); - // application should be in running state - rm3.waitForState(app1.getApplicationId(), RMAppState.RUNNING); - Assert.assertEquals(rmApp.getState(), RMAppState.RUNNING); + // application should be in ACCEPTED state + rm3.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED); + Assert.assertEquals(rmApp.getState(), RMAppState.ACCEPTED); // new attempt should not be started Assert.assertEquals(3, rmApp.getAppAttempts().size()); // am1 and am2 attempts should be in FAILED state where as am3 should be @@ -562,6 +566,11 @@ public void testRMRestartWaitForPreviousAMToFinish() throws Exception { rmApp = rm4.getRMContext().getRMApps().get(app1.getApplicationId()); rm4.waitForState(rmApp.getApplicationId(), RMAppState.ACCEPTED); + // wait for the attempt to be created. + int timeoutSecs = 0; + while (rmApp.getAppAttempts().size() != 2 && timeoutSecs++ < 40) { + Thread.sleep(200); + } Assert.assertEquals(4, rmApp.getAppAttempts().size()); Assert.assertEquals(RMAppState.ACCEPTED, rmApp.getState()); rm4.waitForState(latestAppAttemptId, RMAppAttemptState.SCHEDULED); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java index ba255d339ef..e6e19eaec3b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java @@ -567,7 +567,9 @@ public void testAppSubmittedKill() throws IOException, InterruptedException { RMAppEventType.KILL); application.handle(event); rmDispatcher.await(); - assertAppAndAttemptKilled(application); + sendAppUpdateSavedEvent(application); + assertKilled(application); + assertAppFinalStateSaved(application); } @Test @@ -582,7 +584,7 @@ public void testAppAcceptedFailed() throws IOException { new RMAppFailedAttemptEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED, ""); application.handle(event); - assertAppState(RMAppState.SUBMITTED, application); + assertAppState(RMAppState.ACCEPTED, application); event = new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_ACCEPTED); @@ -612,7 +614,9 @@ public void testAppAcceptedKill() throws IOException, InterruptedException { RMAppEventType.KILL); application.handle(event); rmDispatcher.await(); - assertAppAndAttemptKilled(application); + sendAppUpdateSavedEvent(application); + assertKilled(application); + assertAppFinalStateSaved(application); } @Test @@ -654,7 +658,7 @@ public void testAppRunningFailed() throws IOException { RMAppEventType.ATTEMPT_FAILED, ""); application.handle(event); rmDispatcher.await(); - assertAppState(RMAppState.SUBMITTED, application); + assertAppState(RMAppState.ACCEPTED, application); appAttempt = application.getCurrentAppAttempt(); Assert.assertEquals(++expectedAttemptId, appAttempt.getAppAttemptId().getAttemptId()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java index 0ad2f2a0370..5bea03b6218 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java @@ -79,7 +79,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptLaunchFailedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptNewSavedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRejectedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUpdateSavedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer; @@ -258,7 +257,7 @@ public void setUp() throws Exception { application = mock(RMApp.class); applicationAttempt = new RMAppAttemptImpl(applicationAttemptId, rmContext, scheduler, - masterService, submissionContext, new Configuration(), user); + masterService, submissionContext, new Configuration()); when(application.getCurrentAppAttempt()).thenReturn(applicationAttempt); when(application.getApplicationId()).thenReturn(applicationId); @@ -408,9 +407,6 @@ private void testAppAttemptScheduledState() { assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001); assertEquals(0, applicationAttempt.getRanNodes().size()); assertNull(applicationAttempt.getFinalApplicationStatus()); - - // Check events - verify(application).handle(any(RMAppEvent.class)); } /** @@ -446,7 +442,7 @@ private void testAppAttemptFailedState(Container container, assertEquals(0, applicationAttempt.getRanNodes().size()); // Check events - verify(application, times(2)).handle(any(RMAppFailedAttemptEvent.class)); + verify(application, times(1)).handle(any(RMAppFailedAttemptEvent.class)); verifyTokenCount(applicationAttempt.getAppAttemptId(), 1); verifyAttemptFinalStateSaved(); } @@ -544,7 +540,7 @@ private void scheduleApplicationAttempt() { applicationAttempt.handle( new RMAppAttemptEvent( applicationAttempt.getAppAttemptId(), - RMAppAttemptEventType.APP_ACCEPTED)); + RMAppAttemptEventType.ATTEMPT_ADDED)); if(unmanagedAM){ assertEquals(RMAppAttemptState.LAUNCHED_UNMANAGED_SAVING, @@ -703,16 +699,6 @@ public void testNewToRecovered() { RMAppAttemptEventType.RECOVER)); testAppAttemptRecoveredState(); } - - @Test - public void testSubmittedToFailed() { - submitApplicationAttempt(); - String message = "Rejected"; - applicationAttempt.handle( - new RMAppAttemptRejectedEvent( - applicationAttempt.getAppAttemptId(), message)); - testAppAttemptSubmittedToFailedState(message); - } @Test public void testSubmittedToKilled() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerUtils.java index 9969db5a5e3..8fcbf54b6cd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerUtils.java @@ -51,6 +51,7 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.impl.pb.ResourceRequestPBImpl; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.exceptions.InvalidResourceBlacklistRequestException; import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException; import org.apache.hadoop.yarn.ipc.YarnRPC; @@ -58,8 +59,12 @@ import org.apache.hadoop.yarn.server.resourcemanager.TestAMAuthorization.MockRMWithAMS; import org.apache.hadoop.yarn.server.resourcemanager.TestAMAuthorization.MyContainerManager; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.util.Records; import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; @@ -378,4 +383,24 @@ public void testCreatePreemptedContainerStatus() { ApplicationId.newInstance(System.currentTimeMillis(), 1), 1), 1), "x"); Assert.assertEquals(ContainerExitStatus.PREEMPTED, cd.getExitStatus()); } + + public static SchedulerApplication verifyAppAddedAndRemovedFromScheduler( + final Map applications, + EventHandler handler, String queueName) throws Exception { + ApplicationId appId = + ApplicationId.newInstance(System.currentTimeMillis(), 1); + AppAddedSchedulerEvent appAddedEvent = + new AppAddedSchedulerEvent(appId, queueName, "user"); + handler.handle(appAddedEvent); + SchedulerApplication app = applications.get(appId); + // verify application is added. + Assert.assertNotNull(app); + Assert.assertEquals("user", app.getUser()); + + AppRemovedSchedulerEvent appRemoveEvent = + new AppRemovedSchedulerEvent(appId, RMAppState.FINISHED); + handler.handle(appRemoveEvent); + Assert.assertNull(applications.get(appId)); + return app; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java index f343bd546c6..2b548ef4607 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java @@ -304,7 +304,7 @@ public void testActiveApplicationLimits() throws Exception { int APPLICATION_ID = 0; // Submit first application FiCaSchedulerApp app_0 = getMockApplication(APPLICATION_ID++, user_0); - queue.submitApplication(app_0, user_0, A); + queue.submitApplicationAttempt(app_0, user_0); assertEquals(1, queue.getNumActiveApplications()); assertEquals(0, queue.getNumPendingApplications()); assertEquals(1, queue.getNumActiveApplications(user_0)); @@ -312,7 +312,7 @@ public void testActiveApplicationLimits() throws Exception { // Submit second application FiCaSchedulerApp app_1 = getMockApplication(APPLICATION_ID++, user_0); - queue.submitApplication(app_1, user_0, A); + queue.submitApplicationAttempt(app_1, user_0); assertEquals(2, queue.getNumActiveApplications()); assertEquals(0, queue.getNumPendingApplications()); assertEquals(2, queue.getNumActiveApplications(user_0)); @@ -320,14 +320,14 @@ public void testActiveApplicationLimits() throws Exception { // Submit third application, should remain pending FiCaSchedulerApp app_2 = getMockApplication(APPLICATION_ID++, user_0); - queue.submitApplication(app_2, user_0, A); + queue.submitApplicationAttempt(app_2, user_0); assertEquals(2, queue.getNumActiveApplications()); assertEquals(1, queue.getNumPendingApplications()); assertEquals(2, queue.getNumActiveApplications(user_0)); assertEquals(1, queue.getNumPendingApplications(user_0)); // Finish one application, app_2 should be activated - queue.finishApplication(app_0, A); + queue.finishApplicationAttempt(app_0, A); assertEquals(2, queue.getNumActiveApplications()); assertEquals(0, queue.getNumPendingApplications()); assertEquals(2, queue.getNumActiveApplications(user_0)); @@ -335,7 +335,7 @@ public void testActiveApplicationLimits() throws Exception { // Submit another one for user_0 FiCaSchedulerApp app_3 = getMockApplication(APPLICATION_ID++, user_0); - queue.submitApplication(app_3, user_0, A); + queue.submitApplicationAttempt(app_3, user_0); assertEquals(2, queue.getNumActiveApplications()); assertEquals(1, queue.getNumPendingApplications()); assertEquals(2, queue.getNumActiveApplications(user_0)); @@ -346,7 +346,7 @@ public void testActiveApplicationLimits() throws Exception { // Submit first app for user_1 FiCaSchedulerApp app_4 = getMockApplication(APPLICATION_ID++, user_1); - queue.submitApplication(app_4, user_1, A); + queue.submitApplicationAttempt(app_4, user_1); assertEquals(3, queue.getNumActiveApplications()); assertEquals(1, queue.getNumPendingApplications()); assertEquals(2, queue.getNumActiveApplications(user_0)); @@ -356,7 +356,7 @@ public void testActiveApplicationLimits() throws Exception { // Submit second app for user_1, should block due to queue-limit FiCaSchedulerApp app_5 = getMockApplication(APPLICATION_ID++, user_1); - queue.submitApplication(app_5, user_1, A); + queue.submitApplicationAttempt(app_5, user_1); assertEquals(3, queue.getNumActiveApplications()); assertEquals(2, queue.getNumPendingApplications()); assertEquals(2, queue.getNumActiveApplications(user_0)); @@ -365,7 +365,7 @@ public void testActiveApplicationLimits() throws Exception { assertEquals(1, queue.getNumPendingApplications(user_1)); // Now finish one app of user_1 so app_5 should be activated - queue.finishApplication(app_4, A); + queue.finishApplicationAttempt(app_4, A); assertEquals(3, queue.getNumActiveApplications()); assertEquals(1, queue.getNumPendingApplications()); assertEquals(2, queue.getNumActiveApplications(user_0)); @@ -385,7 +385,7 @@ public void testActiveLimitsWithKilledApps() throws Exception { // Submit first application FiCaSchedulerApp app_0 = getMockApplication(APPLICATION_ID++, user_0); - queue.submitApplication(app_0, user_0, A); + queue.submitApplicationAttempt(app_0, user_0); assertEquals(1, queue.getNumActiveApplications()); assertEquals(0, queue.getNumPendingApplications()); assertEquals(1, queue.getNumActiveApplications(user_0)); @@ -394,7 +394,7 @@ public void testActiveLimitsWithKilledApps() throws Exception { // Submit second application FiCaSchedulerApp app_1 = getMockApplication(APPLICATION_ID++, user_0); - queue.submitApplication(app_1, user_0, A); + queue.submitApplicationAttempt(app_1, user_0); assertEquals(2, queue.getNumActiveApplications()); assertEquals(0, queue.getNumPendingApplications()); assertEquals(2, queue.getNumActiveApplications(user_0)); @@ -403,7 +403,7 @@ public void testActiveLimitsWithKilledApps() throws Exception { // Submit third application, should remain pending FiCaSchedulerApp app_2 = getMockApplication(APPLICATION_ID++, user_0); - queue.submitApplication(app_2, user_0, A); + queue.submitApplicationAttempt(app_2, user_0); assertEquals(2, queue.getNumActiveApplications()); assertEquals(1, queue.getNumPendingApplications()); assertEquals(2, queue.getNumActiveApplications(user_0)); @@ -412,7 +412,7 @@ public void testActiveLimitsWithKilledApps() throws Exception { // Submit fourth application, should remain pending FiCaSchedulerApp app_3 = getMockApplication(APPLICATION_ID++, user_0); - queue.submitApplication(app_3, user_0, A); + queue.submitApplicationAttempt(app_3, user_0); assertEquals(2, queue.getNumActiveApplications()); assertEquals(2, queue.getNumPendingApplications()); assertEquals(2, queue.getNumActiveApplications(user_0)); @@ -420,7 +420,7 @@ public void testActiveLimitsWithKilledApps() throws Exception { assertTrue(queue.pendingApplications.contains(app_3)); // Kill 3rd pending application - queue.finishApplication(app_2, A); + queue.finishApplicationAttempt(app_2, A); assertEquals(2, queue.getNumActiveApplications()); assertEquals(1, queue.getNumPendingApplications()); assertEquals(2, queue.getNumActiveApplications(user_0)); @@ -429,7 +429,7 @@ public void testActiveLimitsWithKilledApps() throws Exception { assertFalse(queue.activeApplications.contains(app_2)); // Finish 1st application, app_3 should become active - queue.finishApplication(app_0, A); + queue.finishApplicationAttempt(app_0, A); assertEquals(2, queue.getNumActiveApplications()); assertEquals(0, queue.getNumPendingApplications()); assertEquals(2, queue.getNumActiveApplications(user_0)); @@ -439,7 +439,7 @@ public void testActiveLimitsWithKilledApps() throws Exception { assertFalse(queue.activeApplications.contains(app_0)); // Finish 2nd application - queue.finishApplication(app_1, A); + queue.finishApplicationAttempt(app_1, A); assertEquals(1, queue.getNumActiveApplications()); assertEquals(0, queue.getNumPendingApplications()); assertEquals(1, queue.getNumActiveApplications(user_0)); @@ -447,7 +447,7 @@ public void testActiveLimitsWithKilledApps() throws Exception { assertFalse(queue.activeApplications.contains(app_1)); // Finish 4th application - queue.finishApplication(app_3, A); + queue.finishApplicationAttempt(app_3, A); assertEquals(0, queue.getNumActiveApplications()); assertEquals(0, queue.getNumPendingApplications()); assertEquals(0, queue.getNumActiveApplications(user_0)); @@ -507,7 +507,7 @@ public void testHeadroom() throws Exception { FiCaSchedulerApp app_0_0 = spy(new FiCaSchedulerApp(appAttemptId_0_0, user_0, queue, queue.getActiveUsersManager(), rmContext)); - queue.submitApplication(app_0_0, user_0, A); + queue.submitApplicationAttempt(app_0_0, user_0); List app_0_0_requests = new ArrayList(); app_0_0_requests.add( @@ -526,7 +526,7 @@ public void testHeadroom() throws Exception { FiCaSchedulerApp app_0_1 = spy(new FiCaSchedulerApp(appAttemptId_0_1, user_0, queue, queue.getActiveUsersManager(), rmContext)); - queue.submitApplication(app_0_1, user_0, A); + queue.submitApplicationAttempt(app_0_1, user_0); List app_0_1_requests = new ArrayList(); app_0_1_requests.add( @@ -545,7 +545,7 @@ public void testHeadroom() throws Exception { FiCaSchedulerApp app_1_0 = spy(new FiCaSchedulerApp(appAttemptId_1_0, user_1, queue, queue.getActiveUsersManager(), rmContext)); - queue.submitApplication(app_1_0, user_1, A); + queue.submitApplicationAttempt(app_1_0, user_1); List app_1_0_requests = new ArrayList(); app_1_0_requests.add( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java index c2f2f88dc46..3d49d86a37d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java @@ -64,7 +64,10 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; @@ -555,9 +558,12 @@ public void testBlackListNodes() throws Exception { ApplicationId appId = BuilderUtils.newApplicationId(100, 1); ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId( appId, 1); - SchedulerEvent event = - new AppAttemptAddedSchedulerEvent(appAttemptId, "default", "user"); - cs.handle(event); + SchedulerEvent addAppEvent = + new AppAddedSchedulerEvent(appId, "default", "user"); + cs.handle(addAppEvent); + SchedulerEvent addAttemptEvent = + new AppAttemptAddedSchedulerEvent(appAttemptId); + cs.handle(addAttemptEvent); // Verify the blacklist can be updated independent of requesting containers cs.allocate(appAttemptId, Collections.emptyList(), @@ -596,10 +602,10 @@ public void testApplicationComparator() public void testConcurrentAccessOnApplications() throws Exception { CapacityScheduler cs = new CapacityScheduler(); verifyConcurrentAccessOnApplications( - cs.applications, FiCaSchedulerApp.class, Queue.class); + cs.appAttempts, FiCaSchedulerApp.class, Queue.class); } - public static + public static void verifyConcurrentAccessOnApplications( final Map applications, Class appClazz, final Class queueClazz) @@ -682,4 +688,21 @@ public void testGetAppsInQueue() throws Exception { Assert.assertNull(scheduler.getAppsInQueue("nonexistentqueue")); } -} + @Test + public void testAddAndRemoveAppFromCapacityScheduler() throws Exception { + + AsyncDispatcher rmDispatcher = new AsyncDispatcher(); + CapacityScheduler cs = new CapacityScheduler(); + CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration(); + setupQueueConfiguration(conf); + cs.reinitialize(conf, new RMContextImpl(rmDispatcher, null, null, null, + null, null, new RMContainerTokenSecretManager(conf), + new NMTokenSecretManagerInRM(conf), + new ClientToAMTokenSecretManagerInRM())); + + SchedulerApplication app = + TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler( + cs.applications, cs, "a1"); + Assert.assertEquals("a1", app.getQueue().getQueueName()); + } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index 73eb697e0ec..5e272debf7c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -271,14 +271,14 @@ public void testSingleQueueOneUserMetrics() throws Exception { FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), rmContext); - a.submitApplication(app_0, user_0, B); + a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, mock(ActiveUsersManager.class), rmContext); - a.submitApplication(app_1, user_0, B); // same user + a.submitApplicationAttempt(app_1, user_0); // same user // Setup some nodes @@ -320,14 +320,14 @@ public void testUserQueueAcl() throws Exception { .getMockApplicationAttemptId(0, 1); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_d, d, null, rmContext); - d.submitApplication(app_0, user_d, D); + d.submitApplicationAttempt(app_0, user_d); // Attempt the same application again final ApplicationAttemptId appAttemptId_1 = TestUtils .getMockApplicationAttemptId(0, 2); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_d, d, null, rmContext); - d.submitApplication(app_1, user_d, D); // same user + d.submitApplicationAttempt(app_1, user_d); // same user } @@ -345,7 +345,7 @@ public void testAppAttemptMetrics() throws Exception { .getMockApplicationAttemptId(0, 1); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, null, rmContext); - a.submitApplication(app_0, user_0, B); + a.submitApplicationAttempt(app_0, user_0); when(cs.getApplication(appAttemptId_0)).thenReturn(app_0); AppAttemptRemovedSchedulerEvent event = new AppAttemptRemovedSchedulerEvent( @@ -360,7 +360,7 @@ public void testAppAttemptMetrics() throws Exception { .getMockApplicationAttemptId(0, 2); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, null, rmContext); - a.submitApplication(app_1, user_0, B); // same user + a.submitApplicationAttempt(app_1, user_0); // same user assertEquals(1, a.getMetrics().getAppsSubmitted()); assertEquals(1, a.getMetrics().getAppsPending()); @@ -396,14 +396,14 @@ public void testSingleQueueWithOneUser() throws Exception { FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), rmContext); - a.submitApplication(app_0, user_0, A); + a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, mock(ActiveUsersManager.class), rmContext); - a.submitApplication(app_1, user_0, A); // same user + a.submitApplicationAttempt(app_1, user_0); // same user // Setup some nodes @@ -524,21 +524,21 @@ public void testUserLimits() throws Exception { FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, a.getActiveUsersManager(), rmContext); - a.submitApplication(app_0, user_0, A); + a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, a.getActiveUsersManager(), rmContext); - a.submitApplication(app_1, user_0, A); // same user + a.submitApplicationAttempt(app_1, user_0); // same user final ApplicationAttemptId appAttemptId_2 = TestUtils.getMockApplicationAttemptId(2, 0); FiCaSchedulerApp app_2 = new FiCaSchedulerApp(appAttemptId_2, user_1, a, a.getActiveUsersManager(), rmContext); - a.submitApplication(app_2, user_1, A); + a.submitApplicationAttempt(app_2, user_1); // Setup some nodes String host_0 = "127.0.0.1"; @@ -618,21 +618,21 @@ public void testHeadroomWithMaxCap() throws Exception { FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, a.getActiveUsersManager(), rmContext); - a.submitApplication(app_0, user_0, A); + a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, a.getActiveUsersManager(), rmContext); - a.submitApplication(app_1, user_0, A); // same user + a.submitApplicationAttempt(app_1, user_0); // same user final ApplicationAttemptId appAttemptId_2 = TestUtils.getMockApplicationAttemptId(2, 0); FiCaSchedulerApp app_2 = new FiCaSchedulerApp(appAttemptId_2, user_1, a, a.getActiveUsersManager(), rmContext); - a.submitApplication(app_2, user_1, A); + a.submitApplicationAttempt(app_2, user_1); // Setup some nodes String host_0 = "127.0.0.1"; @@ -729,28 +729,28 @@ public void testSingleQueueWithMultipleUsers() throws Exception { FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, a.getActiveUsersManager(), rmContext); - a.submitApplication(app_0, user_0, A); + a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, a.getActiveUsersManager(), rmContext); - a.submitApplication(app_1, user_0, A); // same user + a.submitApplicationAttempt(app_1, user_0); // same user final ApplicationAttemptId appAttemptId_2 = TestUtils.getMockApplicationAttemptId(2, 0); FiCaSchedulerApp app_2 = new FiCaSchedulerApp(appAttemptId_2, user_1, a, a.getActiveUsersManager(), rmContext); - a.submitApplication(app_2, user_1, A); + a.submitApplicationAttempt(app_2, user_1); final ApplicationAttemptId appAttemptId_3 = TestUtils.getMockApplicationAttemptId(3, 0); FiCaSchedulerApp app_3 = new FiCaSchedulerApp(appAttemptId_3, user_2, a, a.getActiveUsersManager(), rmContext); - a.submitApplication(app_3, user_2, A); + a.submitApplicationAttempt(app_3, user_2); // Setup some nodes String host_0 = "127.0.0.1"; @@ -905,14 +905,14 @@ public void testReservation() throws Exception { FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), rmContext); - a.submitApplication(app_0, user_0, A); + a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_1, a, mock(ActiveUsersManager.class), rmContext); - a.submitApplication(app_1, user_1, A); + a.submitApplicationAttempt(app_1, user_1); // Setup some nodes String host_0 = "127.0.0.1"; @@ -1007,14 +1007,14 @@ public void testStolenReservedContainer() throws Exception { FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), rmContext); - a.submitApplication(app_0, user_0, A); + a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_1, a, mock(ActiveUsersManager.class), rmContext); - a.submitApplication(app_1, user_1, A); + a.submitApplicationAttempt(app_1, user_1); // Setup some nodes String host_0 = "127.0.0.1"; @@ -1111,14 +1111,14 @@ public void testReservationExchange() throws Exception { FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), rmContext); - a.submitApplication(app_0, user_0, A); + a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_1, a, mock(ActiveUsersManager.class), rmContext); - a.submitApplication(app_1, user_1, A); + a.submitApplicationAttempt(app_1, user_1); // Setup some nodes String host_0 = "127.0.0.1"; @@ -1232,7 +1232,7 @@ public void testLocalityScheduling() throws Exception { FiCaSchedulerApp app_0 = spy(new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), rmContext)); - a.submitApplication(app_0, user_0, A); + a.submitApplicationAttempt(app_0, user_0); // Setup some nodes and racks String host_0 = "127.0.0.1"; @@ -1373,7 +1373,7 @@ public void testApplicationPriorityScheduling() throws Exception { FiCaSchedulerApp app_0 = spy(new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), rmContext)); - a.submitApplication(app_0, user_0, A); + a.submitApplicationAttempt(app_0, user_0); // Setup some nodes and racks String host_0 = "127.0.0.1"; @@ -1504,7 +1504,7 @@ public void testSchedulingConstraints() throws Exception { FiCaSchedulerApp app_0 = spy(new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), rmContext)); - a.submitApplication(app_0, user_0, A); + a.submitApplicationAttempt(app_0, user_0); // Setup some nodes and racks String host_0_0 = "127.0.0.1"; @@ -1607,21 +1607,21 @@ public void testActivateApplicationAfterQueueRefresh() throws Exception { FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_e, e, mock(ActiveUsersManager.class), rmContext); - e.submitApplication(app_0, user_e, E); + e.submitApplicationAttempt(app_0, user_e); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_e, e, mock(ActiveUsersManager.class), rmContext); - e.submitApplication(app_1, user_e, E); // same user + e.submitApplicationAttempt(app_1, user_e); // same user final ApplicationAttemptId appAttemptId_2 = TestUtils.getMockApplicationAttemptId(2, 0); FiCaSchedulerApp app_2 = new FiCaSchedulerApp(appAttemptId_2, user_e, e, mock(ActiveUsersManager.class), rmContext); - e.submitApplication(app_2, user_e, E); // same user + e.submitApplicationAttempt(app_2, user_e); // same user // before reinitialization assertEquals(2, e.activeApplications.size()); @@ -1685,21 +1685,21 @@ public void testActivateApplicationByUpdatingClusterResource() FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_e, e, mock(ActiveUsersManager.class), rmContext); - e.submitApplication(app_0, user_e, E); + e.submitApplicationAttempt(app_0, user_e); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_e, e, mock(ActiveUsersManager.class), rmContext); - e.submitApplication(app_1, user_e, E); // same user + e.submitApplicationAttempt(app_1, user_e); // same user final ApplicationAttemptId appAttemptId_2 = TestUtils.getMockApplicationAttemptId(2, 0); FiCaSchedulerApp app_2 = new FiCaSchedulerApp(appAttemptId_2, user_e, e, mock(ActiveUsersManager.class), rmContext); - e.submitApplication(app_2, user_e, E); // same user + e.submitApplicationAttempt(app_2, user_e); // same user // before updating cluster resource assertEquals(2, e.activeApplications.size()); @@ -1762,14 +1762,14 @@ public void testLocalityConstraints() throws Exception { FiCaSchedulerApp app_0 = spy(new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), rmContext)); - a.submitApplication(app_0, user_0, A); + a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = spy(new FiCaSchedulerApp(appAttemptId_1, user_0, a, mock(ActiveUsersManager.class), rmContext)); - a.submitApplication(app_1, user_0, A); + a.submitApplicationAttempt(app_1, user_0); // Setup some nodes and racks String host_0_0 = "127.0.0.1"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index bd867b6d2b9..452eca0b05a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -78,7 +78,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestCapacityScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; @@ -255,7 +257,12 @@ private ApplicationAttemptId createSchedulingRequest(int memory, String queueId, private ApplicationAttemptId createSchedulingRequest(int memory, int vcores, String queueId, String userId, int numContainers, int priority) { ApplicationAttemptId id = createAppAttemptId(this.APP_ID++, this.ATTEMPT_ID++); - scheduler.addApplicationAttempt(id, queueId, userId); + scheduler.addApplication(id.getApplicationId(), queueId, userId); + // This conditional is for testAclSubmitApplication where app is rejected + // and no app is added. + if (scheduler.applications.containsKey(id.getApplicationId())) { + scheduler.addApplicationAttempt(id); + } List ask = new ArrayList(); ResourceRequest request = createResourceRequest(memory, vcores, ResourceRequest.ANY, priority, numContainers, true); @@ -583,7 +590,7 @@ public void testSimpleContainerReservation() throws Exception { // Make sure queue 2 is waiting with a reservation assertEquals(0, scheduler.getQueueManager().getQueue("queue2"). getResourceUsage().getMemory()); - assertEquals(1024, scheduler.applications.get(attId).getCurrentReservation().getMemory()); + assertEquals(1024, scheduler.appAttempts.get(attId).getCurrentReservation().getMemory()); // Now another node checks in with capacity RMNode node2 = @@ -599,10 +606,10 @@ public void testSimpleContainerReservation() throws Exception { getResourceUsage().getMemory()); // The old reservation should still be there... - assertEquals(1024, scheduler.applications.get(attId).getCurrentReservation().getMemory()); + assertEquals(1024, scheduler.appAttempts.get(attId).getCurrentReservation().getMemory()); // ... but it should disappear when we update the first node. scheduler.handle(updateEvent); - assertEquals(0, scheduler.applications.get(attId).getCurrentReservation().getMemory()); + assertEquals(0, scheduler.appAttempts.get(attId).getCurrentReservation().getMemory()); } @@ -618,9 +625,13 @@ public void testUserAsDefaultQueue() throws Exception { null, null, null, false, false, 0, null, null), null, null, 0, null); appsMap.put(appAttemptId.getApplicationId(), rmApp); - AppAttemptAddedSchedulerEvent appAddedEvent = - new AppAttemptAddedSchedulerEvent(appAttemptId, "default", "user1"); + AppAddedSchedulerEvent appAddedEvent = + new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "default", + "user1"); scheduler.handle(appAddedEvent); + AppAttemptAddedSchedulerEvent attempAddedEvent = + new AppAttemptAddedSchedulerEvent(appAttemptId); + scheduler.handle(attempAddedEvent); assertEquals(1, scheduler.getQueueManager().getLeafQueue("user1", true) .getRunnableAppSchedulables().size()); assertEquals(0, scheduler.getQueueManager().getLeafQueue("default", true) @@ -639,10 +650,14 @@ public void testNotUserAsDefaultQueue() throws Exception { null, null, null, ApplicationSubmissionContext.newInstance(null, null, null, null, null, false, false, 0, null, null), null, null, 0, null); appsMap.put(appAttemptId.getApplicationId(), rmApp); - - AppAttemptAddedSchedulerEvent appAddedEvent2 = - new AppAttemptAddedSchedulerEvent(appAttemptId, "default", "user2"); - scheduler.handle(appAddedEvent2); + + AppAddedSchedulerEvent appAddedEvent = + new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "default", + "user2"); + scheduler.handle(appAddedEvent); + AppAttemptAddedSchedulerEvent attempAddedEvent = + new AppAttemptAddedSchedulerEvent(appAttemptId); + scheduler.handle(attempAddedEvent); assertEquals(0, scheduler.getQueueManager().getLeafQueue("user1", true) .getRunnableAppSchedulables().size()); assertEquals(1, scheduler.getQueueManager().getLeafQueue("default", true) @@ -660,8 +675,8 @@ public void testEmptyQueueName() throws Exception { // submit app with empty queue ApplicationAttemptId appAttemptId = createAppAttemptId(1, 1); - AppAttemptAddedSchedulerEvent appAddedEvent = - new AppAttemptAddedSchedulerEvent(appAttemptId, "", "user1"); + AppAddedSchedulerEvent appAddedEvent = + new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "", "user1"); scheduler.handle(appAddedEvent); // submission rejected @@ -695,7 +710,7 @@ public void testQueuePlacementWithPolicy() throws Exception { scheduler.reinitialize(conf, resourceManager.getRMContext()); ApplicationAttemptId appId; - Map apps = scheduler.applications; + Map apps = scheduler.appAttempts; List rules = new ArrayList(); rules.add(new QueuePlacementRule.Specified().initialize(true, null)); @@ -786,11 +801,14 @@ public void testQueueDemandCalculation() throws Exception { scheduler.reinitialize(conf, resourceManager.getRMContext()); ApplicationAttemptId id11 = createAppAttemptId(1, 1); - scheduler.addApplicationAttempt(id11, "root.queue1", "user1"); + scheduler.addApplication(id11.getApplicationId(), "root.queue1", "user1"); + scheduler.addApplicationAttempt(id11); ApplicationAttemptId id21 = createAppAttemptId(2, 1); - scheduler.addApplicationAttempt(id21, "root.queue2", "user1"); + scheduler.addApplication(id21.getApplicationId(), "root.queue2", "user1"); + scheduler.addApplicationAttempt(id21); ApplicationAttemptId id22 = createAppAttemptId(2, 2); - scheduler.addApplicationAttempt(id22, "root.queue2", "user1"); + scheduler.addApplication(id22.getApplicationId(), "root.queue2", "user1"); + scheduler.addApplicationAttempt(id22); int minReqSize = FairSchedulerConfiguration.DEFAULT_RM_SCHEDULER_INCREMENT_ALLOCATION_MB; @@ -831,11 +849,13 @@ public void testQueueDemandCalculation() throws Exception { @Test public void testAppAdditionAndRemoval() throws Exception { scheduler.reinitialize(conf, resourceManager.getRMContext()); - - AppAttemptAddedSchedulerEvent appAddedEvent1 = - new AppAttemptAddedSchedulerEvent(createAppAttemptId(1, 1), "default", - "user1"); - scheduler.handle(appAddedEvent1); + ApplicationAttemptId attemptId =createAppAttemptId(1, 1); + AppAddedSchedulerEvent appAddedEvent = new AppAddedSchedulerEvent(attemptId.getApplicationId(), "default", + "user1"); + scheduler.handle(appAddedEvent); + AppAttemptAddedSchedulerEvent attemptAddedEvent = + new AppAttemptAddedSchedulerEvent(createAppAttemptId(1, 1)); + scheduler.handle(attemptAddedEvent); // Scheduler should have two queues (the default and the one created for user1) assertEquals(2, scheduler.getQueueManager().getLeafQueues().size()); @@ -1118,12 +1138,12 @@ public void testChoiceOfPreemptedContainers() throws Exception { scheduler.handle(nodeUpdate3); } - assertEquals(1, scheduler.applications.get(app1).getLiveContainers().size()); - assertEquals(1, scheduler.applications.get(app2).getLiveContainers().size()); - assertEquals(1, scheduler.applications.get(app3).getLiveContainers().size()); - assertEquals(1, scheduler.applications.get(app4).getLiveContainers().size()); - assertEquals(1, scheduler.applications.get(app5).getLiveContainers().size()); - assertEquals(1, scheduler.applications.get(app6).getLiveContainers().size()); + assertEquals(1, scheduler.appAttempts.get(app1).getLiveContainers().size()); + assertEquals(1, scheduler.appAttempts.get(app2).getLiveContainers().size()); + assertEquals(1, scheduler.appAttempts.get(app3).getLiveContainers().size()); + assertEquals(1, scheduler.appAttempts.get(app4).getLiveContainers().size()); + assertEquals(1, scheduler.appAttempts.get(app5).getLiveContainers().size()); + assertEquals(1, scheduler.appAttempts.get(app6).getLiveContainers().size()); // Now new requests arrive from queues C and D ApplicationAttemptId app7 = @@ -1146,16 +1166,16 @@ public void testChoiceOfPreemptedContainers() throws Exception { // Make sure it is lowest priority container. scheduler.preemptResources(scheduler.getQueueManager().getLeafQueues(), Resources.createResource(2 * 1024)); - assertEquals(1, scheduler.applications.get(app1).getLiveContainers().size()); - assertEquals(1, scheduler.applications.get(app2).getLiveContainers().size()); - assertEquals(1, scheduler.applications.get(app4).getLiveContainers().size()); - assertEquals(1, scheduler.applications.get(app5).getLiveContainers().size()); + assertEquals(1, scheduler.appAttempts.get(app1).getLiveContainers().size()); + assertEquals(1, scheduler.appAttempts.get(app2).getLiveContainers().size()); + assertEquals(1, scheduler.appAttempts.get(app4).getLiveContainers().size()); + assertEquals(1, scheduler.appAttempts.get(app5).getLiveContainers().size()); // First verify we are adding containers to preemption list for the application - assertTrue(!Collections.disjoint(scheduler.applications.get(app3).getLiveContainers(), - scheduler.applications.get(app3).getPreemptionContainers())); - assertTrue(!Collections.disjoint(scheduler.applications.get(app6).getLiveContainers(), - scheduler.applications.get(app6).getPreemptionContainers())); + assertTrue(!Collections.disjoint(scheduler.appAttempts.get(app3).getLiveContainers(), + scheduler.appAttempts.get(app3).getPreemptionContainers())); + assertTrue(!Collections.disjoint(scheduler.appAttempts.get(app6).getLiveContainers(), + scheduler.appAttempts.get(app6).getPreemptionContainers())); // Pretend 15 seconds have passed clock.tick(15); @@ -1165,8 +1185,8 @@ public void testChoiceOfPreemptedContainers() throws Exception { Resources.createResource(2 * 1024)); // At this point the containers should have been killed (since we are not simulating AM) - assertEquals(0, scheduler.applications.get(app6).getLiveContainers().size()); - assertEquals(0, scheduler.applications.get(app3).getLiveContainers().size()); + assertEquals(0, scheduler.appAttempts.get(app6).getLiveContainers().size()); + assertEquals(0, scheduler.appAttempts.get(app3).getLiveContainers().size()); // Trigger a kill by insisting we want containers back scheduler.preemptResources(scheduler.getQueueManager().getLeafQueues(), @@ -1180,22 +1200,22 @@ public void testChoiceOfPreemptedContainers() throws Exception { scheduler.preemptResources(scheduler.getQueueManager().getLeafQueues(), Resources.createResource(2 * 1024)); - assertEquals(1, scheduler.applications.get(app1).getLiveContainers().size()); - assertEquals(0, scheduler.applications.get(app2).getLiveContainers().size()); - assertEquals(0, scheduler.applications.get(app3).getLiveContainers().size()); - assertEquals(1, scheduler.applications.get(app4).getLiveContainers().size()); - assertEquals(0, scheduler.applications.get(app5).getLiveContainers().size()); - assertEquals(0, scheduler.applications.get(app6).getLiveContainers().size()); + assertEquals(1, scheduler.appAttempts.get(app1).getLiveContainers().size()); + assertEquals(0, scheduler.appAttempts.get(app2).getLiveContainers().size()); + assertEquals(0, scheduler.appAttempts.get(app3).getLiveContainers().size()); + assertEquals(1, scheduler.appAttempts.get(app4).getLiveContainers().size()); + assertEquals(0, scheduler.appAttempts.get(app5).getLiveContainers().size()); + assertEquals(0, scheduler.appAttempts.get(app6).getLiveContainers().size()); // Now A and B are below fair share, so preemption shouldn't do anything scheduler.preemptResources(scheduler.getQueueManager().getLeafQueues(), Resources.createResource(2 * 1024)); - assertEquals(1, scheduler.applications.get(app1).getLiveContainers().size()); - assertEquals(0, scheduler.applications.get(app2).getLiveContainers().size()); - assertEquals(0, scheduler.applications.get(app3).getLiveContainers().size()); - assertEquals(1, scheduler.applications.get(app4).getLiveContainers().size()); - assertEquals(0, scheduler.applications.get(app5).getLiveContainers().size()); - assertEquals(0, scheduler.applications.get(app6).getLiveContainers().size()); + assertEquals(1, scheduler.appAttempts.get(app1).getLiveContainers().size()); + assertEquals(0, scheduler.appAttempts.get(app2).getLiveContainers().size()); + assertEquals(0, scheduler.appAttempts.get(app3).getLiveContainers().size()); + assertEquals(1, scheduler.appAttempts.get(app4).getLiveContainers().size()); + assertEquals(0, scheduler.appAttempts.get(app5).getLiveContainers().size()); + assertEquals(0, scheduler.appAttempts.get(app6).getLiveContainers().size()); } @Test (timeout = 5000) @@ -1354,9 +1374,9 @@ public void testMultipleContainersWaitingForReservation() throws IOException { // One container should get reservation and the other should get nothing assertEquals(1024, - scheduler.applications.get(attId1).getCurrentReservation().getMemory()); + scheduler.appAttempts.get(attId1).getCurrentReservation().getMemory()); assertEquals(0, - scheduler.applications.get(attId2).getCurrentReservation().getMemory()); + scheduler.appAttempts.get(attId2).getCurrentReservation().getMemory()); } @Test (timeout = 5000) @@ -1391,7 +1411,7 @@ public void testUserMaxRunningApps() throws Exception { scheduler.handle(updateEvent); // App 1 should be running - assertEquals(1, scheduler.applications.get(attId1).getLiveContainers().size()); + assertEquals(1, scheduler.appAttempts.get(attId1).getLiveContainers().size()); ApplicationAttemptId attId2 = createSchedulingRequest(1024, "queue1", "user1", 1); @@ -1400,7 +1420,7 @@ public void testUserMaxRunningApps() throws Exception { scheduler.handle(updateEvent); // App 2 should not be running - assertEquals(0, scheduler.applications.get(attId2).getLiveContainers().size()); + assertEquals(0, scheduler.appAttempts.get(attId2).getLiveContainers().size()); // Request another container for app 1 createSchedulingRequestExistingApplication(1024, 1, attId1); @@ -1409,7 +1429,7 @@ public void testUserMaxRunningApps() throws Exception { scheduler.handle(updateEvent); // Request should be fulfilled - assertEquals(2, scheduler.applications.get(attId1).getLiveContainers().size()); + assertEquals(2, scheduler.appAttempts.get(attId1).getLiveContainers().size()); } @Test (timeout = 5000) @@ -1429,10 +1449,10 @@ public void testReservationWhileMultiplePriorities() throws IOException { NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1); scheduler.handle(updateEvent); - FSSchedulerApp app = scheduler.applications.get(attId); + FSSchedulerApp app = scheduler.appAttempts.get(attId); assertEquals(1, app.getLiveContainers().size()); - ContainerId containerId = scheduler.applications.get(attId) + ContainerId containerId = scheduler.appAttempts.get(attId) .getLiveContainers().iterator().next().getContainerId(); // Cause reservation to be created @@ -1501,9 +1521,9 @@ public void testAclSubmitApplication() throws Exception { ApplicationAttemptId attId2 = createSchedulingRequest(1024, "queue1", "norealuserhasthisname2", 1); - FSSchedulerApp app1 = scheduler.applications.get(attId1); + FSSchedulerApp app1 = scheduler.appAttempts.get(attId1); assertNotNull("The application was not allowed", app1); - FSSchedulerApp app2 = scheduler.applications.get(attId2); + FSSchedulerApp app2 = scheduler.appAttempts.get(attId2); assertNull("The application was allowed", app2); } @@ -1526,7 +1546,8 @@ public void testMultipleNodesSingleRackRequest() throws Exception { scheduler.handle(nodeEvent2); ApplicationAttemptId appId = createAppAttemptId(this.APP_ID++, this.ATTEMPT_ID++); - scheduler.addApplicationAttempt(appId, "queue1", "user1"); + scheduler.addApplication(appId.getApplicationId(), "queue1", "user1"); + scheduler.addApplicationAttempt(appId); // 1 request with 2 nodes on the same rack. another request with 1 node on // a different rack @@ -1545,14 +1566,14 @@ public void testMultipleNodesSingleRackRequest() throws Exception { NodeUpdateSchedulerEvent updateEvent1 = new NodeUpdateSchedulerEvent(node1); scheduler.handle(updateEvent1); // should assign node local - assertEquals(1, scheduler.applications.get(appId).getLiveContainers().size()); + assertEquals(1, scheduler.appAttempts.get(appId).getLiveContainers().size()); // node 2 checks in scheduler.update(); NodeUpdateSchedulerEvent updateEvent2 = new NodeUpdateSchedulerEvent(node2); scheduler.handle(updateEvent2); // should assign rack local - assertEquals(2, scheduler.applications.get(appId).getLiveContainers().size()); + assertEquals(2, scheduler.appAttempts.get(appId).getLiveContainers().size()); } @Test (timeout = 5000) @@ -1571,8 +1592,8 @@ public void testFifoWithinQueue() throws Exception { "user1", 2); ApplicationAttemptId attId2 = createSchedulingRequest(1024, "queue1", "user1", 2); - FSSchedulerApp app1 = scheduler.applications.get(attId1); - FSSchedulerApp app2 = scheduler.applications.get(attId2); + FSSchedulerApp app1 = scheduler.appAttempts.get(attId1); + FSSchedulerApp app2 = scheduler.appAttempts.get(attId2); FSLeafQueue queue1 = scheduler.getQueueManager().getLeafQueue("queue1", true); queue1.setPolicy(new FifoPolicy()); @@ -1612,7 +1633,7 @@ public void testMaxAssign() throws Exception { ApplicationAttemptId attId = createSchedulingRequest(1024, "root.default", "user", 8); - FSSchedulerApp app = scheduler.applications.get(attId); + FSSchedulerApp app = scheduler.appAttempts.get(attId); // set maxAssign to 2: only 2 containers should be allocated scheduler.maxAssign = 2; @@ -1674,10 +1695,10 @@ public void testAssignContainer() throws Exception { ApplicationAttemptId attId4 = createSchedulingRequest(1024, fifoQueue, user, 4); - FSSchedulerApp app1 = scheduler.applications.get(attId1); - FSSchedulerApp app2 = scheduler.applications.get(attId2); - FSSchedulerApp app3 = scheduler.applications.get(attId3); - FSSchedulerApp app4 = scheduler.applications.get(attId4); + FSSchedulerApp app1 = scheduler.appAttempts.get(attId1); + FSSchedulerApp app2 = scheduler.appAttempts.get(attId2); + FSSchedulerApp app3 = scheduler.appAttempts.get(attId3); + FSSchedulerApp app4 = scheduler.appAttempts.get(attId4); scheduler.getQueueManager().getLeafQueue(fifoQueue, true) .setPolicy(SchedulingPolicy.parse("fifo")); @@ -1764,7 +1785,7 @@ public void testNotAllowSubmitApplication() throws Exception { ApplicationAttemptId attId = ApplicationAttemptId.newInstance(applicationId, this.ATTEMPT_ID++); - scheduler.addApplicationAttempt(attId, queue, user); + scheduler.addApplication(attId.getApplicationId(), queue, user); numTries = 0; while (application.getFinishTime() == 0 && numTries < MAX_TRIES) { @@ -1792,7 +1813,7 @@ public void testReservationThatDoesntFit() throws IOException { NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1); scheduler.handle(updateEvent); - FSSchedulerApp app = scheduler.applications.get(attId); + FSSchedulerApp app = scheduler.appAttempts.get(attId); assertEquals(0, app.getLiveContainers().size()); assertEquals(0, app.getReservedContainers().size()); @@ -1861,7 +1882,7 @@ public void testStrictLocality() throws IOException { NodeUpdateSchedulerEvent node2UpdateEvent = new NodeUpdateSchedulerEvent(node2); // no matter how many heartbeats, node2 should never get a container - FSSchedulerApp app = scheduler.applications.get(attId1); + FSSchedulerApp app = scheduler.appAttempts.get(attId1); for (int i = 0; i < 10; i++) { scheduler.handle(node2UpdateEvent); assertEquals(0, app.getLiveContainers().size()); @@ -1900,7 +1921,7 @@ public void testCancelStrictLocality() throws IOException { NodeUpdateSchedulerEvent node2UpdateEvent = new NodeUpdateSchedulerEvent(node2); // no matter how many heartbeats, node2 should never get a container - FSSchedulerApp app = scheduler.applications.get(attId1); + FSSchedulerApp app = scheduler.appAttempts.get(attId1); for (int i = 0; i < 10; i++) { scheduler.handle(node2UpdateEvent); assertEquals(0, app.getLiveContainers().size()); @@ -1933,7 +1954,7 @@ public void testReservationsStrictLocality() throws IOException { ApplicationAttemptId attId = createSchedulingRequest(1024, "queue1", "user1", 0); - FSSchedulerApp app = scheduler.applications.get(attId); + FSSchedulerApp app = scheduler.appAttempts.get(attId); ResourceRequest nodeRequest = createResourceRequest(1024, node2.getHostName(), 1, 2, true); ResourceRequest rackRequest = createResourceRequest(1024, "rack1", 1, 2, true); @@ -1973,7 +1994,7 @@ public void testNoMoreCpuOnNode() throws IOException { ApplicationAttemptId attId = createSchedulingRequest(1024, 1, "default", "user1", 2); - FSSchedulerApp app = scheduler.applications.get(attId); + FSSchedulerApp app = scheduler.appAttempts.get(attId); scheduler.update(); NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1); @@ -1993,10 +2014,10 @@ public void testBasicDRFAssignment() throws Exception { ApplicationAttemptId appAttId1 = createSchedulingRequest(2048, 1, "queue1", "user1", 2); - FSSchedulerApp app1 = scheduler.applications.get(appAttId1); + FSSchedulerApp app1 = scheduler.appAttempts.get(appAttId1); ApplicationAttemptId appAttId2 = createSchedulingRequest(1024, 2, "queue1", "user1", 2); - FSSchedulerApp app2 = scheduler.applications.get(appAttId2); + FSSchedulerApp app2 = scheduler.appAttempts.get(appAttId2); DominantResourceFairnessPolicy drfPolicy = new DominantResourceFairnessPolicy(); drfPolicy.initialize(scheduler.getClusterCapacity()); @@ -2034,13 +2055,13 @@ public void testBasicDRFWithQueues() throws Exception { ApplicationAttemptId appAttId1 = createSchedulingRequest(3072, 1, "queue1", "user1", 2); - FSSchedulerApp app1 = scheduler.applications.get(appAttId1); + FSSchedulerApp app1 = scheduler.appAttempts.get(appAttId1); ApplicationAttemptId appAttId2 = createSchedulingRequest(2048, 2, "queue1", "user1", 2); - FSSchedulerApp app2 = scheduler.applications.get(appAttId2); + FSSchedulerApp app2 = scheduler.appAttempts.get(appAttId2); ApplicationAttemptId appAttId3 = createSchedulingRequest(1024, 2, "queue2", "user1", 2); - FSSchedulerApp app3 = scheduler.applications.get(appAttId3); + FSSchedulerApp app3 = scheduler.appAttempts.get(appAttId3); DominantResourceFairnessPolicy drfPolicy = new DominantResourceFairnessPolicy(); drfPolicy.initialize(scheduler.getClusterCapacity()); @@ -2071,19 +2092,19 @@ public void testDRFHierarchicalQueues() throws Exception { ApplicationAttemptId appAttId1 = createSchedulingRequest(3074, 1, "queue1.subqueue1", "user1", 2); Thread.sleep(3); // so that start times will be different - FSSchedulerApp app1 = scheduler.applications.get(appAttId1); + FSSchedulerApp app1 = scheduler.appAttempts.get(appAttId1); ApplicationAttemptId appAttId2 = createSchedulingRequest(1024, 3, "queue1.subqueue1", "user1", 2); Thread.sleep(3); // so that start times will be different - FSSchedulerApp app2 = scheduler.applications.get(appAttId2); + FSSchedulerApp app2 = scheduler.appAttempts.get(appAttId2); ApplicationAttemptId appAttId3 = createSchedulingRequest(2048, 2, "queue1.subqueue2", "user1", 2); Thread.sleep(3); // so that start times will be different - FSSchedulerApp app3 = scheduler.applications.get(appAttId3); + FSSchedulerApp app3 = scheduler.appAttempts.get(appAttId3); ApplicationAttemptId appAttId4 = createSchedulingRequest(1024, 2, "queue2", "user1", 2); Thread.sleep(3); // so that start times will be different - FSSchedulerApp app4 = scheduler.applications.get(appAttId4); + FSSchedulerApp app4 = scheduler.appAttempts.get(appAttId4); DominantResourceFairnessPolicy drfPolicy = new DominantResourceFairnessPolicy(); drfPolicy.initialize(scheduler.getClusterCapacity()); @@ -2163,7 +2184,7 @@ public void testHostPortNodeName() throws Exception { NodeUpdateSchedulerEvent(node2); // no matter how many heartbeats, node2 should never get a container - FSSchedulerApp app = scheduler.applications.get(attId1); + FSSchedulerApp app = scheduler.appAttempts.get(attId1); for (int i = 0; i < 10; i++) { scheduler.handle(node2UpdateEvent); assertEquals(0, app.getLiveContainers().size()); @@ -2178,12 +2199,12 @@ public void testHostPortNodeName() throws Exception { public void testConcurrentAccessOnApplications() throws Exception { FairScheduler fs = new FairScheduler(); TestCapacityScheduler.verifyConcurrentAccessOnApplications( - fs.applications, FSSchedulerApp.class, FSLeafQueue.class); + fs.appAttempts, FSSchedulerApp.class, FSLeafQueue.class); } private void verifyAppRunnable(ApplicationAttemptId attId, boolean runnable) { - FSSchedulerApp app = scheduler.applications.get(attId); + FSSchedulerApp app = scheduler.appAttempts.get(attId); FSLeafQueue queue = app.getQueue(); Collection runnableApps = queue.getRunnableAppSchedulables(); @@ -2356,7 +2377,8 @@ public void testContinuousScheduling() throws Exception { // send application request ApplicationAttemptId appAttemptId = createAppAttemptId(this.APP_ID++, this.ATTEMPT_ID++); - fs.addApplicationAttempt(appAttemptId, "queue11", "user11"); + fs.addApplication(appAttemptId.getApplicationId(), "queue11", "user11"); + fs.addApplicationAttempt(appAttemptId); List ask = new ArrayList(); ResourceRequest request = createResourceRequest(1024, 1, ResourceRequest.ANY, 1, 1, true); @@ -2367,7 +2389,7 @@ public void testContinuousScheduling() throws Exception { // at least one pass Thread.sleep(fs.getConf().getContinuousSchedulingSleepMs() + 500); - FSSchedulerApp app = fs.applications.get(appAttemptId); + FSSchedulerApp app = fs.appAttempts.get(appAttemptId); // Wait until app gets resources. while (app.getCurrentConsumption().equals(Resources.none())) { } @@ -2455,7 +2477,7 @@ public void testBlacklistNodes() throws Exception { ApplicationAttemptId appAttemptId = createSchedulingRequest(GB, "root.default", "user", 1); - FSSchedulerApp app = scheduler.applications.get(appAttemptId); + FSSchedulerApp app = scheduler.appAttempts.get(appAttemptId); // Verify the blacklist can be updated independent of requesting containers scheduler.allocate(appAttemptId, Collections.emptyList(), @@ -2465,7 +2487,7 @@ public void testBlacklistNodes() throws Exception { scheduler.allocate(appAttemptId, Collections.emptyList(), Collections.emptyList(), null, Collections.singletonList(host)); - assertFalse(scheduler.applications.get(appAttemptId).isBlacklisted(host)); + assertFalse(scheduler.appAttempts.get(appAttemptId).isBlacklisted(host)); List update = Arrays.asList( createResourceRequest(GB, node.getHostName(), 1, 0, true)); @@ -2527,4 +2549,12 @@ public void testGetAppsInQueue() throws Exception { assertTrue(appAttIds.contains(appAttId1)); assertTrue(appAttIds.contains(appAttId2)); } + + @Test + public void testAddAndRemoveAppFromFairScheduler() throws Exception { + FairScheduler scheduler = + (FairScheduler) resourceManager.getResourceScheduler(); + TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler( + scheduler.applications, scheduler, "default"); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java index 7ce7e42bc67..30578265fe2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java @@ -61,13 +61,16 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestCapacityScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.utils.BuilderUtils; @@ -150,14 +153,21 @@ public void testAppAttemptMetrics() throws Exception { ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId( appId, 1); - SchedulerEvent event = - new AppAttemptAddedSchedulerEvent(appAttemptId, "queue", "user"); - schedular.handle(event); + SchedulerEvent appEvent = new AppAddedSchedulerEvent(appId, "queue", "user"); + schedular.handle(appEvent); + SchedulerEvent attemptEvent = + new AppAttemptAddedSchedulerEvent(appAttemptId); + schedular.handle(attemptEvent); appAttemptId = BuilderUtils.newApplicationAttemptId(appId, 2); - event = new AppAttemptAddedSchedulerEvent(appAttemptId, "queue", "user"); - schedular.handle(event); + SchedulerEvent appEvent2 = + new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "queue", + "user"); + schedular.handle(appEvent2); + SchedulerEvent attemptEvent2 = + new AppAttemptAddedSchedulerEvent(appAttemptId); + schedular.handle(attemptEvent2); int afterAppsSubmitted = metrics.getAppsSubmitted(); Assert.assertEquals(1, afterAppsSubmitted - beforeAppsSubmitted); @@ -188,9 +198,13 @@ public void testNodeLocalAssignment() throws Exception { int _appAttemptId = 1; ApplicationAttemptId appAttemptId = createAppAttemptId(_appId, _appAttemptId); - AppAttemptAddedSchedulerEvent appEvent1 = - new AppAttemptAddedSchedulerEvent(appAttemptId, "queue1", "user1"); - scheduler.handle(appEvent1); + AppAddedSchedulerEvent appEvent = + new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "queue1", + "user1"); + scheduler.handle(appEvent); + AppAttemptAddedSchedulerEvent attemptEvent = + new AppAttemptAddedSchedulerEvent(appAttemptId); + scheduler.handle(attemptEvent); int memory = 64; int nConts = 3; @@ -274,9 +288,13 @@ public Map getNodes(){ int _appAttemptId = 1; ApplicationAttemptId appAttemptId = createAppAttemptId(_appId, _appAttemptId); - AppAttemptAddedSchedulerEvent appEvent1 = - new AppAttemptAddedSchedulerEvent(appAttemptId, "queue1", "user1"); - scheduler.handle(appEvent1); + AppAddedSchedulerEvent appEvent = + new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "queue1", + "user1"); + scheduler.handle(appEvent); + AppAttemptAddedSchedulerEvent attemptEvent = + new AppAttemptAddedSchedulerEvent(appAttemptId); + scheduler.handle(attemptEvent); int memory = 1024; int priority = 1; @@ -520,7 +538,7 @@ public void testFifoScheduler() throws Exception { public void testConcurrentAccessOnApplications() throws Exception { FifoScheduler fs = new FifoScheduler(); TestCapacityScheduler.verifyConcurrentAccessOnApplications( - fs.applications, FiCaSchedulerApp.class, Queue.class); + fs.appAttempts, FiCaSchedulerApp.class, Queue.class); } @SuppressWarnings("resource") @@ -541,9 +559,13 @@ public void testBlackListNodes() throws Exception { ApplicationId appId = BuilderUtils.newApplicationId(100, 1); ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId( appId, 1); - SchedulerEvent event = - new AppAttemptAddedSchedulerEvent(appAttemptId, "default", "user"); - fs.handle(event); + SchedulerEvent appEvent = + new AppAddedSchedulerEvent(appId, "default", + "user"); + fs.handle(appEvent); + SchedulerEvent attemptEvent = + new AppAttemptAddedSchedulerEvent(appAttemptId); + fs.handle(attemptEvent); // Verify the blacklist can be updated independent of requesting containers fs.allocate(appAttemptId, Collections.emptyList(), @@ -575,6 +597,17 @@ public void testGetAppsInQueue() throws Exception { Assert.assertNull(scheduler.getAppsInQueue("someotherqueue")); } + @Test + public void testAddAndRemoveAppFromFiFoScheduler() throws Exception { + Configuration conf = new Configuration(); + conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class, + ResourceScheduler.class); + MockRM rm = new MockRM(conf); + FifoScheduler fs = (FifoScheduler)rm.getResourceScheduler(); + TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(fs.applications, + fs, "queue"); + } + private void checkApplicationResourceUsage(int expected, Application application) { Assert.assertEquals(expected, application.getUsedResources().getMemory()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java index 6d1d30d84a3..58170efaff2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java @@ -46,6 +46,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler; import org.apache.hadoop.yarn.server.resourcemanager.security.QueueACLsManager; @@ -1392,6 +1393,8 @@ public void testMultipleAppAttempts() throws JSONException, Exception { MockNM amNodeManager = rm.registerNode("127.0.0.1:1234", 2048); RMApp app1 = rm.submitApp(CONTAINER_MB, "testwordcount", "user1"); amNodeManager.nodeHeartbeat(true); + rm.waitForState(app1.getCurrentAppAttempt().getAppAttemptId(), + RMAppAttemptState.ALLOCATED); int maxAppAttempts = rm.getConfig().getInt( YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS); @@ -1405,6 +1408,8 @@ public void testMultipleAppAttempts() throws JSONException, Exception { rm.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED); amNodeManager.nodeHeartbeat(true); } + rm.waitForState(app1.getCurrentAppAttempt().getAppAttemptId(), + RMAppAttemptState.ALLOCATED); assertEquals("incorrect number of attempts", maxAppAttempts, app1.getAppAttempts().values().size()); testAppAttemptsHelper(app1.getApplicationId().toString(), app1,