From ca853445e9a31e05278e9dceea9dbed734103f49 Mon Sep 17 00:00:00 2001 From: Arun Murthy Date: Fri, 9 Sep 2011 02:19:24 +0000 Subject: [PATCH] MAPREDUCE-2953. Fix a race condition on submission which caused client to incorrectly assume application was gone by making submission synchronous for RMAppManager. Contributed by Thomas Graves. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1166968 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 4 ++++ .../yarn/server/resourcemanager/ClientRMService.java | 11 ++++++++--- .../yarn/server/resourcemanager/RMAppManager.java | 2 +- .../yarn/server/resourcemanager/ResourceManager.java | 10 +++++----- .../hadoop/yarn/server/resourcemanager/MockRM.java | 11 ++++------- 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index cc97ff43875..30f23b81134 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -1247,6 +1247,10 @@ Release 0.23.0 - Unreleased MAPREDUCE-2937. Ensure reason for application failure is displayed to the user. (mahadev via acmurthy) + MAPREDUCE-2953. Fix a race condition on submission which caused client to + incorrectly assume application was gone by making submission synchronous + for RMAppManager. (Thomas Graves via acmurthy) + Release 0.22.0 - Unreleased INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java index 628372f9502..593d6525a68 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java @@ -91,6 +91,7 @@ public class ClientRMService extends AbstractService implements final private YarnScheduler scheduler; final private RMContext rmContext; private final AMLivelinessMonitor amLivelinessMonitor; + private final RMAppManager rmAppManager; private String clientServiceBindAddress; private Server server; @@ -100,11 +101,13 @@ public class ClientRMService extends AbstractService implements private ApplicationACLsManager aclsManager; private Map applicationACLs; - public ClientRMService(RMContext rmContext, YarnScheduler scheduler) { + public ClientRMService(RMContext rmContext, YarnScheduler scheduler, + RMAppManager rmAppManager) { super(ClientRMService.class.getName()); this.scheduler = scheduler; this.rmContext = rmContext; this.amLivelinessMonitor = rmContext.getAMLivelinessMonitor(); + this.rmAppManager = rmAppManager; } @Override @@ -201,8 +204,10 @@ public class ClientRMService extends AbstractService implements throw new IOException("Application with id " + applicationId + " is already present! Cannot add a duplicate!"); } - this.rmContext.getDispatcher().getEventHandler().handle( - new RMAppManagerSubmitEvent(submissionContext)); + // This needs to be synchronous as the client can query + // immediately following the submission to get the application status. + // So call handle directly and do not send an event. + rmAppManager.handle(new RMAppManagerSubmitEvent(submissionContext)); LOG.info("Application with id " + applicationId.getId() + " submitted by user " + user + " with " + submissionContext); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index 80e73802739..9a86dfd4579 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -210,7 +210,7 @@ public class RMAppManager implements EventHandler { } } - protected void submitApplication(ApplicationSubmissionContext submissionContext) { + protected synchronized void submitApplication(ApplicationSubmissionContext submissionContext) { ApplicationId applicationId = submissionContext.getApplicationId(); RMApp application = null; try { diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index 4edb502f649..b578fee818e 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -99,7 +99,7 @@ public class ResourceManager extends CompositeService implements Recoverable { protected NMLivelinessMonitor nmLivelinessMonitor; protected NodesListManager nodesListManager; private SchedulerEventDispatcher schedulerDispatcher; - private RMAppManager rmAppManager; + protected RMAppManager rmAppManager; private final AtomicBoolean shutdown = new AtomicBoolean(false); private WebApp webApp; @@ -176,13 +176,13 @@ public class ResourceManager extends CompositeService implements Recoverable { masterService = createApplicationMasterService(); addService(masterService) ; - clientRM = createClientRMService(); - addService(clientRM); - this.rmAppManager = createRMAppManager(); // Register event handler for RMAppManagerEvents this.rmDispatcher.register(RMAppManagerEventType.class, this.rmAppManager); + + clientRM = createClientRMService(); + addService(clientRM); adminService = createAdminService(); addService(adminService); @@ -441,7 +441,7 @@ public class ResourceManager extends CompositeService implements Recoverable { } protected ClientRMService createClientRMService() { - return new ClientRMService(this.rmContext, scheduler); + return new ClientRMService(this.rmContext, scheduler, this.rmAppManager); } protected ApplicationMasterService createApplicationMasterService() { diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index 59da09652a2..901948fab70 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -60,13 +60,9 @@ public class MockRM extends ResourceManager { public void waitForState(ApplicationId appId, RMAppState finalState) throws Exception { + RMApp app = getRMContext().getRMApps().get(appId); + Assert.assertNotNull("app shouldn't be null", app); int timeoutSecs = 0; - RMApp app = null; - while ((app == null) && timeoutSecs++ < 20) { - app = getRMContext().getRMApps().get(appId); - Thread.sleep(500); - } - timeoutSecs = 0; while (!finalState.equals(app.getState()) && timeoutSecs++ < 20) { System.out.println("App State is : " + app.getState() + @@ -95,6 +91,7 @@ public class MockRM extends ResourceManager { req.setApplicationSubmissionContext(sub); client.submitApplication(req); + // make sure app is immediately available after submit waitForState(appId, RMAppState.ACCEPTED); return getRMContext().getRMApps().get(appId); } @@ -131,7 +128,7 @@ public class MockRM extends ResourceManager { @Override protected ClientRMService createClientRMService() { - return new ClientRMService(getRMContext(), getResourceScheduler()) { + return new ClientRMService(getRMContext(), getResourceScheduler(), rmAppManager) { @Override public void start() { //override to not start rpc handler