From 978012b9b6b18985fd60ec5b26c38693a6e86f9a Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Tue, 4 Jun 2013 05:53:47 +0000 Subject: [PATCH] YARN-755. Renamed AllocateResponse.reboot to AllocateResponse.resync. Contributed by Bikas Saha. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1489295 13f79535-47bb-0310-9956-ffa450edef68 --- .../v2/app/local/LocalContainerAllocator.java | 2 +- .../v2/app/rm/RMContainerAllocator.java | 2 +- hadoop-yarn-project/CHANGES.txt | 7 +++++-- .../api/protocolrecords/AllocateResponse.java | 20 ++++++++++++------- .../impl/pb/AllocateResponsePBImpl.java | 8 ++++---- .../src/main/proto/yarn_service_protos.proto | 2 +- .../hadoop/yarn/client/AMRMClientAsync.java | 2 +- .../yarn/client/TestAMRMClientAsync.java | 2 +- .../yarn/server/utils/BuilderUtils.java | 2 +- .../ApplicationMasterService.java | 10 +++++----- .../server/resourcemanager/TestRMRestart.java | 2 +- .../TestAMRMRPCResponseId.java | 4 ++-- .../security/TestApplicationTokens.java | 4 ++-- 13 files changed, 38 insertions(+), 29 deletions(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java index d1d36dd1e8f..5a3f84fe452 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java @@ -116,7 +116,7 @@ public class LocalContainerAllocator extends RMCommunicator // continue to attempt to contact the RM. throw e; } - if (allocateResponse.getReboot()) { + if (allocateResponse.getResync()) { LOG.info("Event from RM: shutting down Application Master"); // This can happen if the RM has been restarted. If it is in that state, // this application must clean itself up. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java index d21c6fa52b0..f951b900f2d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java @@ -570,7 +570,7 @@ public class RMContainerAllocator extends RMContainerRequestor // continue to attempt to contact the RM. throw e; } - if (response.getReboot()) { + if (response.getResync()) { // This can happen if the RM has been restarted. If it is in that state, // this application must clean itself up. eventHandler.handle(new JobEvent(this.getJob().getID(), diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 5e0568a53fb..b0d0fa7f41c 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -93,8 +93,8 @@ Release 2.1.0-beta - UNRELEASED YARN-635. Renamed YarnRemoteException to YarnException. (Siddharth Seth via vinodkv) - YARN-756. Move Preemption* records to yarn.api where they really belong. - (Jian He via vinodkv) + YARN-755. Renamed AllocateResponse.reboot to AllocateResponse.resync. (Bikas + Saha via vinodkv) NEW FEATURES @@ -265,6 +265,9 @@ Release 2.1.0-beta - UNRELEASED YARN-717. Put object creation factories for Token in the class itself and remove useless derivations for specific tokens. (Jian He via vinodkv) + YARN-756. Move Preemption* records to yarn.api where they really belong. + (Jian He via vinodkv) + OPTIMIZATIONS YARN-512. Log aggregation root directory check is more expensive than it diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateResponse.java index 5de34556c62..4a46dc9629b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateResponse.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateResponse.java @@ -65,7 +65,7 @@ public abstract class AllocateResponse { public static AllocateResponse newInstance(int responseId, List completedContainers, List allocatedContainers, List updatedNodes, - Resource availResources, boolean reboot, int numClusterNodes, + Resource availResources, boolean resync, int numClusterNodes, PreemptionMessage preempt) { AllocateResponse response = Records.newRecord(AllocateResponse.class); response.setNumClusterNodes(numClusterNodes); @@ -74,26 +74,32 @@ public abstract class AllocateResponse { response.setAllocatedContainers(allocatedContainers); response.setUpdatedNodes(updatedNodes); response.setAvailableResources(availResources); - response.setReboot(reboot); + response.setResync(resync); response.setPreemptionMessage(preempt); return response; } /** - * Should the ApplicationMaster reboot for being horribly + * Should the ApplicationMaster take action because of being * out-of-sync with the ResourceManager as deigned by - * {@link #getResponseId()}? + * {@link #getResponseId()} + * This can be due to application errors or because the ResourceManager + * has restarted. The action to be taken by the ApplicationMaster + * is to shutdown without unregistering with the ResourceManager. + * The ResourceManager will start a new attempt. If the application is already + * done when it gets the resync command, then it may choose to shutdown after + * unregistering in which case the ResourceManager will not start a new attempt. * * @return true if the ApplicationMaster should - * reboot, false otherwise + * take action, false otherwise */ @Public @Stable - public abstract boolean getReboot(); + public abstract boolean getResync(); @Private @Unstable - public abstract void setReboot(boolean reboot); + public abstract void setResync(boolean value); /** * Get the last response id. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateResponsePBImpl.java index af54139c1c5..d3b7d873a69 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateResponsePBImpl.java @@ -145,15 +145,15 @@ public class AllocateResponsePBImpl extends AllocateResponse { } @Override - public synchronized boolean getReboot() { + public synchronized boolean getResync() { AllocateResponseProtoOrBuilder p = viaProto ? proto : builder; - return (p.getReboot()); + return (p.getResync()); } @Override - public synchronized void setReboot(boolean reboot) { + public synchronized void setResync(boolean resync) { maybeInitBuilder(); - builder.setReboot((reboot)); + builder.setResync((resync)); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto index ed3f871b751..b00e5136c1f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto @@ -59,7 +59,7 @@ message AllocateRequestProto { } message AllocateResponseProto { - optional bool reboot = 1; + optional bool resync = 1; optional int32 response_id = 2; repeated ContainerProto allocated_containers = 3; repeated ContainerStatusProto completed_container_statuses = 4; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/AMRMClientAsync.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/AMRMClientAsync.java index c24e3ba81a5..ad8ee2cb03e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/AMRMClientAsync.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/AMRMClientAsync.java @@ -331,7 +331,7 @@ public class AMRMClientAsync extends AbstractService continue; } - if (response.getReboot()) { + if (response.getResync()) { handler.onRebootRequest(); LOG.info("Reboot requested. Stopping callback."); break; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestAMRMClientAsync.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestAMRMClientAsync.java index 451c0b49fa3..77c929b728a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestAMRMClientAsync.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestAMRMClientAsync.java @@ -189,7 +189,7 @@ public class TestAMRMClientAsync { final AllocateResponse rebootResponse = createAllocateResponse( new ArrayList(), new ArrayList()); - rebootResponse.setReboot(true); + rebootResponse.setResync(true); when(client.allocate(anyFloat())).thenReturn(rebootResponse); AMRMClientAsync asyncClient = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java index 28faac4f341..9037bd24311 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java @@ -411,7 +411,7 @@ public class BuilderUtils { response.setAllocatedContainers(allocatedContainers); response.setUpdatedNodes(updatedNodes); response.setAvailableResources(availResources); - response.setReboot(reboot); + response.setResync(reboot); response.setPreemptionMessage(preempt); return response; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java index 70a9ae2e0cd..53790bf8bcf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java @@ -90,7 +90,7 @@ public class ApplicationMasterService extends AbstractService implements RecordFactoryProvider.getRecordFactory(null); private final ConcurrentMap responseMap = new ConcurrentHashMap(); - private final AllocateResponse reboot = + private final AllocateResponse resync = recordFactory.newRecordInstance(AllocateResponse.class); private final RMContext rmContext; @@ -98,7 +98,7 @@ public class ApplicationMasterService extends AbstractService implements super(ApplicationMasterService.class.getName()); this.amLivelinessMonitor = rmContext.getAMLivelinessMonitor(); this.rScheduler = scheduler; - this.reboot.setReboot(true); + this.resync.setResync(true); // this.reboot.containers = new ArrayList(); this.rmContext = rmContext; } @@ -263,7 +263,7 @@ public class ApplicationMasterService extends AbstractService implements AllocateResponse lastResponse = responseMap.get(appAttemptId); if (lastResponse == null) { LOG.error("AppAttemptId doesnt exist in cache " + appAttemptId); - return reboot; + return resync; } if ((request.getResponseId() + 1) == lastResponse.getResponseId()) { /* old heartbeat */ @@ -273,7 +273,7 @@ public class ApplicationMasterService extends AbstractService implements // Oh damn! Sending reboot isn't enough. RM state is corrupted. TODO: // Reboot is not useful since after AM reboots, it will send register and // get an exception. Might as well throw an exception here. - return reboot; + return resync; } // Allow only one thread in AM to do heartbeat at a time. @@ -344,7 +344,7 @@ public class ApplicationMasterService extends AbstractService implements String message = "App Attempt removed from the cache during allocate" + appAttemptId; LOG.error(message); - return reboot; + return resync; } allocateResponse.setNumClusterNodes(this.rScheduler.getNumClusterNodes()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java index 5408d73d368..1bebedd06f2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -250,7 +250,7 @@ public class TestRMRestart { AllocateResponse allocResponse = am1.allocate( new ArrayList(), new ArrayList()); - Assert.assertTrue(allocResponse.getReboot()); + Assert.assertTrue(allocResponse.getResync()); // NM should be rebooted on heartbeat, even first heartbeat for nm2 NodeHeartbeatResponse hbResponse = nm1.nodeHeartbeat(true); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRMRPCResponseId.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRMRPCResponseId.java index c198580d22f..d204a45b228 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRMRPCResponseId.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRMRPCResponseId.java @@ -82,7 +82,7 @@ public class TestAMRMRPCResponseId { AllocateResponse response = amService.allocate(allocateRequest); Assert.assertEquals(1, response.getResponseId()); - Assert.assertFalse(response.getReboot()); + Assert.assertFalse(response.getResync()); allocateRequest = AllocateRequest.newInstance(attempt .getAppAttemptId(), response.getResponseId(), 0F, null, null); @@ -96,6 +96,6 @@ public class TestAMRMRPCResponseId { allocateRequest = AllocateRequest.newInstance(attempt .getAppAttemptId(), 0, 0F, null, null); response = amService.allocate(allocateRequest); - Assert.assertTrue(response.getReboot()); + Assert.assertTrue(response.getResync()); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestApplicationTokens.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestApplicationTokens.java index 5c68df5359f..8eb4e95497e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestApplicationTokens.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestApplicationTokens.java @@ -208,7 +208,7 @@ public class TestApplicationTokens { AllocateRequest allocateRequest = Records.newRecord(AllocateRequest.class); allocateRequest.setApplicationAttemptId(applicationAttemptId); - Assert.assertFalse(rmClient.allocate(allocateRequest).getReboot()); + Assert.assertFalse(rmClient.allocate(allocateRequest).getResync()); // Simulate a master-key-roll-over ApplicationTokenSecretManager appTokenSecretManager = @@ -224,7 +224,7 @@ public class TestApplicationTokens { rmClient = createRMClient(rm, conf, rpc, currentUser); allocateRequest = Records.newRecord(AllocateRequest.class); allocateRequest.setApplicationAttemptId(applicationAttemptId); - Assert.assertFalse(rmClient.allocate(allocateRequest).getReboot()); + Assert.assertFalse(rmClient.allocate(allocateRequest).getResync()); } finally { rm.stop(); if (rmClient != null) {