From 4ba102bdc3ce2da170a121cbe7c66918a1631924 Mon Sep 17 00:00:00 2001 From: Chris Douglas Date: Fri, 3 Oct 2014 17:05:57 -0700 Subject: [PATCH] YARN-1051. Add a system for creating reservations of cluster capacity. Contributed by Subru Krishnan and Carlo Curino. (cherry picked from commit c8212bacb1b2a7e6ee83cc56f72297465ce99390) Conflicts: hadoop-yarn-project/CHANGES.txt --- YARN-1051-CHANGES.txt | 34 ---------------- hadoop-yarn-project/CHANGES.txt | 40 +++++++++++++++++++ .../src/main/proto/yarn_protos.proto | 1 + .../ApplicationSubmissionContextPBImpl.java | 9 +++++ .../server/resourcemanager/RMContext.java | 1 + .../resourcemanager/rmapp/RMAppImpl.java | 9 +++-- .../scheduler/capacity/LeafQueue.java | 2 +- .../scheduler/capacity/PlanQueue.java | 3 +- 8 files changed, 60 insertions(+), 39 deletions(-) delete mode 100644 YARN-1051-CHANGES.txt diff --git a/YARN-1051-CHANGES.txt b/YARN-1051-CHANGES.txt deleted file mode 100644 index 5cd11367508..00000000000 --- a/YARN-1051-CHANGES.txt +++ /dev/null @@ -1,34 +0,0 @@ -YARN-1707. Introduce APIs to add/remove/resize queues in the -CapacityScheduler. (Carlo Curino and Subru Krishnan via curino) - -YARN-2475. Logic for responding to capacity drops for the -ReservationSystem. (Carlo Curino and Subru Krishnan via curino) - -YARN-1708. Public YARN APIs for creating/updating/deleting -reservations. (Subru Krishnan and Carlo Curino via subru) - -YARN-1709. In-memory data structures used to track resources over -time to enable reservations. (Subru Krishnan and Carlo Curino via -subru) - -YARN-1710. Logic to find allocations within a Plan that satisfy -user ReservationRequest(s). (Carlo Curino and Subru Krishnan via -curino) - -YARN-1711. Policy to enforce instantaneous and over-time quotas -on user reservations. (Carlo Curino and Subru Krishnan via curino) - -YARN-1712. Plan follower that synchronizes the current state of reservation -subsystem with the scheduler. (Subru Krishnan and Carlo Curino via subru) - -YARN-2080. Integrating reservation system with ResourceManager and -client-RM protocol. (Subru Krishnan and Carlo Curino via subru) - -MAPREDUCE-6103. Adding reservation APIs to MR resource manager -delegate. (Subru Krishnan and Carlo Curino via subru) - -YARN-2576. Fixing compilation, javadocs and audit issues to pass -test patch in branch. (Subru Krishnan and Carlo Curino via subru) - -YARN-2611. Fixing jenkins findbugs warning and TestRMWebServicesCapacitySched -for branch YARN-1051. (Subru Krishnan and Carlo Curino via subru) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 0e6ebfd12a6..28f3813fb0b 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -112,6 +112,9 @@ Release 2.6.0 - UNRELEASED YARN-2468. Enhanced NodeManager to support log handling APIs (YARN-2569) for use by long running services. (Xuan Gong via vinodkv) + YARN-1051. Add a system for creating reservations of cluster capacity. + (see breakdown below) + IMPROVEMENTS YARN-2242. Improve exception information on AM launch crashes. (Li Lu @@ -509,6 +512,43 @@ Release 2.6.0 - UNRELEASED YARN-2628. Capacity scheduler with DominantResourceCalculator carries out reservation even though slots are free. (Varun Vasudev via jianhe) + BREAKDOWN OF YARN-1051 SUBTASKS AND RELATED JIRAS + + YARN-1707. Introduce APIs to add/remove/resize queues in the + CapacityScheduler. (Carlo Curino and Subru Krishnan via curino) + + YARN-2475. Logic for responding to capacity drops for the + ReservationSystem. (Carlo Curino and Subru Krishnan via curino) + + YARN-1708. Public YARN APIs for creating/updating/deleting + reservations. (Subru Krishnan and Carlo Curino via subru) + + YARN-1709. In-memory data structures used to track resources over + time to enable reservations. (Subru Krishnan and Carlo Curino via + subru) + + YARN-1710. Logic to find allocations within a Plan that satisfy + user ReservationRequest(s). (Carlo Curino and Subru Krishnan via + curino) + + YARN-1711. Policy to enforce instantaneous and over-time quotas + on user reservations. (Carlo Curino and Subru Krishnan via curino) + + YARN-1712. Plan follower that synchronizes the current state of reservation + subsystem with the scheduler. (Subru Krishnan and Carlo Curino via subru) + + YARN-2080. Integrating reservation system with ResourceManager and + client-RM protocol. (Subru Krishnan and Carlo Curino via subru) + + MAPREDUCE-6103. Adding reservation APIs to MR resource manager + delegate. (Subru Krishnan and Carlo Curino via subru) + + YARN-2576. Fixing compilation, javadocs and audit issues to pass + test patch in branch. (Subru Krishnan and Carlo Curino via subru) + + YARN-2611. Fixing jenkins findbugs warning and TestRMWebServicesCapacitySched + for branch YARN-1051. (Subru Krishnan and Carlo Curino via subru) + Release 2.5.1 - 2014-09-05 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index c645719729e..d07ce139c09 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -202,6 +202,7 @@ message ApplicationAttemptReportProto { optional string diagnostics = 5 [default = "N/A"]; optional YarnApplicationAttemptStateProto yarn_application_attempt_state = 6; optional ContainerIdProto am_container_id = 7; + optional string original_tracking_url = 8; } enum NodeStateProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java index 489cf8cfcef..9462a4e5b94 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java @@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.LogAggregationContext; import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.ReservationId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto; @@ -35,6 +36,7 @@ import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationSubmissionContextProto import org.apache.hadoop.yarn.proto.YarnProtos.ContainerLaunchContextProto; import org.apache.hadoop.yarn.proto.YarnProtos.LogAggregationContextProto; import org.apache.hadoop.yarn.proto.YarnProtos.PriorityProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ReservationIdProto; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; import com.google.protobuf.TextFormat; @@ -115,6 +117,13 @@ extends ApplicationSubmissionContext { builder.clearApplicationTags(); builder.addAllApplicationTags(this.applicationTags); } + if (this.logAggregationContext != null) { + builder.setLogAggregationContext( + convertToProtoFormat(this.logAggregationContext)); + } + if (this.reservationId != null) { + builder.setReservationId(convertToProtoFormat(this.reservationId)); + } } private void mergeLocalToProto() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java index 46ecfcd0e75..a59965f5a14 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java @@ -28,6 +28,7 @@ import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter; import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; +import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationSystem; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 84ec766c5bb..c0681aa7d09 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -832,14 +832,16 @@ public class RMAppImpl implements RMApp, Recoverable { // started or started but not yet saved. if (app.attempts.isEmpty()) { app.scheduler.handle(new AppAddedSchedulerEvent(app.applicationId, - app.submissionContext.getQueue(), app.user)); + app.submissionContext.getQueue(), app.user, + app.submissionContext.getReservationID())); return RMAppState.SUBMITTED; } // Add application to scheduler synchronously to guarantee scheduler // knows applications before AM or NM re-registers. app.scheduler.handle(new AppAddedSchedulerEvent(app.applicationId, - app.submissionContext.getQueue(), app.user, true)); + app.submissionContext.getQueue(), app.user, true, + app.submissionContext.getReservationID())); // recover attempts app.recoverAppAttempts(); @@ -866,7 +868,8 @@ public class RMAppImpl implements RMApp, Recoverable { @Override public void transition(RMAppImpl app, RMAppEvent event) { app.handler.handle(new AppAddedSchedulerEvent(app.applicationId, - app.submissionContext.getQueue(), app.user)); + app.submissionContext.getQueue(), app.user, + app.submissionContext.getReservationID())); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index b9f5d5ff551..f0cff71df72 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -225,7 +225,7 @@ public class LeafQueue implements CSQueue { return (float)scheduler.getConfiguration().getCapacity(getQueuePath()) / 100; } - private synchronized void setupQueueConfigs( + protected synchronized void setupQueueConfigs( Resource clusterResource, float capacity, float absoluteCapacity, float maximumCapacity, float absoluteMaxCapacity, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/PlanQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/PlanQueue.java index abb8e95a493..b87744d77bf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/PlanQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/PlanQueue.java @@ -103,7 +103,8 @@ public class PlanQueue extends ParentQueue { newlyParsedParentQueue.getAbsoluteCapacity(), newlyParsedParentQueue.getMaximumCapacity(), newlyParsedParentQueue.getAbsoluteMaximumCapacity(), - newlyParsedParentQueue.getState(), newlyParsedParentQueue.getACLs()); + newlyParsedParentQueue.getState(), newlyParsedParentQueue.getACLs(), + newlyParsedParentQueue.getReservationContinueLooking()); updateQuotas(newlyParsedParentQueue.userLimit, newlyParsedParentQueue.userLimitFactor,