YARN-1051. Add a system for creating reservations of cluster capacity.

Contributed by Subru Krishnan and Carlo Curino.
(cherry picked from commit c8212bacb1)

Conflicts:
	hadoop-yarn-project/CHANGES.txt
This commit is contained in:
Chris Douglas 2014-10-03 17:05:57 -07:00
parent fb5e9df7fd
commit 4ba102bdc3
8 changed files with 60 additions and 39 deletions

View File

@ -1,34 +0,0 @@
YARN-1707. Introduce APIs to add/remove/resize queues in the
CapacityScheduler. (Carlo Curino and Subru Krishnan via curino)
YARN-2475. Logic for responding to capacity drops for the
ReservationSystem. (Carlo Curino and Subru Krishnan via curino)
YARN-1708. Public YARN APIs for creating/updating/deleting
reservations. (Subru Krishnan and Carlo Curino via subru)
YARN-1709. In-memory data structures used to track resources over
time to enable reservations. (Subru Krishnan and Carlo Curino via
subru)
YARN-1710. Logic to find allocations within a Plan that satisfy
user ReservationRequest(s). (Carlo Curino and Subru Krishnan via
curino)
YARN-1711. Policy to enforce instantaneous and over-time quotas
on user reservations. (Carlo Curino and Subru Krishnan via curino)
YARN-1712. Plan follower that synchronizes the current state of reservation
subsystem with the scheduler. (Subru Krishnan and Carlo Curino via subru)
YARN-2080. Integrating reservation system with ResourceManager and
client-RM protocol. (Subru Krishnan and Carlo Curino via subru)
MAPREDUCE-6103. Adding reservation APIs to MR resource manager
delegate. (Subru Krishnan and Carlo Curino via subru)
YARN-2576. Fixing compilation, javadocs and audit issues to pass
test patch in branch. (Subru Krishnan and Carlo Curino via subru)
YARN-2611. Fixing jenkins findbugs warning and TestRMWebServicesCapacitySched
for branch YARN-1051. (Subru Krishnan and Carlo Curino via subru)

View File

@ -112,6 +112,9 @@ Release 2.6.0 - UNRELEASED
YARN-2468. Enhanced NodeManager to support log handling APIs (YARN-2569) for YARN-2468. Enhanced NodeManager to support log handling APIs (YARN-2569) for
use by long running services. (Xuan Gong via vinodkv) use by long running services. (Xuan Gong via vinodkv)
YARN-1051. Add a system for creating reservations of cluster capacity.
(see breakdown below)
IMPROVEMENTS IMPROVEMENTS
YARN-2242. Improve exception information on AM launch crashes. (Li Lu YARN-2242. Improve exception information on AM launch crashes. (Li Lu
@ -509,6 +512,43 @@ Release 2.6.0 - UNRELEASED
YARN-2628. Capacity scheduler with DominantResourceCalculator carries out YARN-2628. Capacity scheduler with DominantResourceCalculator carries out
reservation even though slots are free. (Varun Vasudev via jianhe) reservation even though slots are free. (Varun Vasudev via jianhe)
BREAKDOWN OF YARN-1051 SUBTASKS AND RELATED JIRAS
YARN-1707. Introduce APIs to add/remove/resize queues in the
CapacityScheduler. (Carlo Curino and Subru Krishnan via curino)
YARN-2475. Logic for responding to capacity drops for the
ReservationSystem. (Carlo Curino and Subru Krishnan via curino)
YARN-1708. Public YARN APIs for creating/updating/deleting
reservations. (Subru Krishnan and Carlo Curino via subru)
YARN-1709. In-memory data structures used to track resources over
time to enable reservations. (Subru Krishnan and Carlo Curino via
subru)
YARN-1710. Logic to find allocations within a Plan that satisfy
user ReservationRequest(s). (Carlo Curino and Subru Krishnan via
curino)
YARN-1711. Policy to enforce instantaneous and over-time quotas
on user reservations. (Carlo Curino and Subru Krishnan via curino)
YARN-1712. Plan follower that synchronizes the current state of reservation
subsystem with the scheduler. (Subru Krishnan and Carlo Curino via subru)
YARN-2080. Integrating reservation system with ResourceManager and
client-RM protocol. (Subru Krishnan and Carlo Curino via subru)
MAPREDUCE-6103. Adding reservation APIs to MR resource manager
delegate. (Subru Krishnan and Carlo Curino via subru)
YARN-2576. Fixing compilation, javadocs and audit issues to pass
test patch in branch. (Subru Krishnan and Carlo Curino via subru)
YARN-2611. Fixing jenkins findbugs warning and TestRMWebServicesCapacitySched
for branch YARN-1051. (Subru Krishnan and Carlo Curino via subru)
Release 2.5.1 - 2014-09-05 Release 2.5.1 - 2014-09-05
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -202,6 +202,7 @@ message ApplicationAttemptReportProto {
optional string diagnostics = 5 [default = "N/A"]; optional string diagnostics = 5 [default = "N/A"];
optional YarnApplicationAttemptStateProto yarn_application_attempt_state = 6; optional YarnApplicationAttemptStateProto yarn_application_attempt_state = 6;
optional ContainerIdProto am_container_id = 7; optional ContainerIdProto am_container_id = 7;
optional string original_tracking_url = 8;
} }
enum NodeStateProto { enum NodeStateProto {

View File

@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.LogAggregationContext; import org.apache.hadoop.yarn.api.records.LogAggregationContext;
import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.ReservationId;
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
@ -35,6 +36,7 @@ import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationSubmissionContextProto
import org.apache.hadoop.yarn.proto.YarnProtos.ContainerLaunchContextProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerLaunchContextProto;
import org.apache.hadoop.yarn.proto.YarnProtos.LogAggregationContextProto; import org.apache.hadoop.yarn.proto.YarnProtos.LogAggregationContextProto;
import org.apache.hadoop.yarn.proto.YarnProtos.PriorityProto; import org.apache.hadoop.yarn.proto.YarnProtos.PriorityProto;
import org.apache.hadoop.yarn.proto.YarnProtos.ReservationIdProto;
import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
import com.google.protobuf.TextFormat; import com.google.protobuf.TextFormat;
@ -115,6 +117,13 @@ extends ApplicationSubmissionContext {
builder.clearApplicationTags(); builder.clearApplicationTags();
builder.addAllApplicationTags(this.applicationTags); builder.addAllApplicationTags(this.applicationTags);
} }
if (this.logAggregationContext != null) {
builder.setLogAggregationContext(
convertToProtoFormat(this.logAggregationContext));
}
if (this.reservationId != null) {
builder.setReservationId(convertToProtoFormat(this.reservationId));
}
} }
private void mergeLocalToProto() { private void mergeLocalToProto() {

View File

@ -28,6 +28,7 @@ import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter; import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter;
import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher; import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationSystem;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;

View File

@ -832,14 +832,16 @@ public class RMAppImpl implements RMApp, Recoverable {
// started or started but not yet saved. // started or started but not yet saved.
if (app.attempts.isEmpty()) { if (app.attempts.isEmpty()) {
app.scheduler.handle(new AppAddedSchedulerEvent(app.applicationId, app.scheduler.handle(new AppAddedSchedulerEvent(app.applicationId,
app.submissionContext.getQueue(), app.user)); app.submissionContext.getQueue(), app.user,
app.submissionContext.getReservationID()));
return RMAppState.SUBMITTED; return RMAppState.SUBMITTED;
} }
// Add application to scheduler synchronously to guarantee scheduler // Add application to scheduler synchronously to guarantee scheduler
// knows applications before AM or NM re-registers. // knows applications before AM or NM re-registers.
app.scheduler.handle(new AppAddedSchedulerEvent(app.applicationId, app.scheduler.handle(new AppAddedSchedulerEvent(app.applicationId,
app.submissionContext.getQueue(), app.user, true)); app.submissionContext.getQueue(), app.user, true,
app.submissionContext.getReservationID()));
// recover attempts // recover attempts
app.recoverAppAttempts(); app.recoverAppAttempts();
@ -866,7 +868,8 @@ public class RMAppImpl implements RMApp, Recoverable {
@Override @Override
public void transition(RMAppImpl app, RMAppEvent event) { public void transition(RMAppImpl app, RMAppEvent event) {
app.handler.handle(new AppAddedSchedulerEvent(app.applicationId, app.handler.handle(new AppAddedSchedulerEvent(app.applicationId,
app.submissionContext.getQueue(), app.user)); app.submissionContext.getQueue(), app.user,
app.submissionContext.getReservationID()));
} }
} }

View File

@ -225,7 +225,7 @@ public class LeafQueue implements CSQueue {
return (float)scheduler.getConfiguration().getCapacity(getQueuePath()) / 100; return (float)scheduler.getConfiguration().getCapacity(getQueuePath()) / 100;
} }
private synchronized void setupQueueConfigs( protected synchronized void setupQueueConfigs(
Resource clusterResource, Resource clusterResource,
float capacity, float absoluteCapacity, float capacity, float absoluteCapacity,
float maximumCapacity, float absoluteMaxCapacity, float maximumCapacity, float absoluteMaxCapacity,

View File

@ -103,7 +103,8 @@ public class PlanQueue extends ParentQueue {
newlyParsedParentQueue.getAbsoluteCapacity(), newlyParsedParentQueue.getAbsoluteCapacity(),
newlyParsedParentQueue.getMaximumCapacity(), newlyParsedParentQueue.getMaximumCapacity(),
newlyParsedParentQueue.getAbsoluteMaximumCapacity(), newlyParsedParentQueue.getAbsoluteMaximumCapacity(),
newlyParsedParentQueue.getState(), newlyParsedParentQueue.getACLs()); newlyParsedParentQueue.getState(), newlyParsedParentQueue.getACLs(),
newlyParsedParentQueue.getReservationContinueLooking());
updateQuotas(newlyParsedParentQueue.userLimit, updateQuotas(newlyParsedParentQueue.userLimit,
newlyParsedParentQueue.userLimitFactor, newlyParsedParentQueue.userLimitFactor,