YARN-1051. Add a system for creating reservations of cluster capacity.

Contributed by Subru Krishnan and Carlo Curino.
(cherry picked from commit c8212bacb1)

Conflicts:
	hadoop-yarn-project/CHANGES.txt
(cherry picked from commit ebf4aae60f7a641ac7406e27d43528cace20d4ff)

Conflicts:
	hadoop-yarn-project/CHANGES.txt
This commit is contained in:
Chris Douglas 2014-10-03 17:05:57 -07:00
parent 114f09226e
commit 0b2dedc42d
8 changed files with 60 additions and 39 deletions

View File

@ -1,34 +0,0 @@
YARN-1707. Introduce APIs to add/remove/resize queues in the
CapacityScheduler. (Carlo Curino and Subru Krishnan via curino)
YARN-2475. Logic for responding to capacity drops for the
ReservationSystem. (Carlo Curino and Subru Krishnan via curino)
YARN-1708. Public YARN APIs for creating/updating/deleting
reservations. (Subru Krishnan and Carlo Curino via subru)
YARN-1709. In-memory data structures used to track resources over
time to enable reservations. (Subru Krishnan and Carlo Curino via
subru)
YARN-1710. Logic to find allocations within a Plan that satisfy
user ReservationRequest(s). (Carlo Curino and Subru Krishnan via
curino)
YARN-1711. Policy to enforce instantaneous and over-time quotas
on user reservations. (Carlo Curino and Subru Krishnan via curino)
YARN-1712. Plan follower that synchronizes the current state of reservation
subsystem with the scheduler. (Subru Krishnan and Carlo Curino via subru)
YARN-2080. Integrating reservation system with ResourceManager and
client-RM protocol. (Subru Krishnan and Carlo Curino via subru)
MAPREDUCE-6103. Adding reservation APIs to MR resource manager
delegate. (Subru Krishnan and Carlo Curino via subru)
YARN-2576. Fixing compilation, javadocs and audit issues to pass
test patch in branch. (Subru Krishnan and Carlo Curino via subru)
YARN-2611. Fixing jenkins findbugs warning and TestRMWebServicesCapacitySched
for branch YARN-1051. (Subru Krishnan and Carlo Curino via subru)

View File

@ -82,6 +82,9 @@ Release 2.6.0 - UNRELEASED
YARN-2468. Enhanced NodeManager to support log handling APIs (YARN-2569) for
use by long running services. (Xuan Gong via vinodkv)
YARN-1051. Add a system for creating reservations of cluster capacity.
(see breakdown below)
IMPROVEMENTS
YARN-2242. Improve exception information on AM launch crashes. (Li Lu
@ -479,6 +482,43 @@ Release 2.6.0 - UNRELEASED
YARN-2628. Capacity scheduler with DominantResourceCalculator carries out
reservation even though slots are free. (Varun Vasudev via jianhe)
BREAKDOWN OF YARN-1051 SUBTASKS AND RELATED JIRAS
YARN-1707. Introduce APIs to add/remove/resize queues in the
CapacityScheduler. (Carlo Curino and Subru Krishnan via curino)
YARN-2475. Logic for responding to capacity drops for the
ReservationSystem. (Carlo Curino and Subru Krishnan via curino)
YARN-1708. Public YARN APIs for creating/updating/deleting
reservations. (Subru Krishnan and Carlo Curino via subru)
YARN-1709. In-memory data structures used to track resources over
time to enable reservations. (Subru Krishnan and Carlo Curino via
subru)
YARN-1710. Logic to find allocations within a Plan that satisfy
user ReservationRequest(s). (Carlo Curino and Subru Krishnan via
curino)
YARN-1711. Policy to enforce instantaneous and over-time quotas
on user reservations. (Carlo Curino and Subru Krishnan via curino)
YARN-1712. Plan follower that synchronizes the current state of reservation
subsystem with the scheduler. (Subru Krishnan and Carlo Curino via subru)
YARN-2080. Integrating reservation system with ResourceManager and
client-RM protocol. (Subru Krishnan and Carlo Curino via subru)
MAPREDUCE-6103. Adding reservation APIs to MR resource manager
delegate. (Subru Krishnan and Carlo Curino via subru)
YARN-2576. Fixing compilation, javadocs and audit issues to pass
test patch in branch. (Subru Krishnan and Carlo Curino via subru)
YARN-2611. Fixing jenkins findbugs warning and TestRMWebServicesCapacitySched
for branch YARN-1051. (Subru Krishnan and Carlo Curino via subru)
Release 2.5.1 - 2014-09-05
INCOMPATIBLE CHANGES

View File

@ -202,6 +202,7 @@ message ApplicationAttemptReportProto {
optional string diagnostics = 5 [default = "N/A"];
optional YarnApplicationAttemptStateProto yarn_application_attempt_state = 6;
optional ContainerIdProto am_container_id = 7;
optional string original_tracking_url = 8;
}
enum NodeStateProto {

View File

@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.LogAggregationContext;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.ReservationId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
@ -35,6 +36,7 @@ import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationSubmissionContextProto
import org.apache.hadoop.yarn.proto.YarnProtos.ContainerLaunchContextProto;
import org.apache.hadoop.yarn.proto.YarnProtos.LogAggregationContextProto;
import org.apache.hadoop.yarn.proto.YarnProtos.PriorityProto;
import org.apache.hadoop.yarn.proto.YarnProtos.ReservationIdProto;
import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
import com.google.protobuf.TextFormat;
@ -115,6 +117,13 @@ extends ApplicationSubmissionContext {
builder.clearApplicationTags();
builder.addAllApplicationTags(this.applicationTags);
}
if (this.logAggregationContext != null) {
builder.setLogAggregationContext(
convertToProtoFormat(this.logAggregationContext));
}
if (this.reservationId != null) {
builder.setReservationId(convertToProtoFormat(this.reservationId));
}
}
private void mergeLocalToProto() {

View File

@ -28,6 +28,7 @@ import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter;
import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationSystem;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;

View File

@ -832,14 +832,16 @@ public class RMAppImpl implements RMApp, Recoverable {
// started or started but not yet saved.
if (app.attempts.isEmpty()) {
app.scheduler.handle(new AppAddedSchedulerEvent(app.applicationId,
app.submissionContext.getQueue(), app.user));
app.submissionContext.getQueue(), app.user,
app.submissionContext.getReservationID()));
return RMAppState.SUBMITTED;
}
// Add application to scheduler synchronously to guarantee scheduler
// knows applications before AM or NM re-registers.
app.scheduler.handle(new AppAddedSchedulerEvent(app.applicationId,
app.submissionContext.getQueue(), app.user, true));
app.submissionContext.getQueue(), app.user, true,
app.submissionContext.getReservationID()));
// recover attempts
app.recoverAppAttempts();
@ -866,7 +868,8 @@ public class RMAppImpl implements RMApp, Recoverable {
@Override
public void transition(RMAppImpl app, RMAppEvent event) {
app.handler.handle(new AppAddedSchedulerEvent(app.applicationId,
app.submissionContext.getQueue(), app.user));
app.submissionContext.getQueue(), app.user,
app.submissionContext.getReservationID()));
}
}

View File

@ -225,7 +225,7 @@ public class LeafQueue implements CSQueue {
return (float)scheduler.getConfiguration().getCapacity(getQueuePath()) / 100;
}
private synchronized void setupQueueConfigs(
protected synchronized void setupQueueConfigs(
Resource clusterResource,
float capacity, float absoluteCapacity,
float maximumCapacity, float absoluteMaxCapacity,

View File

@ -103,7 +103,8 @@ public class PlanQueue extends ParentQueue {
newlyParsedParentQueue.getAbsoluteCapacity(),
newlyParsedParentQueue.getMaximumCapacity(),
newlyParsedParentQueue.getAbsoluteMaximumCapacity(),
newlyParsedParentQueue.getState(), newlyParsedParentQueue.getACLs());
newlyParsedParentQueue.getState(), newlyParsedParentQueue.getACLs(),
newlyParsedParentQueue.getReservationContinueLooking());
updateQuotas(newlyParsedParentQueue.userLimit,
newlyParsedParentQueue.userLimitFactor,