YARN-1051. Add a system for creating reservations of cluster capacity.

Contributed by Subru Krishnan and Carlo Curino.
This commit is contained in:
Chris Douglas 2014-10-03 17:05:57 -07:00
parent a2986234be
commit c8212bacb1
8 changed files with 60 additions and 39 deletions

View File

@ -1,34 +0,0 @@
YARN-1707. Introduce APIs to add/remove/resize queues in the
CapacityScheduler. (Carlo Curino and Subru Krishnan via curino)
YARN-2475. Logic for responding to capacity drops for the
ReservationSystem. (Carlo Curino and Subru Krishnan via curino)
YARN-1708. Public YARN APIs for creating/updating/deleting
reservations. (Subru Krishnan and Carlo Curino via subru)
YARN-1709. In-memory data structures used to track resources over
time to enable reservations. (Subru Krishnan and Carlo Curino via
subru)
YARN-1710. Logic to find allocations within a Plan that satisfy
user ReservationRequest(s). (Carlo Curino and Subru Krishnan via
curino)
YARN-1711. Policy to enforce instantaneous and over-time quotas
on user reservations. (Carlo Curino and Subru Krishnan via curino)
YARN-1712. Plan follower that synchronizes the current state of reservation
subsystem with the scheduler. (Subru Krishnan and Carlo Curino via subru)
YARN-2080. Integrating reservation system with ResourceManager and
client-RM protocol. (Subru Krishnan and Carlo Curino via subru)
MAPREDUCE-6103. Adding reservation APIs to MR resource manager
delegate. (Subru Krishnan and Carlo Curino via subru)
YARN-2576. Fixing compilation, javadocs and audit issues to pass
test patch in branch. (Subru Krishnan and Carlo Curino via subru)
YARN-2611. Fixing jenkins findbugs warning and TestRMWebServicesCapacitySched
for branch YARN-1051. (Subru Krishnan and Carlo Curino via subru)

View File

@ -6,6 +6,9 @@ Trunk - Unreleased
NEW FEATURES
YARN-1051. Add a system for creating reservations of cluster capacity.
(see breakdown below)
IMPROVEMENTS
YARN-2438. yarn-env.sh cleanup (aw)
@ -27,6 +30,43 @@ Trunk - Unreleased
YARN-2525. yarn logs command gives error on trunk (Akira AJISAKA via aw)
BREAKDOWN OF YARN-1051 SUBTASKS AND RELATED JIRAS
YARN-1707. Introduce APIs to add/remove/resize queues in the
CapacityScheduler. (Carlo Curino and Subru Krishnan via curino)
YARN-2475. Logic for responding to capacity drops for the
ReservationSystem. (Carlo Curino and Subru Krishnan via curino)
YARN-1708. Public YARN APIs for creating/updating/deleting
reservations. (Subru Krishnan and Carlo Curino via subru)
YARN-1709. In-memory data structures used to track resources over
time to enable reservations. (Subru Krishnan and Carlo Curino via
subru)
YARN-1710. Logic to find allocations within a Plan that satisfy
user ReservationRequest(s). (Carlo Curino and Subru Krishnan via
curino)
YARN-1711. Policy to enforce instantaneous and over-time quotas
on user reservations. (Carlo Curino and Subru Krishnan via curino)
YARN-1712. Plan follower that synchronizes the current state of reservation
subsystem with the scheduler. (Subru Krishnan and Carlo Curino via subru)
YARN-2080. Integrating reservation system with ResourceManager and
client-RM protocol. (Subru Krishnan and Carlo Curino via subru)
MAPREDUCE-6103. Adding reservation APIs to MR resource manager
delegate. (Subru Krishnan and Carlo Curino via subru)
YARN-2576. Fixing compilation, javadocs and audit issues to pass
test patch in branch. (Subru Krishnan and Carlo Curino via subru)
YARN-2611. Fixing jenkins findbugs warning and TestRMWebServicesCapacitySched
for branch YARN-1051. (Subru Krishnan and Carlo Curino via subru)
Release 2.7.0 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -202,6 +202,7 @@ message ApplicationAttemptReportProto {
optional string diagnostics = 5 [default = "N/A"];
optional YarnApplicationAttemptStateProto yarn_application_attempt_state = 6;
optional ContainerIdProto am_container_id = 7;
optional string original_tracking_url = 8;
}
enum NodeStateProto {

View File

@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.LogAggregationContext;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.ReservationId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
@ -35,6 +36,7 @@ import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationSubmissionContextProto
import org.apache.hadoop.yarn.proto.YarnProtos.ContainerLaunchContextProto;
import org.apache.hadoop.yarn.proto.YarnProtos.LogAggregationContextProto;
import org.apache.hadoop.yarn.proto.YarnProtos.PriorityProto;
import org.apache.hadoop.yarn.proto.YarnProtos.ReservationIdProto;
import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
import com.google.protobuf.TextFormat;
@ -115,6 +117,13 @@ extends ApplicationSubmissionContext {
builder.clearApplicationTags();
builder.addAllApplicationTags(this.applicationTags);
}
if (this.logAggregationContext != null) {
builder.setLogAggregationContext(
convertToProtoFormat(this.logAggregationContext));
}
if (this.reservationId != null) {
builder.setReservationId(convertToProtoFormat(this.reservationId));
}
}
private void mergeLocalToProto() {

View File

@ -28,6 +28,7 @@ import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter;
import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationSystem;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;

View File

@ -832,14 +832,16 @@ public class RMAppImpl implements RMApp, Recoverable {
// started or started but not yet saved.
if (app.attempts.isEmpty()) {
app.scheduler.handle(new AppAddedSchedulerEvent(app.applicationId,
app.submissionContext.getQueue(), app.user));
app.submissionContext.getQueue(), app.user,
app.submissionContext.getReservationID()));
return RMAppState.SUBMITTED;
}
// Add application to scheduler synchronously to guarantee scheduler
// knows applications before AM or NM re-registers.
app.scheduler.handle(new AppAddedSchedulerEvent(app.applicationId,
app.submissionContext.getQueue(), app.user, true));
app.submissionContext.getQueue(), app.user, true,
app.submissionContext.getReservationID()));
// recover attempts
app.recoverAppAttempts();
@ -866,7 +868,8 @@ public class RMAppImpl implements RMApp, Recoverable {
@Override
public void transition(RMAppImpl app, RMAppEvent event) {
app.handler.handle(new AppAddedSchedulerEvent(app.applicationId,
app.submissionContext.getQueue(), app.user));
app.submissionContext.getQueue(), app.user,
app.submissionContext.getReservationID()));
}
}

View File

@ -225,7 +225,7 @@ public class LeafQueue implements CSQueue {
return (float)scheduler.getConfiguration().getCapacity(getQueuePath()) / 100;
}
private synchronized void setupQueueConfigs(
protected synchronized void setupQueueConfigs(
Resource clusterResource,
float capacity, float absoluteCapacity,
float maximumCapacity, float absoluteMaxCapacity,

View File

@ -103,7 +103,8 @@ public class PlanQueue extends ParentQueue {
newlyParsedParentQueue.getAbsoluteCapacity(),
newlyParsedParentQueue.getMaximumCapacity(),
newlyParsedParentQueue.getAbsoluteMaximumCapacity(),
newlyParsedParentQueue.getState(), newlyParsedParentQueue.getACLs());
newlyParsedParentQueue.getState(), newlyParsedParentQueue.getACLs(),
newlyParsedParentQueue.getReservationContinueLooking());
updateQuotas(newlyParsedParentQueue.userLimit,
newlyParsedParentQueue.userLimitFactor,