YARN-1051. Add a system for creating reservations of cluster capacity.
Contributed by Subru Krishnan and Carlo Curino.
This commit is contained in:
parent
a2986234be
commit
c8212bacb1
|
@ -1,34 +0,0 @@
|
|||
YARN-1707. Introduce APIs to add/remove/resize queues in the
|
||||
CapacityScheduler. (Carlo Curino and Subru Krishnan via curino)
|
||||
|
||||
YARN-2475. Logic for responding to capacity drops for the
|
||||
ReservationSystem. (Carlo Curino and Subru Krishnan via curino)
|
||||
|
||||
YARN-1708. Public YARN APIs for creating/updating/deleting
|
||||
reservations. (Subru Krishnan and Carlo Curino via subru)
|
||||
|
||||
YARN-1709. In-memory data structures used to track resources over
|
||||
time to enable reservations. (Subru Krishnan and Carlo Curino via
|
||||
subru)
|
||||
|
||||
YARN-1710. Logic to find allocations within a Plan that satisfy
|
||||
user ReservationRequest(s). (Carlo Curino and Subru Krishnan via
|
||||
curino)
|
||||
|
||||
YARN-1711. Policy to enforce instantaneous and over-time quotas
|
||||
on user reservations. (Carlo Curino and Subru Krishnan via curino)
|
||||
|
||||
YARN-1712. Plan follower that synchronizes the current state of reservation
|
||||
subsystem with the scheduler. (Subru Krishnan and Carlo Curino via subru)
|
||||
|
||||
YARN-2080. Integrating reservation system with ResourceManager and
|
||||
client-RM protocol. (Subru Krishnan and Carlo Curino via subru)
|
||||
|
||||
MAPREDUCE-6103. Adding reservation APIs to MR resource manager
|
||||
delegate. (Subru Krishnan and Carlo Curino via subru)
|
||||
|
||||
YARN-2576. Fixing compilation, javadocs and audit issues to pass
|
||||
test patch in branch. (Subru Krishnan and Carlo Curino via subru)
|
||||
|
||||
YARN-2611. Fixing jenkins findbugs warning and TestRMWebServicesCapacitySched
|
||||
for branch YARN-1051. (Subru Krishnan and Carlo Curino via subru)
|
|
@ -6,6 +6,9 @@ Trunk - Unreleased
|
|||
|
||||
NEW FEATURES
|
||||
|
||||
YARN-1051. Add a system for creating reservations of cluster capacity.
|
||||
(see breakdown below)
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
YARN-2438. yarn-env.sh cleanup (aw)
|
||||
|
@ -27,6 +30,43 @@ Trunk - Unreleased
|
|||
|
||||
YARN-2525. yarn logs command gives error on trunk (Akira AJISAKA via aw)
|
||||
|
||||
BREAKDOWN OF YARN-1051 SUBTASKS AND RELATED JIRAS
|
||||
|
||||
YARN-1707. Introduce APIs to add/remove/resize queues in the
|
||||
CapacityScheduler. (Carlo Curino and Subru Krishnan via curino)
|
||||
|
||||
YARN-2475. Logic for responding to capacity drops for the
|
||||
ReservationSystem. (Carlo Curino and Subru Krishnan via curino)
|
||||
|
||||
YARN-1708. Public YARN APIs for creating/updating/deleting
|
||||
reservations. (Subru Krishnan and Carlo Curino via subru)
|
||||
|
||||
YARN-1709. In-memory data structures used to track resources over
|
||||
time to enable reservations. (Subru Krishnan and Carlo Curino via
|
||||
subru)
|
||||
|
||||
YARN-1710. Logic to find allocations within a Plan that satisfy
|
||||
user ReservationRequest(s). (Carlo Curino and Subru Krishnan via
|
||||
curino)
|
||||
|
||||
YARN-1711. Policy to enforce instantaneous and over-time quotas
|
||||
on user reservations. (Carlo Curino and Subru Krishnan via curino)
|
||||
|
||||
YARN-1712. Plan follower that synchronizes the current state of reservation
|
||||
subsystem with the scheduler. (Subru Krishnan and Carlo Curino via subru)
|
||||
|
||||
YARN-2080. Integrating reservation system with ResourceManager and
|
||||
client-RM protocol. (Subru Krishnan and Carlo Curino via subru)
|
||||
|
||||
MAPREDUCE-6103. Adding reservation APIs to MR resource manager
|
||||
delegate. (Subru Krishnan and Carlo Curino via subru)
|
||||
|
||||
YARN-2576. Fixing compilation, javadocs and audit issues to pass
|
||||
test patch in branch. (Subru Krishnan and Carlo Curino via subru)
|
||||
|
||||
YARN-2611. Fixing jenkins findbugs warning and TestRMWebServicesCapacitySched
|
||||
for branch YARN-1051. (Subru Krishnan and Carlo Curino via subru)
|
||||
|
||||
Release 2.7.0 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -202,6 +202,7 @@ message ApplicationAttemptReportProto {
|
|||
optional string diagnostics = 5 [default = "N/A"];
|
||||
optional YarnApplicationAttemptStateProto yarn_application_attempt_state = 6;
|
||||
optional ContainerIdProto am_container_id = 7;
|
||||
optional string original_tracking_url = 8;
|
||||
}
|
||||
|
||||
enum NodeStateProto {
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
|||
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
|
||||
import org.apache.hadoop.yarn.api.records.LogAggregationContext;
|
||||
import org.apache.hadoop.yarn.api.records.Priority;
|
||||
import org.apache.hadoop.yarn.api.records.ReservationId;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
|
||||
|
@ -35,6 +36,7 @@ import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationSubmissionContextProto
|
|||
import org.apache.hadoop.yarn.proto.YarnProtos.ContainerLaunchContextProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnProtos.LogAggregationContextProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnProtos.PriorityProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnProtos.ReservationIdProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
|
||||
|
||||
import com.google.protobuf.TextFormat;
|
||||
|
@ -115,6 +117,13 @@ extends ApplicationSubmissionContext {
|
|||
builder.clearApplicationTags();
|
||||
builder.addAllApplicationTags(this.applicationTags);
|
||||
}
|
||||
if (this.logAggregationContext != null) {
|
||||
builder.setLogAggregationContext(
|
||||
convertToProtoFormat(this.logAggregationContext));
|
||||
}
|
||||
if (this.reservationId != null) {
|
||||
builder.setReservationId(convertToProtoFormat(this.reservationId));
|
||||
}
|
||||
}
|
||||
|
||||
private void mergeLocalToProto() {
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.hadoop.yarn.event.Dispatcher;
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationSystem;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
|
||||
|
|
|
@ -832,14 +832,16 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|||
// started or started but not yet saved.
|
||||
if (app.attempts.isEmpty()) {
|
||||
app.scheduler.handle(new AppAddedSchedulerEvent(app.applicationId,
|
||||
app.submissionContext.getQueue(), app.user));
|
||||
app.submissionContext.getQueue(), app.user,
|
||||
app.submissionContext.getReservationID()));
|
||||
return RMAppState.SUBMITTED;
|
||||
}
|
||||
|
||||
// Add application to scheduler synchronously to guarantee scheduler
|
||||
// knows applications before AM or NM re-registers.
|
||||
app.scheduler.handle(new AppAddedSchedulerEvent(app.applicationId,
|
||||
app.submissionContext.getQueue(), app.user, true));
|
||||
app.submissionContext.getQueue(), app.user, true,
|
||||
app.submissionContext.getReservationID()));
|
||||
|
||||
// recover attempts
|
||||
app.recoverAppAttempts();
|
||||
|
@ -866,7 +868,8 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|||
@Override
|
||||
public void transition(RMAppImpl app, RMAppEvent event) {
|
||||
app.handler.handle(new AppAddedSchedulerEvent(app.applicationId,
|
||||
app.submissionContext.getQueue(), app.user));
|
||||
app.submissionContext.getQueue(), app.user,
|
||||
app.submissionContext.getReservationID()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -225,7 +225,7 @@ public class LeafQueue implements CSQueue {
|
|||
return (float)scheduler.getConfiguration().getCapacity(getQueuePath()) / 100;
|
||||
}
|
||||
|
||||
private synchronized void setupQueueConfigs(
|
||||
protected synchronized void setupQueueConfigs(
|
||||
Resource clusterResource,
|
||||
float capacity, float absoluteCapacity,
|
||||
float maximumCapacity, float absoluteMaxCapacity,
|
||||
|
|
|
@ -103,7 +103,8 @@ public class PlanQueue extends ParentQueue {
|
|||
newlyParsedParentQueue.getAbsoluteCapacity(),
|
||||
newlyParsedParentQueue.getMaximumCapacity(),
|
||||
newlyParsedParentQueue.getAbsoluteMaximumCapacity(),
|
||||
newlyParsedParentQueue.getState(), newlyParsedParentQueue.getACLs());
|
||||
newlyParsedParentQueue.getState(), newlyParsedParentQueue.getACLs(),
|
||||
newlyParsedParentQueue.getReservationContinueLooking());
|
||||
|
||||
updateQuotas(newlyParsedParentQueue.userLimit,
|
||||
newlyParsedParentQueue.userLimitFactor,
|
||||
|
|
Loading…
Reference in New Issue