YARN-1051. Add a system for creating reservations of cluster capacity.
Contributed by Subru Krishnan and Carlo Curino.
(cherry picked from commit c8212bacb1
)
Conflicts:
hadoop-yarn-project/CHANGES.txt
(cherry picked from commit ebf4aae60f7a641ac7406e27d43528cace20d4ff)
Conflicts:
hadoop-yarn-project/CHANGES.txt
This commit is contained in:
parent
114f09226e
commit
0b2dedc42d
|
@ -1,34 +0,0 @@
|
||||||
YARN-1707. Introduce APIs to add/remove/resize queues in the
|
|
||||||
CapacityScheduler. (Carlo Curino and Subru Krishnan via curino)
|
|
||||||
|
|
||||||
YARN-2475. Logic for responding to capacity drops for the
|
|
||||||
ReservationSystem. (Carlo Curino and Subru Krishnan via curino)
|
|
||||||
|
|
||||||
YARN-1708. Public YARN APIs for creating/updating/deleting
|
|
||||||
reservations. (Subru Krishnan and Carlo Curino via subru)
|
|
||||||
|
|
||||||
YARN-1709. In-memory data structures used to track resources over
|
|
||||||
time to enable reservations. (Subru Krishnan and Carlo Curino via
|
|
||||||
subru)
|
|
||||||
|
|
||||||
YARN-1710. Logic to find allocations within a Plan that satisfy
|
|
||||||
user ReservationRequest(s). (Carlo Curino and Subru Krishnan via
|
|
||||||
curino)
|
|
||||||
|
|
||||||
YARN-1711. Policy to enforce instantaneous and over-time quotas
|
|
||||||
on user reservations. (Carlo Curino and Subru Krishnan via curino)
|
|
||||||
|
|
||||||
YARN-1712. Plan follower that synchronizes the current state of reservation
|
|
||||||
subsystem with the scheduler. (Subru Krishnan and Carlo Curino via subru)
|
|
||||||
|
|
||||||
YARN-2080. Integrating reservation system with ResourceManager and
|
|
||||||
client-RM protocol. (Subru Krishnan and Carlo Curino via subru)
|
|
||||||
|
|
||||||
MAPREDUCE-6103. Adding reservation APIs to MR resource manager
|
|
||||||
delegate. (Subru Krishnan and Carlo Curino via subru)
|
|
||||||
|
|
||||||
YARN-2576. Fixing compilation, javadocs and audit issues to pass
|
|
||||||
test patch in branch. (Subru Krishnan and Carlo Curino via subru)
|
|
||||||
|
|
||||||
YARN-2611. Fixing jenkins findbugs warning and TestRMWebServicesCapacitySched
|
|
||||||
for branch YARN-1051. (Subru Krishnan and Carlo Curino via subru)
|
|
|
@ -82,6 +82,9 @@ Release 2.6.0 - UNRELEASED
|
||||||
YARN-2468. Enhanced NodeManager to support log handling APIs (YARN-2569) for
|
YARN-2468. Enhanced NodeManager to support log handling APIs (YARN-2569) for
|
||||||
use by long running services. (Xuan Gong via vinodkv)
|
use by long running services. (Xuan Gong via vinodkv)
|
||||||
|
|
||||||
|
YARN-1051. Add a system for creating reservations of cluster capacity.
|
||||||
|
(see breakdown below)
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
|
|
||||||
YARN-2242. Improve exception information on AM launch crashes. (Li Lu
|
YARN-2242. Improve exception information on AM launch crashes. (Li Lu
|
||||||
|
@ -479,6 +482,43 @@ Release 2.6.0 - UNRELEASED
|
||||||
YARN-2628. Capacity scheduler with DominantResourceCalculator carries out
|
YARN-2628. Capacity scheduler with DominantResourceCalculator carries out
|
||||||
reservation even though slots are free. (Varun Vasudev via jianhe)
|
reservation even though slots are free. (Varun Vasudev via jianhe)
|
||||||
|
|
||||||
|
BREAKDOWN OF YARN-1051 SUBTASKS AND RELATED JIRAS
|
||||||
|
|
||||||
|
YARN-1707. Introduce APIs to add/remove/resize queues in the
|
||||||
|
CapacityScheduler. (Carlo Curino and Subru Krishnan via curino)
|
||||||
|
|
||||||
|
YARN-2475. Logic for responding to capacity drops for the
|
||||||
|
ReservationSystem. (Carlo Curino and Subru Krishnan via curino)
|
||||||
|
|
||||||
|
YARN-1708. Public YARN APIs for creating/updating/deleting
|
||||||
|
reservations. (Subru Krishnan and Carlo Curino via subru)
|
||||||
|
|
||||||
|
YARN-1709. In-memory data structures used to track resources over
|
||||||
|
time to enable reservations. (Subru Krishnan and Carlo Curino via
|
||||||
|
subru)
|
||||||
|
|
||||||
|
YARN-1710. Logic to find allocations within a Plan that satisfy
|
||||||
|
user ReservationRequest(s). (Carlo Curino and Subru Krishnan via
|
||||||
|
curino)
|
||||||
|
|
||||||
|
YARN-1711. Policy to enforce instantaneous and over-time quotas
|
||||||
|
on user reservations. (Carlo Curino and Subru Krishnan via curino)
|
||||||
|
|
||||||
|
YARN-1712. Plan follower that synchronizes the current state of reservation
|
||||||
|
subsystem with the scheduler. (Subru Krishnan and Carlo Curino via subru)
|
||||||
|
|
||||||
|
YARN-2080. Integrating reservation system with ResourceManager and
|
||||||
|
client-RM protocol. (Subru Krishnan and Carlo Curino via subru)
|
||||||
|
|
||||||
|
MAPREDUCE-6103. Adding reservation APIs to MR resource manager
|
||||||
|
delegate. (Subru Krishnan and Carlo Curino via subru)
|
||||||
|
|
||||||
|
YARN-2576. Fixing compilation, javadocs and audit issues to pass
|
||||||
|
test patch in branch. (Subru Krishnan and Carlo Curino via subru)
|
||||||
|
|
||||||
|
YARN-2611. Fixing jenkins findbugs warning and TestRMWebServicesCapacitySched
|
||||||
|
for branch YARN-1051. (Subru Krishnan and Carlo Curino via subru)
|
||||||
|
|
||||||
Release 2.5.1 - 2014-09-05
|
Release 2.5.1 - 2014-09-05
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -202,6 +202,7 @@ message ApplicationAttemptReportProto {
|
||||||
optional string diagnostics = 5 [default = "N/A"];
|
optional string diagnostics = 5 [default = "N/A"];
|
||||||
optional YarnApplicationAttemptStateProto yarn_application_attempt_state = 6;
|
optional YarnApplicationAttemptStateProto yarn_application_attempt_state = 6;
|
||||||
optional ContainerIdProto am_container_id = 7;
|
optional ContainerIdProto am_container_id = 7;
|
||||||
|
optional string original_tracking_url = 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
enum NodeStateProto {
|
enum NodeStateProto {
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
|
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
|
||||||
import org.apache.hadoop.yarn.api.records.LogAggregationContext;
|
import org.apache.hadoop.yarn.api.records.LogAggregationContext;
|
||||||
import org.apache.hadoop.yarn.api.records.Priority;
|
import org.apache.hadoop.yarn.api.records.Priority;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ReservationId;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
|
import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
|
||||||
|
@ -35,6 +36,7 @@ import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationSubmissionContextProto
|
||||||
import org.apache.hadoop.yarn.proto.YarnProtos.ContainerLaunchContextProto;
|
import org.apache.hadoop.yarn.proto.YarnProtos.ContainerLaunchContextProto;
|
||||||
import org.apache.hadoop.yarn.proto.YarnProtos.LogAggregationContextProto;
|
import org.apache.hadoop.yarn.proto.YarnProtos.LogAggregationContextProto;
|
||||||
import org.apache.hadoop.yarn.proto.YarnProtos.PriorityProto;
|
import org.apache.hadoop.yarn.proto.YarnProtos.PriorityProto;
|
||||||
|
import org.apache.hadoop.yarn.proto.YarnProtos.ReservationIdProto;
|
||||||
import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
|
import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
|
||||||
|
|
||||||
import com.google.protobuf.TextFormat;
|
import com.google.protobuf.TextFormat;
|
||||||
|
@ -115,6 +117,13 @@ extends ApplicationSubmissionContext {
|
||||||
builder.clearApplicationTags();
|
builder.clearApplicationTags();
|
||||||
builder.addAllApplicationTags(this.applicationTags);
|
builder.addAllApplicationTags(this.applicationTags);
|
||||||
}
|
}
|
||||||
|
if (this.logAggregationContext != null) {
|
||||||
|
builder.setLogAggregationContext(
|
||||||
|
convertToProtoFormat(this.logAggregationContext));
|
||||||
|
}
|
||||||
|
if (this.reservationId != null) {
|
||||||
|
builder.setReservationId(convertToProtoFormat(this.reservationId));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void mergeLocalToProto() {
|
private void mergeLocalToProto() {
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.hadoop.yarn.event.Dispatcher;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter;
|
import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher;
|
import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
|
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.reservation.ReservationSystem;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
|
||||||
|
|
|
@ -832,14 +832,16 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
// started or started but not yet saved.
|
// started or started but not yet saved.
|
||||||
if (app.attempts.isEmpty()) {
|
if (app.attempts.isEmpty()) {
|
||||||
app.scheduler.handle(new AppAddedSchedulerEvent(app.applicationId,
|
app.scheduler.handle(new AppAddedSchedulerEvent(app.applicationId,
|
||||||
app.submissionContext.getQueue(), app.user));
|
app.submissionContext.getQueue(), app.user,
|
||||||
|
app.submissionContext.getReservationID()));
|
||||||
return RMAppState.SUBMITTED;
|
return RMAppState.SUBMITTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add application to scheduler synchronously to guarantee scheduler
|
// Add application to scheduler synchronously to guarantee scheduler
|
||||||
// knows applications before AM or NM re-registers.
|
// knows applications before AM or NM re-registers.
|
||||||
app.scheduler.handle(new AppAddedSchedulerEvent(app.applicationId,
|
app.scheduler.handle(new AppAddedSchedulerEvent(app.applicationId,
|
||||||
app.submissionContext.getQueue(), app.user, true));
|
app.submissionContext.getQueue(), app.user, true,
|
||||||
|
app.submissionContext.getReservationID()));
|
||||||
|
|
||||||
// recover attempts
|
// recover attempts
|
||||||
app.recoverAppAttempts();
|
app.recoverAppAttempts();
|
||||||
|
@ -866,7 +868,8 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
@Override
|
@Override
|
||||||
public void transition(RMAppImpl app, RMAppEvent event) {
|
public void transition(RMAppImpl app, RMAppEvent event) {
|
||||||
app.handler.handle(new AppAddedSchedulerEvent(app.applicationId,
|
app.handler.handle(new AppAddedSchedulerEvent(app.applicationId,
|
||||||
app.submissionContext.getQueue(), app.user));
|
app.submissionContext.getQueue(), app.user,
|
||||||
|
app.submissionContext.getReservationID()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -225,7 +225,7 @@ public class LeafQueue implements CSQueue {
|
||||||
return (float)scheduler.getConfiguration().getCapacity(getQueuePath()) / 100;
|
return (float)scheduler.getConfiguration().getCapacity(getQueuePath()) / 100;
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized void setupQueueConfigs(
|
protected synchronized void setupQueueConfigs(
|
||||||
Resource clusterResource,
|
Resource clusterResource,
|
||||||
float capacity, float absoluteCapacity,
|
float capacity, float absoluteCapacity,
|
||||||
float maximumCapacity, float absoluteMaxCapacity,
|
float maximumCapacity, float absoluteMaxCapacity,
|
||||||
|
|
|
@ -103,7 +103,8 @@ public class PlanQueue extends ParentQueue {
|
||||||
newlyParsedParentQueue.getAbsoluteCapacity(),
|
newlyParsedParentQueue.getAbsoluteCapacity(),
|
||||||
newlyParsedParentQueue.getMaximumCapacity(),
|
newlyParsedParentQueue.getMaximumCapacity(),
|
||||||
newlyParsedParentQueue.getAbsoluteMaximumCapacity(),
|
newlyParsedParentQueue.getAbsoluteMaximumCapacity(),
|
||||||
newlyParsedParentQueue.getState(), newlyParsedParentQueue.getACLs());
|
newlyParsedParentQueue.getState(), newlyParsedParentQueue.getACLs(),
|
||||||
|
newlyParsedParentQueue.getReservationContinueLooking());
|
||||||
|
|
||||||
updateQuotas(newlyParsedParentQueue.userLimit,
|
updateQuotas(newlyParsedParentQueue.userLimit,
|
||||||
newlyParsedParentQueue.userLimitFactor,
|
newlyParsedParentQueue.userLimitFactor,
|
||||||
|
|
Loading…
Reference in New Issue