YARN-1041. Added the ApplicationMasterProtocol API for applications to use the ability in ResourceManager to optionally not kill containers when the ApplicationMaster exits. Contributed by Jian He.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1557318 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f677175f35
commit
25bc68d15e
|
@ -63,6 +63,10 @@ Release 2.4.0 - UNRELEASED
|
||||||
|
|
||||||
YARN-1033. Expose RM active/standby state to Web UI and REST API (kasha)
|
YARN-1033. Expose RM active/standby state to Web UI and REST API (kasha)
|
||||||
|
|
||||||
|
YARN-1041. Added the ApplicationMasterProtocol API for applications to use the
|
||||||
|
ability in ResourceManager to optionally not kill containers when the
|
||||||
|
ApplicationMaster exits. (Jian He via vinodkv)
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
|
|
||||||
YARN-7. Support CPU resource for DistributedShell. (Junping Du via llu)
|
YARN-7. Support CPU resource for DistributedShell. (Junping Du via llu)
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
package org.apache.hadoop.yarn.api.protocolrecords;
|
package org.apache.hadoop.yarn.api.protocolrecords;
|
||||||
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
|
@ -27,6 +28,7 @@ import org.apache.hadoop.classification.InterfaceStability.Stable;
|
||||||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||||
import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
|
import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Container;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.util.Records;
|
import org.apache.hadoop.yarn.util.Records;
|
||||||
|
|
||||||
|
@ -47,16 +49,19 @@ import org.apache.hadoop.yarn.util.Records;
|
||||||
@Public
|
@Public
|
||||||
@Stable
|
@Stable
|
||||||
public abstract class RegisterApplicationMasterResponse {
|
public abstract class RegisterApplicationMasterResponse {
|
||||||
|
|
||||||
@Private
|
@Private
|
||||||
@Unstable
|
@Unstable
|
||||||
public static RegisterApplicationMasterResponse newInstance(
|
public static RegisterApplicationMasterResponse newInstance(
|
||||||
Resource minCapability, Resource maxCapability,
|
Resource minCapability, Resource maxCapability,
|
||||||
Map<ApplicationAccessType, String> acls, ByteBuffer key) {
|
Map<ApplicationAccessType, String> acls, ByteBuffer key,
|
||||||
|
List<Container> containersFromPreviousAttempt) {
|
||||||
RegisterApplicationMasterResponse response =
|
RegisterApplicationMasterResponse response =
|
||||||
Records.newRecord(RegisterApplicationMasterResponse.class);
|
Records.newRecord(RegisterApplicationMasterResponse.class);
|
||||||
response.setMaximumResourceCapability(maxCapability);
|
response.setMaximumResourceCapability(maxCapability);
|
||||||
response.setApplicationACLs(acls);
|
response.setApplicationACLs(acls);
|
||||||
response.setClientToAMTokenMasterKey(key);
|
response.setClientToAMTokenMasterKey(key);
|
||||||
|
response.setContainersFromPreviousAttempt(containersFromPreviousAttempt);
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -105,4 +110,30 @@ public abstract class RegisterApplicationMasterResponse {
|
||||||
@Public
|
@Public
|
||||||
@Stable
|
@Stable
|
||||||
public abstract void setClientToAMTokenMasterKey(ByteBuffer key);
|
public abstract void setClientToAMTokenMasterKey(ByteBuffer key);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>
|
||||||
|
* Get the list of running containers as viewed by
|
||||||
|
* <code>ResourceManager</code> from previous application attempt.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @return the list of running containers as viewed by
|
||||||
|
* <code>ResourceManager</code> from previous application attempt
|
||||||
|
*/
|
||||||
|
@Public
|
||||||
|
@Unstable
|
||||||
|
public abstract List<Container> getContainersFromPreviousAttempt();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the list of running containers as viewed by
|
||||||
|
* <code>ResourceManager</code> from previous application attempt.
|
||||||
|
*
|
||||||
|
* @param containersFromPreviousAttempt
|
||||||
|
* the list of running containers as viewed by
|
||||||
|
* <code>ResourceManager</code> from previous application attempt.
|
||||||
|
*/
|
||||||
|
@Private
|
||||||
|
@Unstable
|
||||||
|
public abstract void setContainersFromPreviousAttempt(
|
||||||
|
List<Container> containersFromPreviousAttempt);
|
||||||
}
|
}
|
||||||
|
|
|
@ -44,6 +44,7 @@ message RegisterApplicationMasterResponseProto {
|
||||||
optional ResourceProto maximumCapability = 1;
|
optional ResourceProto maximumCapability = 1;
|
||||||
optional bytes client_to_am_token_master_key = 2;
|
optional bytes client_to_am_token_master_key = 2;
|
||||||
repeated ApplicationACLMapProto application_ACLs = 3;
|
repeated ApplicationACLMapProto application_ACLs = 3;
|
||||||
|
repeated ContainerProto containers_from_previous_attempt = 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
message FinishApplicationMasterRequestProto {
|
message FinishApplicationMasterRequestProto {
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
|
||||||
|
|
||||||
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -29,10 +30,13 @@ import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Container;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
|
||||||
import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils;
|
import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils;
|
||||||
import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl;
|
import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl;
|
||||||
import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationACLMapProto;
|
import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationACLMapProto;
|
||||||
|
import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto;
|
||||||
import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
|
import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
|
||||||
import org.apache.hadoop.yarn.proto.YarnServiceProtos.RegisterApplicationMasterResponseProto;
|
import org.apache.hadoop.yarn.proto.YarnServiceProtos.RegisterApplicationMasterResponseProto;
|
||||||
import org.apache.hadoop.yarn.proto.YarnServiceProtos.RegisterApplicationMasterResponseProtoOrBuilder;
|
import org.apache.hadoop.yarn.proto.YarnServiceProtos.RegisterApplicationMasterResponseProtoOrBuilder;
|
||||||
|
@ -52,6 +56,7 @@ public class RegisterApplicationMasterResponsePBImpl extends
|
||||||
|
|
||||||
private Resource maximumResourceCapability;
|
private Resource maximumResourceCapability;
|
||||||
private Map<ApplicationAccessType, String> applicationACLS = null;
|
private Map<ApplicationAccessType, String> applicationACLS = null;
|
||||||
|
private List<Container> containersFromPreviousAttempt = null;
|
||||||
|
|
||||||
public RegisterApplicationMasterResponsePBImpl() {
|
public RegisterApplicationMasterResponsePBImpl() {
|
||||||
builder = RegisterApplicationMasterResponseProto.newBuilder();
|
builder = RegisterApplicationMasterResponseProto.newBuilder();
|
||||||
|
@ -105,6 +110,9 @@ public class RegisterApplicationMasterResponsePBImpl extends
|
||||||
if (this.applicationACLS != null) {
|
if (this.applicationACLS != null) {
|
||||||
addApplicationACLs();
|
addApplicationACLs();
|
||||||
}
|
}
|
||||||
|
if (this.containersFromPreviousAttempt != null) {
|
||||||
|
addRunningContainersToProto();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -226,6 +234,43 @@ public class RegisterApplicationMasterResponsePBImpl extends
|
||||||
ByteBuffer.wrap(builder.getClientToAmTokenMasterKey().toByteArray());
|
ByteBuffer.wrap(builder.getClientToAmTokenMasterKey().toByteArray());
|
||||||
return key;
|
return key;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Container> getContainersFromPreviousAttempt() {
|
||||||
|
if (this.containersFromPreviousAttempt != null) {
|
||||||
|
return this.containersFromPreviousAttempt;
|
||||||
|
}
|
||||||
|
initRunningContainersList();
|
||||||
|
return this.containersFromPreviousAttempt;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setContainersFromPreviousAttempt(final List<Container> containers) {
|
||||||
|
if (containers == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.containersFromPreviousAttempt = new ArrayList<Container>();
|
||||||
|
this.containersFromPreviousAttempt.addAll(containers);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initRunningContainersList() {
|
||||||
|
RegisterApplicationMasterResponseProtoOrBuilder p = viaProto ? proto : builder;
|
||||||
|
List<ContainerProto> list = p.getContainersFromPreviousAttemptList();
|
||||||
|
containersFromPreviousAttempt = new ArrayList<Container>();
|
||||||
|
for (ContainerProto c : list) {
|
||||||
|
containersFromPreviousAttempt.add(convertFromProtoFormat(c));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addRunningContainersToProto() {
|
||||||
|
maybeInitBuilder();
|
||||||
|
builder.clearContainersFromPreviousAttempt();
|
||||||
|
List<ContainerProto> list = new ArrayList<ContainerProto>();
|
||||||
|
for (Container c : containersFromPreviousAttempt) {
|
||||||
|
list.add(convertToProtoFormat(c));
|
||||||
|
}
|
||||||
|
builder.addAllContainersFromPreviousAttempt(list);
|
||||||
|
}
|
||||||
|
|
||||||
private Resource convertFromProtoFormat(ResourceProto resource) {
|
private Resource convertFromProtoFormat(ResourceProto resource) {
|
||||||
return new ResourcePBImpl(resource);
|
return new ResourcePBImpl(resource);
|
||||||
|
@ -235,4 +280,11 @@ public class RegisterApplicationMasterResponsePBImpl extends
|
||||||
return ((ResourcePBImpl)resource).getProto();
|
return ((ResourcePBImpl)resource).getProto();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private ContainerPBImpl convertFromProtoFormat(ContainerProto p) {
|
||||||
|
return new ContainerPBImpl(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ContainerProto convertToProtoFormat(Container t) {
|
||||||
|
return ((ContainerPBImpl) t).getProto();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,6 +49,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRespo
|
||||||
import org.apache.hadoop.yarn.api.records.AMCommand;
|
import org.apache.hadoop.yarn.api.records.AMCommand;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Container;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeReport;
|
import org.apache.hadoop.yarn.api.records.NodeReport;
|
||||||
import org.apache.hadoop.yarn.api.records.PreemptionContainer;
|
import org.apache.hadoop.yarn.api.records.PreemptionContainer;
|
||||||
|
@ -78,6 +79,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAt
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStatusupdateEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStatusupdateEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
|
||||||
|
@ -271,6 +273,11 @@ public class ApplicationMasterService extends AbstractService implements
|
||||||
.getClientToAMTokenSecretManager()
|
.getClientToAMTokenSecretManager()
|
||||||
.getMasterKey(applicationAttemptId).getEncoded()));
|
.getMasterKey(applicationAttemptId).getEncoded()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
List<Container> containerList =
|
||||||
|
((AbstractYarnScheduler) rScheduler)
|
||||||
|
.getTransferredContainers(applicationAttemptId);
|
||||||
|
response.setContainersFromPreviousAttempt(containerList);
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,64 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Container;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||||
|
|
||||||
|
public class AbstractYarnScheduler {
|
||||||
|
|
||||||
|
protected RMContext rmContext;
|
||||||
|
protected Map<ApplicationId, SchedulerApplication> applications;
|
||||||
|
|
||||||
|
public synchronized List<Container> getTransferredContainers(
|
||||||
|
ApplicationAttemptId currentAttempt) {
|
||||||
|
ApplicationId appId = currentAttempt.getApplicationId();
|
||||||
|
SchedulerApplication app = applications.get(appId);
|
||||||
|
List<Container> containerList = new ArrayList<Container>();
|
||||||
|
RMApp appImpl = this.rmContext.getRMApps().get(appId);
|
||||||
|
if (appImpl.getApplicationSubmissionContext().getUnmanagedAM()) {
|
||||||
|
return containerList;
|
||||||
|
}
|
||||||
|
Collection<RMContainer> liveContainers =
|
||||||
|
app.getCurrentAppAttempt().getLiveContainers();
|
||||||
|
ContainerId amContainerId =
|
||||||
|
rmContext.getRMApps().get(appId).getCurrentAppAttempt()
|
||||||
|
.getMasterContainer().getId();
|
||||||
|
for (RMContainer rmContainer : liveContainers) {
|
||||||
|
if (!rmContainer.getContainerId().equals(amContainerId)) {
|
||||||
|
containerList.add(rmContainer.getContainer());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return containerList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<ApplicationId, SchedulerApplication> getSchedulerApplications() {
|
||||||
|
return applications;
|
||||||
|
}
|
||||||
|
}
|
|
@ -67,6 +67,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerStat
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResourceScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResourceScheduler;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||||
|
@ -95,7 +96,7 @@ import com.google.common.annotations.VisibleForTesting;
|
||||||
@LimitedPrivate("yarn")
|
@LimitedPrivate("yarn")
|
||||||
@Evolving
|
@Evolving
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public class CapacityScheduler
|
public class CapacityScheduler extends AbstractYarnScheduler
|
||||||
implements PreemptableResourceScheduler, CapacitySchedulerContext,
|
implements PreemptableResourceScheduler, CapacitySchedulerContext,
|
||||||
Configurable {
|
Configurable {
|
||||||
|
|
||||||
|
@ -177,7 +178,6 @@ public class CapacityScheduler
|
||||||
|
|
||||||
private CapacitySchedulerConfiguration conf;
|
private CapacitySchedulerConfiguration conf;
|
||||||
private Configuration yarnConf;
|
private Configuration yarnConf;
|
||||||
private RMContext rmContext;
|
|
||||||
|
|
||||||
private Map<String, CSQueue> queues = new ConcurrentHashMap<String, CSQueue>();
|
private Map<String, CSQueue> queues = new ConcurrentHashMap<String, CSQueue>();
|
||||||
|
|
||||||
|
@ -191,10 +191,6 @@ public class CapacityScheduler
|
||||||
private Resource minimumAllocation;
|
private Resource minimumAllocation;
|
||||||
private Resource maximumAllocation;
|
private Resource maximumAllocation;
|
||||||
|
|
||||||
@VisibleForTesting
|
|
||||||
protected Map<ApplicationId, SchedulerApplication> applications =
|
|
||||||
new ConcurrentHashMap<ApplicationId, SchedulerApplication>();
|
|
||||||
|
|
||||||
private boolean initialized = false;
|
private boolean initialized = false;
|
||||||
|
|
||||||
private ResourceCalculator calculator;
|
private ResourceCalculator calculator;
|
||||||
|
@ -271,9 +267,10 @@ public class CapacityScheduler
|
||||||
this.maximumAllocation = this.conf.getMaximumAllocation();
|
this.maximumAllocation = this.conf.getMaximumAllocation();
|
||||||
this.calculator = this.conf.getResourceCalculator();
|
this.calculator = this.conf.getResourceCalculator();
|
||||||
this.usePortForNodeName = this.conf.getUsePortForNodeName();
|
this.usePortForNodeName = this.conf.getUsePortForNodeName();
|
||||||
|
this.applications =
|
||||||
|
new ConcurrentHashMap<ApplicationId, SchedulerApplication>();
|
||||||
this.rmContext = rmContext;
|
this.rmContext = rmContext;
|
||||||
|
|
||||||
initializeQueues(this.conf);
|
initializeQueues(this.conf);
|
||||||
|
|
||||||
initialized = true;
|
initialized = true;
|
||||||
|
|
|
@ -71,6 +71,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEven
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||||
|
@ -120,10 +121,10 @@ import com.google.common.annotations.VisibleForTesting;
|
||||||
@LimitedPrivate("yarn")
|
@LimitedPrivate("yarn")
|
||||||
@Unstable
|
@Unstable
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public class FairScheduler implements ResourceScheduler {
|
public class FairScheduler extends AbstractYarnScheduler implements
|
||||||
|
ResourceScheduler {
|
||||||
private boolean initialized;
|
private boolean initialized;
|
||||||
private FairSchedulerConfiguration conf;
|
private FairSchedulerConfiguration conf;
|
||||||
private RMContext rmContext;
|
|
||||||
private Resource minimumAllocation;
|
private Resource minimumAllocation;
|
||||||
private Resource maximumAllocation;
|
private Resource maximumAllocation;
|
||||||
private Resource incrAllocation;
|
private Resource incrAllocation;
|
||||||
|
@ -157,11 +158,6 @@ public class FairScheduler implements ResourceScheduler {
|
||||||
// Time we last ran preemptTasksIfNecessary
|
// Time we last ran preemptTasksIfNecessary
|
||||||
private long lastPreemptCheckTime;
|
private long lastPreemptCheckTime;
|
||||||
|
|
||||||
// This stores per-application scheduling information,
|
|
||||||
@VisibleForTesting
|
|
||||||
protected Map<ApplicationId, SchedulerApplication> applications =
|
|
||||||
new ConcurrentHashMap<ApplicationId, SchedulerApplication>();
|
|
||||||
|
|
||||||
// Nodes in the cluster, indexed by NodeId
|
// Nodes in the cluster, indexed by NodeId
|
||||||
private Map<NodeId, FSSchedulerNode> nodes =
|
private Map<NodeId, FSSchedulerNode> nodes =
|
||||||
new ConcurrentHashMap<NodeId, FSSchedulerNode>();
|
new ConcurrentHashMap<NodeId, FSSchedulerNode>();
|
||||||
|
@ -1235,6 +1231,9 @@ public class FairScheduler implements ResourceScheduler {
|
||||||
|
|
||||||
rootMetrics = FSQueueMetrics.forQueue("root", null, true, conf);
|
rootMetrics = FSQueueMetrics.forQueue("root", null, true, conf);
|
||||||
this.rmContext = rmContext;
|
this.rmContext = rmContext;
|
||||||
|
// This stores per-application scheduling information
|
||||||
|
this.applications =
|
||||||
|
new ConcurrentHashMap<ApplicationId, SchedulerApplication>();
|
||||||
this.eventLog = new FairSchedulerEventLog();
|
this.eventLog = new FairSchedulerEventLog();
|
||||||
eventLog.init(this.conf);
|
eventLog.init(this.conf);
|
||||||
|
|
||||||
|
@ -1357,5 +1356,4 @@ public class FairScheduler implements ResourceScheduler {
|
||||||
queue.collectSchedulerApplications(apps);
|
queue.collectSchedulerApplications(apps);
|
||||||
return apps;
|
return apps;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -71,6 +71,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerStat
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
|
||||||
|
@ -104,7 +105,8 @@ import com.google.common.annotations.VisibleForTesting;
|
||||||
@LimitedPrivate("yarn")
|
@LimitedPrivate("yarn")
|
||||||
@Evolving
|
@Evolving
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public class FifoScheduler implements ResourceScheduler, Configurable {
|
public class FifoScheduler extends AbstractYarnScheduler implements
|
||||||
|
ResourceScheduler, Configurable {
|
||||||
|
|
||||||
private static final Log LOG = LogFactory.getLog(FifoScheduler.class);
|
private static final Log LOG = LogFactory.getLog(FifoScheduler.class);
|
||||||
|
|
||||||
|
@ -115,7 +117,6 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
||||||
|
|
||||||
private final static Container[] EMPTY_CONTAINER_ARRAY = new Container[] {};
|
private final static Container[] EMPTY_CONTAINER_ARRAY = new Container[] {};
|
||||||
private final static List<Container> EMPTY_CONTAINER_LIST = Arrays.asList(EMPTY_CONTAINER_ARRAY);
|
private final static List<Container> EMPTY_CONTAINER_LIST = Arrays.asList(EMPTY_CONTAINER_ARRAY);
|
||||||
private RMContext rmContext;
|
|
||||||
|
|
||||||
protected Map<NodeId, FiCaSchedulerNode> nodes = new ConcurrentHashMap<NodeId, FiCaSchedulerNode>();
|
protected Map<NodeId, FiCaSchedulerNode> nodes = new ConcurrentHashMap<NodeId, FiCaSchedulerNode>();
|
||||||
|
|
||||||
|
@ -124,11 +125,6 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
||||||
private Resource maximumAllocation;
|
private Resource maximumAllocation;
|
||||||
private boolean usePortForNodeName;
|
private boolean usePortForNodeName;
|
||||||
|
|
||||||
// Use ConcurrentSkipListMap because applications need to be ordered
|
|
||||||
@VisibleForTesting
|
|
||||||
protected Map<ApplicationId, SchedulerApplication> applications =
|
|
||||||
new ConcurrentSkipListMap<ApplicationId, SchedulerApplication>();
|
|
||||||
|
|
||||||
private ActiveUsersManager activeUsersManager;
|
private ActiveUsersManager activeUsersManager;
|
||||||
|
|
||||||
private static final String DEFAULT_QUEUE_NAME = "default";
|
private static final String DEFAULT_QUEUE_NAME = "default";
|
||||||
|
@ -243,6 +239,9 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
||||||
if (!this.initialized) {
|
if (!this.initialized) {
|
||||||
validateConf(conf);
|
validateConf(conf);
|
||||||
this.rmContext = rmContext;
|
this.rmContext = rmContext;
|
||||||
|
//Use ConcurrentSkipListMap because applications need to be ordered
|
||||||
|
this.applications =
|
||||||
|
new ConcurrentSkipListMap<ApplicationId, SchedulerApplication>();
|
||||||
this.minimumAllocation =
|
this.minimumAllocation =
|
||||||
Resources.createResource(conf.getInt(
|
Resources.createResource(conf.getInt(
|
||||||
YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
|
YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
|
||||||
|
|
|
@ -24,6 +24,7 @@ import java.util.List;
|
||||||
|
|
||||||
import junit.framework.Assert;
|
import junit.framework.Assert;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
import org.apache.hadoop.yarn.api.records.Container;
|
import org.apache.hadoop.yarn.api.records.Container;
|
||||||
|
@ -150,7 +151,29 @@ public class TestAMRestart {
|
||||||
ApplicationAttemptId newAttemptId =
|
ApplicationAttemptId newAttemptId =
|
||||||
app1.getCurrentAppAttempt().getAppAttemptId();
|
app1.getCurrentAppAttempt().getAppAttemptId();
|
||||||
Assert.assertFalse(newAttemptId.equals(am1.getApplicationAttemptId()));
|
Assert.assertFalse(newAttemptId.equals(am1.getApplicationAttemptId()));
|
||||||
MockAM am2 = MockRM.launchAM(app1, rm1, nm1);
|
|
||||||
|
// launch the new AM
|
||||||
|
RMAppAttempt attempt2 = app1.getCurrentAppAttempt();
|
||||||
|
nm1.nodeHeartbeat(true);
|
||||||
|
MockAM am2 = rm1.sendAMLaunched(attempt2.getAppAttemptId());
|
||||||
|
RegisterApplicationMasterResponse registerResponse =
|
||||||
|
am2.registerAppAttempt();
|
||||||
|
|
||||||
|
// Assert two containers are running: container2 and container3;
|
||||||
|
Assert.assertEquals(2, registerResponse.getContainersFromPreviousAttempt()
|
||||||
|
.size());
|
||||||
|
boolean containerId2Exists = false, containerId3Exists = false;
|
||||||
|
for (Container container : registerResponse
|
||||||
|
.getContainersFromPreviousAttempt()) {
|
||||||
|
if (container.getId().equals(containerId2)) {
|
||||||
|
containerId2Exists = true;
|
||||||
|
}
|
||||||
|
if (container.getId().equals(containerId3)) {
|
||||||
|
containerId3Exists = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Assert.assertTrue(containerId2Exists && containerId3Exists);
|
||||||
|
rm1.waitForState(app1.getApplicationId(), RMAppState.RUNNING);
|
||||||
|
|
||||||
// complete container by sending the container complete event which has earlier
|
// complete container by sending the container complete event which has earlier
|
||||||
// attempt's attemptId
|
// attempt's attemptId
|
||||||
|
|
|
@ -642,7 +642,7 @@ public class TestCapacityScheduler {
|
||||||
|
|
||||||
SchedulerApplication app =
|
SchedulerApplication app =
|
||||||
TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(
|
TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(
|
||||||
cs.applications, cs, "a1");
|
cs.getSchedulerApplications(), cs, "a1");
|
||||||
Assert.assertEquals("a1", app.getQueue().getQueueName());
|
Assert.assertEquals("a1", app.getQueue().getQueueName());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -79,7 +79,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestCapacityScheduler;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
|
||||||
|
@ -260,7 +259,7 @@ public class TestFairScheduler {
|
||||||
scheduler.addApplication(id.getApplicationId(), queueId, userId);
|
scheduler.addApplication(id.getApplicationId(), queueId, userId);
|
||||||
// This conditional is for testAclSubmitApplication where app is rejected
|
// This conditional is for testAclSubmitApplication where app is rejected
|
||||||
// and no app is added.
|
// and no app is added.
|
||||||
if (scheduler.applications.containsKey(id.getApplicationId())) {
|
if (scheduler.getSchedulerApplications().containsKey(id.getApplicationId())) {
|
||||||
scheduler.addApplicationAttempt(id, false);
|
scheduler.addApplicationAttempt(id, false);
|
||||||
}
|
}
|
||||||
List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
|
List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
|
||||||
|
@ -2546,6 +2545,6 @@ public class TestFairScheduler {
|
||||||
FairScheduler scheduler =
|
FairScheduler scheduler =
|
||||||
(FairScheduler) resourceManager.getResourceScheduler();
|
(FairScheduler) resourceManager.getResourceScheduler();
|
||||||
TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(
|
TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(
|
||||||
scheduler.applications, scheduler, "default");
|
scheduler.getSchedulerApplications(), scheduler, "default");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -591,8 +591,8 @@ public class TestFifoScheduler {
|
||||||
ResourceScheduler.class);
|
ResourceScheduler.class);
|
||||||
MockRM rm = new MockRM(conf);
|
MockRM rm = new MockRM(conf);
|
||||||
FifoScheduler fs = (FifoScheduler)rm.getResourceScheduler();
|
FifoScheduler fs = (FifoScheduler)rm.getResourceScheduler();
|
||||||
TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(fs.applications,
|
TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(
|
||||||
fs, "queue");
|
fs.getSchedulerApplications(), fs, "queue");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkApplicationResourceUsage(int expected,
|
private void checkApplicationResourceUsage(int expected,
|
||||||
|
|
|
@ -0,0 +1,615 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.lang.reflect.Method;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import junit.framework.Assert;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.net.NetworkTopology;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Priority;
|
||||||
|
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.event.AsyncDispatcher;
|
||||||
|
import org.apache.hadoop.yarn.event.InlineDispatcher;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
|
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||||
|
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.Application;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.MockNodes;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.Task;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestCapacityScheduler;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
|
||||||
|
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||||
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestFifoScheduler {
|
||||||
|
private static final Log LOG = LogFactory.getLog(TestFifoScheduler.class);
|
||||||
|
private final int GB = 1024;
|
||||||
|
|
||||||
|
private ResourceManager resourceManager = null;
|
||||||
|
|
||||||
|
private static final RecordFactory recordFactory =
|
||||||
|
RecordFactoryProvider.getRecordFactory(null);
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
resourceManager = new ResourceManager();
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setClass(YarnConfiguration.RM_SCHEDULER,
|
||||||
|
FifoScheduler.class, ResourceScheduler.class);
|
||||||
|
resourceManager.init(conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
resourceManager.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
private org.apache.hadoop.yarn.server.resourcemanager.NodeManager
|
||||||
|
registerNode(String hostName, int containerManagerPort, int nmHttpPort,
|
||||||
|
String rackName, Resource capability) throws IOException,
|
||||||
|
YarnException {
|
||||||
|
return new org.apache.hadoop.yarn.server.resourcemanager.NodeManager(
|
||||||
|
hostName, containerManagerPort, nmHttpPort, rackName, capability,
|
||||||
|
resourceManager);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ApplicationAttemptId createAppAttemptId(int appId, int attemptId) {
|
||||||
|
ApplicationId appIdImpl = ApplicationId.newInstance(0, appId);
|
||||||
|
ApplicationAttemptId attId =
|
||||||
|
ApplicationAttemptId.newInstance(appIdImpl, attemptId);
|
||||||
|
return attId;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ResourceRequest createResourceRequest(int memory, String host,
|
||||||
|
int priority, int numContainers) {
|
||||||
|
ResourceRequest request = recordFactory
|
||||||
|
.newRecordInstance(ResourceRequest.class);
|
||||||
|
request.setCapability(Resources.createResource(memory));
|
||||||
|
request.setResourceName(host);
|
||||||
|
request.setNumContainers(numContainers);
|
||||||
|
Priority prio = recordFactory.newRecordInstance(Priority.class);
|
||||||
|
prio.setPriority(priority);
|
||||||
|
request.setPriority(prio);
|
||||||
|
return request;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout=5000)
|
||||||
|
public void testFifoSchedulerCapacityWhenNoNMs() {
|
||||||
|
FifoScheduler scheduler = new FifoScheduler();
|
||||||
|
QueueInfo queueInfo = scheduler.getQueueInfo(null, false, false);
|
||||||
|
Assert.assertEquals(0.0f, queueInfo.getCurrentCapacity());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout=5000)
|
||||||
|
public void testAppAttemptMetrics() throws Exception {
|
||||||
|
AsyncDispatcher dispatcher = new InlineDispatcher();
|
||||||
|
RMContext rmContext = new RMContextImpl(dispatcher, null,
|
||||||
|
null, null, null, null, null, null, null);
|
||||||
|
|
||||||
|
FifoScheduler schedular = new FifoScheduler();
|
||||||
|
schedular.reinitialize(new Configuration(), rmContext);
|
||||||
|
QueueMetrics metrics = schedular.getRootQueueMetrics();
|
||||||
|
int beforeAppsSubmitted = metrics.getAppsSubmitted();
|
||||||
|
|
||||||
|
ApplicationId appId = BuilderUtils.newApplicationId(200, 1);
|
||||||
|
ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId(
|
||||||
|
appId, 1);
|
||||||
|
|
||||||
|
SchedulerEvent appEvent = new AppAddedSchedulerEvent(appId, "queue", "user");
|
||||||
|
schedular.handle(appEvent);
|
||||||
|
SchedulerEvent attemptEvent =
|
||||||
|
new AppAttemptAddedSchedulerEvent(appAttemptId, false);
|
||||||
|
schedular.handle(attemptEvent);
|
||||||
|
|
||||||
|
appAttemptId = BuilderUtils.newApplicationAttemptId(appId, 2);
|
||||||
|
SchedulerEvent attemptEvent2 =
|
||||||
|
new AppAttemptAddedSchedulerEvent(appAttemptId, false);
|
||||||
|
schedular.handle(attemptEvent2);
|
||||||
|
|
||||||
|
int afterAppsSubmitted = metrics.getAppsSubmitted();
|
||||||
|
Assert.assertEquals(1, afterAppsSubmitted - beforeAppsSubmitted);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout=2000)
|
||||||
|
public void testNodeLocalAssignment() throws Exception {
|
||||||
|
AsyncDispatcher dispatcher = new InlineDispatcher();
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
RMContainerTokenSecretManager containerTokenSecretManager =
|
||||||
|
new RMContainerTokenSecretManager(conf);
|
||||||
|
containerTokenSecretManager.rollMasterKey();
|
||||||
|
NMTokenSecretManagerInRM nmTokenSecretManager =
|
||||||
|
new NMTokenSecretManagerInRM(conf);
|
||||||
|
nmTokenSecretManager.rollMasterKey();
|
||||||
|
RMContext rmContext = new RMContextImpl(dispatcher, null, null, null, null,
|
||||||
|
null, containerTokenSecretManager, nmTokenSecretManager, null);
|
||||||
|
|
||||||
|
FifoScheduler scheduler = new FifoScheduler();
|
||||||
|
scheduler.reinitialize(new Configuration(), rmContext);
|
||||||
|
|
||||||
|
RMNode node0 = MockNodes.newNodeInfo(1,
|
||||||
|
Resources.createResource(1024 * 64), 1, "127.0.0.1");
|
||||||
|
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node0);
|
||||||
|
scheduler.handle(nodeEvent1);
|
||||||
|
|
||||||
|
int _appId = 1;
|
||||||
|
int _appAttemptId = 1;
|
||||||
|
ApplicationAttemptId appAttemptId = createAppAttemptId(_appId,
|
||||||
|
_appAttemptId);
|
||||||
|
AppAddedSchedulerEvent appEvent =
|
||||||
|
new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "queue1",
|
||||||
|
"user1");
|
||||||
|
scheduler.handle(appEvent);
|
||||||
|
AppAttemptAddedSchedulerEvent attemptEvent =
|
||||||
|
new AppAttemptAddedSchedulerEvent(appAttemptId, false);
|
||||||
|
scheduler.handle(attemptEvent);
|
||||||
|
|
||||||
|
int memory = 64;
|
||||||
|
int nConts = 3;
|
||||||
|
int priority = 20;
|
||||||
|
|
||||||
|
List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
|
||||||
|
ResourceRequest nodeLocal = createResourceRequest(memory,
|
||||||
|
node0.getHostName(), priority, nConts);
|
||||||
|
ResourceRequest rackLocal = createResourceRequest(memory,
|
||||||
|
node0.getRackName(), priority, nConts);
|
||||||
|
ResourceRequest any = createResourceRequest(memory, ResourceRequest.ANY, priority,
|
||||||
|
nConts);
|
||||||
|
ask.add(nodeLocal);
|
||||||
|
ask.add(rackLocal);
|
||||||
|
ask.add(any);
|
||||||
|
scheduler.allocate(appAttemptId, ask, new ArrayList<ContainerId>(), null, null);
|
||||||
|
|
||||||
|
NodeUpdateSchedulerEvent node0Update = new NodeUpdateSchedulerEvent(node0);
|
||||||
|
|
||||||
|
// Before the node update event, there are 3 local requests outstanding
|
||||||
|
Assert.assertEquals(3, nodeLocal.getNumContainers());
|
||||||
|
|
||||||
|
scheduler.handle(node0Update);
|
||||||
|
|
||||||
|
// After the node update event, check that there are no more local requests
|
||||||
|
// outstanding
|
||||||
|
Assert.assertEquals(0, nodeLocal.getNumContainers());
|
||||||
|
//Also check that the containers were scheduled
|
||||||
|
SchedulerAppReport info = scheduler.getSchedulerAppInfo(appAttemptId);
|
||||||
|
Assert.assertEquals(3, info.getLiveContainers().size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(timeout=2000)
|
||||||
|
public void testUpdateResourceOnNode() throws Exception {
|
||||||
|
AsyncDispatcher dispatcher = new InlineDispatcher();
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
RMContainerTokenSecretManager containerTokenSecretManager =
|
||||||
|
new RMContainerTokenSecretManager(conf);
|
||||||
|
containerTokenSecretManager.rollMasterKey();
|
||||||
|
NMTokenSecretManagerInRM nmTokenSecretManager =
|
||||||
|
new NMTokenSecretManagerInRM(conf);
|
||||||
|
nmTokenSecretManager.rollMasterKey();
|
||||||
|
RMContext rmContext = new RMContextImpl(dispatcher, null, null, null, null,
|
||||||
|
null, containerTokenSecretManager, nmTokenSecretManager, null);
|
||||||
|
|
||||||
|
FifoScheduler scheduler = new FifoScheduler(){
|
||||||
|
@SuppressWarnings("unused")
|
||||||
|
public Map<NodeId, FiCaSchedulerNode> getNodes(){
|
||||||
|
return nodes;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
scheduler.reinitialize(new Configuration(), rmContext);
|
||||||
|
RMNode node0 = MockNodes.newNodeInfo(1,
|
||||||
|
Resources.createResource(2048, 4), 1, "127.0.0.1");
|
||||||
|
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node0);
|
||||||
|
scheduler.handle(nodeEvent1);
|
||||||
|
|
||||||
|
Method method = scheduler.getClass().getDeclaredMethod("getNodes");
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
Map<NodeId, FiCaSchedulerNode> schedulerNodes =
|
||||||
|
(Map<NodeId, FiCaSchedulerNode>) method.invoke(scheduler);
|
||||||
|
assertEquals(schedulerNodes.values().size(), 1);
|
||||||
|
|
||||||
|
// set resource of RMNode to 1024 and verify it works.
|
||||||
|
node0.setResourceOption(ResourceOption.newInstance(
|
||||||
|
Resources.createResource(1024, 4), RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT));
|
||||||
|
assertEquals(node0.getTotalCapability().getMemory(), 1024);
|
||||||
|
// verify that SchedulerNode's resource hasn't been changed.
|
||||||
|
assertEquals(schedulerNodes.get(node0.getNodeID()).
|
||||||
|
getAvailableResource().getMemory(), 2048);
|
||||||
|
// now, NM heartbeat comes.
|
||||||
|
NodeUpdateSchedulerEvent node0Update = new NodeUpdateSchedulerEvent(node0);
|
||||||
|
scheduler.handle(node0Update);
|
||||||
|
// SchedulerNode's available resource is changed.
|
||||||
|
assertEquals(schedulerNodes.get(node0.getNodeID()).
|
||||||
|
getAvailableResource().getMemory(), 1024);
|
||||||
|
QueueInfo queueInfo = scheduler.getQueueInfo(null, false, false);
|
||||||
|
Assert.assertEquals(0.0f, queueInfo.getCurrentCapacity());
|
||||||
|
|
||||||
|
int _appId = 1;
|
||||||
|
int _appAttemptId = 1;
|
||||||
|
ApplicationAttemptId appAttemptId = createAppAttemptId(_appId,
|
||||||
|
_appAttemptId);
|
||||||
|
AppAddedSchedulerEvent appEvent =
|
||||||
|
new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "queue1",
|
||||||
|
"user1");
|
||||||
|
scheduler.handle(appEvent);
|
||||||
|
AppAttemptAddedSchedulerEvent attemptEvent =
|
||||||
|
new AppAttemptAddedSchedulerEvent(appAttemptId, false);
|
||||||
|
scheduler.handle(attemptEvent);
|
||||||
|
|
||||||
|
int memory = 1024;
|
||||||
|
int priority = 1;
|
||||||
|
|
||||||
|
List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
|
||||||
|
ResourceRequest nodeLocal = createResourceRequest(memory,
|
||||||
|
node0.getHostName(), priority, 1);
|
||||||
|
ResourceRequest rackLocal = createResourceRequest(memory,
|
||||||
|
node0.getRackName(), priority, 1);
|
||||||
|
ResourceRequest any = createResourceRequest(memory, ResourceRequest.ANY, priority,
|
||||||
|
1);
|
||||||
|
ask.add(nodeLocal);
|
||||||
|
ask.add(rackLocal);
|
||||||
|
ask.add(any);
|
||||||
|
scheduler.allocate(appAttemptId, ask, new ArrayList<ContainerId>(), null, null);
|
||||||
|
|
||||||
|
// Before the node update event, there are one local request
|
||||||
|
Assert.assertEquals(1, nodeLocal.getNumContainers());
|
||||||
|
|
||||||
|
// Now schedule.
|
||||||
|
scheduler.handle(node0Update);
|
||||||
|
|
||||||
|
// After the node update event, check no local request
|
||||||
|
Assert.assertEquals(0, nodeLocal.getNumContainers());
|
||||||
|
// Also check that one container was scheduled
|
||||||
|
SchedulerAppReport info = scheduler.getSchedulerAppInfo(appAttemptId);
|
||||||
|
Assert.assertEquals(1, info.getLiveContainers().size());
|
||||||
|
// And check the default Queue now is full.
|
||||||
|
queueInfo = scheduler.getQueueInfo(null, false, false);
|
||||||
|
Assert.assertEquals(1.0f, queueInfo.getCurrentCapacity());
|
||||||
|
}
|
||||||
|
|
||||||
|
// @Test
|
||||||
|
public void testFifoScheduler() throws Exception {
|
||||||
|
|
||||||
|
LOG.info("--- START: testFifoScheduler ---");
|
||||||
|
|
||||||
|
final int GB = 1024;
|
||||||
|
|
||||||
|
// Register node1
|
||||||
|
String host_0 = "host_0";
|
||||||
|
org.apache.hadoop.yarn.server.resourcemanager.NodeManager nm_0 =
|
||||||
|
registerNode(host_0, 1234, 2345, NetworkTopology.DEFAULT_RACK,
|
||||||
|
Resources.createResource(4 * GB, 1));
|
||||||
|
nm_0.heartbeat();
|
||||||
|
|
||||||
|
// Register node2
|
||||||
|
String host_1 = "host_1";
|
||||||
|
org.apache.hadoop.yarn.server.resourcemanager.NodeManager nm_1 =
|
||||||
|
registerNode(host_1, 1234, 2345, NetworkTopology.DEFAULT_RACK,
|
||||||
|
Resources.createResource(2 * GB, 1));
|
||||||
|
nm_1.heartbeat();
|
||||||
|
|
||||||
|
// ResourceRequest priorities
|
||||||
|
Priority priority_0 =
|
||||||
|
org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.create(0);
|
||||||
|
Priority priority_1 =
|
||||||
|
org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.create(1);
|
||||||
|
|
||||||
|
// Submit an application
|
||||||
|
Application application_0 = new Application("user_0", resourceManager);
|
||||||
|
application_0.submit();
|
||||||
|
|
||||||
|
application_0.addNodeManager(host_0, 1234, nm_0);
|
||||||
|
application_0.addNodeManager(host_1, 1234, nm_1);
|
||||||
|
|
||||||
|
Resource capability_0_0 = Resources.createResource(GB);
|
||||||
|
application_0.addResourceRequestSpec(priority_1, capability_0_0);
|
||||||
|
|
||||||
|
Resource capability_0_1 = Resources.createResource(2 * GB);
|
||||||
|
application_0.addResourceRequestSpec(priority_0, capability_0_1);
|
||||||
|
|
||||||
|
Task task_0_0 = new Task(application_0, priority_1,
|
||||||
|
new String[] {host_0, host_1});
|
||||||
|
application_0.addTask(task_0_0);
|
||||||
|
|
||||||
|
// Submit another application
|
||||||
|
Application application_1 = new Application("user_1", resourceManager);
|
||||||
|
application_1.submit();
|
||||||
|
|
||||||
|
application_1.addNodeManager(host_0, 1234, nm_0);
|
||||||
|
application_1.addNodeManager(host_1, 1234, nm_1);
|
||||||
|
|
||||||
|
Resource capability_1_0 = Resources.createResource(3 * GB);
|
||||||
|
application_1.addResourceRequestSpec(priority_1, capability_1_0);
|
||||||
|
|
||||||
|
Resource capability_1_1 = Resources.createResource(4 * GB);
|
||||||
|
application_1.addResourceRequestSpec(priority_0, capability_1_1);
|
||||||
|
|
||||||
|
Task task_1_0 = new Task(application_1, priority_1,
|
||||||
|
new String[] {host_0, host_1});
|
||||||
|
application_1.addTask(task_1_0);
|
||||||
|
|
||||||
|
// Send resource requests to the scheduler
|
||||||
|
LOG.info("Send resource requests to the scheduler");
|
||||||
|
application_0.schedule();
|
||||||
|
application_1.schedule();
|
||||||
|
|
||||||
|
// Send a heartbeat to kick the tires on the Scheduler
|
||||||
|
LOG.info("Send a heartbeat to kick the tires on the Scheduler... " +
|
||||||
|
"nm0 -> task_0_0 and task_1_0 allocated, used=4G " +
|
||||||
|
"nm1 -> nothing allocated");
|
||||||
|
nm_0.heartbeat(); // task_0_0 and task_1_0 allocated, used=4G
|
||||||
|
nm_1.heartbeat(); // nothing allocated
|
||||||
|
|
||||||
|
// Get allocations from the scheduler
|
||||||
|
application_0.schedule(); // task_0_0
|
||||||
|
checkApplicationResourceUsage(GB, application_0);
|
||||||
|
|
||||||
|
application_1.schedule(); // task_1_0
|
||||||
|
checkApplicationResourceUsage(3 * GB, application_1);
|
||||||
|
|
||||||
|
nm_0.heartbeat();
|
||||||
|
nm_1.heartbeat();
|
||||||
|
|
||||||
|
checkNodeResourceUsage(4*GB, nm_0); // task_0_0 (1G) and task_1_0 (3G)
|
||||||
|
checkNodeResourceUsage(0*GB, nm_1); // no tasks, 2G available
|
||||||
|
|
||||||
|
LOG.info("Adding new tasks...");
|
||||||
|
|
||||||
|
Task task_1_1 = new Task(application_1, priority_1,
|
||||||
|
new String[] {ResourceRequest.ANY});
|
||||||
|
application_1.addTask(task_1_1);
|
||||||
|
|
||||||
|
Task task_1_2 = new Task(application_1, priority_1,
|
||||||
|
new String[] {ResourceRequest.ANY});
|
||||||
|
application_1.addTask(task_1_2);
|
||||||
|
|
||||||
|
Task task_1_3 = new Task(application_1, priority_0,
|
||||||
|
new String[] {ResourceRequest.ANY});
|
||||||
|
application_1.addTask(task_1_3);
|
||||||
|
|
||||||
|
application_1.schedule();
|
||||||
|
|
||||||
|
Task task_0_1 = new Task(application_0, priority_1,
|
||||||
|
new String[] {host_0, host_1});
|
||||||
|
application_0.addTask(task_0_1);
|
||||||
|
|
||||||
|
Task task_0_2 = new Task(application_0, priority_1,
|
||||||
|
new String[] {host_0, host_1});
|
||||||
|
application_0.addTask(task_0_2);
|
||||||
|
|
||||||
|
Task task_0_3 = new Task(application_0, priority_0,
|
||||||
|
new String[] {ResourceRequest.ANY});
|
||||||
|
application_0.addTask(task_0_3);
|
||||||
|
|
||||||
|
application_0.schedule();
|
||||||
|
|
||||||
|
// Send a heartbeat to kick the tires on the Scheduler
|
||||||
|
LOG.info("Sending hb from " + nm_0.getHostName());
|
||||||
|
nm_0.heartbeat(); // nothing new, used=4G
|
||||||
|
|
||||||
|
LOG.info("Sending hb from " + nm_1.getHostName());
|
||||||
|
nm_1.heartbeat(); // task_0_3, used=2G
|
||||||
|
|
||||||
|
// Get allocations from the scheduler
|
||||||
|
LOG.info("Trying to allocate...");
|
||||||
|
application_0.schedule();
|
||||||
|
checkApplicationResourceUsage(3 * GB, application_0);
|
||||||
|
application_1.schedule();
|
||||||
|
checkApplicationResourceUsage(3 * GB, application_1);
|
||||||
|
nm_0.heartbeat();
|
||||||
|
nm_1.heartbeat();
|
||||||
|
checkNodeResourceUsage(4*GB, nm_0);
|
||||||
|
checkNodeResourceUsage(2*GB, nm_1);
|
||||||
|
|
||||||
|
// Complete tasks
|
||||||
|
LOG.info("Finishing up task_0_0");
|
||||||
|
application_0.finishTask(task_0_0); // Now task_0_1
|
||||||
|
application_0.schedule();
|
||||||
|
application_1.schedule();
|
||||||
|
nm_0.heartbeat();
|
||||||
|
nm_1.heartbeat();
|
||||||
|
checkApplicationResourceUsage(3 * GB, application_0);
|
||||||
|
checkApplicationResourceUsage(3 * GB, application_1);
|
||||||
|
checkNodeResourceUsage(4*GB, nm_0);
|
||||||
|
checkNodeResourceUsage(2*GB, nm_1);
|
||||||
|
|
||||||
|
LOG.info("Finishing up task_1_0");
|
||||||
|
application_1.finishTask(task_1_0); // Now task_0_2
|
||||||
|
application_0.schedule(); // final overcommit for app0 caused here
|
||||||
|
application_1.schedule();
|
||||||
|
nm_0.heartbeat(); // final overcommit for app0 occurs here
|
||||||
|
nm_1.heartbeat();
|
||||||
|
checkApplicationResourceUsage(4 * GB, application_0);
|
||||||
|
checkApplicationResourceUsage(0 * GB, application_1);
|
||||||
|
//checkNodeResourceUsage(1*GB, nm_0); // final over-commit -> rm.node->1G, test.node=2G
|
||||||
|
checkNodeResourceUsage(2*GB, nm_1);
|
||||||
|
|
||||||
|
LOG.info("Finishing up task_0_3");
|
||||||
|
application_0.finishTask(task_0_3); // No more
|
||||||
|
application_0.schedule();
|
||||||
|
application_1.schedule();
|
||||||
|
nm_0.heartbeat();
|
||||||
|
nm_1.heartbeat();
|
||||||
|
checkApplicationResourceUsage(2 * GB, application_0);
|
||||||
|
checkApplicationResourceUsage(0 * GB, application_1);
|
||||||
|
//checkNodeResourceUsage(2*GB, nm_0); // final over-commit, rm.node->1G, test.node->2G
|
||||||
|
checkNodeResourceUsage(0*GB, nm_1);
|
||||||
|
|
||||||
|
LOG.info("Finishing up task_0_1");
|
||||||
|
application_0.finishTask(task_0_1);
|
||||||
|
application_0.schedule();
|
||||||
|
application_1.schedule();
|
||||||
|
nm_0.heartbeat();
|
||||||
|
nm_1.heartbeat();
|
||||||
|
checkApplicationResourceUsage(1 * GB, application_0);
|
||||||
|
checkApplicationResourceUsage(0 * GB, application_1);
|
||||||
|
|
||||||
|
LOG.info("Finishing up task_0_2");
|
||||||
|
application_0.finishTask(task_0_2); // now task_1_3 can go!
|
||||||
|
application_0.schedule();
|
||||||
|
application_1.schedule();
|
||||||
|
nm_0.heartbeat();
|
||||||
|
nm_1.heartbeat();
|
||||||
|
checkApplicationResourceUsage(0 * GB, application_0);
|
||||||
|
checkApplicationResourceUsage(4 * GB, application_1);
|
||||||
|
|
||||||
|
LOG.info("Finishing up task_1_3");
|
||||||
|
application_1.finishTask(task_1_3); // now task_1_1
|
||||||
|
application_0.schedule();
|
||||||
|
application_1.schedule();
|
||||||
|
nm_0.heartbeat();
|
||||||
|
nm_1.heartbeat();
|
||||||
|
checkApplicationResourceUsage(0 * GB, application_0);
|
||||||
|
checkApplicationResourceUsage(3 * GB, application_1);
|
||||||
|
|
||||||
|
LOG.info("Finishing up task_1_1");
|
||||||
|
application_1.finishTask(task_1_1);
|
||||||
|
application_0.schedule();
|
||||||
|
application_1.schedule();
|
||||||
|
nm_0.heartbeat();
|
||||||
|
nm_1.heartbeat();
|
||||||
|
checkApplicationResourceUsage(0 * GB, application_0);
|
||||||
|
checkApplicationResourceUsage(3 * GB, application_1);
|
||||||
|
|
||||||
|
LOG.info("--- END: testFifoScheduler ---");
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("resource")
|
||||||
|
@Test
|
||||||
|
public void testBlackListNodes() throws Exception {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class,
|
||||||
|
ResourceScheduler.class);
|
||||||
|
MockRM rm = new MockRM(conf);
|
||||||
|
rm.start();
|
||||||
|
FifoScheduler fs = (FifoScheduler) rm.getResourceScheduler();
|
||||||
|
|
||||||
|
String host = "127.0.0.1";
|
||||||
|
RMNode node =
|
||||||
|
MockNodes.newNodeInfo(0, MockNodes.newResource(4 * GB), 1, host);
|
||||||
|
fs.handle(new NodeAddedSchedulerEvent(node));
|
||||||
|
|
||||||
|
ApplicationId appId = BuilderUtils.newApplicationId(100, 1);
|
||||||
|
ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId(
|
||||||
|
appId, 1);
|
||||||
|
SchedulerEvent appEvent =
|
||||||
|
new AppAddedSchedulerEvent(appId, "default",
|
||||||
|
"user");
|
||||||
|
fs.handle(appEvent);
|
||||||
|
SchedulerEvent attemptEvent =
|
||||||
|
new AppAttemptAddedSchedulerEvent(appAttemptId, false);
|
||||||
|
fs.handle(attemptEvent);
|
||||||
|
|
||||||
|
// Verify the blacklist can be updated independent of requesting containers
|
||||||
|
fs.allocate(appAttemptId, Collections.<ResourceRequest>emptyList(),
|
||||||
|
Collections.<ContainerId>emptyList(),
|
||||||
|
Collections.singletonList(host), null);
|
||||||
|
Assert.assertTrue(fs.getApplicationAttempt(appAttemptId).isBlacklisted(host));
|
||||||
|
fs.allocate(appAttemptId, Collections.<ResourceRequest>emptyList(),
|
||||||
|
Collections.<ContainerId>emptyList(), null,
|
||||||
|
Collections.singletonList(host));
|
||||||
|
Assert.assertFalse(fs.getApplicationAttempt(appAttemptId).isBlacklisted(host));
|
||||||
|
rm.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetAppsInQueue() throws Exception {
|
||||||
|
Application application_0 = new Application("user_0", resourceManager);
|
||||||
|
application_0.submit();
|
||||||
|
|
||||||
|
Application application_1 = new Application("user_0", resourceManager);
|
||||||
|
application_1.submit();
|
||||||
|
|
||||||
|
ResourceScheduler scheduler = resourceManager.getResourceScheduler();
|
||||||
|
|
||||||
|
List<ApplicationAttemptId> appsInDefault = scheduler.getAppsInQueue("default");
|
||||||
|
assertTrue(appsInDefault.contains(application_0.getApplicationAttemptId()));
|
||||||
|
assertTrue(appsInDefault.contains(application_1.getApplicationAttemptId()));
|
||||||
|
assertEquals(2, appsInDefault.size());
|
||||||
|
|
||||||
|
Assert.assertNull(scheduler.getAppsInQueue("someotherqueue"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAddAndRemoveAppFromFiFoScheduler() throws Exception {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class,
|
||||||
|
ResourceScheduler.class);
|
||||||
|
MockRM rm = new MockRM(conf);
|
||||||
|
FifoScheduler fs = (FifoScheduler)rm.getResourceScheduler();
|
||||||
|
TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(fs.applications,
|
||||||
|
fs, "queue");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkApplicationResourceUsage(int expected,
|
||||||
|
Application application) {
|
||||||
|
Assert.assertEquals(expected, application.getUsedResources().getMemory());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkNodeResourceUsage(int expected,
|
||||||
|
org.apache.hadoop.yarn.server.resourcemanager.NodeManager node) {
|
||||||
|
Assert.assertEquals(expected, node.getUsed().getMemory());
|
||||||
|
node.checkResourceUsage();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] arg) throws Exception {
|
||||||
|
TestFifoScheduler t = new TestFifoScheduler();
|
||||||
|
t.setUp();
|
||||||
|
t.testFifoScheduler();
|
||||||
|
t.tearDown();
|
||||||
|
}
|
||||||
|
}
|
|
@ -41,9 +41,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||||
|
@ -1387,31 +1384,30 @@ public class TestRMWebServicesApps extends JerseyTest {
|
||||||
rm.stop();
|
rm.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test (timeout = 20000)
|
||||||
public void testMultipleAppAttempts() throws JSONException, Exception {
|
public void testMultipleAppAttempts() throws JSONException, Exception {
|
||||||
rm.start();
|
rm.start();
|
||||||
MockNM amNodeManager = rm.registerNode("127.0.0.1:1234", 8192);
|
MockNM amNodeManager = rm.registerNode("127.0.0.1:1234", 8192);
|
||||||
RMApp app1 = rm.submitApp(CONTAINER_MB, "testwordcount", "user1");
|
RMApp app1 = rm.submitApp(CONTAINER_MB, "testwordcount", "user1");
|
||||||
amNodeManager.nodeHeartbeat(true);
|
MockAM am = MockRM.launchAM(app1, rm, amNodeManager);
|
||||||
rm.waitForState(app1.getCurrentAppAttempt().getAppAttemptId(),
|
|
||||||
RMAppAttemptState.ALLOCATED);
|
|
||||||
int maxAppAttempts = rm.getConfig().getInt(
|
int maxAppAttempts = rm.getConfig().getInt(
|
||||||
YarnConfiguration.RM_AM_MAX_ATTEMPTS,
|
YarnConfiguration.RM_AM_MAX_ATTEMPTS,
|
||||||
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
|
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
|
||||||
assertTrue(maxAppAttempts > 1);
|
assertTrue(maxAppAttempts > 1);
|
||||||
int retriesLeft = maxAppAttempts;
|
int numAttempt = 1;
|
||||||
while (--retriesLeft > 0) {
|
while (true) {
|
||||||
RMAppEvent event =
|
// fail the AM by sending CONTAINER_FINISHED event without registering.
|
||||||
new RMAppFailedAttemptEvent(app1.getApplicationId(),
|
amNodeManager.nodeHeartbeat(am.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
|
||||||
RMAppEventType.ATTEMPT_FAILED, "", false);
|
am.waitForState(RMAppAttemptState.FAILED);
|
||||||
app1.handle(event);
|
if (numAttempt == maxAppAttempts) {
|
||||||
|
rm.waitForState(app1.getApplicationId(), RMAppState.FAILED);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// wait for app to start a new attempt.
|
||||||
rm.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
|
rm.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
|
||||||
amNodeManager.nodeHeartbeat(true);
|
am = MockRM.launchAM(app1, rm, amNodeManager);
|
||||||
|
numAttempt++;
|
||||||
}
|
}
|
||||||
// kick the scheduler to allocate the am container.
|
|
||||||
amNodeManager.nodeHeartbeat(true);
|
|
||||||
rm.waitForState(app1.getCurrentAppAttempt().getAppAttemptId(),
|
|
||||||
RMAppAttemptState.ALLOCATED);
|
|
||||||
assertEquals("incorrect number of attempts", maxAppAttempts,
|
assertEquals("incorrect number of attempts", maxAppAttempts,
|
||||||
app1.getAppAttempts().values().size());
|
app1.getAppAttempts().values().size());
|
||||||
testAppAttemptsHelper(app1.getApplicationId().toString(), app1,
|
testAppAttemptsHelper(app1.getApplicationId().toString(), app1,
|
||||||
|
|
Loading…
Reference in New Issue