Merging r1538408 through r1539244 from trunk to branch HDFS-2832
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-2832@1539245 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
103fd6c6d8
|
@ -495,6 +495,9 @@ Release 2.3.0 - UNRELEASED
|
|||
HDFS-5257. addBlock() retry should return LocatedBlock with locations else client
|
||||
will get AIOBE. (Vinay via jing9)
|
||||
|
||||
HDFS-5427. Not able to read deleted files from snapshot directly under
|
||||
snapshottable dir after checkpoint and NN restart. (Vinay via jing9)
|
||||
|
||||
Release 2.2.1 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -579,6 +582,12 @@ Release 2.2.1 - UNRELEASED
|
|||
HDFS-5035. getFileLinkStatus and rename do not correctly check permissions
|
||||
of symlinks. (Andrew Wang via Colin Patrick McCabe)
|
||||
|
||||
HDFS-5456. NameNode startup progress creates new steps if caller attempts to
|
||||
create a counter for a step that doesn't already exist. (cnauroth)
|
||||
|
||||
HDFS-5458. Datanode failed volume threshold ignored if exception is thrown
|
||||
in getDataDirsFromURIs. (Mike Mellenthin via wang)
|
||||
|
||||
Release 2.2.0 - 2013-10-13
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -1764,7 +1764,7 @@ public class DataNode extends Configured
|
|||
} catch (IOException ioe) {
|
||||
LOG.warn("Invalid " + DFS_DATANODE_DATA_DIR_KEY + " "
|
||||
+ location.getFile() + " : ", ioe);
|
||||
invalidDirs.append("\"").append(location.getFile().getCanonicalPath()).append("\" ");
|
||||
invalidDirs.append("\"").append(location.getUri().getPath()).append("\" ");
|
||||
}
|
||||
}
|
||||
if (locations.size() == 0) {
|
||||
|
|
|
@ -588,8 +588,12 @@ public class FSImageFormat {
|
|||
namesystem.dir.cacheName(child);
|
||||
|
||||
if (child.isFile()) {
|
||||
updateBlocksMap(child.asFile());
|
||||
}
|
||||
}
|
||||
|
||||
public void updateBlocksMap(INodeFile file) {
|
||||
// Add file->block mapping
|
||||
final INodeFile file = child.asFile();
|
||||
final BlockInfo[] blocks = file.getBlocks();
|
||||
if (blocks != null) {
|
||||
final BlockManager bm = namesystem.getBlockManager();
|
||||
|
@ -598,7 +602,6 @@ public class FSImageFormat {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** @return The FSDirectory of the namesystem where the fsimage is loaded */
|
||||
public FSDirectory getFSDirectoryInLoading() {
|
||||
|
|
|
@ -203,6 +203,9 @@ public class SnapshotFSImageFormat {
|
|||
// useful, but set the parent here to be consistent with the original
|
||||
// fsdir tree.
|
||||
deleted.setParent(parent);
|
||||
if (deleted.isFile()) {
|
||||
loader.updateBlocksMap(deleted.asFile());
|
||||
}
|
||||
}
|
||||
return deletedList;
|
||||
}
|
||||
|
|
|
@ -149,8 +149,8 @@ public class StartupProgress {
|
|||
* @return Counter associated with phase and step
|
||||
*/
|
||||
public Counter getCounter(Phase phase, Step step) {
|
||||
final StepTracking tracking = lazyInitStep(phase, step);
|
||||
if (!isComplete()) {
|
||||
final StepTracking tracking = lazyInitStep(phase, step);
|
||||
return new Counter() {
|
||||
@Override
|
||||
public void increment() {
|
||||
|
|
|
@ -21,6 +21,7 @@ import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains;
|
|||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -36,6 +37,8 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
|||
import org.apache.hadoop.hdfs.server.namenode.FSDirectory;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
|
@ -205,4 +208,70 @@ public class TestSnapshotBlocksMap {
|
|||
assertExceptionContains("File does not exist: " + s1f0, e);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to read the files inside snapshot but deleted in original place after
|
||||
* restarting post checkpoint. refer HDFS-5427
|
||||
*/
|
||||
@Test(timeout = 30000)
|
||||
public void testReadSnapshotFileWithCheckpoint() throws Exception {
|
||||
Path foo = new Path("/foo");
|
||||
hdfs.mkdirs(foo);
|
||||
hdfs.allowSnapshot(foo);
|
||||
Path bar = new Path("/foo/bar");
|
||||
DFSTestUtil.createFile(hdfs, bar, 100, (short) 2, 100024L);
|
||||
hdfs.createSnapshot(foo, "s1");
|
||||
assertTrue(hdfs.delete(bar, true));
|
||||
|
||||
// checkpoint
|
||||
NameNode nameNode = cluster.getNameNode();
|
||||
NameNodeAdapter.enterSafeMode(nameNode, false);
|
||||
NameNodeAdapter.saveNamespace(nameNode);
|
||||
NameNodeAdapter.leaveSafeMode(nameNode);
|
||||
|
||||
// restart namenode to load snapshot files from fsimage
|
||||
cluster.restartNameNode(true);
|
||||
String snapshotPath = Snapshot.getSnapshotPath(foo.toString(), "s1/bar");
|
||||
DFSTestUtil.readFile(hdfs, new Path(snapshotPath));
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to read the files inside snapshot but renamed to different file and
|
||||
* deleted after restarting post checkpoint. refer HDFS-5427
|
||||
*/
|
||||
@Test(timeout = 30000)
|
||||
public void testReadRenamedSnapshotFileWithCheckpoint() throws Exception {
|
||||
final Path foo = new Path("/foo");
|
||||
final Path foo2 = new Path("/foo2");
|
||||
hdfs.mkdirs(foo);
|
||||
hdfs.mkdirs(foo2);
|
||||
|
||||
hdfs.allowSnapshot(foo);
|
||||
hdfs.allowSnapshot(foo2);
|
||||
final Path bar = new Path(foo, "bar");
|
||||
final Path bar2 = new Path(foo2, "bar");
|
||||
DFSTestUtil.createFile(hdfs, bar, 100, (short) 2, 100024L);
|
||||
hdfs.createSnapshot(foo, "s1");
|
||||
// rename to another snapshottable directory and take snapshot
|
||||
assertTrue(hdfs.rename(bar, bar2));
|
||||
hdfs.createSnapshot(foo2, "s2");
|
||||
// delete the original renamed file to make sure blocks are not updated by
|
||||
// the original file
|
||||
assertTrue(hdfs.delete(bar2, true));
|
||||
|
||||
// checkpoint
|
||||
NameNode nameNode = cluster.getNameNode();
|
||||
NameNodeAdapter.enterSafeMode(nameNode, false);
|
||||
NameNodeAdapter.saveNamespace(nameNode);
|
||||
NameNodeAdapter.leaveSafeMode(nameNode);
|
||||
// restart namenode to load snapshot files from fsimage
|
||||
cluster.restartNameNode(true);
|
||||
// file in first snapshot
|
||||
String barSnapshotPath = Snapshot.getSnapshotPath(foo.toString(), "s1/bar");
|
||||
DFSTestUtil.readFile(hdfs, new Path(barSnapshotPath));
|
||||
// file in second snapshot after rename+delete
|
||||
String bar2SnapshotPath = Snapshot.getSnapshotPath(foo2.toString(),
|
||||
"s2/bar");
|
||||
DFSTestUtil.readFile(hdfs, new Path(bar2SnapshotPath));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -179,6 +179,14 @@ public class TestStartupProgress {
|
|||
startupProgress.endStep(LOADING_FSIMAGE, step);
|
||||
startupProgress.endPhase(LOADING_FSIMAGE);
|
||||
|
||||
// Also attempt a whole new step that wasn't used last time.
|
||||
startupProgress.beginPhase(LOADING_EDITS);
|
||||
Step newStep = new Step("file1");
|
||||
startupProgress.beginStep(LOADING_EDITS, newStep);
|
||||
incrementCounter(startupProgress, LOADING_EDITS, newStep, 100L);
|
||||
startupProgress.endStep(LOADING_EDITS, newStep);
|
||||
startupProgress.endPhase(LOADING_EDITS);
|
||||
|
||||
StartupProgressView after = startupProgress.createView();
|
||||
|
||||
// Expect that data was frozen after completion of entire startup process, so
|
||||
|
@ -200,6 +208,7 @@ public class TestStartupProgress {
|
|||
after.getTotal(LOADING_FSIMAGE));
|
||||
assertEquals(before.getTotal(LOADING_FSIMAGE, step),
|
||||
after.getTotal(LOADING_FSIMAGE, step));
|
||||
assertFalse(after.getSteps(LOADING_EDITS).iterator().hasNext());
|
||||
}
|
||||
|
||||
@Test(timeout=10000)
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
|||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode
|
||||
|
@ -48,7 +49,7 @@ public class NodeInfo {
|
|||
private String nodeAddr;
|
||||
private String httpAddress;
|
||||
private int cmdPort;
|
||||
private Resource perNode;
|
||||
private volatile ResourceOption perNode;
|
||||
private String rackName;
|
||||
private String healthReport;
|
||||
private NodeState state;
|
||||
|
@ -56,7 +57,7 @@ public class NodeInfo {
|
|||
private List<ApplicationId> toCleanUpApplications;
|
||||
|
||||
public FakeRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress,
|
||||
Resource perNode, String rackName, String healthReport,
|
||||
ResourceOption perNode, String rackName, String healthReport,
|
||||
int cmdPort, String hostName, NodeState state) {
|
||||
this.nodeId = nodeId;
|
||||
this.nodeAddr = nodeAddr;
|
||||
|
@ -104,6 +105,10 @@ public class NodeInfo {
|
|||
}
|
||||
|
||||
public Resource getTotalCapability() {
|
||||
return perNode.getResource();
|
||||
}
|
||||
|
||||
public ResourceOption getResourceOption() {
|
||||
return perNode;
|
||||
}
|
||||
|
||||
|
@ -153,21 +158,27 @@ public class NodeInfo {
|
|||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setResourceOption(ResourceOption resourceOption) {
|
||||
perNode = resourceOption;
|
||||
}
|
||||
}
|
||||
|
||||
public static RMNode newNodeInfo(String rackName, String hostName,
|
||||
final Resource resource, int port) {
|
||||
final ResourceOption resourceOption, int port) {
|
||||
final NodeId nodeId = newNodeID(hostName, port);
|
||||
final String nodeAddr = hostName + ":" + port;
|
||||
final String httpAddress = hostName;
|
||||
|
||||
return new FakeRMNodeImpl(nodeId, nodeAddr, httpAddress,
|
||||
resource, rackName, "Me good",
|
||||
resourceOption, rackName, "Me good",
|
||||
port, hostName, null);
|
||||
}
|
||||
|
||||
public static RMNode newNodeInfo(String rackName, String hostName,
|
||||
final Resource resource) {
|
||||
return newNodeInfo(rackName, hostName, resource, NODE_ID++);
|
||||
return newNodeInfo(rackName, hostName, ResourceOption.newInstance(resource,
|
||||
RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT), NODE_ID++);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId;
|
|||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode
|
||||
|
@ -138,10 +139,19 @@ public class RMNodeWrapper implements RMNode {
|
|||
return updates;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getNodeManagerVersion() {
|
||||
// TODO Auto-generated method stub
|
||||
return null;
|
||||
}
|
||||
@Override
|
||||
public String getNodeManagerVersion() {
|
||||
return node.getNodeManagerVersion();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setResourceOption(ResourceOption resourceOption) {
|
||||
node.setResourceOption(resourceOption);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResourceOption getResourceOption() {
|
||||
return node.getResourceOption();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -37,6 +37,9 @@ Release 2.3.0 - UNRELEASED
|
|||
YARN-1068. Add admin support for HA operations (Karthik Kambatla via
|
||||
bikas)
|
||||
|
||||
YARN-311. RM/scheduler support for dynamic resource configuration.
|
||||
(Junping Du via llu)
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
YARN-7. Support CPU resource for DistributedShell. (Junping Du via llu)
|
||||
|
@ -82,6 +85,9 @@ Release 2.3.0 - UNRELEASED
|
|||
|
||||
YARN-786. Expose application resource usage in RM REST API (Sandy Ryza)
|
||||
|
||||
YARN-1323. Set HTTPS webapp address along with other RPC addresses in HAUtil
|
||||
(Karthik Kambatla via Sandy Ryza)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
@ -106,6 +112,9 @@ Release 2.3.0 - UNRELEASED
|
|||
YARN-1305. RMHAProtocolService#serviceInit should handle HAUtil's
|
||||
IllegalArgumentException (Tsuyoshi Ozawa via bikas)
|
||||
|
||||
YARN-1374. Changed ResourceManager to start the preemption policy monitors
|
||||
as active services. (Karthik Kambatla via vinodkv)
|
||||
|
||||
Release 2.2.1 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -135,6 +144,9 @@ Release 2.2.1 - UNRELEASED
|
|||
YARN-1321. Changed NMTokenCache to support both singleton and an instance
|
||||
usage. (Alejandro Abdelnur via vinodkv)
|
||||
|
||||
YARN-1388. Fair Scheduler page always displays blank fair share (Liyin Liang
|
||||
via Sandy Ryza)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.api.records;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||
import org.apache.hadoop.classification.InterfaceAudience.Public;
|
||||
import org.apache.hadoop.classification.InterfaceStability.Evolving;
|
||||
import org.apache.hadoop.yarn.util.Records;
|
||||
|
||||
@Public
|
||||
@Evolving
|
||||
public abstract class ResourceOption {
|
||||
|
||||
public static ResourceOption newInstance(Resource resource,
|
||||
int overCommitTimeout){
|
||||
ResourceOption resourceOption = Records.newRecord(ResourceOption.class);
|
||||
resourceOption.setResource(resource);
|
||||
return resourceOption;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the <em>resource</em> of the ResourceOption.
|
||||
* @return <em>resource</em> of the ResourceOption
|
||||
*/
|
||||
@Private
|
||||
@Evolving
|
||||
public abstract Resource getResource();
|
||||
|
||||
@Private
|
||||
@Evolving
|
||||
protected abstract void setResource(Resource resource);
|
||||
|
||||
/**
|
||||
* Get timeout for tolerant of resource over-commitment
|
||||
* Note: negative value means no timeout so that allocated containers will
|
||||
* keep running until the end even under resource over-commitment cases.
|
||||
* @return <em>overCommitTimeout</em> of the ResourceOption
|
||||
*/
|
||||
@Private
|
||||
@Evolving
|
||||
public abstract int getOverCommitTimeout();
|
||||
|
||||
@Private
|
||||
@Evolving
|
||||
protected abstract void setOverCommitTimeout(int overCommitTimeout);
|
||||
|
||||
protected abstract void build();
|
||||
|
||||
}
|
|
@ -42,6 +42,7 @@ public class HAUtil {
|
|||
YarnConfiguration.RM_ADMIN_ADDRESS,
|
||||
YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS,
|
||||
YarnConfiguration.RM_WEBAPP_ADDRESS,
|
||||
YarnConfiguration.RM_WEBAPP_HTTPS_ADDRESS,
|
||||
// TODO Remove after YARN-1318
|
||||
YarnConfiguration.RM_HA_ADMIN_ADDRESS));
|
||||
|
||||
|
|
|
@ -58,6 +58,11 @@ message ResourceProto {
|
|||
optional int32 virtual_cores = 2;
|
||||
}
|
||||
|
||||
message ResourceOptionProto {
|
||||
optional ResourceProto resource = 1;
|
||||
optional int32 over_commit_timeout = 2;
|
||||
}
|
||||
|
||||
message PriorityProto {
|
||||
optional int32 priority = 1;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.api.records.impl.pb;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
||||
import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnProtos.ResourceOptionProto;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
public class ResourceOptionPBImpl extends ResourceOption {
|
||||
|
||||
ResourceOptionProto proto = null;
|
||||
ResourceOptionProto.Builder builder = null;
|
||||
private Resource resource = null;
|
||||
|
||||
public ResourceOptionPBImpl() {
|
||||
builder = ResourceOptionProto.newBuilder();
|
||||
}
|
||||
|
||||
public ResourceOptionPBImpl(ResourceOptionProto proto) {
|
||||
this.proto = proto;
|
||||
this.resource = convertFromProtoFormat(proto.getResource());
|
||||
}
|
||||
|
||||
public ResourceOptionProto getProto() {
|
||||
return proto;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Resource getResource() {
|
||||
return this.resource;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setResource(Resource resource) {
|
||||
if (resource != null) {
|
||||
Preconditions.checkNotNull(builder);
|
||||
builder.setResource(convertToProtoFormat(resource));
|
||||
}
|
||||
this.resource = resource;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getOverCommitTimeout() {
|
||||
Preconditions.checkNotNull(proto);
|
||||
return proto.getOverCommitTimeout();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setOverCommitTimeout(int overCommitTimeout) {
|
||||
Preconditions.checkNotNull(builder);
|
||||
builder.setOverCommitTimeout(overCommitTimeout);
|
||||
}
|
||||
|
||||
private ResourceProto convertToProtoFormat(
|
||||
Resource resource) {
|
||||
return ((ResourcePBImpl)resource).getProto();
|
||||
}
|
||||
|
||||
private ResourcePBImpl convertFromProtoFormat(
|
||||
ResourceProto p) {
|
||||
return new ResourcePBImpl(p);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void build() {
|
||||
proto = builder.build();
|
||||
builder = null;
|
||||
}
|
||||
|
||||
}
|
|
@ -501,6 +501,36 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
|||
|
||||
super.serviceStop();
|
||||
}
|
||||
|
||||
protected void createPolicyMonitors() {
|
||||
if (scheduler instanceof PreemptableResourceScheduler
|
||||
&& conf.getBoolean(YarnConfiguration.RM_SCHEDULER_ENABLE_MONITORS,
|
||||
YarnConfiguration.DEFAULT_RM_SCHEDULER_ENABLE_MONITORS)) {
|
||||
LOG.info("Loading policy monitors");
|
||||
List<SchedulingEditPolicy> policies = conf.getInstances(
|
||||
YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES,
|
||||
SchedulingEditPolicy.class);
|
||||
if (policies.size() > 0) {
|
||||
rmDispatcher.register(ContainerPreemptEventType.class,
|
||||
new RMContainerPreemptEventDispatcher(
|
||||
(PreemptableResourceScheduler) scheduler));
|
||||
for (SchedulingEditPolicy policy : policies) {
|
||||
LOG.info("LOADING SchedulingEditPolicy:" + policy.getPolicyName());
|
||||
policy.init(conf, rmContext.getDispatcher().getEventHandler(),
|
||||
(PreemptableResourceScheduler) scheduler);
|
||||
// periodically check whether we need to take action to guarantee
|
||||
// constraints
|
||||
SchedulingMonitor mon = new SchedulingMonitor(policy);
|
||||
addService(mon);
|
||||
}
|
||||
} else {
|
||||
LOG.warn("Policy monitors configured (" +
|
||||
YarnConfiguration.RM_SCHEDULER_ENABLE_MONITORS +
|
||||
") but none specified (" +
|
||||
YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES + ")");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Private
|
||||
|
@ -829,37 +859,6 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
|||
return new ApplicationMasterService(this.rmContext, scheduler);
|
||||
}
|
||||
|
||||
protected void createPolicyMonitors() {
|
||||
if (scheduler instanceof PreemptableResourceScheduler
|
||||
&& conf.getBoolean(YarnConfiguration.RM_SCHEDULER_ENABLE_MONITORS,
|
||||
YarnConfiguration.DEFAULT_RM_SCHEDULER_ENABLE_MONITORS)) {
|
||||
LOG.info("Loading policy monitors");
|
||||
List<SchedulingEditPolicy> policies = conf.getInstances(
|
||||
YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES,
|
||||
SchedulingEditPolicy.class);
|
||||
if (policies.size() > 0) {
|
||||
this.rmDispatcher.register(ContainerPreemptEventType.class,
|
||||
new RMContainerPreemptEventDispatcher(
|
||||
(PreemptableResourceScheduler) scheduler));
|
||||
for (SchedulingEditPolicy policy : policies) {
|
||||
LOG.info("LOADING SchedulingEditPolicy:" + policy.getPolicyName());
|
||||
policy.init(conf, this.rmContext.getDispatcher().getEventHandler(),
|
||||
(PreemptableResourceScheduler) scheduler);
|
||||
// periodically check whether we need to take action to guarantee
|
||||
// constraints
|
||||
SchedulingMonitor mon = new SchedulingMonitor(policy);
|
||||
addService(mon);
|
||||
|
||||
}
|
||||
} else {
|
||||
LOG.warn("Policy monitors configured (" +
|
||||
YarnConfiguration.RM_SCHEDULER_ENABLE_MONITORS +
|
||||
") but none specified (" +
|
||||
YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES + ")");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected AdminService createAdminService(
|
||||
ClientRMService clientRMService,
|
||||
ApplicationMasterService applicationMasterService,
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.hadoop.service.AbstractService;
|
|||
import org.apache.hadoop.util.VersionUtil;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||
|
@ -234,7 +235,8 @@ public class ResourceTrackerService extends AbstractService implements
|
|||
.getCurrentKey());
|
||||
|
||||
RMNode rmNode = new RMNodeImpl(nodeId, rmContext, host, cmPort, httpPort,
|
||||
resolve(host), capability, nodeManagerVersion);
|
||||
resolve(host), ResourceOption.newInstance(capability, RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT),
|
||||
nodeManagerVersion);
|
||||
|
||||
RMNode oldNode = this.rmContext.getRMNodes().putIfAbsent(nodeId, rmNode);
|
||||
if (oldNode == null) {
|
||||
|
|
|
@ -26,6 +26,8 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
|
|||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
||||
|
||||
/**
|
||||
|
@ -35,6 +37,9 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
|||
*/
|
||||
public interface RMNode {
|
||||
|
||||
/** negative value means no timeout */
|
||||
public static final int OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT = -1;
|
||||
|
||||
/**
|
||||
* the node id of of this node.
|
||||
* @return the node id of this node.
|
||||
|
@ -94,7 +99,19 @@ public interface RMNode {
|
|||
* the total available resource.
|
||||
* @return the total available resource.
|
||||
*/
|
||||
public org.apache.hadoop.yarn.api.records.Resource getTotalCapability();
|
||||
public Resource getTotalCapability();
|
||||
|
||||
/**
|
||||
* Set resource option with total available resource and overCommitTimoutMillis
|
||||
* @param resourceOption
|
||||
*/
|
||||
public void setResourceOption(ResourceOption resourceOption);
|
||||
|
||||
/**
|
||||
* resource option with total available resource and overCommitTimoutMillis
|
||||
* @return ResourceOption
|
||||
*/
|
||||
public ResourceOption getResourceOption();
|
||||
|
||||
/**
|
||||
* The rack name for this node manager.
|
||||
|
|
|
@ -43,6 +43,7 @@ import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
|||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
||||
import org.apache.hadoop.yarn.event.EventHandler;
|
||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||
|
@ -92,7 +93,7 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
|||
private final int httpPort;
|
||||
private final String nodeAddress; // The containerManager address
|
||||
private final String httpAddress;
|
||||
private final Resource totalCapability;
|
||||
private volatile ResourceOption resourceOption;
|
||||
private final Node node;
|
||||
|
||||
private String healthReport;
|
||||
|
@ -173,13 +174,13 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
|||
RMNodeEvent> stateMachine;
|
||||
|
||||
public RMNodeImpl(NodeId nodeId, RMContext context, String hostName,
|
||||
int cmPort, int httpPort, Node node, Resource capability, String nodeManagerVersion) {
|
||||
int cmPort, int httpPort, Node node, ResourceOption resourceOption, String nodeManagerVersion) {
|
||||
this.nodeId = nodeId;
|
||||
this.context = context;
|
||||
this.hostName = hostName;
|
||||
this.commandPort = cmPort;
|
||||
this.httpPort = httpPort;
|
||||
this.totalCapability = capability;
|
||||
this.resourceOption = resourceOption;
|
||||
this.nodeAddress = hostName + ":" + cmPort;
|
||||
this.httpAddress = hostName + ":" + httpPort;
|
||||
this.node = node;
|
||||
|
@ -235,14 +236,24 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
|||
|
||||
@Override
|
||||
public Resource getTotalCapability() {
|
||||
return this.totalCapability;
|
||||
return this.resourceOption.getResource();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setResourceOption(ResourceOption resourceOption) {
|
||||
this.resourceOption = resourceOption;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResourceOption getResourceOption(){
|
||||
return this.resourceOption;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getRackName() {
|
||||
return node.getNetworkLocation();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Node getNode() {
|
||||
return this.node;
|
||||
|
|
|
@ -67,6 +67,12 @@ public abstract class SchedulerNode {
|
|||
* @return number of active containers on the node
|
||||
*/
|
||||
public abstract int getNumContainers();
|
||||
|
||||
/**
|
||||
* Apply delta resource on node's available resource.
|
||||
* @param deltaResource the delta of resource need to apply to node
|
||||
*/
|
||||
public abstract void applyDeltaOnAvailableResource(Resource deltaResource);
|
||||
|
||||
/**
|
||||
* Get total resources on the node.
|
||||
|
|
|
@ -19,20 +19,19 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
|
|||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||
import org.apache.hadoop.yarn.exceptions.InvalidResourceBlacklistRequestException;
|
||||
import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException;
|
||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
|
||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||
|
||||
|
@ -147,6 +146,37 @@ public class SchedulerUtils {
|
|||
maximumResource, minimumResource);
|
||||
ask.setCapability(normalized);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update resource in SchedulerNode if any resource change in RMNode.
|
||||
* @param node SchedulerNode with old resource view
|
||||
* @param rmNode RMNode with new resource view
|
||||
* @param clusterResource the cluster's resource that need to update
|
||||
* @param log Scheduler's log for resource change
|
||||
*/
|
||||
public static void updateResourceIfChanged(SchedulerNode node,
|
||||
RMNode rmNode, Resource clusterResource, Log log) {
|
||||
Resource oldAvailableResource = node.getAvailableResource();
|
||||
Resource newAvailableResource = Resources.subtract(
|
||||
rmNode.getTotalCapability(), node.getUsedResource());
|
||||
|
||||
if (!newAvailableResource.equals(oldAvailableResource)) {
|
||||
Resource deltaResource = Resources.subtract(newAvailableResource,
|
||||
oldAvailableResource);
|
||||
// Reflect resource change to scheduler node.
|
||||
node.applyDeltaOnAvailableResource(deltaResource);
|
||||
// Reflect resource change to clusterResource.
|
||||
Resources.addTo(clusterResource, deltaResource);
|
||||
// TODO process resource over-commitment case (allocated containers
|
||||
// > total capacity) in different option by getting value of
|
||||
// overCommitTimeoutMillis.
|
||||
|
||||
// Log resource change
|
||||
log.info("Resource change on node: " + rmNode.getNodeAddress()
|
||||
+ " with delta: CPU: " + deltaResource.getMemory() + "core, Memory: "
|
||||
+ deltaResource.getMemory() +"MB");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method to normalize a list of resource requests, by insuring that
|
||||
|
|
|
@ -629,6 +629,10 @@ public class CapacityScheduler
|
|||
}
|
||||
|
||||
FiCaSchedulerNode node = getNode(nm.getNodeID());
|
||||
|
||||
// Update resource if any change
|
||||
SchedulerUtils.updateResourceIfChanged(node, nm, clusterResource, LOG);
|
||||
|
||||
List<UpdatedContainerInfo> containerInfoList = nm.pullContainerUpdates();
|
||||
List<ContainerStatus> newlyLaunchedContainers = new ArrayList<ContainerStatus>();
|
||||
List<ContainerStatus> completedContainers = new ArrayList<ContainerStatus>();
|
||||
|
@ -695,7 +699,7 @@ public class CapacityScheduler
|
|||
node.getReservedContainer().getContainerId().getApplicationAttemptId()
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
private void containerLaunchedOnNode(ContainerId containerId, FiCaSchedulerNode node) {
|
||||
|
|
|
@ -268,4 +268,10 @@ public class FiCaSchedulerNode extends SchedulerNode {
|
|||
return reservedContainer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void applyDeltaOnAvailableResource(Resource deltaResource) {
|
||||
// we can only adjust available resource if total resource is changed.
|
||||
Resources.addTo(this.availableResource, deltaResource);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -269,4 +269,11 @@ public class FSSchedulerNode extends SchedulerNode {
|
|||
public synchronized AppSchedulable getReservedAppSchedulable() {
|
||||
return reservedAppSchedulable;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void applyDeltaOnAvailableResource(Resource deltaResource) {
|
||||
// we can only adjust available resource if total resource is changed.
|
||||
Resources.addTo(this.availableResource, deltaResource);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -920,6 +920,9 @@ public class FairScheduler implements ResourceScheduler {
|
|||
eventLog.log("HEARTBEAT", nm.getHostName());
|
||||
FSSchedulerNode node = nodes.get(nm.getNodeID());
|
||||
|
||||
// Update resource if any change
|
||||
SchedulerUtils.updateResourceIfChanged(node, nm, clusterCapacity, LOG);
|
||||
|
||||
List<UpdatedContainerInfo> containerInfoList = nm.pullContainerUpdates();
|
||||
List<ContainerStatus> newlyLaunchedContainers = new ArrayList<ContainerStatus>();
|
||||
List<ContainerStatus> completedContainers = new ArrayList<ContainerStatus>();
|
||||
|
|
|
@ -100,7 +100,7 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
private final static List<Container> EMPTY_CONTAINER_LIST = Arrays.asList(EMPTY_CONTAINER_ARRAY);
|
||||
private RMContext rmContext;
|
||||
|
||||
private Map<NodeId, FiCaSchedulerNode> nodes = new ConcurrentHashMap<NodeId, FiCaSchedulerNode>();
|
||||
protected Map<NodeId, FiCaSchedulerNode> nodes = new ConcurrentHashMap<NodeId, FiCaSchedulerNode>();
|
||||
|
||||
private boolean initialized;
|
||||
private Resource minimumAllocation;
|
||||
|
@ -628,6 +628,9 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
private synchronized void nodeUpdate(RMNode rmNode) {
|
||||
FiCaSchedulerNode node = getNode(rmNode.getNodeID());
|
||||
|
||||
// Update resource if any change
|
||||
SchedulerUtils.updateResourceIfChanged(node, rmNode, clusterResource, LOG);
|
||||
|
||||
List<UpdatedContainerInfo> containerInfoList = rmNode.pullContainerUpdates();
|
||||
List<ContainerStatus> newlyLaunchedContainers = new ArrayList<ContainerStatus>();
|
||||
List<ContainerStatus> completedContainers = new ArrayList<ContainerStatus>();
|
||||
|
@ -661,7 +664,7 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
|
||||
metrics.setAvailableResourcesToQueue(
|
||||
Resources.subtract(clusterResource, usedResource));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handle(SchedulerEvent event) {
|
||||
|
|
|
@ -77,7 +77,8 @@ public class FairSchedulerPage extends RmView {
|
|||
if (maxApps < Integer.MAX_VALUE) {
|
||||
ri._("Max Running Applications:", qinfo.getMaxApplications());
|
||||
}
|
||||
ri._("Fair Share:", qinfo.getFairShare().toString());
|
||||
ri._("Fair Share:", StringEscapeUtils.escapeHtml(
|
||||
qinfo.getFairShare().toString()));
|
||||
|
||||
html._(InfoBlock.class);
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId;
|
|||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
||||
|
@ -93,14 +94,14 @@ public class MockNodes {
|
|||
private String nodeAddr;
|
||||
private String httpAddress;
|
||||
private int cmdPort;
|
||||
private Resource perNode;
|
||||
private ResourceOption perNode;
|
||||
private String rackName;
|
||||
private String healthReport;
|
||||
private long lastHealthReportTime;
|
||||
private NodeState state;
|
||||
|
||||
public MockRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress,
|
||||
Resource perNode, String rackName, String healthReport,
|
||||
ResourceOption perNode, String rackName, String healthReport,
|
||||
long lastHealthReportTime, int cmdPort, String hostName, NodeState state) {
|
||||
this.nodeId = nodeId;
|
||||
this.nodeAddr = nodeAddr;
|
||||
|
@ -146,7 +147,7 @@ public class MockNodes {
|
|||
|
||||
@Override
|
||||
public Resource getTotalCapability() {
|
||||
return this.perNode;
|
||||
return this.perNode.getResource();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -202,6 +203,17 @@ public class MockNodes {
|
|||
public long getLastHealthReportTime() {
|
||||
return lastHealthReportTime;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setResourceOption(ResourceOption resourceOption) {
|
||||
this.perNode = resourceOption;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResourceOption getResourceOption(){
|
||||
return this.perNode;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
private static RMNode buildRMNode(int rack, final Resource perNode, NodeState state, String httpAddr) {
|
||||
|
@ -220,8 +232,9 @@ public class MockNodes {
|
|||
|
||||
final String httpAddress = httpAddr;
|
||||
String healthReport = (state == NodeState.UNHEALTHY) ? null : "HealthyMe";
|
||||
return new MockRMNodeImpl(nodeID, nodeAddr, httpAddress, perNode, rackName,
|
||||
healthReport, 0, nid, hostName, state);
|
||||
return new MockRMNodeImpl(nodeID, nodeAddr, httpAddress,
|
||||
ResourceOption.newInstance(perNode, RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT),
|
||||
rackName, healthReport, 0, nid, hostName, state);
|
||||
}
|
||||
|
||||
public static RMNode nodeInfo(int rack, final Resource perNode,
|
||||
|
|
|
@ -36,10 +36,12 @@ import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
|||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
||||
import org.apache.hadoop.yarn.event.EventHandler;
|
||||
import org.apache.hadoop.yarn.event.InlineDispatcher;
|
||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
||||
import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanAppEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent;
|
||||
|
@ -451,7 +453,8 @@ public class TestRMNodeTransitions {
|
|||
NodeId nodeId = BuilderUtils.newNodeId("localhost", 0);
|
||||
Resource capability = Resource.newInstance(4096, 4);
|
||||
RMNodeImpl node = new RMNodeImpl(nodeId, rmContext,null, 0, 0,
|
||||
null, capability, null);
|
||||
null, ResourceOption.newInstance(capability,
|
||||
RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT), null);
|
||||
node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.STARTED));
|
||||
Assert.assertEquals(NodeState.RUNNING, node.getState());
|
||||
return node;
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.monitor;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
public class TestSchedulingMonitor {
|
||||
|
||||
@Test(timeout = 10000)
|
||||
public void testRMStarts() {
|
||||
Configuration conf = new YarnConfiguration();
|
||||
conf.setBoolean(YarnConfiguration.RM_SCHEDULER_ENABLE_MONITORS, true);
|
||||
conf.set(YarnConfiguration.RM_SCHEDULER_MONITOR_POLICIES,
|
||||
ProportionalCapacityPreemptionPolicy.class.getCanonicalName());
|
||||
|
||||
ResourceManager rm = new ResourceManager();
|
||||
try {
|
||||
rm.init(conf);
|
||||
} catch (Exception e) {
|
||||
fail("ResourceManager does not start when " +
|
||||
YarnConfiguration.RM_SCHEDULER_ENABLE_MONITORS + " is set to true");
|
||||
}
|
||||
}
|
||||
}
|
|
@ -18,10 +18,14 @@
|
|||
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import junit.framework.Assert;
|
||||
|
||||
|
@ -32,9 +36,11 @@ import org.apache.hadoop.net.NetworkTopology;
|
|||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.Priority;
|
||||
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.event.AsyncDispatcher;
|
||||
|
@ -55,6 +61,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestCapacityScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
|
||||
|
@ -212,6 +219,92 @@ public class TestFifoScheduler {
|
|||
Assert.assertEquals(3, info.getLiveContainers().size());
|
||||
}
|
||||
|
||||
@Test(timeout=2000)
|
||||
public void testUpdateResourceOnNode() throws Exception {
|
||||
AsyncDispatcher dispatcher = new InlineDispatcher();
|
||||
Configuration conf = new Configuration();
|
||||
RMContainerTokenSecretManager containerTokenSecretManager =
|
||||
new RMContainerTokenSecretManager(conf);
|
||||
containerTokenSecretManager.rollMasterKey();
|
||||
NMTokenSecretManagerInRM nmTokenSecretManager =
|
||||
new NMTokenSecretManagerInRM(conf);
|
||||
nmTokenSecretManager.rollMasterKey();
|
||||
RMContext rmContext = new RMContextImpl(dispatcher, null, null, null, null,
|
||||
null, containerTokenSecretManager, nmTokenSecretManager, null);
|
||||
|
||||
FifoScheduler scheduler = new FifoScheduler(){
|
||||
@SuppressWarnings("unused")
|
||||
public Map<NodeId, FiCaSchedulerNode> getNodes(){
|
||||
return nodes;
|
||||
}
|
||||
};
|
||||
scheduler.reinitialize(new Configuration(), rmContext);
|
||||
RMNode node0 = MockNodes.newNodeInfo(1,
|
||||
Resources.createResource(2048, 4), 1, "127.0.0.1");
|
||||
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node0);
|
||||
scheduler.handle(nodeEvent1);
|
||||
|
||||
Method method = scheduler.getClass().getDeclaredMethod("getNodes");
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<NodeId, FiCaSchedulerNode> schedulerNodes =
|
||||
(Map<NodeId, FiCaSchedulerNode>) method.invoke(scheduler);
|
||||
assertEquals(schedulerNodes.values().size(), 1);
|
||||
|
||||
// set resource of RMNode to 1024 and verify it works.
|
||||
node0.setResourceOption(ResourceOption.newInstance(
|
||||
Resources.createResource(1024, 4), RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT));
|
||||
assertEquals(node0.getTotalCapability().getMemory(), 1024);
|
||||
// verify that SchedulerNode's resource hasn't been changed.
|
||||
assertEquals(schedulerNodes.get(node0.getNodeID()).
|
||||
getAvailableResource().getMemory(), 2048);
|
||||
// now, NM heartbeat comes.
|
||||
NodeUpdateSchedulerEvent node0Update = new NodeUpdateSchedulerEvent(node0);
|
||||
scheduler.handle(node0Update);
|
||||
// SchedulerNode's available resource is changed.
|
||||
assertEquals(schedulerNodes.get(node0.getNodeID()).
|
||||
getAvailableResource().getMemory(), 1024);
|
||||
QueueInfo queueInfo = scheduler.getQueueInfo(null, false, false);
|
||||
Assert.assertEquals(0.0f, queueInfo.getCurrentCapacity());
|
||||
|
||||
int _appId = 1;
|
||||
int _appAttemptId = 1;
|
||||
ApplicationAttemptId appAttemptId = createAppAttemptId(_appId,
|
||||
_appAttemptId);
|
||||
AppAddedSchedulerEvent appEvent1 = new AppAddedSchedulerEvent(appAttemptId,
|
||||
"queue1", "user1");
|
||||
scheduler.handle(appEvent1);
|
||||
|
||||
int memory = 1024;
|
||||
int priority = 1;
|
||||
|
||||
List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
|
||||
ResourceRequest nodeLocal = createResourceRequest(memory,
|
||||
node0.getHostName(), priority, 1);
|
||||
ResourceRequest rackLocal = createResourceRequest(memory,
|
||||
node0.getRackName(), priority, 1);
|
||||
ResourceRequest any = createResourceRequest(memory, ResourceRequest.ANY, priority,
|
||||
1);
|
||||
ask.add(nodeLocal);
|
||||
ask.add(rackLocal);
|
||||
ask.add(any);
|
||||
scheduler.allocate(appAttemptId, ask, new ArrayList<ContainerId>(), null, null);
|
||||
|
||||
// Before the node update event, there are one local request
|
||||
Assert.assertEquals(1, nodeLocal.getNumContainers());
|
||||
|
||||
// Now schedule.
|
||||
scheduler.handle(node0Update);
|
||||
|
||||
// After the node update event, check no local request
|
||||
Assert.assertEquals(0, nodeLocal.getNumContainers());
|
||||
// Also check that one container was scheduled
|
||||
SchedulerAppReport info = scheduler.getSchedulerAppInfo(appAttemptId);
|
||||
Assert.assertEquals(1, info.getLiveContainers().size());
|
||||
// And check the default Queue now is full.
|
||||
queueInfo = scheduler.getQueueInfo(null, false, false);
|
||||
Assert.assertEquals(1.0f, queueInfo.getCurrentCapacity());
|
||||
}
|
||||
|
||||
// @Test
|
||||
public void testFifoScheduler() throws Exception {
|
||||
|
||||
|
|
Loading…
Reference in New Issue