Merge r1555021 through r1558254 from trunk.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-5535@1558303 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
4a1abe5a3d
|
@ -430,6 +430,9 @@ Release 2.4.0 - UNRELEASED
|
|||
HADOOP-10173. Remove UGI from DIGEST-MD5 SASL server creation (daryn via
|
||||
kihwal)
|
||||
|
||||
HADOOP-10228. FsPermission#fromShort() should cache FsAction.values().
|
||||
(Haohui Mai via cnauroth)
|
||||
|
||||
BUG FIXES
|
||||
|
||||
HADOOP-9964. Fix deadlocks in TestHttpServer by synchronize
|
||||
|
@ -511,6 +514,9 @@ Release 2.4.0 - UNRELEASED
|
|||
HADOOP-10214. Fix multithreaded correctness warnings in ActiveStandbyElector
|
||||
(Liang Xie via kasha)
|
||||
|
||||
HADOOP-10223. MiniKdc#main() should close the FileReader it creates.
|
||||
(Ted Yu via tucu)
|
||||
|
||||
Release 2.3.0 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -596,6 +602,9 @@ Release 2.3.0 - UNRELEASED
|
|||
HADOOP-10193. hadoop-auth's PseudoAuthenticationHandler can consume getInputStream.
|
||||
(gchanan via tucu)
|
||||
|
||||
HADOOP-10178. Configuration deprecation always emit "deprecated" warnings
|
||||
when a new key is used. (Shanyu Zhao via cnauroth)
|
||||
|
||||
Release 2.2.0 - 2013-10-13
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -1323,6 +1332,18 @@ Release 2.1.0-beta - 2013-08-22
|
|||
|
||||
HADOOP-9701. mvn site ambiguous links in hadoop-common. (kkambatl via tucu)
|
||||
|
||||
Release 2.0.6-alpha - 08/22/2013
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
||||
NEW FEATURES
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
||||
Release 2.0.5-alpha - 06/06/2013
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -552,36 +552,6 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
|
|||
return deprecationContext.get().getDeprecatedKeyMap().containsKey(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the alternate name for a key if the property name is deprecated
|
||||
* or if deprecates a property name.
|
||||
*
|
||||
* @param name property name.
|
||||
* @return alternate name.
|
||||
*/
|
||||
private String[] getAlternateNames(String name) {
|
||||
String altNames[] = null;
|
||||
DeprecationContext cur = deprecationContext.get();
|
||||
DeprecatedKeyInfo keyInfo = cur.getDeprecatedKeyMap().get(name);
|
||||
if (keyInfo == null) {
|
||||
altNames = (cur.getReverseDeprecatedKeyMap().get(name) != null ) ?
|
||||
new String [] {cur.getReverseDeprecatedKeyMap().get(name)} : null;
|
||||
if(altNames != null && altNames.length > 0) {
|
||||
//To help look for other new configs for this deprecated config
|
||||
keyInfo = cur.getDeprecatedKeyMap().get(altNames[0]);
|
||||
}
|
||||
}
|
||||
if(keyInfo != null && keyInfo.newKeys.length > 0) {
|
||||
List<String> list = new ArrayList<String>();
|
||||
if(altNames != null) {
|
||||
list.addAll(Arrays.asList(altNames));
|
||||
}
|
||||
list.addAll(Arrays.asList(keyInfo.newKeys));
|
||||
altNames = list.toArray(new String[list.size()]);
|
||||
}
|
||||
return altNames;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for the presence of the property <code>name</code> in the
|
||||
* deprecation map. Returns the first of the list of new keys if present
|
||||
|
@ -933,6 +903,37 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
|
|||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns alternative names (non-deprecated keys or previously-set deprecated keys)
|
||||
* for a given non-deprecated key.
|
||||
* If the given key is deprecated, return null.
|
||||
*
|
||||
* @param name property name.
|
||||
* @return alternative names.
|
||||
*/
|
||||
private String[] getAlternativeNames(String name) {
|
||||
String altNames[] = null;
|
||||
DeprecatedKeyInfo keyInfo = null;
|
||||
DeprecationContext cur = deprecationContext.get();
|
||||
String depKey = cur.getReverseDeprecatedKeyMap().get(name);
|
||||
if(depKey != null) {
|
||||
keyInfo = cur.getDeprecatedKeyMap().get(depKey);
|
||||
if(keyInfo.newKeys.length > 0) {
|
||||
if(getProps().containsKey(depKey)) {
|
||||
//if deprecated key is previously set explicitly
|
||||
List<String> list = new ArrayList<String>();
|
||||
list.addAll(Arrays.asList(keyInfo.newKeys));
|
||||
list.add(depKey);
|
||||
altNames = list.toArray(new String[list.size()]);
|
||||
}
|
||||
else {
|
||||
altNames = keyInfo.newKeys;
|
||||
}
|
||||
}
|
||||
}
|
||||
return altNames;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the <code>value</code> of the <code>name</code> property. If
|
||||
* <code>name</code> is deprecated or there is a deprecated name associated to it,
|
||||
|
@ -947,9 +948,9 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
|
|||
|
||||
/**
|
||||
* Set the <code>value</code> of the <code>name</code> property. If
|
||||
* <code>name</code> is deprecated or there is a deprecated name associated to it,
|
||||
* it sets the value to both names.
|
||||
*
|
||||
* <code>name</code> is deprecated, it also sets the <code>value</code> to
|
||||
* the keys that replace the deprecated key.
|
||||
*
|
||||
* @param name property name.
|
||||
* @param value property value.
|
||||
* @param source the place that this configuration value came from
|
||||
|
@ -969,23 +970,30 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
|
|||
}
|
||||
getOverlay().setProperty(name, value);
|
||||
getProps().setProperty(name, value);
|
||||
if(source == null) {
|
||||
updatingResource.put(name, new String[] {"programatically"});
|
||||
} else {
|
||||
updatingResource.put(name, new String[] {source});
|
||||
}
|
||||
String[] altNames = getAlternateNames(name);
|
||||
if (altNames != null && altNames.length > 0) {
|
||||
String altSource = "because " + name + " is deprecated";
|
||||
for(String altName : altNames) {
|
||||
if(!altName.equals(name)) {
|
||||
getOverlay().setProperty(altName, value);
|
||||
getProps().setProperty(altName, value);
|
||||
updatingResource.put(altName, new String[] {altSource});
|
||||
String newSource = (source == null ? "programatically" : source);
|
||||
|
||||
if (!isDeprecated(name)) {
|
||||
updatingResource.put(name, new String[] {newSource});
|
||||
String[] altNames = getAlternativeNames(name);
|
||||
if(altNames != null) {
|
||||
for(String n: altNames) {
|
||||
if(!n.equals(name)) {
|
||||
getOverlay().setProperty(n, value);
|
||||
getProps().setProperty(n, value);
|
||||
updatingResource.put(n, new String[] {newSource});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
warnOnceIfDeprecated(deprecations, name);
|
||||
else {
|
||||
String[] names = handleDeprecation(deprecationContext.get(), name);
|
||||
String altSource = "because " + name + " is deprecated";
|
||||
for(String n : names) {
|
||||
getOverlay().setProperty(n, value);
|
||||
getProps().setProperty(n, value);
|
||||
updatingResource.put(n, new String[] {altSource});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void warnOnceIfDeprecated(DeprecationContext deprecations, String name) {
|
||||
|
@ -999,15 +1007,21 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
|
|||
* Unset a previously set property.
|
||||
*/
|
||||
public synchronized void unset(String name) {
|
||||
String[] altNames = getAlternateNames(name);
|
||||
getOverlay().remove(name);
|
||||
getProps().remove(name);
|
||||
if (altNames !=null && altNames.length > 0) {
|
||||
for(String altName : altNames) {
|
||||
getOverlay().remove(altName);
|
||||
getProps().remove(altName);
|
||||
String[] names = null;
|
||||
if (!isDeprecated(name)) {
|
||||
names = getAlternativeNames(name);
|
||||
if(names == null) {
|
||||
names = new String[]{name};
|
||||
}
|
||||
}
|
||||
else {
|
||||
names = handleDeprecation(deprecationContext.get(), name);
|
||||
}
|
||||
|
||||
for(String n: names) {
|
||||
getOverlay().remove(n);
|
||||
getProps().remove(n);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2600,4 +2614,18 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
|
|||
System.out.println(entry.getKey() + "\t" + newKeys.toString());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether or not a deprecated name has been warned. If the name is not
|
||||
* deprecated then always return false
|
||||
*/
|
||||
public static boolean hasWarnedDeprecation(String name) {
|
||||
DeprecationContext deprecations = deprecationContext.get();
|
||||
if(deprecations.getDeprecatedKeyMap().containsKey(name)) {
|
||||
if(deprecations.getDeprecatedKeyMap().get(name).accessed.get()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -120,8 +120,7 @@ public class FsPermission implements Writable {
|
|||
}
|
||||
|
||||
public void fromShort(short n) {
|
||||
FsAction[] v = FsAction.values();
|
||||
|
||||
FsAction[] v = FSACTION_VALUES;
|
||||
set(v[(n >>> 6) & 7], v[(n >>> 3) & 7], v[n & 7], (((n >>> 9) & 1) == 1) );
|
||||
}
|
||||
|
||||
|
@ -210,6 +209,8 @@ public class FsPermission implements Writable {
|
|||
public static final int DEFAULT_UMASK =
|
||||
CommonConfigurationKeys.FS_PERMISSIONS_UMASK_DEFAULT;
|
||||
|
||||
private static final FsAction[] FSACTION_VALUES = FsAction.values();
|
||||
|
||||
/**
|
||||
* Get the user file creation mask (umask)
|
||||
*
|
||||
|
|
|
@ -95,7 +95,7 @@ public class CompositeService extends AbstractService {
|
|||
|
||||
protected synchronized boolean removeService(Service service) {
|
||||
synchronized (serviceList) {
|
||||
return serviceList.add(service);
|
||||
return serviceList.remove(service);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.io.BufferedWriter;
|
|||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -399,4 +400,30 @@ public class TestConfigurationDeprecation {
|
|||
Uninterruptibles.getUninterruptibly(future);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoFalseDeprecationWarning() throws IOException {
|
||||
Configuration conf = new Configuration();
|
||||
Configuration.addDeprecation("AA", "BB");
|
||||
conf.set("BB", "bb");
|
||||
conf.get("BB");
|
||||
conf.writeXml(new ByteArrayOutputStream());
|
||||
assertEquals(false, Configuration.hasWarnedDeprecation("AA"));
|
||||
conf.set("AA", "aa");
|
||||
assertEquals(true, Configuration.hasWarnedDeprecation("AA"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeprecationSetUnset() throws IOException {
|
||||
addDeprecationToConfiguration();
|
||||
Configuration conf = new Configuration();
|
||||
//"X" is deprecated by "Y" and "Z"
|
||||
conf.set("Y", "y");
|
||||
assertEquals("y", conf.get("Z"));
|
||||
conf.set("X", "x");
|
||||
assertEquals("x", conf.get("Z"));
|
||||
conf.unset("Y");
|
||||
assertEquals(null, conf.get("Z"));
|
||||
assertEquals(null, conf.get("X"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -125,7 +125,15 @@ public class MiniKdc {
|
|||
+ file.getAbsolutePath());
|
||||
}
|
||||
Properties userConf = new Properties();
|
||||
userConf.load(new FileReader(file));
|
||||
FileReader r = null;
|
||||
try {
|
||||
r = new FileReader(file);
|
||||
userConf.load(r);
|
||||
} finally {
|
||||
if (r != null) {
|
||||
r.close();
|
||||
}
|
||||
}
|
||||
for (Map.Entry entry : userConf.entrySet()) {
|
||||
conf.put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
|
|
|
@ -741,6 +741,17 @@ Release 2.4.0 - UNRELEASED
|
|||
|
||||
HDFS-5449. WebHdfs compatibility broken between 2.2 and 1.x / 23.x (kihwal)
|
||||
|
||||
HDFS-5756. hadoopRzOptionsSetByteBufferPool does not accept NULL argument,
|
||||
contrary to docs. (cmccabe via wang)
|
||||
|
||||
HDFS-5747. Fix NPEs in BlockManager. (Arpit Agarwal)
|
||||
|
||||
HDFS-5710. FSDirectory#getFullPathName should check inodes against null.
|
||||
(Uma Maheswara Rao G via jing9)
|
||||
|
||||
HDFS-5579. Under construction files make DataNode decommission take very long
|
||||
hours. (zhaoyunjiong via jing9)
|
||||
|
||||
BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
|
||||
|
||||
HDFS-4985. Add storage type to the protocol and expose it in block report
|
||||
|
@ -913,6 +924,9 @@ Release 2.3.0 - UNRELEASED
|
|||
HDFS-5675. Add Mkdirs operation to NNThroughputBenchmark.
|
||||
(Plamen Jeliazkov via shv)
|
||||
|
||||
HDFS-5677. Need error checking for HA cluster configuration.
|
||||
(Vincent Sheffer via cos)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
@ -2293,6 +2307,16 @@ Release 2.1.0-beta - 2013-08-22
|
|||
HDFS-4982. JournalNode should relogin from keytab before fetching logs
|
||||
from other JNs (todd)
|
||||
|
||||
Release 2.0.6-alpha - 08/22/2013
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
||||
NEW FEATURES
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
Release 2.0.5-alpha - 06/06/2013
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -585,6 +585,12 @@ public class DFSUtil {
|
|||
String address = getConfValue(defaultValue, suffix, conf, keys);
|
||||
if (address != null) {
|
||||
InetSocketAddress isa = NetUtils.createSocketAddr(address);
|
||||
if (isa.isUnresolved()) {
|
||||
LOG.warn("Namenode for " + nsId +
|
||||
" remains unresolved for ID " + nnId +
|
||||
". Check your hdfs-site.xml file to " +
|
||||
"ensure namenodes are configured properly.");
|
||||
}
|
||||
ret.put(nnId, isa);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ public interface BlockCollection {
|
|||
/**
|
||||
* Get the last block of the collection.
|
||||
*/
|
||||
public BlockInfo getLastBlock() throws IOException;
|
||||
public BlockInfo getLastBlock();
|
||||
|
||||
/**
|
||||
* Get content summary.
|
||||
|
|
|
@ -324,12 +324,14 @@ public class BlockInfoUnderConstruction extends BlockInfo {
|
|||
Iterator<ReplicaUnderConstruction> it = replicas.iterator();
|
||||
while (it.hasNext()) {
|
||||
ReplicaUnderConstruction r = it.next();
|
||||
if(r.getExpectedStorageLocation() == storage) {
|
||||
DatanodeStorageInfo expectedLocation = r.getExpectedStorageLocation();
|
||||
if(expectedLocation == storage) {
|
||||
// Record the gen stamp from the report
|
||||
r.setGenerationStamp(block.getGenerationStamp());
|
||||
return;
|
||||
} else if (r.getExpectedStorageLocation().getDatanodeDescriptor() ==
|
||||
storage.getDatanodeDescriptor()) {
|
||||
} else if (expectedLocation != null &&
|
||||
expectedLocation.getDatanodeDescriptor() ==
|
||||
storage.getDatanodeDescriptor()) {
|
||||
|
||||
// The Datanode reported that the block is on a different storage
|
||||
// than the one chosen by BlockPlacementPolicy. This can occur as
|
||||
|
|
|
@ -1233,8 +1233,10 @@ public class BlockManager {
|
|||
// block should belong to a file
|
||||
bc = blocksMap.getBlockCollection(block);
|
||||
// abandoned block or block reopened for append
|
||||
if(bc == null || bc.isUnderConstruction()) {
|
||||
neededReplications.remove(block, priority); // remove from neededReplications
|
||||
if (bc == null
|
||||
|| (bc.isUnderConstruction() && block.equals(bc.getLastBlock()))) {
|
||||
// remove from neededReplications
|
||||
neededReplications.remove(block, priority);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1314,7 +1316,7 @@ public class BlockManager {
|
|||
// block should belong to a file
|
||||
bc = blocksMap.getBlockCollection(block);
|
||||
// abandoned block or block reopened for append
|
||||
if(bc == null || bc.isUnderConstruction()) {
|
||||
if(bc == null || (bc.isUnderConstruction() && block.equals(bc.getLastBlock()))) {
|
||||
neededReplications.remove(block, priority); // remove from neededReplications
|
||||
rw.targets = null;
|
||||
continue;
|
||||
|
@ -3007,8 +3009,16 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
|||
NumberReplicas num = countNodes(block);
|
||||
int curReplicas = num.liveReplicas();
|
||||
int curExpectedReplicas = getReplication(block);
|
||||
|
||||
if (isNeededReplication(block, curExpectedReplicas, curReplicas)) {
|
||||
if (curExpectedReplicas > curReplicas) {
|
||||
if (bc.isUnderConstruction()) {
|
||||
if (block.equals(bc.getLastBlock()) && curReplicas > minReplication) {
|
||||
continue;
|
||||
}
|
||||
underReplicatedInOpenFiles++;
|
||||
}
|
||||
|
||||
// Log info about one block for this node which needs replication
|
||||
if (!status) {
|
||||
status = true;
|
||||
|
@ -3025,9 +3035,6 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
|||
if ((curReplicas == 0) && (num.decommissionedReplicas() > 0)) {
|
||||
decommissionOnlyReplicas++;
|
||||
}
|
||||
if (bc.isUnderConstruction()) {
|
||||
underReplicatedInOpenFiles++;
|
||||
}
|
||||
}
|
||||
if (!neededReplications.contains(block) &&
|
||||
pendingReplications.getNumReplicas(block) == 0) {
|
||||
|
|
|
@ -1842,7 +1842,8 @@ public class FSDirectory implements Closeable {
|
|||
/** Return the full path name of the specified inode */
|
||||
static String getFullPathName(INode inode) {
|
||||
INode[] inodes = getFullPathINodes(inode);
|
||||
return getFullPathName(inodes, inodes.length - 1);
|
||||
// inodes can be null only when its called without holding lock
|
||||
return inodes == null ? "" : getFullPathName(inodes, inodes.length - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -640,7 +640,7 @@ public class INodeFile extends INodeWithAdditionalFields
|
|||
}
|
||||
|
||||
@Override
|
||||
public BlockInfo getLastBlock() throws IOException {
|
||||
public BlockInfo getLastBlock() {
|
||||
return blocks == null || blocks.length == 0? null: blocks[blocks.length-1];
|
||||
}
|
||||
|
||||
|
|
|
@ -547,8 +547,8 @@ public class NameNode implements NameNodeStatusMXBean {
|
|||
}
|
||||
|
||||
private void stopCommonServices() {
|
||||
if(namesystem != null) namesystem.close();
|
||||
if(rpcServer != null) rpcServer.stop();
|
||||
if(namesystem != null) namesystem.close();
|
||||
if (pauseMonitor != null) pauseMonitor.stop();
|
||||
if (plugins != null) {
|
||||
for (ServicePlugin p : plugins) {
|
||||
|
|
|
@ -2174,16 +2174,18 @@ int hadoopRzOptionsSetByteBufferPool(
|
|||
return -1;
|
||||
}
|
||||
|
||||
// Note: we don't have to call hadoopRzOptionsClearCached in this
|
||||
// function, since the ByteBufferPool is passed separately from the
|
||||
// EnumSet of ReadOptions.
|
||||
if (className) {
|
||||
// Note: we don't have to call hadoopRzOptionsClearCached in this
|
||||
// function, since the ByteBufferPool is passed separately from the
|
||||
// EnumSet of ReadOptions.
|
||||
|
||||
jthr = constructNewObjectOfClass(env, &byteBufferPool, className, "()V");
|
||||
if (jthr) {
|
||||
printExceptionAndFree(env, jthr, PRINT_EXC_ALL,
|
||||
"hadoopRzOptionsSetByteBufferPool(className=%s): ", className);
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
jthr = constructNewObjectOfClass(env, &byteBufferPool, className, "()V");
|
||||
if (jthr) {
|
||||
printExceptionAndFree(env, jthr, PRINT_EXC_ALL,
|
||||
"hadoopRzOptionsSetByteBufferPool(className=%s): ", className);
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (opts->byteBufferPool) {
|
||||
// Delete any previous ByteBufferPool we had.
|
||||
|
|
|
@ -140,6 +140,12 @@ static int doTestZeroCopyReads(hdfsFS fs, const char *fileName)
|
|||
EXPECT_NULL(hadoopReadZero(file, opts, TEST_ZEROCOPY_FULL_BLOCK_SIZE));
|
||||
EXPECT_INT_EQ(EPROTONOSUPPORT, errno);
|
||||
|
||||
/* Verify that setting a NULL ByteBufferPool class works. */
|
||||
EXPECT_ZERO(hadoopRzOptionsSetByteBufferPool(opts, NULL));
|
||||
EXPECT_ZERO(hadoopRzOptionsSetSkipChecksum(opts, 0));
|
||||
EXPECT_NULL(hadoopReadZero(file, opts, TEST_ZEROCOPY_FULL_BLOCK_SIZE));
|
||||
EXPECT_INT_EQ(EPROTONOSUPPORT, errno);
|
||||
|
||||
/* Now set a ByteBufferPool and try again. It should succeed this time. */
|
||||
EXPECT_ZERO(hadoopRzOptionsSetByteBufferPool(opts,
|
||||
ELASTIC_BYTE_BUFFER_POOL_CLASS));
|
||||
|
|
|
@ -42,6 +42,7 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
|||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||
|
@ -779,4 +780,53 @@ public class TestDecommission {
|
|||
Thread.sleep(HEARTBEAT_INTERVAL * 1000);
|
||||
}
|
||||
}
|
||||
|
||||
@Test(timeout=120000)
|
||||
public void testDecommissionWithOpenfile() throws IOException, InterruptedException {
|
||||
LOG.info("Starting test testDecommissionWithOpenfile");
|
||||
|
||||
//At most 4 nodes will be decommissioned
|
||||
startCluster(1, 7, conf);
|
||||
|
||||
FileSystem fileSys = cluster.getFileSystem(0);
|
||||
FSNamesystem ns = cluster.getNamesystem(0);
|
||||
|
||||
String openFile = "/testDecommissionWithOpenfile.dat";
|
||||
|
||||
writeFile(fileSys, new Path(openFile), (short)3);
|
||||
// make sure the file was open for write
|
||||
FSDataOutputStream fdos = fileSys.append(new Path(openFile));
|
||||
|
||||
LocatedBlocks lbs = NameNodeAdapter.getBlockLocations(cluster.getNameNode(0), openFile, 0, fileSize);
|
||||
|
||||
DatanodeInfo[] dnInfos4LastBlock = lbs.getLastLocatedBlock().getLocations();
|
||||
DatanodeInfo[] dnInfos4FirstBlock = lbs.get(0).getLocations();
|
||||
|
||||
ArrayList<String> nodes = new ArrayList<String>();
|
||||
ArrayList<DatanodeInfo> dnInfos = new ArrayList<DatanodeInfo>();
|
||||
|
||||
for (DatanodeInfo datanodeInfo : dnInfos4FirstBlock) {
|
||||
DatanodeInfo found = datanodeInfo;
|
||||
for (DatanodeInfo dif: dnInfos4LastBlock) {
|
||||
if (datanodeInfo.equals(dif)) {
|
||||
found = null;
|
||||
}
|
||||
}
|
||||
if (found != null) {
|
||||
nodes.add(found.getXferAddr());
|
||||
dnInfos.add(found);
|
||||
}
|
||||
}
|
||||
//decommission one of the 3 nodes which have last block
|
||||
nodes.add(dnInfos4LastBlock[0].getXferAddr());
|
||||
dnInfos.add(dnInfos4LastBlock[0]);
|
||||
|
||||
writeConfigFile(excludeFile, nodes);
|
||||
refreshNodes(ns, conf);
|
||||
for (DatanodeInfo dn : dnInfos) {
|
||||
waitNodeState(dn, AdminStates.DECOMMISSIONED);
|
||||
}
|
||||
|
||||
fdos.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -998,6 +998,16 @@ Release 2.1.0-beta - 2013-08-22
|
|||
MAPREDUCE-4374. Fix child task environment variable config and add support
|
||||
for Windows. (Chuan Liu via cnauroth)
|
||||
|
||||
Release 2.0.6-alpha - 08/22/2013
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
||||
NEW FEATURES
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
Release 2.0.5-alpha - 06/06/2013
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -760,7 +760,28 @@
|
|||
<groupId>com.google.code.findbugs</groupId>
|
||||
<artifactId>jsr305</artifactId>
|
||||
<version>1.3.9</version>
|
||||
</dependency>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.xml.bind</groupId>
|
||||
<artifactId>jaxb-api</artifactId>
|
||||
<version>2.2.2</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.codehaus.jettison</groupId>
|
||||
<artifactId>jettison</artifactId>
|
||||
<version>1.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey</groupId>
|
||||
<artifactId>jersey-client</artifactId>
|
||||
<version>${jersey.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.glassfish.grizzly</groupId>
|
||||
<artifactId>grizzly-http-servlet</artifactId>
|
||||
<version>2.1.2</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
||||
|
|
|
@ -860,4 +860,9 @@ public class ResourceSchedulerWrapper implements
|
|||
public List<ApplicationAttemptId> getAppsInQueue(String queue) {
|
||||
return scheduler.getAppsInQueue(queue);
|
||||
}
|
||||
|
||||
@Override
|
||||
public RMContainer getRMContainer(ContainerId containerId) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -58,6 +58,18 @@ Release 2.4.0 - UNRELEASED
|
|||
YARN-1029. Added embedded leader election in the ResourceManager. (Karthik
|
||||
Kambatla via vinodkv)
|
||||
|
||||
YARN-1490. Introduced the ability to make ResourceManager optionally not kill
|
||||
all containers when an ApplicationMaster exits. (Jian He via vinodkv)
|
||||
|
||||
YARN-1033. Expose RM active/standby state to Web UI and REST API (kasha)
|
||||
|
||||
YARN-1041. Added the ApplicationMasterProtocol API for applications to use the
|
||||
ability in ResourceManager to optionally not kill containers when the
|
||||
ApplicationMaster exits. (Jian He via vinodkv)
|
||||
|
||||
YARN-1566. Changed Distributed Shell to retain containers across application
|
||||
attempts. (Jian He via vinodkv)
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
YARN-7. Support CPU resource for DistributedShell. (Junping Du via llu)
|
||||
|
@ -207,6 +219,13 @@ Release 2.4.0 - UNRELEASED
|
|||
|
||||
YARN-1579. ActiveRMInfoProto fields should be optional (kasha)
|
||||
|
||||
YARN-888. Cleaned up POM files so that non-leaf modules don't include any
|
||||
dependencies and thus compact the dependency list for leaf modules.
|
||||
(Alejandro Abdelnur via vinodkv)
|
||||
|
||||
YARN-1567. In Fair Scheduler, allow empty queues to change between leaf and
|
||||
parent on allocation file reload (Sandy Ryza)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
@ -308,6 +327,14 @@ Release 2.4.0 - UNRELEASED
|
|||
YARN-1293. Fixed TestContainerLaunch#testInvalidEnvSyntaxDiagnostics failure
|
||||
caused by non-English system locale. (Tsuyoshi OZAWA via jianhe)
|
||||
|
||||
YARN-1574. RMDispatcher should be reset on transition to standby. (Xuan Gong
|
||||
via kasha)
|
||||
|
||||
YARN-1166. Fixed app-specific and attempt-specific QueueMetrics to be
|
||||
triggered by accordingly app event and attempt event.
|
||||
|
||||
YARN-1598. HA-related rmadmin commands don't work on a secure cluster (kasha)
|
||||
|
||||
Release 2.3.0 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
@ -1513,6 +1540,21 @@ Release 2.1.0-beta - 2013-08-22
|
|||
yarn.resourcemanager.connect.{max.wait.secs|retry_interval.secs}
|
||||
(Karthik Kambatla via acmurthy)
|
||||
|
||||
Release 2.0.6-alpha - 08/22/2013
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
||||
NEW FEATURES
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
||||
YARN-854. Fixing YARN bugs that are failing applications in secure
|
||||
environment. (Omkar Vinit Joshi and shv)
|
||||
|
||||
Release 2.0.5-alpha - 06/06/2013
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -32,6 +32,56 @@
|
|||
<yarn.basedir>${project.parent.basedir}</yarn.basedir>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>commons-lang</groupId>
|
||||
<artifactId>commons-lang</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>commons-el</groupId>
|
||||
<artifactId>commons-el</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-runtime</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-compiler</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jsp-2.1-jetty</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-annotations</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.protobuf</groupId>
|
||||
<artifactId>protobuf-java</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
package org.apache.hadoop.yarn.api.protocolrecords;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||
|
@ -27,6 +28,7 @@ import org.apache.hadoop.classification.InterfaceStability.Stable;
|
|||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||
import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
||||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.util.Records;
|
||||
|
||||
|
@ -47,16 +49,19 @@ import org.apache.hadoop.yarn.util.Records;
|
|||
@Public
|
||||
@Stable
|
||||
public abstract class RegisterApplicationMasterResponse {
|
||||
|
||||
@Private
|
||||
@Unstable
|
||||
public static RegisterApplicationMasterResponse newInstance(
|
||||
Resource minCapability, Resource maxCapability,
|
||||
Map<ApplicationAccessType, String> acls, ByteBuffer key) {
|
||||
Map<ApplicationAccessType, String> acls, ByteBuffer key,
|
||||
List<Container> containersFromPreviousAttempt) {
|
||||
RegisterApplicationMasterResponse response =
|
||||
Records.newRecord(RegisterApplicationMasterResponse.class);
|
||||
response.setMaximumResourceCapability(maxCapability);
|
||||
response.setApplicationACLs(acls);
|
||||
response.setClientToAMTokenMasterKey(key);
|
||||
response.setContainersFromPreviousAttempt(containersFromPreviousAttempt);
|
||||
return response;
|
||||
}
|
||||
|
||||
|
@ -105,4 +110,30 @@ public abstract class RegisterApplicationMasterResponse {
|
|||
@Public
|
||||
@Stable
|
||||
public abstract void setClientToAMTokenMasterKey(ByteBuffer key);
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Get the list of running containers as viewed by
|
||||
* <code>ResourceManager</code> from previous application attempt.
|
||||
* </p>
|
||||
*
|
||||
* @return the list of running containers as viewed by
|
||||
* <code>ResourceManager</code> from previous application attempt
|
||||
*/
|
||||
@Public
|
||||
@Unstable
|
||||
public abstract List<Container> getContainersFromPreviousAttempt();
|
||||
|
||||
/**
|
||||
* Set the list of running containers as viewed by
|
||||
* <code>ResourceManager</code> from previous application attempt.
|
||||
*
|
||||
* @param containersFromPreviousAttempt
|
||||
* the list of running containers as viewed by
|
||||
* <code>ResourceManager</code> from previous application attempt.
|
||||
*/
|
||||
@Private
|
||||
@Unstable
|
||||
public abstract void setContainersFromPreviousAttempt(
|
||||
List<Container> containersFromPreviousAttempt);
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Public;
|
|||
import org.apache.hadoop.classification.InterfaceStability.Stable;
|
||||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||
import org.apache.hadoop.yarn.api.ApplicationClientProtocol;
|
||||
import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
|
||||
import org.apache.hadoop.yarn.util.Records;
|
||||
|
||||
/**
|
||||
|
@ -57,7 +58,8 @@ public abstract class ApplicationSubmissionContext {
|
|||
ApplicationId applicationId, String applicationName, String queue,
|
||||
Priority priority, ContainerLaunchContext amContainer,
|
||||
boolean isUnmanagedAM, boolean cancelTokensWhenComplete,
|
||||
int maxAppAttempts, Resource resource, String applicationType) {
|
||||
int maxAppAttempts, Resource resource, String applicationType,
|
||||
boolean keepContainers) {
|
||||
ApplicationSubmissionContext context =
|
||||
Records.newRecord(ApplicationSubmissionContext.class);
|
||||
context.setApplicationId(applicationId);
|
||||
|
@ -70,9 +72,22 @@ public abstract class ApplicationSubmissionContext {
|
|||
context.setMaxAppAttempts(maxAppAttempts);
|
||||
context.setResource(resource);
|
||||
context.setApplicationType(applicationType);
|
||||
context.setKeepContainersAcrossApplicationAttempts(keepContainers);
|
||||
return context;
|
||||
}
|
||||
|
||||
@Public
|
||||
@Stable
|
||||
public static ApplicationSubmissionContext newInstance(
|
||||
ApplicationId applicationId, String applicationName, String queue,
|
||||
Priority priority, ContainerLaunchContext amContainer,
|
||||
boolean isUnmanagedAM, boolean cancelTokensWhenComplete,
|
||||
int maxAppAttempts, Resource resource, String applicationType) {
|
||||
return newInstance(applicationId, applicationName, queue, priority,
|
||||
amContainer, isUnmanagedAM, cancelTokensWhenComplete, maxAppAttempts,
|
||||
resource, null, false);
|
||||
}
|
||||
|
||||
@Public
|
||||
@Stable
|
||||
public static ApplicationSubmissionContext newInstance(
|
||||
|
@ -268,4 +283,35 @@ public abstract class ApplicationSubmissionContext {
|
|||
@Public
|
||||
@Stable
|
||||
public abstract void setApplicationType(String applicationType);
|
||||
|
||||
|
||||
/**
|
||||
* Get the flag which indicates whether to keep containers across application
|
||||
* attempts or not.
|
||||
*
|
||||
* @return the flag which indicates whether to keep containers across
|
||||
* application attempts or not.
|
||||
*/
|
||||
@Public
|
||||
@Stable
|
||||
public abstract boolean getKeepContainersAcrossApplicationAttempts();
|
||||
|
||||
/**
|
||||
* Set the flag which indicates whether to keep containers across application
|
||||
* attempts.
|
||||
* <p>
|
||||
* If the flag is true, running containers will not be killed when application
|
||||
* attempt fails and these containers will be retrieved by the new application
|
||||
* attempt on registration via
|
||||
* {@link ApplicationMasterProtocol#registerApplicationMaster(RegisterApplicationMasterRequest)}.
|
||||
* </p>
|
||||
*
|
||||
* @param keepContainers
|
||||
* the flag which indicates whether to keep containers across
|
||||
* application attempts.
|
||||
*/
|
||||
@Public
|
||||
@Stable
|
||||
public abstract void setKeepContainersAcrossApplicationAttempts(
|
||||
boolean keepContainers);
|
||||
}
|
|
@ -46,10 +46,20 @@ public abstract class ContainerId implements Comparable<ContainerId>{
|
|||
}
|
||||
|
||||
/**
|
||||
* Get the <code>ApplicationAttemptId</code> of the application to which
|
||||
* the <code>Container</code> was assigned.
|
||||
* @return <code>ApplicationAttemptId</code> of the application to which
|
||||
* the <code>Container</code> was assigned
|
||||
* Get the <code>ApplicationAttemptId</code> of the application to which the
|
||||
* <code>Container</code> was assigned.
|
||||
* <p>
|
||||
* Note: If containers are kept alive across application attempts via
|
||||
* {@link ApplicationSubmissionContext#setKeepContainersAcrossApplicationAttempts(boolean)}
|
||||
* the <code>ContainerId</code> does not necessarily contain the current
|
||||
* running application attempt's <code>ApplicationAttemptId</code> This
|
||||
* container can be allocated by previously exited application attempt and
|
||||
* managed by the current running attempt thus have the previous application
|
||||
* attempt's <code>ApplicationAttemptId</code>.
|
||||
* </p>
|
||||
*
|
||||
* @return <code>ApplicationAttemptId</code> of the application to which the
|
||||
* <code>Container</code> was assigned
|
||||
*/
|
||||
@Public
|
||||
@Stable
|
||||
|
|
|
@ -248,6 +248,7 @@ message ApplicationSubmissionContextProto {
|
|||
optional int32 maxAppAttempts = 8 [default = 0];
|
||||
optional ResourceProto resource = 9;
|
||||
optional string applicationType = 10 [default = "YARN"];
|
||||
optional bool keep_containers_across_application_attempts = 11 [default = false];
|
||||
}
|
||||
|
||||
enum ApplicationAccessTypeProto {
|
||||
|
|
|
@ -44,6 +44,7 @@ message RegisterApplicationMasterResponseProto {
|
|||
optional ResourceProto maximumCapability = 1;
|
||||
optional bytes client_to_am_token_master_key = 2;
|
||||
repeated ApplicationACLMapProto application_ACLs = 3;
|
||||
repeated ContainerProto containers_from_previous_attempt = 4;
|
||||
}
|
||||
|
||||
message FinishApplicationMasterRequestProto {
|
||||
|
|
|
@ -33,33 +33,101 @@
|
|||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>commons-el</groupId>
|
||||
<artifactId>commons-el</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-runtime</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-compiler</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jsp-2.1-jetty</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-lang</groupId>
|
||||
<artifactId>commons-lang</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-cli</groupId>
|
||||
<artifactId>commons-cli</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-annotations</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-api</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-common</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-client</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-nodemanager</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-common</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-tests</artifactId>
|
||||
|
|
|
@ -37,7 +37,6 @@ import java.util.concurrent.ConcurrentHashMap;
|
|||
import java.util.concurrent.ConcurrentMap;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import org.apache.commons.cli.CommandLine;
|
||||
import org.apache.commons.cli.GnuParser;
|
||||
import org.apache.commons.cli.HelpFormatter;
|
||||
|
@ -89,6 +88,8 @@ import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
|||
import org.apache.hadoop.yarn.util.ConverterUtils;
|
||||
import org.apache.hadoop.yarn.util.Records;
|
||||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
/**
|
||||
* An ApplicationMaster for executing shell commands on a set of launched
|
||||
* containers using the YARN framework.
|
||||
|
@ -169,7 +170,8 @@ public class ApplicationMaster {
|
|||
private NMCallbackHandler containerListener;
|
||||
|
||||
// Application Attempt Id ( combination of attemptId and fail count )
|
||||
private ApplicationAttemptId appAttemptID;
|
||||
@VisibleForTesting
|
||||
protected ApplicationAttemptId appAttemptID;
|
||||
|
||||
// TODO
|
||||
// For status update for clients - yet to be implemented
|
||||
|
@ -194,13 +196,15 @@ public class ApplicationMaster {
|
|||
private AtomicInteger numCompletedContainers = new AtomicInteger();
|
||||
// Allocated container count so that we know how many containers has the RM
|
||||
// allocated to us
|
||||
private AtomicInteger numAllocatedContainers = new AtomicInteger();
|
||||
@VisibleForTesting
|
||||
protected AtomicInteger numAllocatedContainers = new AtomicInteger();
|
||||
// Count of failed containers
|
||||
private AtomicInteger numFailedContainers = new AtomicInteger();
|
||||
// Count of containers already requested from the RM
|
||||
// Needed as once requested, we should not request for containers again.
|
||||
// Only request for more if the original requirement changes.
|
||||
private AtomicInteger numRequestedContainers = new AtomicInteger();
|
||||
@VisibleForTesting
|
||||
protected AtomicInteger numRequestedContainers = new AtomicInteger();
|
||||
|
||||
// Shell command to be executed
|
||||
private String shellCommand = "";
|
||||
|
@ -251,6 +255,7 @@ public class ApplicationMaster {
|
|||
System.exit(0);
|
||||
}
|
||||
result = appMaster.run();
|
||||
appMaster.finish();
|
||||
} catch (Throwable t) {
|
||||
LOG.fatal("Error running ApplicationMaster", t);
|
||||
System.exit(1);
|
||||
|
@ -537,26 +542,25 @@ public class ApplicationMaster {
|
|||
containerVirtualCores = maxVCores;
|
||||
}
|
||||
|
||||
List<Container> previousAMRunningContainers =
|
||||
response.getContainersFromPreviousAttempt();
|
||||
LOG.info("Received " + previousAMRunningContainers.size()
|
||||
+ " previous AM's running containers on AM registration.");
|
||||
numAllocatedContainers.addAndGet(previousAMRunningContainers.size());
|
||||
|
||||
int numTotalContainersToRequest =
|
||||
numTotalContainers - previousAMRunningContainers.size();
|
||||
// Setup ask for containers from RM
|
||||
// Send request for containers to RM
|
||||
// Until we get our fully allocated quota, we keep on polling RM for
|
||||
// containers
|
||||
// Keep looping until all the containers are launched and shell script
|
||||
// executed on them ( regardless of success/failure).
|
||||
for (int i = 0; i < numTotalContainers; ++i) {
|
||||
for (int i = 0; i < numTotalContainersToRequest; ++i) {
|
||||
ContainerRequest containerAsk = setupContainerAskForRM();
|
||||
amRMClient.addContainerRequest(containerAsk);
|
||||
}
|
||||
numRequestedContainers.set(numTotalContainers);
|
||||
|
||||
while (!done
|
||||
&& (numCompletedContainers.get() != numTotalContainers)) {
|
||||
try {
|
||||
Thread.sleep(200);
|
||||
} catch (InterruptedException ex) {}
|
||||
}
|
||||
finish();
|
||||
|
||||
numRequestedContainers.set(numTotalContainersToRequest);
|
||||
return success;
|
||||
}
|
||||
|
||||
|
@ -565,7 +569,15 @@ public class ApplicationMaster {
|
|||
return new NMCallbackHandler(this);
|
||||
}
|
||||
|
||||
private void finish() {
|
||||
protected void finish() {
|
||||
// wait for completion.
|
||||
while (!done
|
||||
&& (numCompletedContainers.get() != numTotalContainers)) {
|
||||
try {
|
||||
Thread.sleep(200);
|
||||
} catch (InterruptedException ex) {}
|
||||
}
|
||||
|
||||
// Join all launched threads
|
||||
// needed for when we time out
|
||||
// and we need to release containers
|
||||
|
|
|
@ -162,6 +162,9 @@ public class Client {
|
|||
// Timeout threshold for client. Kill app after time interval expires.
|
||||
private long clientTimeout = 600000;
|
||||
|
||||
// flag to indicate whether to keep containers across application attempts.
|
||||
private boolean keepContainers = false;
|
||||
|
||||
// Debug flag
|
||||
boolean debugFlag = false;
|
||||
|
||||
|
@ -243,6 +246,11 @@ public class Client {
|
|||
opts.addOption("container_vcores", true, "Amount of virtual cores to be requested to run the shell command");
|
||||
opts.addOption("num_containers", true, "No. of containers on which the shell command needs to be executed");
|
||||
opts.addOption("log_properties", true, "log4j.properties file");
|
||||
opts.addOption("keep_containers_across_application_attempts", false,
|
||||
"Flag to indicate whether to keep containers across application attempts." +
|
||||
" If the flag is true, running containers will not be killed when" +
|
||||
" application attempt fails and these containers will be retrieved by" +
|
||||
" the new application attempt ");
|
||||
opts.addOption("debug", false, "Dump out debug information");
|
||||
opts.addOption("help", false, "Print usage");
|
||||
|
||||
|
@ -294,12 +302,17 @@ public class Client {
|
|||
|
||||
}
|
||||
|
||||
if (cliParser.hasOption("keep_containers_across_application_attempts")) {
|
||||
LOG.info("keep_containers_across_application_attempts");
|
||||
keepContainers = true;
|
||||
}
|
||||
|
||||
appName = cliParser.getOptionValue("appname", "DistributedShell");
|
||||
amPriority = Integer.parseInt(cliParser.getOptionValue("priority", "0"));
|
||||
amQueue = cliParser.getOptionValue("queue", "default");
|
||||
amMemory = Integer.parseInt(cliParser.getOptionValue("master_memory", "10"));
|
||||
amVCores = Integer.parseInt(cliParser.getOptionValue("master_vcores", "1"));
|
||||
|
||||
|
||||
if (amMemory < 0) {
|
||||
throw new IllegalArgumentException("Invalid memory specified for application master, exiting."
|
||||
+ " Specified memory=" + amMemory);
|
||||
|
@ -442,6 +455,8 @@ public class Client {
|
|||
// set the application name
|
||||
ApplicationSubmissionContext appContext = app.getApplicationSubmissionContext();
|
||||
ApplicationId appId = appContext.getApplicationId();
|
||||
|
||||
appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
|
||||
appContext.setApplicationName(appName);
|
||||
|
||||
// Set up the container launch context for the application master
|
||||
|
|
|
@ -67,6 +67,7 @@ public class ContainerLaunchFailAppMaster extends ApplicationMaster {
|
|||
System.exit(0);
|
||||
}
|
||||
result = appMaster.run();
|
||||
appMaster.finish();
|
||||
} catch (Throwable t) {
|
||||
LOG.fatal("Error running ApplicationMaster", t);
|
||||
System.exit(1);
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.applications.distributedshell;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
|
||||
public class TestDSFailedAppMaster extends ApplicationMaster {
|
||||
|
||||
private static final Log LOG = LogFactory.getLog(TestDSFailedAppMaster.class);
|
||||
|
||||
@Override
|
||||
public boolean run() throws YarnException, IOException {
|
||||
boolean res = super.run();
|
||||
|
||||
// for the 2nd attempt.
|
||||
if (appAttemptID.getAttemptId() == 2) {
|
||||
// should reuse the earlier running container, so numAllocatedContainers
|
||||
// should be set to 1. And should ask no more containers, so
|
||||
// numRequestedContainers should be set to 0.
|
||||
if (numAllocatedContainers.get() != 1
|
||||
|| numRequestedContainers.get() != 0) {
|
||||
LOG.info("Application Master failed. exiting");
|
||||
System.exit(200);
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
boolean result = false;
|
||||
try {
|
||||
TestDSFailedAppMaster appMaster = new TestDSFailedAppMaster();
|
||||
boolean doRun = appMaster.init(args);
|
||||
if (!doRun) {
|
||||
System.exit(0);
|
||||
}
|
||||
result = appMaster.run();
|
||||
if (appMaster.appAttemptID.getAttemptId() == 1) {
|
||||
try {
|
||||
// sleep some time, wait for the AM to launch a container.
|
||||
Thread.sleep(3000);
|
||||
} catch (InterruptedException e) {}
|
||||
// fail the first am.
|
||||
System.exit(100);
|
||||
}
|
||||
appMaster.finish();
|
||||
} catch (Throwable t) {
|
||||
System.exit(1);
|
||||
}
|
||||
if (result) {
|
||||
LOG.info("Application Master completed successfully. exiting");
|
||||
System.exit(0);
|
||||
} else {
|
||||
LOG.info("Application Master failed. exiting");
|
||||
System.exit(2);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -174,6 +174,35 @@ public class TestDistributedShell {
|
|||
|
||||
}
|
||||
|
||||
@Test(timeout=90000)
|
||||
public void testDSRestartWithPreviousRunningContainers() throws Exception {
|
||||
String[] args = {
|
||||
"--jar",
|
||||
APPMASTER_JAR,
|
||||
"--num_containers",
|
||||
"1",
|
||||
"--shell_command",
|
||||
Shell.WINDOWS ? "timeout 8" : "sleep 8",
|
||||
"--master_memory",
|
||||
"512",
|
||||
"--container_memory",
|
||||
"128",
|
||||
"--keep_containers_across_application_attempts"
|
||||
};
|
||||
|
||||
LOG.info("Initializing DS Client");
|
||||
Client client = new Client(TestDSFailedAppMaster.class.getName(),
|
||||
new Configuration(yarnCluster.getConfig()));
|
||||
|
||||
client.init(args);
|
||||
LOG.info("Running DS Client");
|
||||
boolean result = client.run();
|
||||
|
||||
LOG.info("Client run completed. Result=" + result);
|
||||
// application should succeed
|
||||
Assert.assertTrue(result);
|
||||
}
|
||||
|
||||
@Test(timeout=90000)
|
||||
public void testDSShellWithCustomLogPropertyFile() throws Exception {
|
||||
final File basedir =
|
||||
|
|
|
@ -34,42 +34,60 @@
|
|||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-common</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-nodemanager</artifactId>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
|
||||
<scope>test</scope>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-cli</groupId>
|
||||
<artifactId>commons-cli</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
|
||||
<scope>test</scope>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>commons-el</groupId>
|
||||
<artifactId>commons-el</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-runtime</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-compiler</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jsp-2.1-jetty</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<type>test-jar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-common</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-client</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-mapreduce-client-core</artifactId>
|
||||
<scope>test</scope>
|
||||
<artifactId>hadoop-yarn-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-common</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
|
|
|
@ -32,6 +32,8 @@
|
|||
<hadoop.common.build.dir>${basedir}/../../../../hadoop-common-project/hadoop-common/target</hadoop.common.build.dir>
|
||||
</properties>
|
||||
|
||||
<!-- Do not add dependencies here, add them to the POM of the leaf module -->
|
||||
|
||||
<modules>
|
||||
<module>hadoop-yarn-applications-distributedshell</module>
|
||||
<module>hadoop-yarn-applications-unmanaged-am-launcher</module>
|
||||
|
|
|
@ -30,37 +30,121 @@
|
|||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>commons-el</groupId>
|
||||
<artifactId>commons-el</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-runtime</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-compiler</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jsp-2.1-jetty</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-lang</groupId>
|
||||
<artifactId>commons-lang</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-cli</groupId>
|
||||
<artifactId>commons-cli</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jetty-util</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-annotations</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.zookeeper</groupId>
|
||||
<artifactId>zookeeper</artifactId>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-common</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-common</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-tests</artifactId>
|
||||
<scope>test</scope>
|
||||
<type>test-jar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
|
||||
<scope>test</scope>
|
||||
<type>test-jar</type>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
|
||||
<scope>test</scope>
|
||||
<type>test-jar</type>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
</project>
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private;
|
|||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
||||
import org.apache.hadoop.ha.HAAdmin;
|
||||
import org.apache.hadoop.ha.HAServiceTarget;
|
||||
import org.apache.hadoop.ipc.RemoteException;
|
||||
|
@ -364,13 +365,26 @@ public class RMAdminCLI extends HAAdmin {
|
|||
@Override
|
||||
public void setConf(Configuration conf) {
|
||||
if (conf != null) {
|
||||
if (!(conf instanceof YarnConfiguration)) {
|
||||
conf = new YarnConfiguration(conf);
|
||||
}
|
||||
conf = addSecurityConfiguration(conf);
|
||||
}
|
||||
super.setConf(conf);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the requisite security principal settings to the given Configuration,
|
||||
* returning a copy.
|
||||
* @param conf the original config
|
||||
* @return a copy with the security settings added
|
||||
*/
|
||||
private static Configuration addSecurityConfiguration(Configuration conf) {
|
||||
// Make a copy so we don't mutate it. Also use an YarnConfiguration to
|
||||
// force loading of yarn-site.xml.
|
||||
conf = new YarnConfiguration(conf);
|
||||
conf.set(CommonConfigurationKeys.HADOOP_SECURITY_SERVICE_USER_NAME_KEY,
|
||||
conf.get(YarnConfiguration.RM_PRINCIPAL, ""));
|
||||
return conf;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected HAServiceTarget resolveTarget(String rmId) {
|
||||
Collection<String> rmIds = HAUtil.getRMHAIds(getConf());
|
||||
|
|
|
@ -33,15 +33,137 @@
|
|||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-api</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>commons-el</groupId>
|
||||
<artifactId>commons-el</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-runtime</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-compiler</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jsp-2.1-jetty</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-api</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>javax.xml.bind</groupId>
|
||||
<artifactId>jaxb-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-compress</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-lang</groupId>
|
||||
<artifactId>commons-lang</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey</groupId>
|
||||
<artifactId>jersey-core</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.codehaus.jackson</groupId>
|
||||
<artifactId>jackson-mapper-asl</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-cli</groupId>
|
||||
<artifactId>commons-cli</artifactId>
|
||||
</dependency>
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-annotations</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.inject.extensions</groupId>
|
||||
<artifactId>guice-servlet</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.protobuf</groupId>
|
||||
<artifactId>protobuf-java</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.inject</groupId>
|
||||
<artifactId>guice</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey</groupId>
|
||||
<artifactId>jersey-server</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey</groupId>
|
||||
<artifactId>jersey-json</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey.contribs</groupId>
|
||||
<artifactId>jersey-guice</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
|
|||
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
@ -29,10 +30,13 @@ import org.apache.hadoop.classification.InterfaceAudience.Private;
|
|||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
||||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
|
||||
import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils;
|
||||
import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl;
|
||||
import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationACLMapProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnServiceProtos.RegisterApplicationMasterResponseProto;
|
||||
import org.apache.hadoop.yarn.proto.YarnServiceProtos.RegisterApplicationMasterResponseProtoOrBuilder;
|
||||
|
@ -52,6 +56,7 @@ public class RegisterApplicationMasterResponsePBImpl extends
|
|||
|
||||
private Resource maximumResourceCapability;
|
||||
private Map<ApplicationAccessType, String> applicationACLS = null;
|
||||
private List<Container> containersFromPreviousAttempt = null;
|
||||
|
||||
public RegisterApplicationMasterResponsePBImpl() {
|
||||
builder = RegisterApplicationMasterResponseProto.newBuilder();
|
||||
|
@ -105,6 +110,9 @@ public class RegisterApplicationMasterResponsePBImpl extends
|
|||
if (this.applicationACLS != null) {
|
||||
addApplicationACLs();
|
||||
}
|
||||
if (this.containersFromPreviousAttempt != null) {
|
||||
addRunningContainersToProto();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -226,6 +234,43 @@ public class RegisterApplicationMasterResponsePBImpl extends
|
|||
ByteBuffer.wrap(builder.getClientToAmTokenMasterKey().toByteArray());
|
||||
return key;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Container> getContainersFromPreviousAttempt() {
|
||||
if (this.containersFromPreviousAttempt != null) {
|
||||
return this.containersFromPreviousAttempt;
|
||||
}
|
||||
initRunningContainersList();
|
||||
return this.containersFromPreviousAttempt;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setContainersFromPreviousAttempt(final List<Container> containers) {
|
||||
if (containers == null) {
|
||||
return;
|
||||
}
|
||||
this.containersFromPreviousAttempt = new ArrayList<Container>();
|
||||
this.containersFromPreviousAttempt.addAll(containers);
|
||||
}
|
||||
|
||||
private void initRunningContainersList() {
|
||||
RegisterApplicationMasterResponseProtoOrBuilder p = viaProto ? proto : builder;
|
||||
List<ContainerProto> list = p.getContainersFromPreviousAttemptList();
|
||||
containersFromPreviousAttempt = new ArrayList<Container>();
|
||||
for (ContainerProto c : list) {
|
||||
containersFromPreviousAttempt.add(convertFromProtoFormat(c));
|
||||
}
|
||||
}
|
||||
|
||||
private void addRunningContainersToProto() {
|
||||
maybeInitBuilder();
|
||||
builder.clearContainersFromPreviousAttempt();
|
||||
List<ContainerProto> list = new ArrayList<ContainerProto>();
|
||||
for (Container c : containersFromPreviousAttempt) {
|
||||
list.add(convertToProtoFormat(c));
|
||||
}
|
||||
builder.addAllContainersFromPreviousAttempt(list);
|
||||
}
|
||||
|
||||
private Resource convertFromProtoFormat(ResourceProto resource) {
|
||||
return new ResourcePBImpl(resource);
|
||||
|
@ -235,4 +280,11 @@ public class RegisterApplicationMasterResponsePBImpl extends
|
|||
return ((ResourcePBImpl)resource).getProto();
|
||||
}
|
||||
|
||||
private ContainerPBImpl convertFromProtoFormat(ContainerProto p) {
|
||||
return new ContainerPBImpl(p);
|
||||
}
|
||||
|
||||
private ContainerProto convertToProtoFormat(Container t) {
|
||||
return ((ContainerPBImpl) t).getProto();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -298,6 +298,19 @@ extends ApplicationSubmissionContext {
|
|||
this.resource = resource;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void
|
||||
setKeepContainersAcrossApplicationAttempts(boolean keepContainers) {
|
||||
maybeInitBuilder();
|
||||
builder.setKeepContainersAcrossApplicationAttempts(keepContainers);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean getKeepContainersAcrossApplicationAttempts() {
|
||||
ApplicationSubmissionContextProtoOrBuilder p = viaProto ? proto : builder;
|
||||
return p.getKeepContainersAcrossApplicationAttempts();
|
||||
}
|
||||
|
||||
private PriorityPBImpl convertFromProtoFormat(PriorityProto p) {
|
||||
return new PriorityPBImpl(p);
|
||||
}
|
||||
|
|
|
@ -337,7 +337,34 @@ public class TestCompositeService {
|
|||
assertEquals("Incorrect number of services",
|
||||
1, testService.getServices().size());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testRemoveService() {
|
||||
CompositeService testService = new CompositeService("TestService") {
|
||||
@Override
|
||||
public void serviceInit(Configuration conf) {
|
||||
Integer notAService = new Integer(0);
|
||||
assertFalse("Added an integer as a service",
|
||||
addIfService(notAService));
|
||||
|
||||
Service service1 = new AbstractService("Service1") {};
|
||||
addIfService(service1);
|
||||
|
||||
Service service2 = new AbstractService("Service2") {};
|
||||
addIfService(service2);
|
||||
|
||||
Service service3 = new AbstractService("Service3") {};
|
||||
addIfService(service3);
|
||||
|
||||
removeService(service1);
|
||||
}
|
||||
};
|
||||
|
||||
testService.init(new Configuration());
|
||||
assertEquals("Incorrect number of services",
|
||||
2, testService.getServices().size());
|
||||
}
|
||||
|
||||
public static class CompositeServiceAddingAChild extends CompositeService{
|
||||
Service child;
|
||||
|
||||
|
|
|
@ -33,10 +33,72 @@
|
|||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>commons-el</groupId>
|
||||
<artifactId>commons-el</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-runtime</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-compiler</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jsp-2.1-jetty</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-api</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-common</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-annotations</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.protobuf</groupId>
|
||||
<artifactId>protobuf-java</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.zookeeper</groupId>
|
||||
<artifactId>zookeeper</artifactId>
|
||||
|
|
|
@ -35,6 +35,141 @@
|
|||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>commons-el</groupId>
|
||||
<artifactId>commons-el</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-runtime</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-compiler</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jsp-2.1-jetty</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-common</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.xml.bind</groupId>
|
||||
<artifactId>jaxb-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.codehaus.jettison</groupId>
|
||||
<artifactId>jettison</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-lang</groupId>
|
||||
<artifactId>commons-lang</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-codec</groupId>
|
||||
<artifactId>commons-codec</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey</groupId>
|
||||
<artifactId>jersey-core</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey</groupId>
|
||||
<artifactId>jersey-client</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jetty-util</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-annotations</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.inject.extensions</groupId>
|
||||
<artifactId>guice-servlet</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.protobuf</groupId>
|
||||
<artifactId>protobuf-java</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.inject</groupId>
|
||||
<artifactId>guice</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey.jersey-test-framework</groupId>
|
||||
<artifactId>jersey-test-framework-grizzly2</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey</groupId>
|
||||
<artifactId>jersey-json</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey.contribs</groupId>
|
||||
<artifactId>jersey-guice</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-common</artifactId>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-common</artifactId>
|
||||
|
|
|
@ -33,10 +33,156 @@
|
|||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>javax.servlet</groupId>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>commons-el</groupId>
|
||||
<artifactId>commons-el</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-runtime</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-compiler</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jsp-2.1-jetty</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-annotations</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.inject.extensions</groupId>
|
||||
<artifactId>guice-servlet</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.protobuf</groupId>
|
||||
<artifactId>protobuf-java</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.inject</groupId>
|
||||
<artifactId>guice</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey.jersey-test-framework</groupId>
|
||||
<artifactId>jersey-test-framework-core</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey</groupId>
|
||||
<artifactId>jersey-json</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey.contribs</groupId>
|
||||
<artifactId>jersey-guice</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-common</artifactId>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-common</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>javax.xml.bind</groupId>
|
||||
<artifactId>jaxb-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.codehaus.jettison</groupId>
|
||||
<artifactId>jettison</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-lang</groupId>
|
||||
<artifactId>commons-lang</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey</groupId>
|
||||
<artifactId>jersey-core</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey</groupId>
|
||||
<artifactId>jersey-client</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jetty-util</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-common</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-web-proxy</artifactId>
|
||||
|
@ -51,12 +197,20 @@
|
|||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey.jersey-test-framework</groupId>
|
||||
<artifactId>jersey-test-framework-grizzly2</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
|
|
@ -49,6 +49,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRespo
|
|||
import org.apache.hadoop.yarn.api.records.AMCommand;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeReport;
|
||||
import org.apache.hadoop.yarn.api.records.PreemptionContainer;
|
||||
|
@ -78,6 +79,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAt
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStatusupdateEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
|
||||
|
@ -271,6 +273,11 @@ public class ApplicationMasterService extends AbstractService implements
|
|||
.getClientToAMTokenSecretManager()
|
||||
.getMasterKey(applicationAttemptId).getEncoded()));
|
||||
}
|
||||
|
||||
List<Container> containerList =
|
||||
((AbstractYarnScheduler) rScheduler)
|
||||
.getTransferredContainers(applicationAttemptId);
|
||||
response.setContainersFromPreviousAttempt(containerList);
|
||||
return response;
|
||||
}
|
||||
}
|
||||
|
@ -421,21 +428,26 @@ public class ApplicationMasterService extends AbstractService implements
|
|||
LOG.warn("Invalid blacklist request by application " + appAttemptId, e);
|
||||
throw e;
|
||||
}
|
||||
|
||||
try {
|
||||
RMServerUtils.validateContainerReleaseRequest(release, appAttemptId);
|
||||
} catch (InvalidContainerReleaseException e) {
|
||||
LOG.warn("Invalid container release by application " + appAttemptId, e);
|
||||
throw e;
|
||||
|
||||
RMApp app =
|
||||
this.rmContext.getRMApps().get(appAttemptId.getApplicationId());
|
||||
// In the case of work-preserving AM restart, it's possible for the
|
||||
// AM to release containers from the earlier attempt.
|
||||
if (!app.getApplicationSubmissionContext()
|
||||
.getKeepContainersAcrossApplicationAttempts()) {
|
||||
try {
|
||||
RMServerUtils.validateContainerReleaseRequest(release, appAttemptId);
|
||||
} catch (InvalidContainerReleaseException e) {
|
||||
LOG.warn("Invalid container release by application " + appAttemptId, e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Send new requests to appAttempt.
|
||||
Allocation allocation =
|
||||
this.rScheduler.allocate(appAttemptId, ask, release,
|
||||
blacklistAdditions, blacklistRemovals);
|
||||
|
||||
RMApp app = this.rmContext.getRMApps().get(
|
||||
appAttemptId.getApplicationId());
|
||||
RMAppAttempt appAttempt = app.getRMAppAttempt(appAttemptId);
|
||||
|
||||
AllocateResponse allocateResponse =
|
||||
|
@ -591,4 +603,4 @@ public class ApplicationMasterService extends AbstractService implements
|
|||
this.response = response;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.hadoop.security.SecurityUtil;
|
|||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hadoop.service.AbstractService;
|
||||
import org.apache.hadoop.service.CompositeService;
|
||||
import org.apache.hadoop.service.Service;
|
||||
import org.apache.hadoop.util.ExitUtil;
|
||||
import org.apache.hadoop.util.ReflectionUtils;
|
||||
import org.apache.hadoop.util.ShutdownHookManager;
|
||||
|
@ -180,13 +181,11 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
|||
this.conf = conf;
|
||||
this.rmContext = new RMContextImpl();
|
||||
|
||||
rmDispatcher = createDispatcher();
|
||||
// register the handlers for all AlwaysOn services using setupDispatcher().
|
||||
rmDispatcher = setupDispatcher();
|
||||
addIfService(rmDispatcher);
|
||||
rmContext.setDispatcher(rmDispatcher);
|
||||
|
||||
rmDispatcher.register(RMFatalEventType.class,
|
||||
new ResourceManager.RMFatalEventDispatcher(this.rmContext, this));
|
||||
|
||||
adminService = createAdminService();
|
||||
addService(adminService);
|
||||
rmContext.setRMAdminService(adminService);
|
||||
|
@ -832,6 +831,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
|||
HAServiceProtocol.HAServiceState.ACTIVE) {
|
||||
stopActiveServices();
|
||||
if (initialize) {
|
||||
resetDispatcher();
|
||||
createAndInitActiveServices();
|
||||
}
|
||||
}
|
||||
|
@ -994,4 +994,24 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
|||
YarnConfiguration.YARN_HTTP_POLICY_KEY,
|
||||
YarnConfiguration.YARN_HTTP_POLICY_DEFAULT)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Register the handlers for alwaysOn services
|
||||
*/
|
||||
private Dispatcher setupDispatcher() {
|
||||
Dispatcher dispatcher = createDispatcher();
|
||||
dispatcher.register(RMFatalEventType.class,
|
||||
new ResourceManager.RMFatalEventDispatcher(this.rmContext, this));
|
||||
return dispatcher;
|
||||
}
|
||||
|
||||
private void resetDispatcher() {
|
||||
Dispatcher dispatcher = setupDispatcher();
|
||||
((Service)dispatcher).init(this.conf);
|
||||
((Service)dispatcher).start();
|
||||
removeService((Service)rmDispatcher);
|
||||
rmDispatcher = dispatcher;
|
||||
addIfService(rmDispatcher);
|
||||
rmContext.setDispatcher(rmDispatcher);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,14 +23,20 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
|
|||
public class RMAppFailedAttemptEvent extends RMAppEvent {
|
||||
|
||||
private final String diagnostics;
|
||||
private final boolean transferStateFromPreviousAttempt;
|
||||
|
||||
public RMAppFailedAttemptEvent(ApplicationId appId, RMAppEventType event,
|
||||
String diagnostics) {
|
||||
String diagnostics, boolean transferStateFromPreviousAttempt) {
|
||||
super(appId, event);
|
||||
this.diagnostics = diagnostics;
|
||||
this.transferStateFromPreviousAttempt = transferStateFromPreviousAttempt;
|
||||
}
|
||||
|
||||
public String getDiagnostics() {
|
||||
return this.diagnostics;
|
||||
}
|
||||
|
||||
public boolean getTransferStateFromPreviousAttempt() {
|
||||
return transferStateFromPreviousAttempt;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -63,6 +63,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptE
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppStartAttemptEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanAppEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
|
||||
|
@ -76,6 +77,7 @@ import org.apache.hadoop.yarn.state.StateMachine;
|
|||
import org.apache.hadoop.yarn.state.StateMachineFactory;
|
||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||
|
||||
@SuppressWarnings({ "rawtypes", "unchecked" })
|
||||
public class RMAppImpl implements RMApp, Recoverable {
|
||||
|
||||
private static final Log LOG = LogFactory.getLog(RMAppImpl.class);
|
||||
|
@ -633,7 +635,7 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|||
this.writeLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void recover(RMState state) throws Exception{
|
||||
ApplicationState appState = state.getApplicationState().get(getApplicationId());
|
||||
|
@ -646,26 +648,28 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|||
|
||||
for(int i=0; i<appState.getAttemptCount(); ++i) {
|
||||
// create attempt
|
||||
createNewAttempt(false);
|
||||
createNewAttempt();
|
||||
((RMAppAttemptImpl)this.currentAttempt).recover(state);
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private void createNewAttempt(boolean startAttempt) {
|
||||
private void createNewAttempt() {
|
||||
ApplicationAttemptId appAttemptId =
|
||||
ApplicationAttemptId.newInstance(applicationId, attempts.size() + 1);
|
||||
RMAppAttempt attempt =
|
||||
new RMAppAttemptImpl(appAttemptId, rmContext, scheduler, masterService,
|
||||
submissionContext, conf);
|
||||
submissionContext, conf, maxAppAttempts == attempts.size());
|
||||
attempts.put(appAttemptId, attempt);
|
||||
currentAttempt = attempt;
|
||||
if(startAttempt) {
|
||||
handler.handle(
|
||||
new RMAppAttemptEvent(appAttemptId, RMAppAttemptEventType.START));
|
||||
}
|
||||
}
|
||||
|
||||
private void
|
||||
createAndStartNewAttempt(boolean transferStateFromPreviousAttempt) {
|
||||
createNewAttempt();
|
||||
handler.handle(new RMAppStartAttemptEvent(currentAttempt.getAppAttemptId(),
|
||||
transferStateFromPreviousAttempt));
|
||||
}
|
||||
|
||||
private void processNodeUpdate(RMAppNodeUpdateType type, RMNode node) {
|
||||
NodeState nodeState = node.getState();
|
||||
updatedNodes.add(node);
|
||||
|
@ -688,7 +692,6 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|||
};
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private static final class RMAppRecoveredTransition implements
|
||||
MultipleArcTransition<RMAppImpl, RMAppEvent, RMAppState> {
|
||||
|
||||
|
@ -729,7 +732,6 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|||
|
||||
private static final class AddApplicationToSchedulerTransition extends
|
||||
RMAppTransition {
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void transition(RMAppImpl app, RMAppEvent event) {
|
||||
if (event instanceof RMAppNewSavedEvent) {
|
||||
|
@ -751,14 +753,13 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|||
private static final class StartAppAttemptTransition extends RMAppTransition {
|
||||
@Override
|
||||
public void transition(RMAppImpl app, RMAppEvent event) {
|
||||
app.createNewAttempt(true);
|
||||
app.createAndStartNewAttempt(false);
|
||||
};
|
||||
}
|
||||
|
||||
private static final class FinalStateSavedTransition implements
|
||||
MultipleArcTransition<RMAppImpl, RMAppEvent, RMAppState> {
|
||||
|
||||
@SuppressWarnings({ "rawtypes", "unchecked" })
|
||||
@Override
|
||||
public RMAppState transition(RMAppImpl app, RMAppEvent event) {
|
||||
RMAppUpdateSavedEvent storeEvent = (RMAppUpdateSavedEvent) event;
|
||||
|
@ -959,7 +960,6 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|||
}
|
||||
|
||||
private static class KillAttemptTransition extends RMAppTransition {
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void transition(RMAppImpl app, RMAppEvent event) {
|
||||
app.stateBeforeKilling = app.getState();
|
||||
|
@ -987,7 +987,6 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|||
return nodes;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public void transition(RMAppImpl app, RMAppEvent event) {
|
||||
Set<NodeId> nodes = getNodesOnWhichAttemptRan(app);
|
||||
for (NodeId nodeId : nodes) {
|
||||
|
@ -1019,7 +1018,21 @@ public class RMAppImpl implements RMApp, Recoverable {
|
|||
public RMAppState transition(RMAppImpl app, RMAppEvent event) {
|
||||
if (!app.submissionContext.getUnmanagedAM()
|
||||
&& app.attempts.size() < app.maxAppAttempts) {
|
||||
app.createNewAttempt(true);
|
||||
boolean transferStateFromPreviousAttempt = false;
|
||||
RMAppFailedAttemptEvent failedEvent = (RMAppFailedAttemptEvent) event;
|
||||
transferStateFromPreviousAttempt =
|
||||
failedEvent.getTransferStateFromPreviousAttempt();
|
||||
|
||||
RMAppAttempt oldAttempt = app.currentAttempt;
|
||||
app.createAndStartNewAttempt(transferStateFromPreviousAttempt);
|
||||
// Transfer the state from the previous attempt to the current attempt.
|
||||
// Note that the previous failed attempt may still be collecting the
|
||||
// container events from the scheduler and update its data structures
|
||||
// before the new attempt is created.
|
||||
if (transferStateFromPreviousAttempt) {
|
||||
((RMAppAttemptImpl) app.currentAttempt)
|
||||
.transferStateFromPreviousAttempt(oldAttempt);
|
||||
}
|
||||
return initialState;
|
||||
} else {
|
||||
app.rememberTargetTransitionsAndStoreState(event,
|
||||
|
|
|
@ -129,9 +129,9 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
private SecretKey clientTokenMasterKey = null;
|
||||
|
||||
//nodes on while this attempt's containers ran
|
||||
private final Set<NodeId> ranNodes =
|
||||
private Set<NodeId> ranNodes =
|
||||
new HashSet<NodeId>();
|
||||
private final List<ContainerStatus> justFinishedContainers =
|
||||
private List<ContainerStatus> justFinishedContainers =
|
||||
new ArrayList<ContainerStatus>();
|
||||
private Container masterContainer;
|
||||
|
||||
|
@ -148,7 +148,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
private final StringBuilder diagnostics = new StringBuilder();
|
||||
|
||||
private Configuration conf;
|
||||
|
||||
private final boolean isLastAttempt;
|
||||
private static final ExpiredTransition EXPIRED_TRANSITION =
|
||||
new ExpiredTransition();
|
||||
|
||||
|
@ -330,6 +330,12 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
RMAppAttemptEventType.KILL))
|
||||
|
||||
// Transitions from FAILED State
|
||||
// For work-preserving AM restart, failed attempt are still capturing
|
||||
// CONTAINER_FINISHED event and record the finished containers for the
|
||||
// use by the next new attempt.
|
||||
.addTransition(RMAppAttemptState.FAILED, RMAppAttemptState.FAILED,
|
||||
RMAppAttemptEventType.CONTAINER_FINISHED,
|
||||
new ContainerFinishedAtFailedTransition())
|
||||
.addTransition(
|
||||
RMAppAttemptState.FAILED,
|
||||
RMAppAttemptState.FAILED,
|
||||
|
@ -338,8 +344,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
RMAppAttemptEventType.KILL,
|
||||
RMAppAttemptEventType.UNREGISTERED,
|
||||
RMAppAttemptEventType.STATUS_UPDATE,
|
||||
RMAppAttemptEventType.CONTAINER_ALLOCATED,
|
||||
RMAppAttemptEventType.CONTAINER_FINISHED))
|
||||
RMAppAttemptEventType.CONTAINER_ALLOCATED))
|
||||
|
||||
// Transitions from FINISHING State
|
||||
.addTransition(RMAppAttemptState.FINISHING,
|
||||
|
@ -390,7 +395,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
RMContext rmContext, YarnScheduler scheduler,
|
||||
ApplicationMasterService masterService,
|
||||
ApplicationSubmissionContext submissionContext,
|
||||
Configuration conf) {
|
||||
Configuration conf, boolean isLastAttempt) {
|
||||
this.conf = conf;
|
||||
this.applicationAttemptId = appAttemptId;
|
||||
this.rmContext = rmContext;
|
||||
|
@ -404,7 +409,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
this.writeLock = lock.writeLock();
|
||||
|
||||
this.proxiedTrackingUrl = generateProxyUriWithScheme(null);
|
||||
|
||||
this.isLastAttempt = isLastAttempt;
|
||||
this.stateMachine = stateMachineFactory.make(this);
|
||||
}
|
||||
|
||||
|
@ -416,7 +421,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
@Override
|
||||
public ApplicationSubmissionContext getSubmissionContext() {
|
||||
return this.submissionContext;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public FinalApplicationStatus getFinalApplicationStatus() {
|
||||
|
@ -685,6 +690,11 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
this.startTime = attemptState.getStartTime();
|
||||
}
|
||||
|
||||
public void transferStateFromPreviousAttempt(RMAppAttempt attempt) {
|
||||
this.justFinishedContainers = attempt.getJustFinishedContainers();
|
||||
this.ranNodes = attempt.getRanNodes();
|
||||
}
|
||||
|
||||
private void recoverAppAttemptCredentials(Credentials appAttemptTokens)
|
||||
throws IOException {
|
||||
if (appAttemptTokens == null) {
|
||||
|
@ -721,6 +731,12 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
public void transition(RMAppAttemptImpl appAttempt,
|
||||
RMAppAttemptEvent event) {
|
||||
|
||||
boolean transferStateFromPreviousAttempt = false;
|
||||
if (event instanceof RMAppStartAttemptEvent) {
|
||||
transferStateFromPreviousAttempt =
|
||||
((RMAppStartAttemptEvent) event)
|
||||
.getTransferStateFromPreviousAttempt();
|
||||
}
|
||||
appAttempt.startTime = System.currentTimeMillis();
|
||||
|
||||
// Register with the ApplicationMasterService
|
||||
|
@ -740,9 +756,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
new Token<AMRMTokenIdentifier>(id,
|
||||
appAttempt.rmContext.getAMRMTokenSecretManager());
|
||||
|
||||
// Add the applicationAttempt to the scheduler
|
||||
// Add the applicationAttempt to the scheduler and inform the scheduler
|
||||
// whether to transfer the state from previous attempt.
|
||||
appAttempt.eventHandler.handle(new AppAttemptAddedSchedulerEvent(
|
||||
appAttempt.applicationAttemptId));
|
||||
appAttempt.applicationAttemptId, transferStateFromPreviousAttempt));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -981,6 +998,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
// Tell the application and the scheduler
|
||||
ApplicationId applicationId = appAttemptId.getApplicationId();
|
||||
RMAppEvent appEvent = null;
|
||||
boolean keepContainersAcrossAppAttempts = false;
|
||||
switch (finalAttemptState) {
|
||||
case FINISHED:
|
||||
{
|
||||
|
@ -996,7 +1014,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
appEvent =
|
||||
new RMAppFailedAttemptEvent(applicationId,
|
||||
RMAppEventType.ATTEMPT_KILLED,
|
||||
"Application killed by user.");
|
||||
"Application killed by user.", false);
|
||||
}
|
||||
break;
|
||||
case FAILED:
|
||||
|
@ -1004,10 +1022,17 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
// don't leave the tracking URL pointing to a non-existent AM
|
||||
appAttempt.setTrackingUrlToRMAppPage();
|
||||
appAttempt.invalidateAMHostAndPort();
|
||||
if (appAttempt.submissionContext
|
||||
.getKeepContainersAcrossApplicationAttempts()
|
||||
&& !appAttempt.isLastAttempt
|
||||
&& !appAttempt.submissionContext.getUnmanagedAM()) {
|
||||
keepContainersAcrossAppAttempts = true;
|
||||
}
|
||||
appEvent =
|
||||
new RMAppFailedAttemptEvent(applicationId,
|
||||
RMAppEventType.ATTEMPT_FAILED,
|
||||
appAttempt.getDiagnostics());
|
||||
RMAppEventType.ATTEMPT_FAILED, appAttempt.getDiagnostics(),
|
||||
keepContainersAcrossAppAttempts);
|
||||
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
@ -1019,7 +1044,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
|
||||
appAttempt.eventHandler.handle(appEvent);
|
||||
appAttempt.eventHandler.handle(new AppAttemptRemovedSchedulerEvent(
|
||||
appAttemptId, finalAttemptState));
|
||||
appAttemptId, finalAttemptState, keepContainersAcrossAppAttempts));
|
||||
appAttempt.removeCredentials(appAttempt);
|
||||
}
|
||||
}
|
||||
|
@ -1045,6 +1070,11 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
public void transition(RMAppAttemptImpl appAttempt,
|
||||
RMAppAttemptEvent event) {
|
||||
appAttempt.checkAttemptStoreError(event);
|
||||
// TODO Today unmanaged AM client is waiting for app state to be Accepted to
|
||||
// launch the AM. This is broken since we changed to start the attempt
|
||||
// after the application is Accepted. We may need to introduce an attempt
|
||||
// report that client can rely on to query the attempt state and choose to
|
||||
// launch the unmanaged AM.
|
||||
super.transition(appAttempt, event);
|
||||
}
|
||||
}
|
||||
|
@ -1346,6 +1376,20 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
|||
}
|
||||
}
|
||||
|
||||
private static final class ContainerFinishedAtFailedTransition
|
||||
extends BaseTransition {
|
||||
@Override
|
||||
public void
|
||||
transition(RMAppAttemptImpl appAttempt, RMAppAttemptEvent event) {
|
||||
RMAppAttemptContainerFinishedEvent containerFinishedEvent =
|
||||
(RMAppAttemptContainerFinishedEvent) event;
|
||||
ContainerStatus containerStatus =
|
||||
containerFinishedEvent.getContainerStatus();
|
||||
// Normal container. Add it in completed containers list
|
||||
appAttempt.justFinishedContainers.add(containerStatus);
|
||||
}
|
||||
}
|
||||
|
||||
private static class ContainerFinishedFinalStateSavedTransition extends
|
||||
BaseTransition {
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
|
||||
public class RMAppStartAttemptEvent extends RMAppAttemptEvent {
|
||||
|
||||
private final boolean transferStateFromPreviousAttempt;
|
||||
|
||||
public RMAppStartAttemptEvent(ApplicationAttemptId appAttemptId,
|
||||
boolean transferStateFromPreviousAttempt) {
|
||||
super(appAttemptId, RMAppAttemptEventType.START);
|
||||
this.transferStateFromPreviousAttempt = transferStateFromPreviousAttempt;
|
||||
}
|
||||
|
||||
public boolean getTransferStateFromPreviousAttempt() {
|
||||
return transferStateFromPreviousAttempt;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||
|
||||
public class AbstractYarnScheduler {
|
||||
|
||||
protected RMContext rmContext;
|
||||
protected Map<ApplicationId, SchedulerApplication> applications;
|
||||
|
||||
public synchronized List<Container> getTransferredContainers(
|
||||
ApplicationAttemptId currentAttempt) {
|
||||
ApplicationId appId = currentAttempt.getApplicationId();
|
||||
SchedulerApplication app = applications.get(appId);
|
||||
List<Container> containerList = new ArrayList<Container>();
|
||||
RMApp appImpl = this.rmContext.getRMApps().get(appId);
|
||||
if (appImpl.getApplicationSubmissionContext().getUnmanagedAM()) {
|
||||
return containerList;
|
||||
}
|
||||
Collection<RMContainer> liveContainers =
|
||||
app.getCurrentAppAttempt().getLiveContainers();
|
||||
ContainerId amContainerId =
|
||||
rmContext.getRMApps().get(appId).getCurrentAppAttempt()
|
||||
.getMasterContainer().getId();
|
||||
for (RMContainer rmContainer : liveContainers) {
|
||||
if (!rmContainer.getContainerId().equals(amContainerId)) {
|
||||
containerList.add(rmContainer.getContainer());
|
||||
}
|
||||
}
|
||||
return containerList;
|
||||
}
|
||||
|
||||
public Map<ApplicationId, SchedulerApplication> getSchedulerApplications() {
|
||||
return applications;
|
||||
}
|
||||
}
|
|
@ -59,10 +59,10 @@ public class AppSchedulingInfo {
|
|||
|
||||
final Set<Priority> priorities = new TreeSet<Priority>(
|
||||
new org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.Comparator());
|
||||
final Map<Priority, Map<String, ResourceRequest>> requests =
|
||||
final Map<Priority, Map<String, ResourceRequest>> requests =
|
||||
new HashMap<Priority, Map<String, ResourceRequest>>();
|
||||
final Set<String> blacklist = new HashSet<String>();
|
||||
|
||||
private Set<String> blacklist = new HashSet<String>();
|
||||
|
||||
//private final ApplicationStore store;
|
||||
private final ActiveUsersManager activeUsersManager;
|
||||
|
||||
|
@ -260,7 +260,7 @@ public class AppSchedulingInfo {
|
|||
// once an allocation is done we assume the application is
|
||||
// running from scheduler's POV.
|
||||
pending = false;
|
||||
metrics.incrAppsRunning(this, user);
|
||||
metrics.runAppAttempt(applicationId, user);
|
||||
}
|
||||
LOG.debug("allocate: user: " + user + ", memory: "
|
||||
+ request.getCapability());
|
||||
|
@ -390,7 +390,7 @@ public class AppSchedulingInfo {
|
|||
.getNumContainers()));
|
||||
}
|
||||
}
|
||||
metrics.finishApp(this, rmAppAttemptFinalState);
|
||||
metrics.finishAppAttempt(applicationId, pending, user);
|
||||
|
||||
// Clear requests themselves
|
||||
clearRequests();
|
||||
|
@ -399,4 +399,15 @@ public class AppSchedulingInfo {
|
|||
public synchronized void setQueue(Queue queue) {
|
||||
this.queue = queue;
|
||||
}
|
||||
|
||||
public synchronized Set<String> getBlackList() {
|
||||
return this.blacklist;
|
||||
}
|
||||
|
||||
public synchronized void transferStateFromPreviousAppSchedulingInfo(
|
||||
AppSchedulingInfo appInfo) {
|
||||
// this.priorities = appInfo.getPriorities();
|
||||
// this.requests = appInfo.getRequests();
|
||||
this.blacklist = appInfo.getBlackList();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -41,7 +41,7 @@ import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
|
|||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -57,7 +57,7 @@ public class QueueMetrics implements MetricsSource {
|
|||
@Metric("# of pending apps") MutableGaugeInt appsPending;
|
||||
@Metric("# of apps completed") MutableCounterInt appsCompleted;
|
||||
@Metric("# of apps killed") MutableCounterInt appsKilled;
|
||||
@Metric("# of apps failed") MutableGaugeInt appsFailed;
|
||||
@Metric("# of apps failed") MutableCounterInt appsFailed;
|
||||
|
||||
@Metric("Allocated memory in MB") MutableGaugeInt allocatedMB;
|
||||
@Metric("Allocated CPU in virtual cores") MutableGaugeInt allocatedVCores;
|
||||
|
@ -214,54 +214,70 @@ public class QueueMetrics implements MetricsSource {
|
|||
registry.snapshot(collector.addRecord(registry.info()), all);
|
||||
}
|
||||
|
||||
public void submitApp(String user, int attemptId) {
|
||||
if (attemptId == 1) {
|
||||
appsSubmitted.incr();
|
||||
} else {
|
||||
appsFailed.decr();
|
||||
}
|
||||
appsPending.incr();
|
||||
public void submitApp(String user) {
|
||||
appsSubmitted.incr();
|
||||
QueueMetrics userMetrics = getUserMetrics(user);
|
||||
if (userMetrics != null) {
|
||||
userMetrics.submitApp(user, attemptId);
|
||||
userMetrics.submitApp(user);
|
||||
}
|
||||
if (parent != null) {
|
||||
parent.submitApp(user, attemptId);
|
||||
parent.submitApp(user);
|
||||
}
|
||||
}
|
||||
|
||||
public void incrAppsRunning(AppSchedulingInfo app, String user) {
|
||||
runBuckets.add(app.getApplicationId(), System.currentTimeMillis());
|
||||
public void submitAppAttempt(String user) {
|
||||
appsPending.incr();
|
||||
QueueMetrics userMetrics = getUserMetrics(user);
|
||||
if (userMetrics != null) {
|
||||
userMetrics.submitAppAttempt(user);
|
||||
}
|
||||
if (parent != null) {
|
||||
parent.submitAppAttempt(user);
|
||||
}
|
||||
}
|
||||
|
||||
public void runAppAttempt(ApplicationId appId, String user) {
|
||||
runBuckets.add(appId, System.currentTimeMillis());
|
||||
appsRunning.incr();
|
||||
appsPending.decr();
|
||||
QueueMetrics userMetrics = getUserMetrics(user);
|
||||
if (userMetrics != null) {
|
||||
userMetrics.incrAppsRunning(app, user);
|
||||
userMetrics.runAppAttempt(appId, user);
|
||||
}
|
||||
if (parent != null) {
|
||||
parent.incrAppsRunning(app, user);
|
||||
parent.runAppAttempt(appId, user);
|
||||
}
|
||||
}
|
||||
|
||||
public void finishApp(AppSchedulingInfo app,
|
||||
RMAppAttemptState rmAppAttemptFinalState) {
|
||||
runBuckets.remove(app.getApplicationId());
|
||||
switch (rmAppAttemptFinalState) {
|
||||
case KILLED: appsKilled.incr(); break;
|
||||
case FAILED: appsFailed.incr(); break;
|
||||
default: appsCompleted.incr(); break;
|
||||
}
|
||||
if (app.isPending()) {
|
||||
public void finishAppAttempt(
|
||||
ApplicationId appId, boolean isPending, String user) {
|
||||
runBuckets.remove(appId);
|
||||
if (isPending) {
|
||||
appsPending.decr();
|
||||
} else {
|
||||
appsRunning.decr();
|
||||
}
|
||||
QueueMetrics userMetrics = getUserMetrics(app.getUser());
|
||||
QueueMetrics userMetrics = getUserMetrics(user);
|
||||
if (userMetrics != null) {
|
||||
userMetrics.finishApp(app, rmAppAttemptFinalState);
|
||||
userMetrics.finishAppAttempt(appId, isPending, user);
|
||||
}
|
||||
if (parent != null) {
|
||||
parent.finishApp(app, rmAppAttemptFinalState);
|
||||
parent.finishAppAttempt(appId, isPending, user);
|
||||
}
|
||||
}
|
||||
|
||||
public void finishApp(String user, RMAppState rmAppFinalState) {
|
||||
switch (rmAppFinalState) {
|
||||
case KILLED: appsKilled.incr(); break;
|
||||
case FAILED: appsFailed.incr(); break;
|
||||
default: appsCompleted.incr(); break;
|
||||
}
|
||||
QueueMetrics userMetrics = getUserMetrics(user);
|
||||
if (userMetrics != null) {
|
||||
userMetrics.finishApp(user, rmAppFinalState);
|
||||
}
|
||||
if (parent != null) {
|
||||
parent.finishApp(user, rmAppFinalState);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -493,4 +509,8 @@ public class QueueMetrics implements MetricsSource {
|
|||
public int getActiveApps() {
|
||||
return activeApplications.value();
|
||||
}
|
||||
|
||||
public MetricsSystem getMetricsSystem() {
|
||||
return metricsSystem;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
|
|||
|
||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||
|
||||
@Private
|
||||
@Unstable
|
||||
|
@ -26,6 +27,7 @@ public class SchedulerApplication {
|
|||
|
||||
private final Queue queue;
|
||||
private final String user;
|
||||
private SchedulerApplicationAttempt currentAttempt;
|
||||
|
||||
public SchedulerApplication(Queue queue, String user) {
|
||||
this.queue = queue;
|
||||
|
@ -39,4 +41,17 @@ public class SchedulerApplication {
|
|||
public String getUser() {
|
||||
return user;
|
||||
}
|
||||
|
||||
public SchedulerApplicationAttempt getCurrentAppAttempt() {
|
||||
return currentAttempt;
|
||||
}
|
||||
|
||||
public void setCurrentAppAttempt(SchedulerApplicationAttempt currentAttempt) {
|
||||
this.currentAttempt = currentAttempt;
|
||||
}
|
||||
|
||||
public void stop(RMAppState rmAppFinalState) {
|
||||
queue.getMetrics().finishApp(user, rmAppFinalState);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -64,7 +64,7 @@ public abstract class SchedulerApplicationAttempt {
|
|||
|
||||
protected final AppSchedulingInfo appSchedulingInfo;
|
||||
|
||||
protected final Map<ContainerId, RMContainer> liveContainers =
|
||||
protected Map<ContainerId, RMContainer> liveContainers =
|
||||
new HashMap<ContainerId, RMContainer>();
|
||||
protected final Map<Priority, Map<NodeId, RMContainer>> reservedContainers =
|
||||
new HashMap<Priority, Map<NodeId, RMContainer>>();
|
||||
|
@ -73,7 +73,7 @@ public abstract class SchedulerApplicationAttempt {
|
|||
|
||||
protected final Resource currentReservation = Resource.newInstance(0, 0);
|
||||
private Resource resourceLimit = Resource.newInstance(0, 0);
|
||||
protected final Resource currentConsumption = Resource.newInstance(0, 0);
|
||||
protected Resource currentConsumption = Resource.newInstance(0, 0);
|
||||
|
||||
protected List<RMContainer> newlyAllocatedContainers =
|
||||
new ArrayList<RMContainer>();
|
||||
|
@ -407,4 +407,29 @@ public abstract class SchedulerApplicationAttempt {
|
|||
Resources.add(currentConsumption, currentReservation));
|
||||
}
|
||||
|
||||
public synchronized Map<ContainerId, RMContainer> getLiveContainersMap() {
|
||||
return this.liveContainers;
|
||||
}
|
||||
|
||||
public synchronized Resource getResourceLimit() {
|
||||
return this.resourceLimit;
|
||||
}
|
||||
|
||||
public synchronized Map<Priority, Long> getLastScheduledContainer() {
|
||||
return this.lastScheduledContainer;
|
||||
}
|
||||
|
||||
public synchronized void transferStateFromPreviousAttempt(
|
||||
SchedulerApplicationAttempt appAttempt) {
|
||||
this.liveContainers = appAttempt.getLiveContainersMap();
|
||||
// this.reReservations = appAttempt.reReservations;
|
||||
this.currentConsumption = appAttempt.getCurrentConsumption();
|
||||
this.resourceLimit = appAttempt.getResourceLimit();
|
||||
// this.currentReservation = appAttempt.currentReservation;
|
||||
// this.newlyAllocatedContainers = appAttempt.newlyAllocatedContainers;
|
||||
// this.schedulingOpportunities = appAttempt.schedulingOpportunities;
|
||||
this.lastScheduledContainer = appAttempt.getLastScheduledContainer();
|
||||
this.appSchedulingInfo
|
||||
.transferStateFromPreviousAppSchedulingInfo(appAttempt.appSchedulingInfo);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,13 +19,13 @@
|
|||
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate;
|
||||
import org.apache.hadoop.classification.InterfaceAudience.Public;
|
||||
import org.apache.hadoop.classification.InterfaceStability.Evolving;
|
||||
import org.apache.hadoop.classification.InterfaceStability.Stable;
|
||||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport;
|
||||
|
@ -37,6 +37,7 @@ import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
|
|||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||
import org.apache.hadoop.yarn.event.EventHandler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
|
||||
|
||||
/**
|
||||
|
@ -170,4 +171,13 @@ public interface YarnScheduler extends EventHandler<SchedulerEvent> {
|
|||
@LimitedPrivate("yarn")
|
||||
@Stable
|
||||
public List<ApplicationAttemptId> getAppsInQueue(String queueName);
|
||||
|
||||
/**
|
||||
* Get the container for the given containerId.
|
||||
* @param containerId
|
||||
* @return the container for the given containerId.
|
||||
*/
|
||||
@LimitedPrivate("yarn")
|
||||
@Unstable
|
||||
public RMContainer getRMContainer(ContainerId containerId);
|
||||
}
|
||||
|
|
|
@ -63,14 +63,16 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptE
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResourceScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
|
||||
|
@ -94,7 +96,7 @@ import com.google.common.annotations.VisibleForTesting;
|
|||
@LimitedPrivate("yarn")
|
||||
@Evolving
|
||||
@SuppressWarnings("unchecked")
|
||||
public class CapacityScheduler
|
||||
public class CapacityScheduler extends AbstractYarnScheduler
|
||||
implements PreemptableResourceScheduler, CapacitySchedulerContext,
|
||||
Configurable {
|
||||
|
||||
|
@ -176,7 +178,6 @@ public class CapacityScheduler
|
|||
|
||||
private CapacitySchedulerConfiguration conf;
|
||||
private Configuration yarnConf;
|
||||
private RMContext rmContext;
|
||||
|
||||
private Map<String, CSQueue> queues = new ConcurrentHashMap<String, CSQueue>();
|
||||
|
||||
|
@ -190,14 +191,6 @@ public class CapacityScheduler
|
|||
private Resource minimumAllocation;
|
||||
private Resource maximumAllocation;
|
||||
|
||||
@VisibleForTesting
|
||||
protected Map<ApplicationId, SchedulerApplication> applications =
|
||||
new ConcurrentHashMap<ApplicationId, SchedulerApplication>();
|
||||
|
||||
@VisibleForTesting
|
||||
protected Map<ApplicationAttemptId, FiCaSchedulerApp> appAttempts =
|
||||
new ConcurrentHashMap<ApplicationAttemptId, FiCaSchedulerApp>();
|
||||
|
||||
private boolean initialized = false;
|
||||
|
||||
private ResourceCalculator calculator;
|
||||
|
@ -274,9 +267,10 @@ public class CapacityScheduler
|
|||
this.maximumAllocation = this.conf.getMaximumAllocation();
|
||||
this.calculator = this.conf.getResourceCalculator();
|
||||
this.usePortForNodeName = this.conf.getUsePortForNodeName();
|
||||
|
||||
this.applications =
|
||||
new ConcurrentHashMap<ApplicationId, SchedulerApplication>();
|
||||
this.rmContext = rmContext;
|
||||
|
||||
|
||||
initializeQueues(this.conf);
|
||||
|
||||
initialized = true;
|
||||
|
@ -464,21 +458,27 @@ public class CapacityScheduler
|
|||
}
|
||||
|
||||
private synchronized void addApplicationAttempt(
|
||||
ApplicationAttemptId applicationAttemptId) {
|
||||
ApplicationAttemptId applicationAttemptId,
|
||||
boolean transferStateFromPreviousAttempt) {
|
||||
SchedulerApplication application =
|
||||
applications.get(applicationAttemptId.getApplicationId());
|
||||
CSQueue queue = (CSQueue) application.getQueue();
|
||||
|
||||
FiCaSchedulerApp SchedulerApp =
|
||||
FiCaSchedulerApp attempt =
|
||||
new FiCaSchedulerApp(applicationAttemptId, application.getUser(),
|
||||
queue, queue.getActiveUsersManager(), rmContext);
|
||||
appAttempts.put(applicationAttemptId, SchedulerApp);
|
||||
queue.submitApplicationAttempt(SchedulerApp, application.getUser());
|
||||
if (transferStateFromPreviousAttempt) {
|
||||
attempt.transferStateFromPreviousAttempt(application
|
||||
.getCurrentAppAttempt());
|
||||
}
|
||||
application.setCurrentAppAttempt(attempt);
|
||||
|
||||
queue.submitApplicationAttempt(attempt, application.getUser());
|
||||
LOG.info("Added Application Attempt " + applicationAttemptId
|
||||
+ " to scheduler from user " + application.getUser() + " in queue "
|
||||
+ queue.getQueueName());
|
||||
rmContext.getDispatcher().getEventHandler().handle(
|
||||
new RMAppAttemptEvent(applicationAttemptId,
|
||||
rmContext.getDispatcher().getEventHandler() .handle(
|
||||
new RMAppAttemptEvent(applicationAttemptId,
|
||||
RMAppAttemptEventType.ATTEMPT_ADDED));
|
||||
}
|
||||
|
||||
|
@ -486,7 +486,9 @@ public class CapacityScheduler
|
|||
RMAppState finalState) {
|
||||
SchedulerApplication application = applications.get(applicationId);
|
||||
if (application == null){
|
||||
// The AppRemovedSchedulerEvent maybe sent on recovery for completed apps.
|
||||
// The AppRemovedSchedulerEvent maybe sent on recovery for completed apps,
|
||||
// ignore it.
|
||||
LOG.warn("Couldn't find application " + applicationId);
|
||||
return;
|
||||
}
|
||||
CSQueue queue = (CSQueue) application.getQueue();
|
||||
|
@ -496,57 +498,62 @@ public class CapacityScheduler
|
|||
} else {
|
||||
queue.finishApplication(applicationId, application.getUser());
|
||||
}
|
||||
application.stop(finalState);
|
||||
applications.remove(applicationId);
|
||||
}
|
||||
|
||||
private synchronized void doneApplicationAttempt(
|
||||
ApplicationAttemptId applicationAttemptId,
|
||||
RMAppAttemptState rmAppAttemptFinalState) {
|
||||
RMAppAttemptState rmAppAttemptFinalState, boolean keepContainers) {
|
||||
LOG.info("Application Attempt " + applicationAttemptId + " is done." +
|
||||
" finalState=" + rmAppAttemptFinalState);
|
||||
|
||||
FiCaSchedulerApp application = getApplication(applicationAttemptId);
|
||||
FiCaSchedulerApp attempt = getApplicationAttempt(applicationAttemptId);
|
||||
SchedulerApplication application =
|
||||
applications.get(applicationAttemptId.getApplicationId());
|
||||
|
||||
if (application == null) {
|
||||
// throw new IOException("Unknown application " + applicationId +
|
||||
// " has completed!");
|
||||
if (application == null || attempt == null) {
|
||||
LOG.info("Unknown application " + applicationAttemptId + " has completed!");
|
||||
return;
|
||||
}
|
||||
|
||||
// Release all the running containers
|
||||
for (RMContainer rmContainer : application.getLiveContainers()) {
|
||||
completedContainer(rmContainer,
|
||||
SchedulerUtils.createAbnormalContainerStatus(
|
||||
rmContainer.getContainerId(),
|
||||
SchedulerUtils.COMPLETED_APPLICATION),
|
||||
RMContainerEventType.KILL);
|
||||
|
||||
// Release all the allocated, acquired, running containers
|
||||
for (RMContainer rmContainer : attempt.getLiveContainers()) {
|
||||
if (keepContainers
|
||||
&& rmContainer.getState().equals(RMContainerState.RUNNING)) {
|
||||
// do not kill the running container in the case of work-preserving AM
|
||||
// restart.
|
||||
LOG.info("Skip killing " + rmContainer.getContainerId());
|
||||
continue;
|
||||
}
|
||||
completedContainer(
|
||||
rmContainer,
|
||||
SchedulerUtils.createAbnormalContainerStatus(
|
||||
rmContainer.getContainerId(), SchedulerUtils.COMPLETED_APPLICATION),
|
||||
RMContainerEventType.KILL);
|
||||
}
|
||||
|
||||
// Release all reserved containers
|
||||
for (RMContainer rmContainer : application.getReservedContainers()) {
|
||||
completedContainer(rmContainer,
|
||||
SchedulerUtils.createAbnormalContainerStatus(
|
||||
rmContainer.getContainerId(),
|
||||
"Application Complete"),
|
||||
RMContainerEventType.KILL);
|
||||
|
||||
// Release all reserved containers
|
||||
for (RMContainer rmContainer : attempt.getReservedContainers()) {
|
||||
completedContainer(
|
||||
rmContainer,
|
||||
SchedulerUtils.createAbnormalContainerStatus(
|
||||
rmContainer.getContainerId(), "Application Complete"),
|
||||
RMContainerEventType.KILL);
|
||||
}
|
||||
|
||||
|
||||
// Clean up pending requests, metrics etc.
|
||||
application.stop(rmAppAttemptFinalState);
|
||||
|
||||
attempt.stop(rmAppAttemptFinalState);
|
||||
|
||||
// Inform the queue
|
||||
String queueName = application.getQueue().getQueueName();
|
||||
String queueName = attempt.getQueue().getQueueName();
|
||||
CSQueue queue = queues.get(queueName);
|
||||
if (!(queue instanceof LeafQueue)) {
|
||||
LOG.error("Cannot finish application " + "from non-leaf queue: "
|
||||
+ queueName);
|
||||
} else {
|
||||
queue.finishApplicationAttempt(application, queue.getQueueName());
|
||||
queue.finishApplicationAttempt(attempt, queue.getQueueName());
|
||||
}
|
||||
|
||||
// Remove from our data-structure
|
||||
appAttempts.remove(applicationAttemptId);
|
||||
}
|
||||
|
||||
private static final Allocation EMPTY_ALLOCATION =
|
||||
|
@ -558,7 +565,7 @@ public class CapacityScheduler
|
|||
List<ResourceRequest> ask, List<ContainerId> release,
|
||||
List<String> blacklistAdditions, List<String> blacklistRemovals) {
|
||||
|
||||
FiCaSchedulerApp application = getApplication(applicationAttemptId);
|
||||
FiCaSchedulerApp application = getApplicationAttempt(applicationAttemptId);
|
||||
if (application == null) {
|
||||
LOG.info("Calling allocate on removed " +
|
||||
"or non existant application " + applicationAttemptId);
|
||||
|
@ -700,8 +707,8 @@ public class CapacityScheduler
|
|||
|
||||
RMContainer reservedContainer = node.getReservedContainer();
|
||||
if (reservedContainer != null) {
|
||||
FiCaSchedulerApp reservedApplication =
|
||||
getApplication(reservedContainer.getApplicationAttemptId());
|
||||
FiCaSchedulerApp reservedApplication =
|
||||
getCurrentAttemptForContainer(reservedContainer.getContainerId());
|
||||
|
||||
// Try to fulfill the reservation
|
||||
LOG.info("Trying to fulfill reservation for application " +
|
||||
|
@ -738,12 +745,11 @@ public class CapacityScheduler
|
|||
|
||||
private void containerLaunchedOnNode(ContainerId containerId, FiCaSchedulerNode node) {
|
||||
// Get the application for the finished container
|
||||
ApplicationAttemptId applicationAttemptId = containerId.getApplicationAttemptId();
|
||||
FiCaSchedulerApp application = getApplication(applicationAttemptId);
|
||||
FiCaSchedulerApp application = getCurrentAttemptForContainer(containerId);
|
||||
if (application == null) {
|
||||
LOG.info("Unknown application: " + applicationAttemptId +
|
||||
" launched container " + containerId +
|
||||
" on node: " + node);
|
||||
LOG.info("Unknown application "
|
||||
+ containerId.getApplicationAttemptId().getApplicationId()
|
||||
+ " launched container " + containerId + " on node: " + node);
|
||||
this.rmContext.getDispatcher().getEventHandler()
|
||||
.handle(new RMNodeCleanContainerEvent(node.getNodeID(), containerId));
|
||||
return;
|
||||
|
@ -791,7 +797,8 @@ public class CapacityScheduler
|
|||
{
|
||||
AppAttemptAddedSchedulerEvent appAttemptAddedEvent =
|
||||
(AppAttemptAddedSchedulerEvent) event;
|
||||
addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId());
|
||||
addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(),
|
||||
appAttemptAddedEvent.getTransferStateFromPreviousAttempt());
|
||||
}
|
||||
break;
|
||||
case APP_ATTEMPT_REMOVED:
|
||||
|
@ -799,7 +806,8 @@ public class CapacityScheduler
|
|||
AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent =
|
||||
(AppAttemptRemovedSchedulerEvent) event;
|
||||
doneApplicationAttempt(appAttemptRemovedEvent.getApplicationAttemptID(),
|
||||
appAttemptRemovedEvent.getFinalAttemptState());
|
||||
appAttemptRemovedEvent.getFinalAttemptState(),
|
||||
appAttemptRemovedEvent.getKeepContainersAcrossAppAttempts());
|
||||
}
|
||||
break;
|
||||
case CONTAINER_EXPIRED:
|
||||
|
@ -874,13 +882,13 @@ public class CapacityScheduler
|
|||
Container container = rmContainer.getContainer();
|
||||
|
||||
// Get the application for the finished container
|
||||
ApplicationAttemptId applicationAttemptId =
|
||||
container.getId().getApplicationAttemptId();
|
||||
FiCaSchedulerApp application = getApplication(applicationAttemptId);
|
||||
FiCaSchedulerApp application =
|
||||
getCurrentAttemptForContainer(container.getId());
|
||||
ApplicationId appId =
|
||||
container.getId().getApplicationAttemptId().getApplicationId();
|
||||
if (application == null) {
|
||||
LOG.info("Container " + container + " of" +
|
||||
" unknown application " + applicationAttemptId +
|
||||
" completed with event " + event);
|
||||
LOG.info("Container " + container + " of" + " unknown application "
|
||||
+ appId + " completed with event " + event);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -892,28 +900,33 @@ public class CapacityScheduler
|
|||
queue.completedContainer(clusterResource, application, node,
|
||||
rmContainer, containerStatus, event, null);
|
||||
|
||||
LOG.info("Application " + applicationAttemptId +
|
||||
" released container " + container.getId() +
|
||||
" on node: " + node +
|
||||
" with event: " + event);
|
||||
LOG.info("Application attempt " + application.getApplicationAttemptId()
|
||||
+ " released container " + container.getId() + " on node: " + node
|
||||
+ " with event: " + event);
|
||||
}
|
||||
|
||||
@Lock(Lock.NoLock.class)
|
||||
FiCaSchedulerApp getApplication(ApplicationAttemptId applicationAttemptId) {
|
||||
return appAttempts.get(applicationAttemptId);
|
||||
FiCaSchedulerApp getApplicationAttempt(
|
||||
ApplicationAttemptId applicationAttemptId) {
|
||||
SchedulerApplication app =
|
||||
applications.get(applicationAttemptId.getApplicationId());
|
||||
if (app != null) {
|
||||
return (FiCaSchedulerApp) app.getCurrentAppAttempt();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SchedulerAppReport getSchedulerAppInfo(
|
||||
ApplicationAttemptId applicationAttemptId) {
|
||||
FiCaSchedulerApp app = getApplication(applicationAttemptId);
|
||||
FiCaSchedulerApp app = getApplicationAttempt(applicationAttemptId);
|
||||
return app == null ? null : new SchedulerAppReport(app);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ApplicationResourceUsageReport getAppResourceUsageReport(
|
||||
ApplicationAttemptId applicationAttemptId) {
|
||||
FiCaSchedulerApp app = getApplication(applicationAttemptId);
|
||||
FiCaSchedulerApp app = getApplicationAttempt(applicationAttemptId);
|
||||
return app == null ? null : app.getResourceUsageReport();
|
||||
}
|
||||
|
||||
|
@ -922,10 +935,22 @@ public class CapacityScheduler
|
|||
return nodes.get(nodeId);
|
||||
}
|
||||
|
||||
private RMContainer getRMContainer(ContainerId containerId) {
|
||||
FiCaSchedulerApp application =
|
||||
getApplication(containerId.getApplicationAttemptId());
|
||||
return (application == null) ? null : application.getRMContainer(containerId);
|
||||
@Override
|
||||
public RMContainer getRMContainer(ContainerId containerId) {
|
||||
FiCaSchedulerApp attempt = getCurrentAttemptForContainer(containerId);
|
||||
return (attempt == null) ? null : attempt.getRMContainer(containerId);
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public FiCaSchedulerApp getCurrentAttemptForContainer(
|
||||
ContainerId containerId) {
|
||||
SchedulerApplication app =
|
||||
applications.get(containerId.getApplicationAttemptId()
|
||||
.getApplicationId());
|
||||
if (app != null) {
|
||||
return (FiCaSchedulerApp) app.getCurrentAppAttempt();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -958,7 +983,7 @@ public class CapacityScheduler
|
|||
LOG.debug("PREEMPT_CONTAINER: application:" + aid.toString() +
|
||||
" container: " + cont.toString());
|
||||
}
|
||||
FiCaSchedulerApp app = appAttempts.get(aid);
|
||||
FiCaSchedulerApp app = getApplicationAttempt(aid);
|
||||
if (app != null) {
|
||||
app.addPreemptContainer(cont.getContainerId());
|
||||
}
|
||||
|
|
|
@ -644,8 +644,7 @@ public class LeafQueue implements CSQueue {
|
|||
addApplicationAttempt(application, user);
|
||||
}
|
||||
|
||||
int attemptId = application.getApplicationAttemptId().getAttemptId();
|
||||
metrics.submitApp(userName, attemptId);
|
||||
metrics.submitAppAttempt(userName);
|
||||
getParent().submitApplicationAttempt(application, userName);
|
||||
}
|
||||
|
||||
|
@ -702,6 +701,8 @@ public class LeafQueue implements CSQueue {
|
|||
getParent().getQueuePath(), ace);
|
||||
throw ace;
|
||||
}
|
||||
|
||||
metrics.submitApp(userName);
|
||||
}
|
||||
|
||||
private synchronized void activateApplications() {
|
||||
|
|
|
@ -219,7 +219,8 @@ public class FiCaSchedulerNode extends SchedulerNode {
|
|||
" on node " + this.reservedContainer.getReservedNode());
|
||||
}
|
||||
|
||||
// Cannot reserve more than one application on a given node!
|
||||
// Cannot reserve more than one application attempt on a given node!
|
||||
// Reservation is still against attempt.
|
||||
if (!this.reservedContainer.getContainer().getId().getApplicationAttemptId().equals(
|
||||
reservedContainer.getContainer().getId().getApplicationAttemptId())) {
|
||||
throw new IllegalStateException("Trying to reserve" +
|
||||
|
|
|
@ -23,14 +23,21 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
|||
public class AppAttemptAddedSchedulerEvent extends SchedulerEvent {
|
||||
|
||||
private final ApplicationAttemptId applicationAttemptId;
|
||||
private final boolean transferStateFromPreviousAttempt;
|
||||
|
||||
public AppAttemptAddedSchedulerEvent(
|
||||
ApplicationAttemptId applicationAttemptId) {
|
||||
ApplicationAttemptId applicationAttemptId,
|
||||
boolean transferStateFromPreviousAttempt) {
|
||||
super(SchedulerEventType.APP_ATTEMPT_ADDED);
|
||||
this.applicationAttemptId = applicationAttemptId;
|
||||
this.transferStateFromPreviousAttempt = transferStateFromPreviousAttempt;
|
||||
}
|
||||
|
||||
public ApplicationAttemptId getApplicationAttemptId() {
|
||||
return applicationAttemptId;
|
||||
}
|
||||
|
||||
public boolean getTransferStateFromPreviousAttempt() {
|
||||
return transferStateFromPreviousAttempt;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,13 +25,15 @@ public class AppAttemptRemovedSchedulerEvent extends SchedulerEvent {
|
|||
|
||||
private final ApplicationAttemptId applicationAttemptId;
|
||||
private final RMAppAttemptState finalAttemptState;
|
||||
private final boolean keepContainersAcrossAppAttempts;
|
||||
|
||||
public AppAttemptRemovedSchedulerEvent(
|
||||
ApplicationAttemptId applicationAttemptId,
|
||||
RMAppAttemptState finalAttemptState) {
|
||||
RMAppAttemptState finalAttemptState, boolean keepContainers) {
|
||||
super(SchedulerEventType.APP_ATTEMPT_REMOVED);
|
||||
this.applicationAttemptId = applicationAttemptId;
|
||||
this.finalAttemptState = finalAttemptState;
|
||||
this.keepContainersAcrossAppAttempts = keepContainers;
|
||||
}
|
||||
|
||||
public ApplicationAttemptId getApplicationAttemptID() {
|
||||
|
@ -41,4 +43,8 @@ public class AppAttemptRemovedSchedulerEvent extends SchedulerEvent {
|
|||
public RMAppAttemptState getFinalAttemptState() {
|
||||
return this.finalAttemptState;
|
||||
}
|
||||
|
||||
public boolean getKeepContainersAcrossAppAttempts() {
|
||||
return this.keepContainersAcrossAppAttempts;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -76,7 +76,8 @@ public class AllocationConfiguration {
|
|||
@VisibleForTesting
|
||||
QueuePlacementPolicy placementPolicy;
|
||||
|
||||
private final Set<String> queueNames;
|
||||
@VisibleForTesting
|
||||
Set<String> queueNames;
|
||||
|
||||
public AllocationConfiguration(Map<String, Resource> minQueueResources,
|
||||
Map<String, Resource> maxQueueResources,
|
||||
|
|
|
@ -214,7 +214,7 @@ public class FSLeafQueue extends FSQueue {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Collection<FSQueue> getChildQueues() {
|
||||
public List<FSQueue> getChildQueues() {
|
||||
return new ArrayList<FSQueue>(1);
|
||||
}
|
||||
|
||||
|
|
|
@ -157,7 +157,7 @@ public class FSParentQueue extends FSQueue {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Collection<FSQueue> getChildQueues() {
|
||||
public List<FSQueue> getChildQueues() {
|
||||
return childQueues;
|
||||
}
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
|
|||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||
|
@ -158,7 +159,7 @@ public abstract class FSQueue extends Schedulable implements Queue {
|
|||
/**
|
||||
* Gets the children of this queue, if any.
|
||||
*/
|
||||
public abstract Collection<FSQueue> getChildQueues();
|
||||
public abstract List<FSQueue> getChildQueues();
|
||||
|
||||
/**
|
||||
* Adds all applications in the queue and its subqueues to the given collection.
|
||||
|
|
|
@ -71,6 +71,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEven
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||
|
@ -120,10 +121,10 @@ import com.google.common.annotations.VisibleForTesting;
|
|||
@LimitedPrivate("yarn")
|
||||
@Unstable
|
||||
@SuppressWarnings("unchecked")
|
||||
public class FairScheduler implements ResourceScheduler {
|
||||
public class FairScheduler extends AbstractYarnScheduler implements
|
||||
ResourceScheduler {
|
||||
private boolean initialized;
|
||||
private FairSchedulerConfiguration conf;
|
||||
private RMContext rmContext;
|
||||
private Resource minimumAllocation;
|
||||
private Resource maximumAllocation;
|
||||
private Resource incrAllocation;
|
||||
|
@ -157,17 +158,6 @@ public class FairScheduler implements ResourceScheduler {
|
|||
// Time we last ran preemptTasksIfNecessary
|
||||
private long lastPreemptCheckTime;
|
||||
|
||||
// This stores per-application scheduling information,
|
||||
@VisibleForTesting
|
||||
protected Map<ApplicationId, SchedulerApplication> applications =
|
||||
new ConcurrentHashMap<ApplicationId, SchedulerApplication>();
|
||||
|
||||
// This stores per-application-attempt scheduling information, indexed by
|
||||
// attempt ID's for fast lookup.
|
||||
@VisibleForTesting
|
||||
protected Map<ApplicationAttemptId, FSSchedulerApp> appAttempts =
|
||||
new ConcurrentHashMap<ApplicationAttemptId, FSSchedulerApp>();
|
||||
|
||||
// Nodes in the cluster, indexed by NodeId
|
||||
private Map<NodeId, FSSchedulerNode> nodes =
|
||||
new ConcurrentHashMap<NodeId, FSSchedulerNode>();
|
||||
|
@ -262,10 +252,21 @@ public class FairScheduler implements ResourceScheduler {
|
|||
return queueMgr;
|
||||
}
|
||||
|
||||
private RMContainer getRMContainer(ContainerId containerId) {
|
||||
FSSchedulerApp application =
|
||||
appAttempts.get(containerId.getApplicationAttemptId());
|
||||
return (application == null) ? null : application.getRMContainer(containerId);
|
||||
@Override
|
||||
public RMContainer getRMContainer(ContainerId containerId) {
|
||||
FSSchedulerApp attempt = getCurrentAttemptForContainer(containerId);
|
||||
return (attempt == null) ? null : attempt.getRMContainer(containerId);
|
||||
}
|
||||
|
||||
private FSSchedulerApp getCurrentAttemptForContainer(
|
||||
ContainerId containerId) {
|
||||
SchedulerApplication app =
|
||||
applications.get(containerId.getApplicationAttemptId()
|
||||
.getApplicationId());
|
||||
if (app != null) {
|
||||
return (FSSchedulerApp) app.getCurrentAppAttempt();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -638,9 +639,11 @@ public class FairScheduler implements ResourceScheduler {
|
|||
SchedulerApplication application =
|
||||
new SchedulerApplication(queue, user);
|
||||
applications.put(applicationId, application);
|
||||
queue.getMetrics().submitApp(user);
|
||||
|
||||
LOG.info("Accepted application " + applicationId + " from user: " + user
|
||||
+ ", in queue: " + queueName);
|
||||
+ ", in queue: " + queueName + ", currently num of applications: "
|
||||
+ applications.size());
|
||||
rmContext.getDispatcher().getEventHandler()
|
||||
.handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED));
|
||||
}
|
||||
|
@ -649,31 +652,35 @@ public class FairScheduler implements ResourceScheduler {
|
|||
* Add a new application attempt to the scheduler.
|
||||
*/
|
||||
protected synchronized void addApplicationAttempt(
|
||||
ApplicationAttemptId applicationAttemptId) {
|
||||
ApplicationAttemptId applicationAttemptId,
|
||||
boolean transferStateFromPreviousAttempt) {
|
||||
SchedulerApplication application =
|
||||
applications.get(applicationAttemptId.getApplicationId());
|
||||
String user = application.getUser();
|
||||
FSLeafQueue queue = (FSLeafQueue) application.getQueue();
|
||||
|
||||
FSSchedulerApp schedulerApp =
|
||||
FSSchedulerApp attempt =
|
||||
new FSSchedulerApp(applicationAttemptId, user,
|
||||
queue, new ActiveUsersManager(getRootQueueMetrics()),
|
||||
rmContext);
|
||||
if (transferStateFromPreviousAttempt) {
|
||||
attempt.transferStateFromPreviousAttempt(application
|
||||
.getCurrentAppAttempt());
|
||||
}
|
||||
application.setCurrentAppAttempt(attempt);
|
||||
|
||||
boolean runnable = maxRunningEnforcer.canAppBeRunnable(queue, user);
|
||||
queue.addApp(schedulerApp, runnable);
|
||||
queue.addApp(attempt, runnable);
|
||||
if (runnable) {
|
||||
maxRunningEnforcer.trackRunnableApp(schedulerApp);
|
||||
maxRunningEnforcer.trackRunnableApp(attempt);
|
||||
} else {
|
||||
maxRunningEnforcer.trackNonRunnableApp(schedulerApp);
|
||||
maxRunningEnforcer.trackNonRunnableApp(attempt);
|
||||
}
|
||||
|
||||
queue.getMetrics().submitApp(user, applicationAttemptId.getAttemptId());
|
||||
appAttempts.put(applicationAttemptId, schedulerApp);
|
||||
queue.getMetrics().submitAppAttempt(user);
|
||||
|
||||
LOG.info("Added Application Attempt " + applicationAttemptId
|
||||
+ " to scheduler from user: " + user + ", currently active: "
|
||||
+ appAttempts.size());
|
||||
+ " to scheduler from user: " + user);
|
||||
rmContext.getDispatcher().getEventHandler().handle(
|
||||
new RMAppAttemptEvent(applicationAttemptId,
|
||||
RMAppAttemptEventType.ATTEMPT_ADDED));
|
||||
|
@ -704,24 +711,38 @@ public class FairScheduler implements ResourceScheduler {
|
|||
|
||||
private synchronized void removeApplication(ApplicationId applicationId,
|
||||
RMAppState finalState) {
|
||||
SchedulerApplication application = applications.get(applicationId);
|
||||
if (application == null){
|
||||
LOG.warn("Couldn't find application " + applicationId);
|
||||
return;
|
||||
}
|
||||
application.stop(finalState);
|
||||
applications.remove(applicationId);
|
||||
}
|
||||
|
||||
private synchronized void removeApplicationAttempt(
|
||||
ApplicationAttemptId applicationAttemptId,
|
||||
RMAppAttemptState rmAppAttemptFinalState) {
|
||||
RMAppAttemptState rmAppAttemptFinalState, boolean keepContainers) {
|
||||
LOG.info("Application " + applicationAttemptId + " is done." +
|
||||
" finalState=" + rmAppAttemptFinalState);
|
||||
SchedulerApplication application =
|
||||
applications.get(applicationAttemptId.getApplicationId());
|
||||
FSSchedulerApp attempt = getSchedulerApp(applicationAttemptId);
|
||||
|
||||
FSSchedulerApp application = appAttempts.get(applicationAttemptId);
|
||||
|
||||
if (application == null) {
|
||||
if (attempt == null || application == null) {
|
||||
LOG.info("Unknown application " + applicationAttemptId + " has completed!");
|
||||
return;
|
||||
}
|
||||
|
||||
// Release all the running containers
|
||||
for (RMContainer rmContainer : application.getLiveContainers()) {
|
||||
for (RMContainer rmContainer : attempt.getLiveContainers()) {
|
||||
if (keepContainers
|
||||
&& rmContainer.getState().equals(RMContainerState.RUNNING)) {
|
||||
// do not kill the running container in the case of work-preserving AM
|
||||
// restart.
|
||||
LOG.info("Skip killing " + rmContainer.getContainerId());
|
||||
continue;
|
||||
}
|
||||
completedContainer(rmContainer,
|
||||
SchedulerUtils.createAbnormalContainerStatus(
|
||||
rmContainer.getContainerId(),
|
||||
|
@ -730,30 +751,26 @@ public class FairScheduler implements ResourceScheduler {
|
|||
}
|
||||
|
||||
// Release all reserved containers
|
||||
for (RMContainer rmContainer : application.getReservedContainers()) {
|
||||
for (RMContainer rmContainer : attempt.getReservedContainers()) {
|
||||
completedContainer(rmContainer,
|
||||
SchedulerUtils.createAbnormalContainerStatus(
|
||||
rmContainer.getContainerId(),
|
||||
"Application Complete"),
|
||||
RMContainerEventType.KILL);
|
||||
RMContainerEventType.KILL);
|
||||
}
|
||||
|
||||
// Clean up pending requests, metrics etc.
|
||||
application.stop(rmAppAttemptFinalState);
|
||||
attempt.stop(rmAppAttemptFinalState);
|
||||
|
||||
// Inform the queue
|
||||
FSLeafQueue queue = queueMgr.getLeafQueue(application.getQueue()
|
||||
FSLeafQueue queue = queueMgr.getLeafQueue(attempt.getQueue()
|
||||
.getQueueName(), false);
|
||||
boolean wasRunnable = queue.removeApp(application);
|
||||
boolean wasRunnable = queue.removeApp(attempt);
|
||||
|
||||
if (wasRunnable) {
|
||||
maxRunningEnforcer.updateRunnabilityOnAppRemoval(application);
|
||||
maxRunningEnforcer.updateRunnabilityOnAppRemoval(attempt);
|
||||
} else {
|
||||
maxRunningEnforcer.untrackNonRunnableApp(application);
|
||||
maxRunningEnforcer.untrackNonRunnableApp(attempt);
|
||||
}
|
||||
|
||||
// Remove from our data-structure
|
||||
appAttempts.remove(applicationAttemptId);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -769,11 +786,13 @@ public class FairScheduler implements ResourceScheduler {
|
|||
Container container = rmContainer.getContainer();
|
||||
|
||||
// Get the application for the finished container
|
||||
ApplicationAttemptId applicationAttemptId = container.getId().getApplicationAttemptId();
|
||||
FSSchedulerApp application = appAttempts.get(applicationAttemptId);
|
||||
FSSchedulerApp application =
|
||||
getCurrentAttemptForContainer(container.getId());
|
||||
ApplicationId appId =
|
||||
container.getId().getApplicationAttemptId().getApplicationId();
|
||||
if (application == null) {
|
||||
LOG.info("Container " + container + " of" +
|
||||
" unknown application " + applicationAttemptId +
|
||||
" unknown application attempt " + appId +
|
||||
" completed with event " + event);
|
||||
return;
|
||||
}
|
||||
|
@ -790,10 +809,9 @@ public class FairScheduler implements ResourceScheduler {
|
|||
updateRootQueueMetrics();
|
||||
}
|
||||
|
||||
LOG.info("Application " + applicationAttemptId +
|
||||
" released container " + container.getId() +
|
||||
" on node: " + node +
|
||||
" with event: " + event);
|
||||
LOG.info("Application attempt " + application.getApplicationAttemptId()
|
||||
+ " released container " + container.getId() + " on node: " + node
|
||||
+ " with event: " + event);
|
||||
}
|
||||
|
||||
private synchronized void addNode(RMNode node) {
|
||||
|
@ -844,7 +862,7 @@ public class FairScheduler implements ResourceScheduler {
|
|||
List<ResourceRequest> ask, List<ContainerId> release, List<String> blacklistAdditions, List<String> blacklistRemovals) {
|
||||
|
||||
// Make sure this application exists
|
||||
FSSchedulerApp application = appAttempts.get(appAttemptId);
|
||||
FSSchedulerApp application = getSchedulerApp(appAttemptId);
|
||||
if (application == null) {
|
||||
LOG.info("Calling allocate on removed " +
|
||||
"or non existant application " + appAttemptId);
|
||||
|
@ -914,12 +932,11 @@ public class FairScheduler implements ResourceScheduler {
|
|||
*/
|
||||
private void containerLaunchedOnNode(ContainerId containerId, FSSchedulerNode node) {
|
||||
// Get the application for the finished container
|
||||
ApplicationAttemptId applicationAttemptId = containerId.getApplicationAttemptId();
|
||||
FSSchedulerApp application = appAttempts.get(applicationAttemptId);
|
||||
FSSchedulerApp application = getCurrentAttemptForContainer(containerId);
|
||||
if (application == null) {
|
||||
LOG.info("Unknown application: " + applicationAttemptId +
|
||||
" launched container " + containerId +
|
||||
" on node: " + node);
|
||||
LOG.info("Unknown application "
|
||||
+ containerId.getApplicationAttemptId().getApplicationId()
|
||||
+ " launched container " + containerId + " on node: " + node);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1058,28 +1075,34 @@ public class FairScheduler implements ResourceScheduler {
|
|||
}
|
||||
|
||||
public FSSchedulerApp getSchedulerApp(ApplicationAttemptId appAttemptId) {
|
||||
return appAttempts.get(appAttemptId);
|
||||
SchedulerApplication app =
|
||||
applications.get(appAttemptId.getApplicationId());
|
||||
if (app != null) {
|
||||
return (FSSchedulerApp) app.getCurrentAppAttempt();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SchedulerAppReport getSchedulerAppInfo(
|
||||
ApplicationAttemptId appAttemptId) {
|
||||
if (!appAttempts.containsKey(appAttemptId)) {
|
||||
FSSchedulerApp attempt = getSchedulerApp(appAttemptId);
|
||||
if (attempt == null) {
|
||||
LOG.error("Request for appInfo of unknown attempt" + appAttemptId);
|
||||
return null;
|
||||
}
|
||||
return new SchedulerAppReport(appAttempts.get(appAttemptId));
|
||||
return new SchedulerAppReport(attempt);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ApplicationResourceUsageReport getAppResourceUsageReport(
|
||||
ApplicationAttemptId appAttemptId) {
|
||||
FSSchedulerApp app = appAttempts.get(appAttemptId);
|
||||
if (app == null) {
|
||||
FSSchedulerApp attempt = getSchedulerApp(appAttemptId);
|
||||
if (attempt == null) {
|
||||
LOG.error("Request for appInfo of unknown attempt" + appAttemptId);
|
||||
return null;
|
||||
}
|
||||
return app.getResourceUsageReport();
|
||||
return attempt.getResourceUsageReport();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1145,7 +1168,8 @@ public class FairScheduler implements ResourceScheduler {
|
|||
}
|
||||
AppAttemptAddedSchedulerEvent appAttemptAddedEvent =
|
||||
(AppAttemptAddedSchedulerEvent) event;
|
||||
addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId());
|
||||
addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(),
|
||||
appAttemptAddedEvent.getTransferStateFromPreviousAttempt());
|
||||
break;
|
||||
case APP_ATTEMPT_REMOVED:
|
||||
if (!(event instanceof AppAttemptRemovedSchedulerEvent)) {
|
||||
|
@ -1153,8 +1177,10 @@ public class FairScheduler implements ResourceScheduler {
|
|||
}
|
||||
AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent =
|
||||
(AppAttemptRemovedSchedulerEvent) event;
|
||||
removeApplicationAttempt(appAttemptRemovedEvent.getApplicationAttemptID(),
|
||||
appAttemptRemovedEvent.getFinalAttemptState());
|
||||
removeApplicationAttempt(
|
||||
appAttemptRemovedEvent.getApplicationAttemptID(),
|
||||
appAttemptRemovedEvent.getFinalAttemptState(),
|
||||
appAttemptRemovedEvent.getKeepContainersAcrossAppAttempts());
|
||||
break;
|
||||
case CONTAINER_EXPIRED:
|
||||
if (!(event instanceof ContainerExpiredSchedulerEvent)) {
|
||||
|
@ -1205,6 +1231,9 @@ public class FairScheduler implements ResourceScheduler {
|
|||
|
||||
rootMetrics = FSQueueMetrics.forQueue("root", null, true, conf);
|
||||
this.rmContext = rmContext;
|
||||
// This stores per-application scheduling information
|
||||
this.applications =
|
||||
new ConcurrentHashMap<ApplicationId, SchedulerApplication>();
|
||||
this.eventLog = new FairSchedulerEventLog();
|
||||
eventLog.init(this.conf);
|
||||
|
||||
|
@ -1327,5 +1356,4 @@ public class FairScheduler implements ResourceScheduler {
|
|||
queue.collectSchedulerApplications(apps);
|
||||
return apps;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -85,9 +85,7 @@ public class QueueManager {
|
|||
* could be referred to as just "parent1.queue2".
|
||||
*/
|
||||
public FSLeafQueue getLeafQueue(String name, boolean create) {
|
||||
if (!name.startsWith(ROOT_QUEUE + ".")) {
|
||||
name = ROOT_QUEUE + "." + name;
|
||||
}
|
||||
name = ensureRootPrefix(name);
|
||||
synchronized (queues) {
|
||||
FSQueue queue = queues.get(name);
|
||||
if (queue == null && create) {
|
||||
|
@ -174,13 +172,107 @@ public class QueueManager {
|
|||
return leafQueue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make way for the given leaf queue if possible, by removing incompatible
|
||||
* queues with no apps in them. Incompatibility could be due to
|
||||
* (1) leafToCreate being currently being a parent, or (2) an existing leaf queue in
|
||||
* the ancestry of leafToCreate.
|
||||
*
|
||||
* We will never remove the root queue or the default queue in this way.
|
||||
*
|
||||
* @return true if we can create leafToCreate or it already exists.
|
||||
*/
|
||||
private boolean removeEmptyIncompatibleQueues(String leafToCreate) {
|
||||
leafToCreate = ensureRootPrefix(leafToCreate);
|
||||
|
||||
// Ensure leafToCreate is not root and doesn't have the default queue in its
|
||||
// ancestry.
|
||||
if (leafToCreate.equals(ROOT_QUEUE) ||
|
||||
leafToCreate.startsWith(
|
||||
ROOT_QUEUE + "." + YarnConfiguration.DEFAULT_QUEUE_NAME + ".")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
FSQueue queue = queues.get(leafToCreate);
|
||||
// Queue exists already.
|
||||
if (queue != null) {
|
||||
if (queue instanceof FSLeafQueue) {
|
||||
// If it's an already existing leaf, we're ok.
|
||||
return true;
|
||||
} else {
|
||||
// If it's an existing parent queue, remove it if it's empty.
|
||||
return removeQueueIfEmpty(queue);
|
||||
}
|
||||
}
|
||||
|
||||
// Queue doesn't exist already. Check if the new queue would be created
|
||||
// under an existing leaf queue. If so, try removing that leaf queue.
|
||||
int sepIndex = leafToCreate.length();
|
||||
sepIndex = leafToCreate.lastIndexOf('.', sepIndex-1);
|
||||
while (sepIndex != -1) {
|
||||
String prefixString = leafToCreate.substring(0, sepIndex);
|
||||
FSQueue prefixQueue = queues.get(prefixString);
|
||||
if (prefixQueue != null && prefixQueue instanceof FSLeafQueue) {
|
||||
return removeQueueIfEmpty(prefixQueue);
|
||||
}
|
||||
sepIndex = leafToCreate.lastIndexOf('.', sepIndex-1);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove the queue if it and its descendents are all empty.
|
||||
* @param queue
|
||||
* @return true if removed, false otherwise
|
||||
*/
|
||||
private boolean removeQueueIfEmpty(FSQueue queue) {
|
||||
if (isEmpty(queue)) {
|
||||
removeQueue(queue);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove a queue and all its descendents.
|
||||
*/
|
||||
private void removeQueue(FSQueue queue) {
|
||||
if (queue instanceof FSLeafQueue) {
|
||||
leafQueues.remove(queue);
|
||||
} else {
|
||||
List<FSQueue> childQueues = queue.getChildQueues();
|
||||
while (!childQueues.isEmpty()) {
|
||||
removeQueue(childQueues.get(0));
|
||||
}
|
||||
}
|
||||
queues.remove(queue.getName());
|
||||
queue.getParent().getChildQueues().remove(queue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if there are no applications, running or not, in the given
|
||||
* queue or any of its descendents.
|
||||
*/
|
||||
protected boolean isEmpty(FSQueue queue) {
|
||||
if (queue instanceof FSLeafQueue) {
|
||||
FSLeafQueue leafQueue = (FSLeafQueue)queue;
|
||||
return queue.getNumRunnableApps() == 0 &&
|
||||
leafQueue.getNonRunnableAppSchedulables().isEmpty();
|
||||
} else {
|
||||
for (FSQueue child : queue.getChildQueues()) {
|
||||
if (!isEmpty(child)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a queue by name.
|
||||
*/
|
||||
public FSQueue getQueue(String name) {
|
||||
if (!name.startsWith(ROOT_QUEUE + ".") && !name.equals(ROOT_QUEUE)) {
|
||||
name = ROOT_QUEUE + "." + name;
|
||||
}
|
||||
name = ensureRootPrefix(name);
|
||||
synchronized (queues) {
|
||||
return queues.get(name);
|
||||
}
|
||||
|
@ -190,9 +282,7 @@ public class QueueManager {
|
|||
* Return whether a queue exists already.
|
||||
*/
|
||||
public boolean exists(String name) {
|
||||
if (!name.startsWith(ROOT_QUEUE + ".") && !name.equals(ROOT_QUEUE)) {
|
||||
name = ROOT_QUEUE + "." + name;
|
||||
}
|
||||
name = ensureRootPrefix(name);
|
||||
synchronized (queues) {
|
||||
return queues.containsKey(name);
|
||||
}
|
||||
|
@ -214,10 +304,19 @@ public class QueueManager {
|
|||
return queues.values();
|
||||
}
|
||||
|
||||
private String ensureRootPrefix(String name) {
|
||||
if (!name.startsWith(ROOT_QUEUE + ".") && !name.equals(ROOT_QUEUE)) {
|
||||
name = ROOT_QUEUE + "." + name;
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
public void updateAllocationConfiguration(AllocationConfiguration queueConf) {
|
||||
// Make sure all queues exist
|
||||
for (String name : queueConf.getQueueNames()) {
|
||||
getLeafQueue(name, true);
|
||||
if (removeEmptyIncompatibleQueues(name)) {
|
||||
getLeafQueue(name, true);
|
||||
}
|
||||
}
|
||||
|
||||
for (FSQueue queue : queues.values()) {
|
||||
|
|
|
@ -67,9 +67,11 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptE
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
|
||||
|
@ -103,7 +105,8 @@ import com.google.common.annotations.VisibleForTesting;
|
|||
@LimitedPrivate("yarn")
|
||||
@Evolving
|
||||
@SuppressWarnings("unchecked")
|
||||
public class FifoScheduler implements ResourceScheduler, Configurable {
|
||||
public class FifoScheduler extends AbstractYarnScheduler implements
|
||||
ResourceScheduler, Configurable {
|
||||
|
||||
private static final Log LOG = LogFactory.getLog(FifoScheduler.class);
|
||||
|
||||
|
@ -114,7 +117,6 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
|
||||
private final static Container[] EMPTY_CONTAINER_ARRAY = new Container[] {};
|
||||
private final static List<Container> EMPTY_CONTAINER_LIST = Arrays.asList(EMPTY_CONTAINER_ARRAY);
|
||||
private RMContext rmContext;
|
||||
|
||||
protected Map<NodeId, FiCaSchedulerNode> nodes = new ConcurrentHashMap<NodeId, FiCaSchedulerNode>();
|
||||
|
||||
|
@ -123,15 +125,6 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
private Resource maximumAllocation;
|
||||
private boolean usePortForNodeName;
|
||||
|
||||
@VisibleForTesting
|
||||
protected Map<ApplicationId, SchedulerApplication> applications =
|
||||
new ConcurrentSkipListMap<ApplicationId, SchedulerApplication>();
|
||||
|
||||
// Use ConcurrentSkipListMap because applications need to be ordered
|
||||
@VisibleForTesting
|
||||
protected Map<ApplicationAttemptId, FiCaSchedulerApp> appAttempts
|
||||
= new ConcurrentSkipListMap<ApplicationAttemptId, FiCaSchedulerApp>();
|
||||
|
||||
private ActiveUsersManager activeUsersManager;
|
||||
|
||||
private static final String DEFAULT_QUEUE_NAME = "default";
|
||||
|
@ -246,6 +239,9 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
if (!this.initialized) {
|
||||
validateConf(conf);
|
||||
this.rmContext = rmContext;
|
||||
//Use ConcurrentSkipListMap because applications need to be ordered
|
||||
this.applications =
|
||||
new ConcurrentSkipListMap<ApplicationId, SchedulerApplication>();
|
||||
this.minimumAllocation =
|
||||
Resources.createResource(conf.getInt(
|
||||
YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
|
||||
|
@ -270,7 +266,7 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
public Allocation allocate(
|
||||
ApplicationAttemptId applicationAttemptId, List<ResourceRequest> ask,
|
||||
List<ContainerId> release, List<String> blacklistAdditions, List<String> blacklistRemovals) {
|
||||
FiCaSchedulerApp application = getApplication(applicationAttemptId);
|
||||
FiCaSchedulerApp application = getApplicationAttempt(applicationAttemptId);
|
||||
if (application == null) {
|
||||
LOG.error("Calling allocate on removed " +
|
||||
"or non existant application " + applicationAttemptId);
|
||||
|
@ -336,22 +332,26 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
}
|
||||
|
||||
@VisibleForTesting
|
||||
FiCaSchedulerApp getApplication(
|
||||
ApplicationAttemptId applicationAttemptId) {
|
||||
return appAttempts.get(applicationAttemptId);
|
||||
FiCaSchedulerApp getApplicationAttempt(ApplicationAttemptId applicationAttemptId) {
|
||||
SchedulerApplication app =
|
||||
applications.get(applicationAttemptId.getApplicationId());
|
||||
if (app != null) {
|
||||
return (FiCaSchedulerApp) app.getCurrentAppAttempt();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SchedulerAppReport getSchedulerAppInfo(
|
||||
ApplicationAttemptId applicationAttemptId) {
|
||||
FiCaSchedulerApp app = getApplication(applicationAttemptId);
|
||||
FiCaSchedulerApp app = getApplicationAttempt(applicationAttemptId);
|
||||
return app == null ? null : new SchedulerAppReport(app);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ApplicationResourceUsageReport getAppResourceUsageReport(
|
||||
ApplicationAttemptId applicationAttemptId) {
|
||||
FiCaSchedulerApp app = getApplication(applicationAttemptId);
|
||||
FiCaSchedulerApp app = getApplicationAttempt(applicationAttemptId);
|
||||
return app == null ? null : app.getResourceUsageReport();
|
||||
}
|
||||
|
||||
|
@ -362,15 +362,18 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
private synchronized void addApplication(ApplicationId applicationId,
|
||||
String queue, String user) {
|
||||
SchedulerApplication application =
|
||||
new SchedulerApplication(null, user);
|
||||
new SchedulerApplication(DEFAULT_QUEUE, user);
|
||||
applications.put(applicationId, application);
|
||||
LOG.info("Accepted application " + applicationId + " from user: " + user);
|
||||
metrics.submitApp(user);
|
||||
LOG.info("Accepted application " + applicationId + " from user: " + user
|
||||
+ ", currently num of applications: " + applications.size());
|
||||
rmContext.getDispatcher().getEventHandler()
|
||||
.handle(new RMAppEvent(applicationId, RMAppEventType.APP_ACCEPTED));
|
||||
}
|
||||
|
||||
private synchronized void addApplicationAttempt(
|
||||
ApplicationAttemptId appAttemptId) {
|
||||
private synchronized void
|
||||
addApplicationAttempt(ApplicationAttemptId appAttemptId,
|
||||
boolean transferStateFromPreviousAttempt) {
|
||||
SchedulerApplication application =
|
||||
applications.get(appAttemptId.getApplicationId());
|
||||
String user = application.getUser();
|
||||
|
@ -378,11 +381,16 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
FiCaSchedulerApp schedulerApp =
|
||||
new FiCaSchedulerApp(appAttemptId, user, DEFAULT_QUEUE,
|
||||
activeUsersManager, this.rmContext);
|
||||
appAttempts.put(appAttemptId, schedulerApp);
|
||||
metrics.submitApp(user, appAttemptId.getAttemptId());
|
||||
|
||||
if (transferStateFromPreviousAttempt) {
|
||||
schedulerApp.transferStateFromPreviousAttempt(application
|
||||
.getCurrentAppAttempt());
|
||||
}
|
||||
application.setCurrentAppAttempt(schedulerApp);
|
||||
|
||||
metrics.submitAppAttempt(user);
|
||||
LOG.info("Added Application Attempt " + appAttemptId
|
||||
+ " to scheduler from user " + application.getUser()
|
||||
+ ", currently active: " + appAttempts.size());
|
||||
+ " to scheduler from user " + application.getUser());
|
||||
rmContext.getDispatcher().getEventHandler().handle(
|
||||
new RMAppAttemptEvent(appAttemptId,
|
||||
RMAppAttemptEventType.ATTEMPT_ADDED));
|
||||
|
@ -391,37 +399,47 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
private synchronized void doneApplication(ApplicationId applicationId,
|
||||
RMAppState finalState) {
|
||||
SchedulerApplication application = applications.get(applicationId);
|
||||
if (application == null){
|
||||
LOG.warn("Couldn't find application " + applicationId);
|
||||
return;
|
||||
}
|
||||
|
||||
// Inform the activeUsersManager
|
||||
activeUsersManager.deactivateApplication(application.getUser(),
|
||||
applicationId);
|
||||
application.stop(finalState);
|
||||
applications.remove(applicationId);
|
||||
}
|
||||
|
||||
private synchronized void doneApplicationAttempt(
|
||||
ApplicationAttemptId applicationAttemptId,
|
||||
RMAppAttemptState rmAppAttemptFinalState)
|
||||
RMAppAttemptState rmAppAttemptFinalState, boolean keepContainers)
|
||||
throws IOException {
|
||||
FiCaSchedulerApp application = getApplication(applicationAttemptId);
|
||||
if (application == null) {
|
||||
FiCaSchedulerApp attempt = getApplicationAttempt(applicationAttemptId);
|
||||
SchedulerApplication application =
|
||||
applications.get(applicationAttemptId.getApplicationId());
|
||||
if (application == null || attempt == null) {
|
||||
throw new IOException("Unknown application " + applicationAttemptId +
|
||||
" has completed!");
|
||||
}
|
||||
|
||||
// Kill all 'live' containers
|
||||
for (RMContainer container : application.getLiveContainers()) {
|
||||
containerCompleted(container,
|
||||
SchedulerUtils.createAbnormalContainerStatus(
|
||||
container.getContainerId(),
|
||||
SchedulerUtils.COMPLETED_APPLICATION),
|
||||
RMContainerEventType.KILL);
|
||||
for (RMContainer container : attempt.getLiveContainers()) {
|
||||
if (keepContainers
|
||||
&& container.getState().equals(RMContainerState.RUNNING)) {
|
||||
// do not kill the running container in the case of work-preserving AM
|
||||
// restart.
|
||||
LOG.info("Skip killing " + container.getContainerId());
|
||||
continue;
|
||||
}
|
||||
containerCompleted(container,
|
||||
SchedulerUtils.createAbnormalContainerStatus(
|
||||
container.getContainerId(), SchedulerUtils.COMPLETED_APPLICATION),
|
||||
RMContainerEventType.KILL);
|
||||
}
|
||||
|
||||
// Clean up pending requests, metrics etc.
|
||||
application.stop(rmAppAttemptFinalState);
|
||||
|
||||
// Remove the application
|
||||
appAttempts.remove(applicationAttemptId);
|
||||
attempt.stop(rmAppAttemptFinalState);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -432,12 +450,13 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
private void assignContainers(FiCaSchedulerNode node) {
|
||||
LOG.debug("assignContainers:" +
|
||||
" node=" + node.getRMNode().getNodeAddress() +
|
||||
" #applications=" + appAttempts.size());
|
||||
" #applications=" + applications.size());
|
||||
|
||||
// Try to assign containers to applications in fifo order
|
||||
for (Map.Entry<ApplicationAttemptId, FiCaSchedulerApp> e : appAttempts
|
||||
for (Map.Entry<ApplicationId, SchedulerApplication> e : applications
|
||||
.entrySet()) {
|
||||
FiCaSchedulerApp application = e.getValue();
|
||||
FiCaSchedulerApp application =
|
||||
(FiCaSchedulerApp) e.getValue().getCurrentAppAttempt();
|
||||
LOG.debug("pre-assignContainers");
|
||||
application.showRequests();
|
||||
synchronized (application) {
|
||||
|
@ -474,8 +493,10 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
|
||||
// Update the applications' headroom to correctly take into
|
||||
// account the containers assigned in this update.
|
||||
for (FiCaSchedulerApp application : appAttempts.values()) {
|
||||
application.setHeadroom(Resources.subtract(clusterResource, usedResource));
|
||||
for (SchedulerApplication application : applications.values()) {
|
||||
FiCaSchedulerApp attempt =
|
||||
(FiCaSchedulerApp) application.getCurrentAppAttempt();
|
||||
attempt.setHeadroom(Resources.subtract(clusterResource, usedResource));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -744,7 +765,8 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
{
|
||||
AppAttemptAddedSchedulerEvent appAttemptAddedEvent =
|
||||
(AppAttemptAddedSchedulerEvent) event;
|
||||
addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId());
|
||||
addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(),
|
||||
appAttemptAddedEvent.getTransferStateFromPreviousAttempt());
|
||||
}
|
||||
break;
|
||||
case APP_ATTEMPT_REMOVED:
|
||||
|
@ -754,7 +776,8 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
try {
|
||||
doneApplicationAttempt(
|
||||
appAttemptRemovedEvent.getApplicationAttemptID(),
|
||||
appAttemptRemovedEvent.getFinalAttemptState());
|
||||
appAttemptRemovedEvent.getFinalAttemptState(),
|
||||
appAttemptRemovedEvent.getKeepContainersAcrossAppAttempts());
|
||||
} catch(IOException ie) {
|
||||
LOG.error("Unable to remove application "
|
||||
+ appAttemptRemovedEvent.getApplicationAttemptID(), ie);
|
||||
|
@ -780,12 +803,11 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
|
||||
private void containerLaunchedOnNode(ContainerId containerId, FiCaSchedulerNode node) {
|
||||
// Get the application for the finished container
|
||||
ApplicationAttemptId applicationAttemptId = containerId.getApplicationAttemptId();
|
||||
FiCaSchedulerApp application = getApplication(applicationAttemptId);
|
||||
FiCaSchedulerApp application = getCurrentAttemptForContainer(containerId);
|
||||
if (application == null) {
|
||||
LOG.info("Unknown application: " + applicationAttemptId +
|
||||
" launched container " + containerId +
|
||||
" on node: " + node);
|
||||
LOG.info("Unknown application "
|
||||
+ containerId.getApplicationAttemptId().getApplicationId()
|
||||
+ " launched container " + containerId + " on node: " + node);
|
||||
// Some unknown container sneaked into the system. Kill it.
|
||||
this.rmContext.getDispatcher().getEventHandler()
|
||||
.handle(new RMNodeCleanContainerEvent(node.getNodeID(), containerId));
|
||||
|
@ -806,14 +828,16 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
|
||||
// Get the application for the finished container
|
||||
Container container = rmContainer.getContainer();
|
||||
ApplicationAttemptId applicationAttemptId = container.getId().getApplicationAttemptId();
|
||||
FiCaSchedulerApp application = getApplication(applicationAttemptId);
|
||||
FiCaSchedulerApp application =
|
||||
getCurrentAttemptForContainer(container.getId());
|
||||
ApplicationId appId =
|
||||
container.getId().getApplicationAttemptId().getApplicationId();
|
||||
|
||||
// Get the node on which the container was allocated
|
||||
FiCaSchedulerNode node = getNode(container.getNodeId());
|
||||
|
||||
if (application == null) {
|
||||
LOG.info("Unknown application: " + applicationAttemptId +
|
||||
LOG.info("Unknown application: " + appId +
|
||||
" released container " + container.getId() +
|
||||
" on node: " + node +
|
||||
" with event: " + event);
|
||||
|
@ -829,7 +853,7 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
// Update total usage
|
||||
Resources.subtractFrom(usedResource, container.getResource());
|
||||
|
||||
LOG.info("Application " + applicationAttemptId +
|
||||
LOG.info("Application attempt " + application.getApplicationAttemptId() +
|
||||
" released container " + container.getId() +
|
||||
" on node: " + node +
|
||||
" with event: " + event);
|
||||
|
@ -887,11 +911,22 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
FiCaSchedulerNode node = getNode(nodeId);
|
||||
return node == null ? null : new SchedulerNodeReport(node);
|
||||
}
|
||||
|
||||
private RMContainer getRMContainer(ContainerId containerId) {
|
||||
FiCaSchedulerApp application =
|
||||
getApplication(containerId.getApplicationAttemptId());
|
||||
return (application == null) ? null : application.getRMContainer(containerId);
|
||||
|
||||
@Override
|
||||
public RMContainer getRMContainer(ContainerId containerId) {
|
||||
FiCaSchedulerApp attempt = getCurrentAttemptForContainer(containerId);
|
||||
return (attempt == null) ? null : attempt.getRMContainer(containerId);
|
||||
}
|
||||
|
||||
private FiCaSchedulerApp getCurrentAttemptForContainer(
|
||||
ContainerId containerId) {
|
||||
SchedulerApplication app =
|
||||
applications.get(containerId.getApplicationAttemptId()
|
||||
.getApplicationId());
|
||||
if (app != null) {
|
||||
return (FiCaSchedulerApp) app.getCurrentAppAttempt();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -908,12 +943,12 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
|
|||
@Override
|
||||
public synchronized List<ApplicationAttemptId> getAppsInQueue(String queueName) {
|
||||
if (queueName.equals(DEFAULT_QUEUE.getQueueName())) {
|
||||
List<ApplicationAttemptId> apps = new ArrayList<ApplicationAttemptId>(
|
||||
appAttempts.size());
|
||||
for (FiCaSchedulerApp app : appAttempts.values()) {
|
||||
apps.add(app.getApplicationAttemptId());
|
||||
List<ApplicationAttemptId> attempts = new ArrayList<ApplicationAttemptId>(
|
||||
applications.size());
|
||||
for (SchedulerApplication app : applications.values()) {
|
||||
attempts.add(app.getCurrentAppAttempt().getApplicationAttemptId());
|
||||
}
|
||||
return apps;
|
||||
return attempts;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.security.authorize;
|
|||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
||||
import org.apache.hadoop.ha.HAServiceProtocol;
|
||||
import org.apache.hadoop.security.authorize.PolicyProvider;
|
||||
import org.apache.hadoop.security.authorize.Service;
|
||||
|
@ -53,6 +54,9 @@ public class RMPolicyProvider extends PolicyProvider {
|
|||
new Service(
|
||||
YarnConfiguration.YARN_SECURITY_SERVICE_AUTHORIZATION_CONTAINER_MANAGEMENT_PROTOCOL,
|
||||
ContainerManagementProtocolPB.class),
|
||||
new Service(
|
||||
CommonConfigurationKeys.SECURITY_HA_SERVICE_PROTOCOL_ACL,
|
||||
HAServiceProtocol.class),
|
||||
};
|
||||
|
||||
@Override
|
||||
|
|
|
@ -43,6 +43,7 @@ public class AboutBlock extends HtmlBlock {
|
|||
info("Cluster overview").
|
||||
_("Cluster ID:", cinfo.getClusterId()).
|
||||
_("ResourceManager state:", cinfo.getState()).
|
||||
_("ResourceManager HA state:", cinfo.getHAState()).
|
||||
_("ResourceManager started on:", Times.format(cinfo.getStartedOn())).
|
||||
_("ResourceManager version:", cinfo.getRMBuildVersion() +
|
||||
" on " + cinfo.getRMVersionBuiltOn()).
|
||||
|
|
|
@ -21,6 +21,7 @@ import javax.xml.bind.annotation.XmlAccessType;
|
|||
import javax.xml.bind.annotation.XmlAccessorType;
|
||||
import javax.xml.bind.annotation.XmlRootElement;
|
||||
|
||||
import org.apache.hadoop.ha.HAServiceProtocol;
|
||||
import org.apache.hadoop.service.Service.STATE;
|
||||
import org.apache.hadoop.util.VersionInfo;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||
|
@ -33,6 +34,7 @@ public class ClusterInfo {
|
|||
protected long id;
|
||||
protected long startedOn;
|
||||
protected STATE state;
|
||||
protected HAServiceProtocol.HAServiceState haState;
|
||||
protected String resourceManagerVersion;
|
||||
protected String resourceManagerBuildVersion;
|
||||
protected String resourceManagerVersionBuiltOn;
|
||||
|
@ -48,6 +50,7 @@ public class ClusterInfo {
|
|||
|
||||
this.id = ts;
|
||||
this.state = rm.getServiceState();
|
||||
this.haState = rm.getRMContext().getHAServiceState();
|
||||
this.startedOn = ts;
|
||||
this.resourceManagerVersion = YarnVersionInfo.getVersion();
|
||||
this.resourceManagerBuildVersion = YarnVersionInfo.getBuildVersion();
|
||||
|
@ -61,6 +64,10 @@ public class ClusterInfo {
|
|||
return this.state.toString();
|
||||
}
|
||||
|
||||
public String getHAState() {
|
||||
return this.haState.toString();
|
||||
}
|
||||
|
||||
public String getRMVersion() {
|
||||
return this.resourceManagerVersion;
|
||||
}
|
||||
|
|
|
@ -171,7 +171,7 @@ public class Application {
|
|||
new AppAddedSchedulerEvent(this.applicationId, this.queue, "user");
|
||||
scheduler.handle(addAppEvent);
|
||||
AppAttemptAddedSchedulerEvent addAttemptEvent =
|
||||
new AppAttemptAddedSchedulerEvent(this.applicationAttemptId);
|
||||
new AppAttemptAddedSchedulerEvent(this.applicationAttemptId, false);
|
||||
scheduler.handle(addAttemptEvent);
|
||||
}
|
||||
|
||||
|
|
|
@ -43,6 +43,7 @@ import org.apache.hadoop.yarn.server.api.records.NodeStatus;
|
|||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||
import org.apache.hadoop.yarn.util.Records;
|
||||
import org.apache.hadoop.yarn.util.YarnVersionInfo;
|
||||
import org.mortbay.log.Log;
|
||||
|
||||
public class MockNM {
|
||||
|
||||
|
@ -130,12 +131,13 @@ public class MockNM {
|
|||
int containerId, ContainerState containerState) throws Exception {
|
||||
HashMap<ApplicationId, List<ContainerStatus>> nodeUpdate =
|
||||
new HashMap<ApplicationId, List<ContainerStatus>>(1);
|
||||
ContainerStatus amContainerStatus = BuilderUtils.newContainerStatus(
|
||||
BuilderUtils.newContainerId(attemptId, 1),
|
||||
ContainerState.COMPLETE, "Success", 0);
|
||||
ContainerStatus containerStatus = BuilderUtils.newContainerStatus(
|
||||
BuilderUtils.newContainerId(attemptId, containerId), containerState,
|
||||
"Success", 0);
|
||||
ArrayList<ContainerStatus> containerStatusList =
|
||||
new ArrayList<ContainerStatus>(1);
|
||||
containerStatusList.add(amContainerStatus);
|
||||
containerStatusList.add(containerStatus);
|
||||
Log.info("ContainerStatus: " + containerStatus);
|
||||
nodeUpdate.put(attemptId.getApplicationId(), containerStatusList);
|
||||
return nodeHeartbeat(nodeUpdate, true);
|
||||
}
|
||||
|
@ -152,6 +154,7 @@ public class MockNM {
|
|||
status.setResponseId(resId);
|
||||
status.setNodeId(nodeId);
|
||||
for (Map.Entry<ApplicationId, List<ContainerStatus>> entry : conts.entrySet()) {
|
||||
Log.info("entry.getValue() " + entry.getValue());
|
||||
status.setContainersStatuses(entry.getValue());
|
||||
}
|
||||
NodeHealthStatus healthStatus = Records.newRecord(NodeHealthStatus.class);
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.hadoop.io.DataOutputBuffer;
|
|||
import org.apache.hadoop.security.Credentials;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hadoop.yarn.api.ApplicationClientProtocol;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
|
||||
|
@ -40,7 +41,10 @@ import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
|||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
|
@ -56,6 +60,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptE
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptLaunchFailedEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
|
||||
|
@ -122,6 +128,33 @@ public class MockRM extends ResourceManager {
|
|||
attempt.getAppAttemptState());
|
||||
}
|
||||
|
||||
public void waitForContainerAllocated(MockNM nm, ContainerId containerId)
|
||||
throws Exception {
|
||||
int timeoutSecs = 0;
|
||||
while (getResourceScheduler().getRMContainer(containerId) == null
|
||||
&& timeoutSecs++ < 40) {
|
||||
System.out.println("Waiting for" + containerId + " to be allocated.");
|
||||
nm.nodeHeartbeat(true);
|
||||
Thread.sleep(200);
|
||||
}
|
||||
}
|
||||
|
||||
public void waitForState(MockNM nm, ContainerId containerId,
|
||||
RMContainerState containerState) throws Exception {
|
||||
RMContainer container = getResourceScheduler().getRMContainer(containerId);
|
||||
Assert.assertNotNull("Container shouldn't be null", container);
|
||||
int timeoutSecs = 0;
|
||||
while (!containerState.equals(container.getState()) && timeoutSecs++ < 40) {
|
||||
System.out.println("Container : " + containerId + " State is : "
|
||||
+ container.getState() + " Waiting for state : " + containerState);
|
||||
nm.nodeHeartbeat(true);
|
||||
Thread.sleep(300);
|
||||
}
|
||||
System.out.println("Container State is : " + container.getState());
|
||||
Assert.assertEquals("Container state is not correct (timedout)",
|
||||
containerState, container.getState());
|
||||
}
|
||||
|
||||
// get new application id
|
||||
public GetNewApplicationResponse getNewAppId() throws Exception {
|
||||
ApplicationClientProtocol client = getClientRMService();
|
||||
|
@ -172,7 +205,17 @@ public class MockRM extends ResourceManager {
|
|||
public RMApp submitApp(int masterMemory, String name, String user,
|
||||
Map<ApplicationAccessType, String> acls, boolean unmanaged, String queue,
|
||||
int maxAppAttempts, Credentials ts, String appType,
|
||||
boolean waitForAccepted) throws Exception {
|
||||
boolean waitForAccepted)
|
||||
throws Exception {
|
||||
return submitApp(masterMemory, name, user, acls, unmanaged, queue,
|
||||
maxAppAttempts, ts, appType, waitForAccepted, false);
|
||||
}
|
||||
|
||||
public RMApp submitApp(int masterMemory, String name, String user,
|
||||
Map<ApplicationAccessType, String> acls, boolean unmanaged, String queue,
|
||||
int maxAppAttempts, Credentials ts, String appType,
|
||||
boolean waitForAccepted, boolean keepContainers)
|
||||
throws Exception {
|
||||
ApplicationClientProtocol client = getClientRMService();
|
||||
GetNewApplicationResponse resp = client.getNewApplication(Records
|
||||
.newRecord(GetNewApplicationRequest.class));
|
||||
|
@ -182,6 +225,7 @@ public class MockRM extends ResourceManager {
|
|||
.newRecord(SubmitApplicationRequest.class);
|
||||
ApplicationSubmissionContext sub = Records
|
||||
.newRecord(ApplicationSubmissionContext.class);
|
||||
sub.setKeepContainersAcrossApplicationAttempts(keepContainers);
|
||||
sub.setApplicationId(appId);
|
||||
sub.setApplicationName(name);
|
||||
sub.setMaxAppAttempts(maxAppAttempts);
|
||||
|
@ -421,4 +465,26 @@ public class MockRM extends ResourceManager {
|
|||
// override to disable webapp
|
||||
}
|
||||
|
||||
public static void finishApplicationMaster(RMApp rmApp, MockRM rm, MockNM nm,
|
||||
MockAM am) throws Exception {
|
||||
FinishApplicationMasterRequest req =
|
||||
FinishApplicationMasterRequest.newInstance(
|
||||
FinalApplicationStatus.SUCCEEDED, "", "");
|
||||
am.unregisterAppAttempt(req);
|
||||
am.waitForState(RMAppAttemptState.FINISHING);
|
||||
nm.nodeHeartbeat(am.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
|
||||
am.waitForState(RMAppAttemptState.FINISHED);
|
||||
rm.waitForState(rmApp.getApplicationId(), RMAppState.FINISHED);
|
||||
}
|
||||
|
||||
public static MockAM launchAM(RMApp app, MockRM rm, MockNM nm)
|
||||
throws Exception {
|
||||
RMAppAttempt attempt = app.getCurrentAppAttempt();
|
||||
nm.nodeHeartbeat(true);
|
||||
MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
|
||||
am.registerAppAttempt();
|
||||
rm.waitForState(app.getApplicationId(), RMAppState.RUNNING);
|
||||
return am;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -649,7 +649,7 @@ public class TestClientRMService {
|
|||
.currentTimeMillis(), "YARN"));
|
||||
ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(applicationId3, 1);
|
||||
RMAppAttemptImpl rmAppAttemptImpl = new RMAppAttemptImpl(attemptId,
|
||||
rmContext, yarnScheduler, null, asContext, config);
|
||||
rmContext, yarnScheduler, null, asContext, config, false);
|
||||
when(app.getCurrentAppAttempt()).thenReturn(rmAppAttemptImpl);
|
||||
return app;
|
||||
}
|
||||
|
|
|
@ -302,7 +302,7 @@ public class TestFifoScheduler {
|
|||
new AppAddedSchedulerEvent(appId1, "queue", "user");
|
||||
fs.handle(appEvent);
|
||||
SchedulerEvent attemptEvent =
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId1);
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId1, false);
|
||||
fs.handle(attemptEvent);
|
||||
|
||||
List<ContainerId> emptyId = new ArrayList<ContainerId>();
|
||||
|
@ -396,7 +396,7 @@ public class TestFifoScheduler {
|
|||
new AppAddedSchedulerEvent(appId1, "queue", "user");
|
||||
fs.handle(appEvent);
|
||||
SchedulerEvent attemptEvent =
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId1);
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId1, false);
|
||||
fs.handle(attemptEvent);
|
||||
|
||||
ApplicationId appId2 = BuilderUtils.newApplicationId(200, 2);
|
||||
|
@ -406,7 +406,7 @@ public class TestFifoScheduler {
|
|||
new AppAddedSchedulerEvent(appId2, "queue", "user");
|
||||
fs.handle(appEvent2);
|
||||
SchedulerEvent attemptEvent2 =
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId2);
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId2, false);
|
||||
fs.handle(attemptEvent2);
|
||||
|
||||
List<ContainerId> emptyId = new ArrayList<ContainerId>();
|
||||
|
|
|
@ -28,7 +28,6 @@ import junit.framework.Assert;
|
|||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsResponse;
|
||||
|
@ -38,7 +37,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationReport;
|
|||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
||||
import org.apache.hadoop.yarn.api.records.NMToken;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||
import org.apache.hadoop.yarn.api.records.Token;
|
||||
|
@ -295,6 +293,8 @@ public class TestRM {
|
|||
nm2.nodeHeartbeat(attempt.getAppAttemptId(), container.getId().getId(),
|
||||
ContainerState.COMPLETE);
|
||||
}
|
||||
nm1.nodeHeartbeat(am.getApplicationAttemptId(), 1,
|
||||
ContainerState.COMPLETE);
|
||||
am.waitForState(RMAppAttemptState.FINISHED);
|
||||
Assert.assertFalse(nmTokenSecretManager
|
||||
.isApplicationAttemptRegistered(attempt.getAppAttemptId()));
|
||||
|
@ -389,19 +389,19 @@ public class TestRM {
|
|||
MockNM nm1 =
|
||||
new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
|
||||
nm1.registerNode();
|
||||
MockAM am1 = launchAM(app1, rm1, nm1);
|
||||
finishApplicationMaster(app1, rm1, nm1, am1);
|
||||
MockAM am1 = MockRM.launchAM(app1, rm1, nm1);
|
||||
MockRM.finishApplicationMaster(app1, rm1, nm1, am1);
|
||||
|
||||
// a failed app
|
||||
RMApp app2 = rm1.submitApp(200);
|
||||
MockAM am2 = launchAM(app2, rm1, nm1);
|
||||
MockAM am2 = MockRM.launchAM(app2, rm1, nm1);
|
||||
nm1.nodeHeartbeat(am2.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
|
||||
am2.waitForState(RMAppAttemptState.FAILED);
|
||||
rm1.waitForState(app2.getApplicationId(), RMAppState.FAILED);
|
||||
|
||||
// a killed app
|
||||
RMApp app3 = rm1.submitApp(200);
|
||||
MockAM am3 = launchAM(app3, rm1, nm1);
|
||||
MockAM am3 = MockRM.launchAM(app3, rm1, nm1);
|
||||
rm1.killApp(app3.getApplicationId());
|
||||
rm1.waitForState(app3.getApplicationId(), RMAppState.KILLED);
|
||||
rm1.waitForState(am3.getApplicationAttemptId(), RMAppAttemptState.KILLED);
|
||||
|
@ -441,7 +441,7 @@ public class TestRM {
|
|||
|
||||
// a failed app
|
||||
RMApp app2 = rm1.submitApp(200);
|
||||
MockAM am2 = launchAM(app2, rm1, nm1);
|
||||
MockAM am2 = MockRM.launchAM(app2, rm1, nm1);
|
||||
nm1
|
||||
.nodeHeartbeat(am2.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
|
||||
am2.waitForState(RMAppAttemptState.FAILED);
|
||||
|
@ -458,28 +458,6 @@ public class TestRM {
|
|||
Assert.assertEquals(-1, report1.getRpcPort());
|
||||
}
|
||||
|
||||
private MockAM launchAM(RMApp app, MockRM rm, MockNM nm)
|
||||
throws Exception {
|
||||
RMAppAttempt attempt = app.getCurrentAppAttempt();
|
||||
nm.nodeHeartbeat(true);
|
||||
MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
|
||||
am.registerAppAttempt();
|
||||
rm.waitForState(app.getApplicationId(), RMAppState.RUNNING);
|
||||
return am;
|
||||
}
|
||||
|
||||
private void finishApplicationMaster(RMApp rmApp, MockRM rm, MockNM nm,
|
||||
MockAM am) throws Exception {
|
||||
FinishApplicationMasterRequest req =
|
||||
FinishApplicationMasterRequest.newInstance(
|
||||
FinalApplicationStatus.SUCCEEDED, "", "");
|
||||
am.unregisterAppAttempt(req);
|
||||
am.waitForState(RMAppAttemptState.FINISHING);
|
||||
nm.nodeHeartbeat(am.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
|
||||
am.waitForState(RMAppAttemptState.FINISHED);
|
||||
rm.waitForState(rmApp.getApplicationId(), RMAppState.FINISHED);
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
TestRM t = new TestRM();
|
||||
t.testGetNewAppId();
|
||||
|
|
|
@ -26,8 +26,11 @@ import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
|
|||
import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
|
||||
import org.apache.hadoop.ha.HealthCheckFailedException;
|
||||
import org.apache.hadoop.security.AccessControlException;
|
||||
import org.apache.hadoop.service.AbstractService;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.conf.HAUtil;
|
||||
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||
import org.apache.hadoop.yarn.event.EventHandler;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -222,4 +225,81 @@ public class TestRMHA {
|
|||
checkMonitorHealth();
|
||||
checkActiveRMFunctionality();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRMDispatcherForHA() throws IOException {
|
||||
String errorMessageForEventHandler =
|
||||
"Expect to get the same number of handlers";
|
||||
String errorMessageForService = "Expect to get the same number of services";
|
||||
Configuration conf = new YarnConfiguration(configuration);
|
||||
rm = new MockRM(conf) {
|
||||
@Override
|
||||
protected Dispatcher createDispatcher() {
|
||||
return new MyCountingDispatcher();
|
||||
}
|
||||
};
|
||||
rm.init(conf);
|
||||
int expectedEventHandlerCount =
|
||||
((MyCountingDispatcher) rm.getRMContext().getDispatcher())
|
||||
.getEventHandlerCount();
|
||||
int expectedServiceCount = rm.getServices().size();
|
||||
assertTrue(expectedEventHandlerCount != 0);
|
||||
|
||||
StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(
|
||||
HAServiceProtocol.RequestSource.REQUEST_BY_USER);
|
||||
|
||||
assertEquals(STATE_ERR, HAServiceState.INITIALIZING,
|
||||
rm.adminService.getServiceStatus().getState());
|
||||
assertFalse("RM is ready to become active before being started",
|
||||
rm.adminService.getServiceStatus().isReadyToBecomeActive());
|
||||
rm.start();
|
||||
|
||||
//call transitions to standby and active a couple of times
|
||||
rm.adminService.transitionToStandby(requestInfo);
|
||||
rm.adminService.transitionToActive(requestInfo);
|
||||
rm.adminService.transitionToStandby(requestInfo);
|
||||
rm.adminService.transitionToActive(requestInfo);
|
||||
rm.adminService.transitionToStandby(requestInfo);
|
||||
|
||||
rm.adminService.transitionToActive(requestInfo);
|
||||
assertEquals(errorMessageForEventHandler, expectedEventHandlerCount,
|
||||
((MyCountingDispatcher) rm.getRMContext().getDispatcher())
|
||||
.getEventHandlerCount());
|
||||
assertEquals(errorMessageForService, expectedServiceCount,
|
||||
rm.getServices().size());
|
||||
|
||||
rm.adminService.transitionToStandby(requestInfo);
|
||||
assertEquals(errorMessageForEventHandler, expectedEventHandlerCount,
|
||||
((MyCountingDispatcher) rm.getRMContext().getDispatcher())
|
||||
.getEventHandlerCount());
|
||||
assertEquals(errorMessageForService, expectedServiceCount,
|
||||
rm.getServices().size());
|
||||
|
||||
rm.stop();
|
||||
}
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
class MyCountingDispatcher extends AbstractService implements Dispatcher {
|
||||
|
||||
private int eventHandlerCount;
|
||||
|
||||
public MyCountingDispatcher() {
|
||||
super("MyCountingDispatcher");
|
||||
this.eventHandlerCount = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public EventHandler getEventHandler() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void register(Class<? extends Enum> eventType, EventHandler handler) {
|
||||
this.eventHandlerCount ++;
|
||||
}
|
||||
|
||||
public int getEventHandlerCount() {
|
||||
return this.eventHandlerCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -84,6 +84,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||
import org.apache.hadoop.yarn.util.ConverterUtils;
|
||||
import org.apache.log4j.Level;
|
||||
|
@ -179,7 +180,7 @@ public class TestRMRestart {
|
|||
am1.registerAppAttempt();
|
||||
|
||||
// AM request for containers
|
||||
am1.allocate("127.0.0.1" , 1000, 1, new ArrayList<ContainerId>());
|
||||
am1.allocate("127.0.0.1" , 1000, 1, new ArrayList<ContainerId>());
|
||||
// kick the scheduler
|
||||
nm1.nodeHeartbeat(true);
|
||||
List<Container> conts = am1.allocate(new ArrayList<ResourceRequest>(),
|
||||
|
@ -1543,6 +1544,128 @@ public class TestRMRestart {
|
|||
Assert.assertEquals(2, ((TestMemoryRMStateStore) memStore).updateApp);
|
||||
}
|
||||
|
||||
@SuppressWarnings("resource")
|
||||
@Test
|
||||
public void testQueueMetricsOnRMRestart() throws Exception {
|
||||
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS,
|
||||
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
|
||||
MemoryRMStateStore memStore = new MemoryRMStateStore();
|
||||
memStore.init(conf);
|
||||
|
||||
// PHASE 1: create state in an RM
|
||||
// start RM
|
||||
MockRM rm1 = new MockRM(conf, memStore);
|
||||
rm1.start();
|
||||
MockNM nm1 =
|
||||
new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
|
||||
nm1.registerNode();
|
||||
QueueMetrics qm1 = rm1.getResourceScheduler().getRootQueueMetrics();
|
||||
resetQueueMetrics(qm1);
|
||||
assertQueueMetrics(qm1, 0, 0, 0, 0);
|
||||
|
||||
// create app that gets launched and does allocate before RM restart
|
||||
RMApp app1 = rm1.submitApp(200);
|
||||
assertQueueMetrics(qm1, 1, 1, 0, 0);
|
||||
nm1.nodeHeartbeat(true);
|
||||
RMAppAttempt attempt1 = app1.getCurrentAppAttempt();
|
||||
ApplicationAttemptId attemptId1 = attempt1.getAppAttemptId();
|
||||
rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
|
||||
MockAM am1 = rm1.sendAMLaunched(attempt1.getAppAttemptId());
|
||||
am1.registerAppAttempt();
|
||||
am1.allocate("127.0.0.1" , 1000, 1, new ArrayList<ContainerId>());
|
||||
nm1.nodeHeartbeat(true);
|
||||
List<Container> conts = am1.allocate(new ArrayList<ResourceRequest>(),
|
||||
new ArrayList<ContainerId>()).getAllocatedContainers();
|
||||
while (conts.size() == 0) {
|
||||
nm1.nodeHeartbeat(true);
|
||||
conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(),
|
||||
new ArrayList<ContainerId>()).getAllocatedContainers());
|
||||
Thread.sleep(500);
|
||||
}
|
||||
assertQueueMetrics(qm1, 1, 0, 1, 0);
|
||||
|
||||
// PHASE 2: create new RM and start from old state
|
||||
// create new RM to represent restart and recover state
|
||||
MockRM rm2 = new MockRM(conf, memStore);
|
||||
rm2.start();
|
||||
nm1.setResourceTrackerService(rm2.getResourceTrackerService());
|
||||
QueueMetrics qm2 = rm2.getResourceScheduler().getRootQueueMetrics();
|
||||
resetQueueMetrics(qm2);
|
||||
assertQueueMetrics(qm2, 0, 0, 0, 0);
|
||||
// recover app
|
||||
RMApp loadedApp1 = rm2.getRMContext().getRMApps().get(app1.getApplicationId());
|
||||
am1.setAMRMProtocol(rm2.getApplicationMasterService());
|
||||
am1.allocate(new ArrayList<ResourceRequest>(), new ArrayList<ContainerId>());
|
||||
nm1.nodeHeartbeat(true);
|
||||
nm1 = new MockNM("127.0.0.1:1234", 15120, rm2.getResourceTrackerService());
|
||||
List<ContainerStatus> containerStatuses = new ArrayList<ContainerStatus>();
|
||||
ContainerStatus containerStatus =
|
||||
BuilderUtils.newContainerStatus(BuilderUtils.newContainerId(loadedApp1
|
||||
.getCurrentAppAttempt().getAppAttemptId(), 1),
|
||||
ContainerState.COMPLETE, "Killed AM container", 143);
|
||||
containerStatuses.add(containerStatus);
|
||||
nm1.registerNode(containerStatuses);
|
||||
int timeoutSecs = 0;
|
||||
while (loadedApp1.getAppAttempts().size() != 2 && timeoutSecs++ < 40) {;
|
||||
Thread.sleep(200);
|
||||
}
|
||||
|
||||
assertQueueMetrics(qm2, 1, 1, 0, 0);
|
||||
nm1.nodeHeartbeat(true);
|
||||
attempt1 = loadedApp1.getCurrentAppAttempt();
|
||||
attemptId1 = attempt1.getAppAttemptId();
|
||||
rm2.waitForState(attemptId1, RMAppAttemptState.ALLOCATED);
|
||||
assertQueueMetrics(qm2, 1, 0, 1, 0);
|
||||
am1 = rm2.sendAMLaunched(attempt1.getAppAttemptId());
|
||||
am1.registerAppAttempt();
|
||||
am1.allocate("127.0.0.1" , 1000, 3, new ArrayList<ContainerId>());
|
||||
nm1.nodeHeartbeat(true);
|
||||
conts = am1.allocate(new ArrayList<ResourceRequest>(),
|
||||
new ArrayList<ContainerId>()).getAllocatedContainers();
|
||||
while (conts.size() == 0) {
|
||||
nm1.nodeHeartbeat(true);
|
||||
conts.addAll(am1.allocate(new ArrayList<ResourceRequest>(),
|
||||
new ArrayList<ContainerId>()).getAllocatedContainers());
|
||||
Thread.sleep(500);
|
||||
}
|
||||
|
||||
// finish the AMs
|
||||
finishApplicationMaster(loadedApp1, rm2, nm1, am1);
|
||||
assertQueueMetrics(qm2, 1, 0, 0, 1);
|
||||
|
||||
// stop RM's
|
||||
rm2.stop();
|
||||
rm1.stop();
|
||||
}
|
||||
|
||||
|
||||
// The metrics has some carry-on value from the previous RM, because the
|
||||
// test case is in-memory, for the same queue name (e.g. root), there's
|
||||
// always a singleton QueueMetrics object.
|
||||
private int appsSubmittedCarryOn = 0;
|
||||
private int appsPendingCarryOn = 0;
|
||||
private int appsRunningCarryOn = 0;
|
||||
private int appsCompletedCarryOn = 0;
|
||||
|
||||
private void resetQueueMetrics(QueueMetrics qm) {
|
||||
appsSubmittedCarryOn = qm.getAppsSubmitted();
|
||||
appsPendingCarryOn = qm.getAppsPending();
|
||||
appsRunningCarryOn = qm.getAppsRunning();
|
||||
appsCompletedCarryOn = qm.getAppsCompleted();
|
||||
}
|
||||
|
||||
private void assertQueueMetrics(QueueMetrics qm, int appsSubmitted,
|
||||
int appsPending, int appsRunning, int appsCompleted) {
|
||||
Assert.assertEquals(qm.getAppsSubmitted(),
|
||||
appsSubmitted + appsSubmittedCarryOn);
|
||||
Assert.assertEquals(qm.getAppsPending(),
|
||||
appsPending + appsPendingCarryOn);
|
||||
Assert.assertEquals(qm.getAppsRunning(),
|
||||
appsRunning + appsRunningCarryOn);
|
||||
Assert.assertEquals(qm.getAppsCompleted(),
|
||||
appsCompleted + appsCompletedCarryOn);
|
||||
}
|
||||
|
||||
public class TestMemoryRMStateStore extends MemoryRMStateStore {
|
||||
int count = 0;
|
||||
public int updateApp = 0;
|
||||
|
|
|
@ -164,7 +164,7 @@ public class TestResourceManager {
|
|||
// Notify scheduler application is finished.
|
||||
AppAttemptRemovedSchedulerEvent appRemovedEvent1 =
|
||||
new AppAttemptRemovedSchedulerEvent(
|
||||
application.getApplicationAttemptId(), RMAppAttemptState.FINISHED);
|
||||
application.getApplicationAttemptId(), RMAppAttemptState.FINISHED, false);
|
||||
resourceManager.getResourceScheduler().handle(appRemovedEvent1);
|
||||
|
||||
checkResourceUsage(nm1, nm2);
|
||||
|
|
|
@ -18,49 +18,31 @@
|
|||
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.applicationsmanager;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import junit.framework.Assert;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.Priority;
|
||||
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
||||
import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.event.EventHandler;
|
||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.ClientRMService;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager;
|
||||
import org.apache.hadoop.yarn.server.security.BaseContainerTokenSecretManager;
|
||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.MockAM;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.MockNM;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
|
@ -68,238 +50,186 @@ import org.junit.Test;
|
|||
*
|
||||
*/
|
||||
public class TestAMRestart {
|
||||
// private static final Log LOG = LogFactory.getLog(TestAMRestart.class);
|
||||
// ApplicationsManagerImpl appImpl;
|
||||
// RMContext asmContext = new RMContextImpl(new MemStore());
|
||||
// ApplicationTokenSecretManager appTokenSecretManager =
|
||||
// new ApplicationTokenSecretManager();
|
||||
// DummyResourceScheduler scheduler;
|
||||
// private ClientRMService clientRMService;
|
||||
// int count = 0;
|
||||
// ApplicationId appID;
|
||||
// final int maxFailures = 3;
|
||||
// AtomicInteger launchNotify = new AtomicInteger();
|
||||
// AtomicInteger schedulerNotify = new AtomicInteger();
|
||||
// volatile boolean stop = false;
|
||||
// int schedulerAddApplication = 0;
|
||||
// int schedulerRemoveApplication = 0;
|
||||
// int launcherLaunchCalled = 0;
|
||||
// int launcherCleanupCalled = 0;
|
||||
// private final static RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
|
||||
//
|
||||
// private class ExtApplicationsManagerImpl extends ApplicationsManagerImpl {
|
||||
// public ExtApplicationsManagerImpl(
|
||||
// ApplicationTokenSecretManager applicationTokenSecretManager,
|
||||
// YarnScheduler scheduler, RMContext asmContext) {
|
||||
// super(applicationTokenSecretManager, scheduler, asmContext);
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public EventHandler<ASMEvent<AMLauncherEventType>> createNewApplicationMasterLauncher(
|
||||
// ApplicationTokenSecretManager tokenSecretManager) {
|
||||
// return new DummyAMLauncher();
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// private class DummyAMLauncher implements EventHandler<ASMEvent<AMLauncherEventType>> {
|
||||
//
|
||||
// public DummyAMLauncher() {
|
||||
// asmContext.getDispatcher().register(AMLauncherEventType.class, this);
|
||||
// new Thread() {
|
||||
// public void run() {
|
||||
// while (!stop) {
|
||||
// LOG.info("DEBUG -- waiting for launch");
|
||||
// synchronized(launchNotify) {
|
||||
// while (launchNotify.get() == 0) {
|
||||
// try {
|
||||
// launchNotify.wait();
|
||||
// } catch (InterruptedException e) {
|
||||
// }
|
||||
// }
|
||||
// asmContext.getDispatcher().getEventHandler().handle(
|
||||
// new ApplicationEvent(
|
||||
// ApplicationEventType.LAUNCHED, appID));
|
||||
// launchNotify.addAndGet(-1);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }.start();
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public void handle(ASMEvent<AMLauncherEventType> event) {
|
||||
// switch (event.getType()) {
|
||||
// case CLEANUP:
|
||||
// launcherCleanupCalled++;
|
||||
// break;
|
||||
// case LAUNCH:
|
||||
// LOG.info("DEBUG -- launching");
|
||||
// launcherLaunchCalled++;
|
||||
// synchronized (launchNotify) {
|
||||
// launchNotify.addAndGet(1);
|
||||
// launchNotify.notify();
|
||||
// }
|
||||
// break;
|
||||
// default:
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// private class DummyResourceScheduler implements ResourceScheduler {
|
||||
//
|
||||
// @Override
|
||||
// public void removeNode(RMNode node) {
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public Allocation allocate(ApplicationId applicationId,
|
||||
// List<ResourceRequest> ask, List<Container> release) throws IOException {
|
||||
// Container container = recordFactory.newRecordInstance(Container.class);
|
||||
// container.setContainerToken(recordFactory.newRecordInstance(ContainerToken.class));
|
||||
// container.setNodeId(recordFactory.newRecordInstance(NodeId.class));
|
||||
// container.setContainerManagerAddress("localhost");
|
||||
// container.setNodeHttpAddress("localhost:8042");
|
||||
// container.setId(recordFactory.newRecordInstance(ContainerId.class));
|
||||
// container.getId().setAppId(appID);
|
||||
// container.getId().setId(count);
|
||||
// count++;
|
||||
// return new Allocation(Arrays.asList(container), Resources.none());
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public void handle(ASMEvent<ApplicationTrackerEventType> event) {
|
||||
// switch (event.getType()) {
|
||||
// case ADD:
|
||||
// schedulerAddApplication++;
|
||||
// break;
|
||||
// case EXPIRE:
|
||||
// schedulerRemoveApplication++;
|
||||
// LOG.info("REMOVING app : " + schedulerRemoveApplication);
|
||||
// if (schedulerRemoveApplication == maxFailures) {
|
||||
// synchronized (schedulerNotify) {
|
||||
// schedulerNotify.addAndGet(1);
|
||||
// schedulerNotify.notify();
|
||||
// }
|
||||
// }
|
||||
// break;
|
||||
// default:
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public QueueInfo getQueueInfo(String queueName,
|
||||
// boolean includeChildQueues,
|
||||
// boolean recursive) throws IOException {
|
||||
// return null;
|
||||
// }
|
||||
// @Override
|
||||
// public List<QueueUserACLInfo> getQueueUserAclInfo() {
|
||||
// return null;
|
||||
// }
|
||||
// @Override
|
||||
// public void addApplication(ApplicationId applicationId,
|
||||
// ApplicationMaster master, String user, String queue, Priority priority,
|
||||
// ApplicationStore store)
|
||||
// throws IOException {
|
||||
// }
|
||||
// @Override
|
||||
// public void addNode(RMNode nodeInfo) {
|
||||
// }
|
||||
// @Override
|
||||
// public void recover(RMState state) throws Exception {
|
||||
// }
|
||||
// @Override
|
||||
// public void reinitialize(Configuration conf,
|
||||
// ContainerTokenSecretManager secretManager, RMContext rmContext)
|
||||
// throws IOException {
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public void nodeUpdate(RMNode nodeInfo,
|
||||
// Map<String, List<Container>> containers) {
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public Resource getMaximumResourceCapability() {
|
||||
// // TODO Auto-generated method stub
|
||||
// return null;
|
||||
// }
|
||||
//
|
||||
// @Override
|
||||
// public Resource getMinimumResourceCapability() {
|
||||
// // TODO Auto-generated method stub
|
||||
// return null;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// @Before
|
||||
// public void setUp() {
|
||||
//
|
||||
// asmContext.getDispatcher().register(ApplicationEventType.class,
|
||||
// new ResourceManager.ApplicationEventDispatcher(asmContext));
|
||||
//
|
||||
// appID = recordFactory.newRecordInstance(ApplicationId.class);
|
||||
// appID.setClusterTimestamp(System.currentTimeMillis());
|
||||
// appID.setId(1);
|
||||
// Configuration conf = new Configuration();
|
||||
// scheduler = new DummyResourceScheduler();
|
||||
// asmContext.getDispatcher().init(conf);
|
||||
// asmContext.getDispatcher().start();
|
||||
// asmContext.getDispatcher().register(ApplicationTrackerEventType.class, scheduler);
|
||||
// appImpl = new ExtApplicationsManagerImpl(appTokenSecretManager, scheduler, asmContext);
|
||||
//
|
||||
// conf.setLong(YarnConfiguration.AM_EXPIRY_INTERVAL, 1000L);
|
||||
// conf.setInt(RMConfig.AM_MAX_RETRIES, maxFailures);
|
||||
// appImpl.init(conf);
|
||||
// appImpl.start();
|
||||
//
|
||||
// this.clientRMService = new ClientRMService(asmContext, appImpl
|
||||
// .getAmLivelinessMonitor(), appImpl.getClientToAMSecretManager(),
|
||||
// scheduler);
|
||||
// this.clientRMService.init(conf);
|
||||
// }
|
||||
//
|
||||
// @After
|
||||
// public void tearDown() {
|
||||
// }
|
||||
//
|
||||
// private void waitForFailed(AppAttempt application, ApplicationState
|
||||
// finalState) throws Exception {
|
||||
// int count = 0;
|
||||
// while(application.getState() != finalState && count < 10) {
|
||||
// Thread.sleep(500);
|
||||
// count++;
|
||||
// }
|
||||
// Assert.assertEquals(finalState, application.getState());
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testAMRestart() throws Exception {
|
||||
// ApplicationSubmissionContext subContext = recordFactory.newRecordInstance(ApplicationSubmissionContext.class);
|
||||
// subContext.setApplicationId(appID);
|
||||
// subContext.setApplicationName("dummyApp");
|
||||
//// subContext.command = new ArrayList<String>();
|
||||
//// subContext.environment = new HashMap<String, String>();
|
||||
//// subContext.fsTokens = new ArrayList<String>();
|
||||
// subContext.setFsTokensTodo(ByteBuffer.wrap(new byte[0]));
|
||||
// SubmitApplicationRequest request = recordFactory
|
||||
// .newRecordInstance(SubmitApplicationRequest.class);
|
||||
// request.setApplicationSubmissionContext(subContext);
|
||||
// clientRMService.submitApplication(request);
|
||||
// AppAttempt application = asmContext.getApplications().get(appID);
|
||||
// synchronized (schedulerNotify) {
|
||||
// while(schedulerNotify.get() == 0) {
|
||||
// schedulerNotify.wait();
|
||||
// }
|
||||
// }
|
||||
// Assert.assertEquals(maxFailures, launcherCleanupCalled);
|
||||
// Assert.assertEquals(maxFailures, launcherLaunchCalled);
|
||||
// Assert.assertEquals(maxFailures, schedulerAddApplication);
|
||||
// Assert.assertEquals(maxFailures, schedulerRemoveApplication);
|
||||
// Assert.assertEquals(maxFailures, application.getFailedCount());
|
||||
// waitForFailed(application, ApplicationState.FAILED);
|
||||
// stop = true;
|
||||
// }
|
||||
|
||||
@Test
|
||||
public void testAMRestartWithExistingContainers() throws Exception {
|
||||
YarnConfiguration conf = new YarnConfiguration();
|
||||
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
|
||||
|
||||
MockRM rm1 = new MockRM(conf);
|
||||
rm1.start();
|
||||
RMApp app1 =
|
||||
rm1.submitApp(200, "name", "user",
|
||||
new HashMap<ApplicationAccessType, String>(), false, "default", -1,
|
||||
null, "MAPREDUCE", false, true);
|
||||
MockNM nm1 =
|
||||
new MockNM("127.0.0.1:1234", 10240, rm1.getResourceTrackerService());
|
||||
nm1.registerNode();
|
||||
MockNM nm2 =
|
||||
new MockNM("127.0.0.1:2351", 4089, rm1.getResourceTrackerService());
|
||||
nm2.registerNode();
|
||||
|
||||
MockAM am1 = MockRM.launchAM(app1, rm1, nm1);
|
||||
int NUM_CONTAINERS = 3;
|
||||
// allocate NUM_CONTAINERS containers
|
||||
am1.allocate("127.0.0.1", 1024, NUM_CONTAINERS,
|
||||
new ArrayList<ContainerId>());
|
||||
nm1.nodeHeartbeat(true);
|
||||
|
||||
// wait for containers to be allocated.
|
||||
List<Container> containers =
|
||||
am1.allocate(new ArrayList<ResourceRequest>(),
|
||||
new ArrayList<ContainerId>()).getAllocatedContainers();
|
||||
while (containers.size() != NUM_CONTAINERS) {
|
||||
nm1.nodeHeartbeat(true);
|
||||
containers.addAll(am1.allocate(new ArrayList<ResourceRequest>(),
|
||||
new ArrayList<ContainerId>()).getAllocatedContainers());
|
||||
Thread.sleep(200);
|
||||
}
|
||||
|
||||
// launch the 2nd container, for testing running container transferred.
|
||||
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 2, ContainerState.RUNNING);
|
||||
ContainerId containerId2 =
|
||||
ContainerId.newInstance(am1.getApplicationAttemptId(), 2);
|
||||
rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
|
||||
|
||||
// launch the 3rd container, for testing container allocated by previous
|
||||
// attempt is completed by the next new attempt/
|
||||
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 3, ContainerState.RUNNING);
|
||||
ContainerId containerId3 =
|
||||
ContainerId.newInstance(am1.getApplicationAttemptId(), 3);
|
||||
rm1.waitForState(nm1, containerId3, RMContainerState.RUNNING);
|
||||
|
||||
// 4th container still in AQUIRED state. for testing Acquired container is
|
||||
// always killed.
|
||||
ContainerId containerId4 =
|
||||
ContainerId.newInstance(am1.getApplicationAttemptId(), 4);
|
||||
rm1.waitForState(nm1, containerId4, RMContainerState.ACQUIRED);
|
||||
|
||||
// 5th container is in Allocated state. for testing allocated container is
|
||||
// always killed.
|
||||
am1.allocate("127.0.0.1", 1024, 1, new ArrayList<ContainerId>());
|
||||
nm1.nodeHeartbeat(true);
|
||||
ContainerId containerId5 =
|
||||
ContainerId.newInstance(am1.getApplicationAttemptId(), 5);
|
||||
rm1.waitForContainerAllocated(nm1, containerId5);
|
||||
rm1.waitForState(nm1, containerId5, RMContainerState.ALLOCATED);
|
||||
|
||||
// 6th container is in Reserved state.
|
||||
am1.allocate("127.0.0.1", 6000, 1, new ArrayList<ContainerId>());
|
||||
ContainerId containerId6 =
|
||||
ContainerId.newInstance(am1.getApplicationAttemptId(), 6);
|
||||
nm1.nodeHeartbeat(true);
|
||||
SchedulerApplicationAttempt schedulerAttempt =
|
||||
((CapacityScheduler) rm1.getResourceScheduler())
|
||||
.getCurrentAttemptForContainer(containerId6);
|
||||
while (schedulerAttempt.getReservedContainers().size() == 0) {
|
||||
System.out.println("Waiting for container " + containerId6
|
||||
+ " to be reserved.");
|
||||
nm1.nodeHeartbeat(true);
|
||||
Thread.sleep(200);
|
||||
}
|
||||
// assert containerId6 is reserved.
|
||||
Assert.assertEquals(containerId6, schedulerAttempt.getReservedContainers()
|
||||
.get(0).getContainerId());
|
||||
|
||||
// fail the AM by sending CONTAINER_FINISHED event without registering.
|
||||
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
|
||||
am1.waitForState(RMAppAttemptState.FAILED);
|
||||
|
||||
// wait for some time. previous AM's running containers should still remain
|
||||
// in scheduler even though am failed
|
||||
Thread.sleep(3000);
|
||||
rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
|
||||
// acquired/allocated containers are cleaned up.
|
||||
Assert.assertNull(rm1.getResourceScheduler().getRMContainer(containerId4));
|
||||
Assert.assertNull(rm1.getResourceScheduler().getRMContainer(containerId5));
|
||||
|
||||
// wait for app to start a new attempt.
|
||||
rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
|
||||
// assert this is a new AM.
|
||||
ApplicationAttemptId newAttemptId =
|
||||
app1.getCurrentAppAttempt().getAppAttemptId();
|
||||
Assert.assertFalse(newAttemptId.equals(am1.getApplicationAttemptId()));
|
||||
|
||||
// launch the new AM
|
||||
RMAppAttempt attempt2 = app1.getCurrentAppAttempt();
|
||||
nm1.nodeHeartbeat(true);
|
||||
MockAM am2 = rm1.sendAMLaunched(attempt2.getAppAttemptId());
|
||||
RegisterApplicationMasterResponse registerResponse =
|
||||
am2.registerAppAttempt();
|
||||
|
||||
// Assert two containers are running: container2 and container3;
|
||||
Assert.assertEquals(2, registerResponse.getContainersFromPreviousAttempt()
|
||||
.size());
|
||||
boolean containerId2Exists = false, containerId3Exists = false;
|
||||
for (Container container : registerResponse
|
||||
.getContainersFromPreviousAttempt()) {
|
||||
if (container.getId().equals(containerId2)) {
|
||||
containerId2Exists = true;
|
||||
}
|
||||
if (container.getId().equals(containerId3)) {
|
||||
containerId3Exists = true;
|
||||
}
|
||||
}
|
||||
Assert.assertTrue(containerId2Exists && containerId3Exists);
|
||||
rm1.waitForState(app1.getApplicationId(), RMAppState.RUNNING);
|
||||
|
||||
// complete container by sending the container complete event which has earlier
|
||||
// attempt's attemptId
|
||||
nm1.nodeHeartbeat(am1.getApplicationAttemptId(), 3, ContainerState.COMPLETE);
|
||||
rm1.waitForState(nm1, containerId3, RMContainerState.COMPLETED);
|
||||
|
||||
// Even though the completed container containerId3 event was sent to the
|
||||
// earlier failed attempt, new RMAppAttempt can also capture this container
|
||||
// info.
|
||||
// completed containerId4 is also transferred to the new attempt.
|
||||
RMAppAttempt newAttempt =
|
||||
app1.getRMAppAttempt(am2.getApplicationAttemptId());
|
||||
// 4 containers finished, acquired/allocated/reserved/completed.
|
||||
Assert.assertEquals(4, newAttempt.getJustFinishedContainers().size());
|
||||
boolean container3Exists = false, container4Exists = false, container5Exists =
|
||||
false, container6Exists = false;
|
||||
for(ContainerStatus status : newAttempt.getJustFinishedContainers()) {
|
||||
if(status.getContainerId().equals(containerId3)) {
|
||||
// containerId3 is the container ran by previous attempt but finished by the
|
||||
// new attempt.
|
||||
container3Exists = true;
|
||||
}
|
||||
if (status.getContainerId().equals(containerId4)) {
|
||||
// containerId4 is the Acquired Container killed by the previous attempt,
|
||||
// it's now inside new attempt's finished container list.
|
||||
container4Exists = true;
|
||||
}
|
||||
if (status.getContainerId().equals(containerId5)) {
|
||||
// containerId5 is the Allocated container killed by previous failed attempt.
|
||||
container5Exists = true;
|
||||
}
|
||||
if (status.getContainerId().equals(containerId6)) {
|
||||
// containerId6 is the reserved container killed by previous failed attempt.
|
||||
container6Exists = true;
|
||||
}
|
||||
}
|
||||
Assert.assertTrue(container3Exists && container4Exists && container5Exists
|
||||
&& container6Exists);
|
||||
|
||||
// New SchedulerApplicationAttempt also has the containers info.
|
||||
rm1.waitForState(nm1, containerId2, RMContainerState.RUNNING);
|
||||
|
||||
// record the scheduler attempt for testing.
|
||||
SchedulerApplicationAttempt schedulerNewAttempt =
|
||||
((CapacityScheduler) rm1.getResourceScheduler())
|
||||
.getCurrentAttemptForContainer(containerId2);
|
||||
// finish this application
|
||||
MockRM.finishApplicationMaster(app1, rm1, nm1, am2);
|
||||
|
||||
// the 2nd attempt released the 1st attempt's running container, when the
|
||||
// 2nd attempt finishes.
|
||||
Assert.assertFalse(schedulerNewAttempt.getLiveContainers().contains(
|
||||
containerId2));
|
||||
// all 4 normal containers finished.
|
||||
Assert.assertEquals(5, newAttempt.getJustFinishedContainers().size());
|
||||
|
||||
rm1.stop();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -460,7 +460,7 @@ public class TestRMAppTransitions {
|
|||
LOG.info("--- START: testUnmanagedAppFailPath ---");
|
||||
application = testCreateAppRunning(subContext);
|
||||
RMAppEvent event = new RMAppFailedAttemptEvent(
|
||||
application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED, "");
|
||||
application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED, "", false);
|
||||
application.handle(event);
|
||||
rmDispatcher.await();
|
||||
RMAppAttempt appAttempt = application.getCurrentAppAttempt();
|
||||
|
@ -582,7 +582,7 @@ public class TestRMAppTransitions {
|
|||
for (int i=1; i < maxAppAttempts; i++) {
|
||||
RMAppEvent event =
|
||||
new RMAppFailedAttemptEvent(application.getApplicationId(),
|
||||
RMAppEventType.ATTEMPT_FAILED, "");
|
||||
RMAppEventType.ATTEMPT_FAILED, "", false);
|
||||
application.handle(event);
|
||||
assertAppState(RMAppState.ACCEPTED, application);
|
||||
event =
|
||||
|
@ -598,7 +598,7 @@ public class TestRMAppTransitions {
|
|||
String message = "Test fail";
|
||||
RMAppEvent event =
|
||||
new RMAppFailedAttemptEvent(application.getApplicationId(),
|
||||
RMAppEventType.ATTEMPT_FAILED, message);
|
||||
RMAppEventType.ATTEMPT_FAILED, message, false);
|
||||
application.handle(event);
|
||||
rmDispatcher.await();
|
||||
sendAppUpdateSavedEvent(application);
|
||||
|
@ -655,7 +655,7 @@ public class TestRMAppTransitions {
|
|||
for (int i=1; i<maxAppAttempts; i++) {
|
||||
RMAppEvent event =
|
||||
new RMAppFailedAttemptEvent(application.getApplicationId(),
|
||||
RMAppEventType.ATTEMPT_FAILED, "");
|
||||
RMAppEventType.ATTEMPT_FAILED, "", false);
|
||||
application.handle(event);
|
||||
rmDispatcher.await();
|
||||
assertAppState(RMAppState.ACCEPTED, application);
|
||||
|
@ -680,7 +680,7 @@ public class TestRMAppTransitions {
|
|||
// after max application attempts
|
||||
RMAppEvent event =
|
||||
new RMAppFailedAttemptEvent(application.getApplicationId(),
|
||||
RMAppEventType.ATTEMPT_FAILED, "");
|
||||
RMAppEventType.ATTEMPT_FAILED, "", false);
|
||||
application.handle(event);
|
||||
rmDispatcher.await();
|
||||
sendAppUpdateSavedEvent(application);
|
||||
|
@ -804,7 +804,7 @@ public class TestRMAppTransitions {
|
|||
// KILLED => KILLED event RMAppEventType.ATTEMPT_FAILED
|
||||
event =
|
||||
new RMAppFailedAttemptEvent(application.getApplicationId(),
|
||||
RMAppEventType.ATTEMPT_FAILED, "");
|
||||
RMAppEventType.ATTEMPT_FAILED, "", false);
|
||||
application.handle(event);
|
||||
rmDispatcher.await();
|
||||
assertTimesAtFinish(application);
|
||||
|
|
|
@ -51,6 +51,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
|||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
||||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||
|
@ -68,10 +69,10 @@ import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventT
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationAttemptState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAcquiredEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAllocatedEvent;
|
||||
|
@ -120,14 +121,15 @@ public class TestRMAppAttemptTransitions {
|
|||
private AMLivelinessMonitor amFinishingMonitor;
|
||||
private RMStateStore store;
|
||||
|
||||
private RMApp application;
|
||||
private RMAppImpl application;
|
||||
private RMAppAttempt applicationAttempt;
|
||||
|
||||
private Configuration conf = new Configuration();
|
||||
private AMRMTokenSecretManager amRMTokenManager = spy(new AMRMTokenSecretManager(conf));
|
||||
private ClientToAMTokenSecretManagerInRM clientToAMTokenManager =
|
||||
spy(new ClientToAMTokenSecretManagerInRM());
|
||||
|
||||
private boolean transferStateFromPreviousAttempt = false;
|
||||
|
||||
private final class TestApplicationAttemptEventDispatcher implements
|
||||
EventHandler<RMAppAttemptEvent> {
|
||||
|
||||
|
@ -150,6 +152,11 @@ public class TestRMAppAttemptTransitions {
|
|||
@Override
|
||||
public void handle(RMAppEvent event) {
|
||||
assertEquals(application.getApplicationId(), event.getApplicationId());
|
||||
if (event instanceof RMAppFailedAttemptEvent) {
|
||||
transferStateFromPreviousAttempt =
|
||||
((RMAppFailedAttemptEvent) event)
|
||||
.getTransferStateFromPreviousAttempt();
|
||||
}
|
||||
try {
|
||||
application.handle(event);
|
||||
} catch (Throwable t) {
|
||||
|
@ -254,10 +261,10 @@ public class TestRMAppAttemptTransitions {
|
|||
|
||||
unmanagedAM = false;
|
||||
|
||||
application = mock(RMApp.class);
|
||||
application = mock(RMAppImpl.class);
|
||||
applicationAttempt =
|
||||
new RMAppAttemptImpl(applicationAttemptId, rmContext, scheduler,
|
||||
masterService, submissionContext, new Configuration());
|
||||
masterService, submissionContext, new Configuration(), false);
|
||||
when(application.getCurrentAppAttempt()).thenReturn(applicationAttempt);
|
||||
when(application.getApplicationId()).thenReturn(applicationId);
|
||||
|
||||
|
@ -371,6 +378,7 @@ public class TestRMAppAttemptTransitions {
|
|||
assertNull(applicationAttempt.getFinalApplicationStatus());
|
||||
verifyTokenCount(applicationAttempt.getAppAttemptId(), 1);
|
||||
verifyAttemptFinalStateSaved();
|
||||
assertFalse(transferStateFromPreviousAttempt);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -525,6 +533,7 @@ public class TestRMAppAttemptTransitions {
|
|||
assertEquals(container, applicationAttempt.getMasterContainer());
|
||||
assertEquals(finalStatus, applicationAttempt.getFinalApplicationStatus());
|
||||
verifyTokenCount(applicationAttempt.getAppAttemptId(), 1);
|
||||
assertFalse(transferStateFromPreviousAttempt);
|
||||
}
|
||||
|
||||
|
||||
|
@ -654,6 +663,7 @@ public class TestRMAppAttemptTransitions {
|
|||
diagnostics));
|
||||
testAppAttemptFinishedState(null, finalStatus, url, diagnostics, 1,
|
||||
true);
|
||||
assertFalse(transferStateFromPreviousAttempt);
|
||||
}
|
||||
|
||||
private void sendAttemptUpdateSavedEvent(RMAppAttempt applicationAttempt) {
|
||||
|
@ -681,6 +691,21 @@ public class TestRMAppAttemptTransitions {
|
|||
"Unmanaged AM must register after AM attempt reaches LAUNCHED state.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnmanagedAMContainersCleanup() {
|
||||
unmanagedAM = true;
|
||||
when(submissionContext.getUnmanagedAM()).thenReturn(true);
|
||||
when(submissionContext.getKeepContainersAcrossApplicationAttempts())
|
||||
.thenReturn(true);
|
||||
// submit AM and check it goes to SUBMITTED state
|
||||
submitApplicationAttempt();
|
||||
// launch AM and verify attempt failed
|
||||
applicationAttempt.handle(new RMAppAttemptRegistrationEvent(
|
||||
applicationAttempt.getAppAttemptId(), "host", 8042, "oldtrackingurl"));
|
||||
sendAttemptUpdateSavedEvent(applicationAttempt);
|
||||
assertFalse(transferStateFromPreviousAttempt);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNewToKilled() {
|
||||
applicationAttempt.handle(
|
||||
|
@ -1092,6 +1117,64 @@ public class TestRMAppAttemptTransitions {
|
|||
Assert.assertNull(token);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailedToFailed() {
|
||||
// create a failed attempt.
|
||||
when(submissionContext.getKeepContainersAcrossApplicationAttempts())
|
||||
.thenReturn(true);
|
||||
Container amContainer = allocateApplicationAttempt();
|
||||
launchApplicationAttempt(amContainer);
|
||||
runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl", false);
|
||||
ContainerStatus cs1 =
|
||||
ContainerStatus.newInstance(amContainer.getId(),
|
||||
ContainerState.COMPLETE, "some error", 123);
|
||||
ApplicationAttemptId appAttemptId = applicationAttempt.getAppAttemptId();
|
||||
applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
|
||||
appAttemptId, cs1));
|
||||
sendAttemptUpdateSavedEvent(applicationAttempt);
|
||||
assertEquals(RMAppAttemptState.FAILED,
|
||||
applicationAttempt.getAppAttemptState());
|
||||
// should not kill containers when attempt fails.
|
||||
assertTrue(transferStateFromPreviousAttempt);
|
||||
|
||||
// failed attempt captured the container finished event.
|
||||
assertEquals(0, applicationAttempt.getJustFinishedContainers().size());
|
||||
ContainerStatus cs2 =
|
||||
ContainerStatus.newInstance(ContainerId.newInstance(appAttemptId, 2),
|
||||
ContainerState.COMPLETE, "", 0);
|
||||
applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
|
||||
appAttemptId, cs2));
|
||||
assertEquals(1, applicationAttempt.getJustFinishedContainers().size());
|
||||
assertEquals(cs2.getContainerId(), applicationAttempt
|
||||
.getJustFinishedContainers().get(0).getContainerId());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testContainersCleanupForLastAttempt() {
|
||||
// create a failed attempt.
|
||||
applicationAttempt =
|
||||
new RMAppAttemptImpl(applicationAttempt.getAppAttemptId(), rmContext,
|
||||
scheduler, masterService, submissionContext, new Configuration(),
|
||||
true);
|
||||
when(submissionContext.getKeepContainersAcrossApplicationAttempts())
|
||||
.thenReturn(true);
|
||||
when(submissionContext.getMaxAppAttempts()).thenReturn(1);
|
||||
Container amContainer = allocateApplicationAttempt();
|
||||
launchApplicationAttempt(amContainer);
|
||||
runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl", false);
|
||||
ContainerStatus cs1 =
|
||||
ContainerStatus.newInstance(amContainer.getId(),
|
||||
ContainerState.COMPLETE, "some error", 123);
|
||||
ApplicationAttemptId appAttemptId = applicationAttempt.getAppAttemptId();
|
||||
applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
|
||||
appAttemptId, cs1));
|
||||
sendAttemptUpdateSavedEvent(applicationAttempt);
|
||||
assertEquals(RMAppAttemptState.FAILED,
|
||||
applicationAttempt.getAppAttemptState());
|
||||
assertFalse(transferStateFromPreviousAttempt);
|
||||
}
|
||||
|
||||
private void verifyTokenCount(ApplicationAttemptId appAttemptId, int count) {
|
||||
verify(amRMTokenManager, times(count)).applicationMasterFinished(appAttemptId);
|
||||
if (UserGroupInformation.isSecurityEnabled()) {
|
||||
|
|
|
@ -37,7 +37,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
|||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler;
|
||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||
|
@ -66,8 +66,10 @@ public class TestQueueMetrics {
|
|||
MetricsSource queueSource= queueSource(ms, queueName);
|
||||
AppSchedulingInfo app = mockApp(user);
|
||||
|
||||
metrics.submitApp(user, 1);
|
||||
metrics.submitApp(user);
|
||||
MetricsSource userSource = userSource(ms, queueName, user);
|
||||
checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
|
||||
metrics.submitAppAttempt(user);
|
||||
checkApps(queueSource, 1, 1, 0, 0, 0, 0, true);
|
||||
|
||||
metrics.setAvailableResourcesToQueue(Resources.createResource(100*GB, 100));
|
||||
|
@ -76,7 +78,7 @@ public class TestQueueMetrics {
|
|||
// configurable cluster/queue resources
|
||||
checkResources(queueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0);
|
||||
|
||||
metrics.incrAppsRunning(app, user);
|
||||
metrics.runAppAttempt(app.getApplicationId(), user);
|
||||
checkApps(queueSource, 1, 0, 1, 0, 0, 0, true);
|
||||
|
||||
metrics.allocateResources(user, 3, Resources.createResource(2*GB, 2));
|
||||
|
@ -85,7 +87,10 @@ public class TestQueueMetrics {
|
|||
metrics.releaseResources(user, 1, Resources.createResource(2*GB, 2));
|
||||
checkResources(queueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0);
|
||||
|
||||
metrics.finishApp(app, RMAppAttemptState.FINISHED);
|
||||
metrics.finishAppAttempt(
|
||||
app.getApplicationId(), app.isPending(), app.getUser());
|
||||
checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
|
||||
metrics.finishApp(user, RMAppState.FINISHED);
|
||||
checkApps(queueSource, 1, 0, 0, 1, 0, 0, true);
|
||||
assertNull(userSource);
|
||||
}
|
||||
|
@ -100,39 +105,47 @@ public class TestQueueMetrics {
|
|||
MetricsSource queueSource = queueSource(ms, queueName);
|
||||
AppSchedulingInfo app = mockApp(user);
|
||||
|
||||
metrics.submitApp(user, 1);
|
||||
metrics.submitApp(user);
|
||||
MetricsSource userSource = userSource(ms, queueName, user);
|
||||
checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
|
||||
metrics.submitAppAttempt(user);
|
||||
checkApps(queueSource, 1, 1, 0, 0, 0, 0, true);
|
||||
|
||||
metrics.incrAppsRunning(app, user);
|
||||
metrics.runAppAttempt(app.getApplicationId(), user);
|
||||
checkApps(queueSource, 1, 0, 1, 0, 0, 0, true);
|
||||
|
||||
metrics.finishApp(app, RMAppAttemptState.FAILED);
|
||||
checkApps(queueSource, 1, 0, 0, 0, 1, 0, true);
|
||||
metrics.finishAppAttempt(
|
||||
app.getApplicationId(), app.isPending(), app.getUser());
|
||||
checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
|
||||
|
||||
// As the application has failed, framework retries the same application
|
||||
// based on configuration
|
||||
metrics.submitApp(user, 2);
|
||||
metrics.submitAppAttempt(user);
|
||||
checkApps(queueSource, 1, 1, 0, 0, 0, 0, true);
|
||||
|
||||
metrics.incrAppsRunning(app, user);
|
||||
metrics.runAppAttempt(app.getApplicationId(), user);
|
||||
checkApps(queueSource, 1, 0, 1, 0, 0, 0, true);
|
||||
|
||||
// Suppose say application has failed this time as well.
|
||||
metrics.finishApp(app, RMAppAttemptState.FAILED);
|
||||
checkApps(queueSource, 1, 0, 0, 0, 1, 0, true);
|
||||
metrics.finishAppAttempt(
|
||||
app.getApplicationId(), app.isPending(), app.getUser());
|
||||
checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
|
||||
|
||||
// As the application has failed, framework retries the same application
|
||||
// based on configuration
|
||||
metrics.submitApp(user, 3);
|
||||
metrics.submitAppAttempt(user);
|
||||
checkApps(queueSource, 1, 1, 0, 0, 0, 0, true);
|
||||
|
||||
metrics.incrAppsRunning(app, user);
|
||||
metrics.runAppAttempt(app.getApplicationId(), user);
|
||||
checkApps(queueSource, 1, 0, 1, 0, 0, 0, true);
|
||||
|
||||
// Suppose say application has finished.
|
||||
metrics.finishApp(app, RMAppAttemptState.FINISHED);
|
||||
checkApps(queueSource, 1, 0, 0, 1, 0, 0, true);
|
||||
// Suppose say application has failed, and there's no more retries.
|
||||
metrics.finishAppAttempt(
|
||||
app.getApplicationId(), app.isPending(), app.getUser());
|
||||
checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
|
||||
|
||||
metrics.finishApp(user, RMAppState.FAILED);
|
||||
checkApps(queueSource, 1, 0, 0, 0, 1, 0, true);
|
||||
|
||||
assertNull(userSource);
|
||||
}
|
||||
|
@ -146,9 +159,13 @@ public class TestQueueMetrics {
|
|||
MetricsSource queueSource = queueSource(ms, queueName);
|
||||
AppSchedulingInfo app = mockApp(user);
|
||||
|
||||
metrics.submitApp(user, 1);
|
||||
metrics.submitApp(user);
|
||||
MetricsSource userSource = userSource(ms, queueName, user);
|
||||
|
||||
checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
|
||||
checkApps(userSource, 1, 0, 0, 0, 0, 0, true);
|
||||
|
||||
metrics.submitAppAttempt(user);
|
||||
checkApps(queueSource, 1, 1, 0, 0, 0, 0, true);
|
||||
checkApps(userSource, 1, 1, 0, 0, 0, 0, true);
|
||||
|
||||
|
@ -160,7 +177,7 @@ public class TestQueueMetrics {
|
|||
checkResources(queueSource, 0, 0, 0, 0, 0, 100*GB, 100, 15*GB, 15, 5, 0, 0, 0);
|
||||
checkResources(userSource, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0);
|
||||
|
||||
metrics.incrAppsRunning(app, user);
|
||||
metrics.runAppAttempt(app.getApplicationId(), user);
|
||||
checkApps(queueSource, 1, 0, 1, 0, 0, 0, true);
|
||||
checkApps(userSource, 1, 0, 1, 0, 0, 0, true);
|
||||
|
||||
|
@ -172,7 +189,11 @@ public class TestQueueMetrics {
|
|||
checkResources(queueSource, 4*GB, 4, 2, 3, 1, 100*GB, 100, 9*GB, 9, 2, 0, 0, 0);
|
||||
checkResources(userSource, 4*GB, 4, 2, 3, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0);
|
||||
|
||||
metrics.finishApp(app, RMAppAttemptState.FINISHED);
|
||||
metrics.finishAppAttempt(
|
||||
app.getApplicationId(), app.isPending(), app.getUser());
|
||||
checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
|
||||
checkApps(userSource, 1, 0, 0, 0, 0, 0, true);
|
||||
metrics.finishApp(user, RMAppState.FINISHED);
|
||||
checkApps(queueSource, 1, 0, 0, 1, 0, 0, true);
|
||||
checkApps(userSource, 1, 0, 0, 1, 0, 0, true);
|
||||
}
|
||||
|
@ -192,10 +213,16 @@ public class TestQueueMetrics {
|
|||
MetricsSource queueSource = queueSource(ms, leafQueueName);
|
||||
AppSchedulingInfo app = mockApp(user);
|
||||
|
||||
metrics.submitApp(user, 1);
|
||||
metrics.submitApp(user);
|
||||
MetricsSource userSource = userSource(ms, leafQueueName, user);
|
||||
MetricsSource parentUserSource = userSource(ms, parentQueueName, user);
|
||||
|
||||
checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
|
||||
checkApps(parentQueueSource, 1, 0, 0, 0, 0, 0, true);
|
||||
checkApps(userSource, 1, 0, 0, 0, 0, 0, true);
|
||||
checkApps(parentUserSource, 1, 0, 0, 0, 0, 0, true);
|
||||
|
||||
metrics.submitAppAttempt(user);
|
||||
checkApps(queueSource, 1, 1, 0, 0, 0, 0, true);
|
||||
checkApps(parentQueueSource, 1, 1, 0, 0, 0, 0, true);
|
||||
checkApps(userSource, 1, 1, 0, 0, 0, 0, true);
|
||||
|
@ -211,7 +238,7 @@ public class TestQueueMetrics {
|
|||
checkResources(userSource, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0);
|
||||
checkResources(parentUserSource, 0, 0, 0, 0, 0, 10*GB, 10, 15*GB, 15, 5, 0, 0, 0);
|
||||
|
||||
metrics.incrAppsRunning(app, user);
|
||||
metrics.runAppAttempt(app.getApplicationId(), user);
|
||||
checkApps(queueSource, 1, 0, 1, 0, 0, 0, true);
|
||||
checkApps(userSource, 1, 0, 1, 0, 0, 0, true);
|
||||
|
||||
|
@ -231,7 +258,14 @@ public class TestQueueMetrics {
|
|||
checkResources(userSource, 4*GB, 4, 2, 3, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0);
|
||||
checkResources(parentUserSource, 4*GB, 4, 2, 3, 1, 10*GB, 10, 9*GB, 9, 2, 0, 0, 0);
|
||||
|
||||
metrics.finishApp(app, RMAppAttemptState.FINISHED);
|
||||
metrics.finishAppAttempt(
|
||||
app.getApplicationId(), app.isPending(), app.getUser());
|
||||
checkApps(queueSource, 1, 0, 0, 0, 0, 0, true);
|
||||
checkApps(parentQueueSource, 1, 0, 0, 0, 0, 0, true);
|
||||
checkApps(userSource, 1, 0, 0, 0, 0, 0, true);
|
||||
checkApps(parentUserSource, 1, 0, 0, 0, 0, 0, true);
|
||||
|
||||
metrics.finishApp(user, RMAppState.FINISHED);
|
||||
checkApps(queueSource, 1, 0, 0, 1, 0, 0, true);
|
||||
checkApps(parentQueueSource, 1, 0, 0, 1, 0, 0, true);
|
||||
checkApps(userSource, 1, 0, 0, 1, 0, 0, true);
|
||||
|
@ -308,7 +342,7 @@ public class TestQueueMetrics {
|
|||
assertGauge("AppsPending", pending, rb);
|
||||
assertGauge("AppsRunning", running, rb);
|
||||
assertCounter("AppsCompleted", completed, rb);
|
||||
assertGauge("AppsFailed", failed, rb);
|
||||
assertCounter("AppsFailed", failed, rb);
|
||||
assertCounter("AppsKilled", killed, rb);
|
||||
}
|
||||
|
||||
|
|
|
@ -562,18 +562,18 @@ public class TestCapacityScheduler {
|
|||
new AppAddedSchedulerEvent(appId, "default", "user");
|
||||
cs.handle(addAppEvent);
|
||||
SchedulerEvent addAttemptEvent =
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId);
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId, false);
|
||||
cs.handle(addAttemptEvent);
|
||||
|
||||
// Verify the blacklist can be updated independent of requesting containers
|
||||
cs.allocate(appAttemptId, Collections.<ResourceRequest>emptyList(),
|
||||
Collections.<ContainerId>emptyList(),
|
||||
Collections.singletonList(host), null);
|
||||
Assert.assertTrue(cs.getApplication(appAttemptId).isBlacklisted(host));
|
||||
Assert.assertTrue(cs.getApplicationAttempt(appAttemptId).isBlacklisted(host));
|
||||
cs.allocate(appAttemptId, Collections.<ResourceRequest>emptyList(),
|
||||
Collections.<ContainerId>emptyList(), null,
|
||||
Collections.singletonList(host));
|
||||
Assert.assertFalse(cs.getApplication(appAttemptId).isBlacklisted(host));
|
||||
Assert.assertFalse(cs.getApplicationAttempt(appAttemptId).isBlacklisted(host));
|
||||
rm.stop();
|
||||
}
|
||||
|
||||
|
@ -597,66 +597,6 @@ public class TestCapacityScheduler {
|
|||
assertTrue(appComparator.compare(app1, app3) < 0);
|
||||
assertTrue(appComparator.compare(app2, app3) < 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConcurrentAccessOnApplications() throws Exception {
|
||||
CapacityScheduler cs = new CapacityScheduler();
|
||||
verifyConcurrentAccessOnApplications(
|
||||
cs.appAttempts, FiCaSchedulerApp.class, Queue.class);
|
||||
}
|
||||
|
||||
public static <T extends SchedulerApplicationAttempt, Q extends Queue>
|
||||
void verifyConcurrentAccessOnApplications(
|
||||
final Map<ApplicationAttemptId, T> applications, Class<T> appClazz,
|
||||
final Class<Q> queueClazz)
|
||||
throws Exception {
|
||||
final int size = 10000;
|
||||
final ApplicationId appId = ApplicationId.newInstance(0, 0);
|
||||
final Constructor<T> ctor = appClazz.getDeclaredConstructor(
|
||||
ApplicationAttemptId.class, String.class, queueClazz,
|
||||
ActiveUsersManager.class, RMContext.class);
|
||||
|
||||
ApplicationAttemptId appAttemptId0
|
||||
= ApplicationAttemptId.newInstance(appId, 0);
|
||||
applications.put(appAttemptId0, ctor.newInstance(
|
||||
appAttemptId0, null, mock(queueClazz), null, null));
|
||||
assertNotNull(applications.get(appAttemptId0));
|
||||
|
||||
// Imitating the thread of scheduler that will add and remove apps
|
||||
final AtomicBoolean finished = new AtomicBoolean(false);
|
||||
final AtomicBoolean failed = new AtomicBoolean(false);
|
||||
Thread t = new Thread() {
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
for (int i = 1; i <= size; ++i) {
|
||||
ApplicationAttemptId appAttemptId
|
||||
= ApplicationAttemptId.newInstance(appId, i);
|
||||
try {
|
||||
applications.put(appAttemptId, ctor.newInstance(
|
||||
appAttemptId, null, mock(queueClazz), null, null));
|
||||
} catch (Exception e) {
|
||||
failed.set(true);
|
||||
finished.set(true);
|
||||
return;
|
||||
}
|
||||
}
|
||||
for (int i = 1; i <= size; ++i) {
|
||||
ApplicationAttemptId appAttemptId
|
||||
= ApplicationAttemptId.newInstance(appId, i);
|
||||
applications.remove(appAttemptId);
|
||||
}
|
||||
finished.set(true);
|
||||
}
|
||||
};
|
||||
t.start();
|
||||
|
||||
// Imitating the thread of rmappattempt that will get the app
|
||||
while (!finished.get()) {
|
||||
assertNotNull(applications.get(appAttemptId0));
|
||||
}
|
||||
assertFalse(failed.get());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetAppsInQueue() throws Exception {
|
||||
|
@ -702,7 +642,7 @@ public class TestCapacityScheduler {
|
|||
|
||||
SchedulerApplication app =
|
||||
TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(
|
||||
cs.applications, cs, "a1");
|
||||
cs.getSchedulerApplications(), cs, "a1");
|
||||
Assert.assertEquals("a1", app.getQueue().getQueueName());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -55,6 +55,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
|||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
|
||||
|
@ -63,7 +64,10 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
|
||||
import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
|
||||
import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
|
||||
|
@ -345,15 +349,20 @@ public class TestLeafQueue {
|
|||
.getMockApplicationAttemptId(0, 1);
|
||||
FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, null,
|
||||
rmContext);
|
||||
a.submitApplicationAttempt(app_0, user_0);
|
||||
|
||||
when(cs.getApplication(appAttemptId_0)).thenReturn(app_0);
|
||||
AppAddedSchedulerEvent addAppEvent =
|
||||
new AppAddedSchedulerEvent(appAttemptId_0.getApplicationId(),
|
||||
a.getQueueName(), user_0);
|
||||
cs.handle(addAppEvent);
|
||||
AppAttemptAddedSchedulerEvent addAttemptEvent =
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId_0, false);
|
||||
cs.handle(addAttemptEvent);
|
||||
|
||||
AppAttemptRemovedSchedulerEvent event = new AppAttemptRemovedSchedulerEvent(
|
||||
appAttemptId_0, RMAppAttemptState.FAILED);
|
||||
appAttemptId_0, RMAppAttemptState.FAILED, false);
|
||||
cs.handle(event);
|
||||
|
||||
assertEquals(0, a.getMetrics().getAppsPending());
|
||||
assertEquals(1, a.getMetrics().getAppsFailed());
|
||||
assertEquals(0, a.getMetrics().getAppsFailed());
|
||||
|
||||
// Attempt the same application again
|
||||
final ApplicationAttemptId appAttemptId_1 = TestUtils
|
||||
|
@ -365,10 +374,12 @@ public class TestLeafQueue {
|
|||
assertEquals(1, a.getMetrics().getAppsSubmitted());
|
||||
assertEquals(1, a.getMetrics().getAppsPending());
|
||||
|
||||
when(cs.getApplication(appAttemptId_1)).thenReturn(app_0);
|
||||
event = new AppAttemptRemovedSchedulerEvent(appAttemptId_0,
|
||||
RMAppAttemptState.FINISHED);
|
||||
RMAppAttemptState.FINISHED, false);
|
||||
cs.handle(event);
|
||||
AppRemovedSchedulerEvent rEvent = new AppRemovedSchedulerEvent(
|
||||
appAttemptId_0.getApplicationId(), RMAppState.FINISHED);
|
||||
cs.handle(rEvent);
|
||||
|
||||
assertEquals(1, a.getMetrics().getAppsSubmitted());
|
||||
assertEquals(0, a.getMetrics().getAppsPending());
|
||||
|
|
|
@ -79,7 +79,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestCapacityScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
|
||||
|
@ -260,8 +259,8 @@ public class TestFairScheduler {
|
|||
scheduler.addApplication(id.getApplicationId(), queueId, userId);
|
||||
// This conditional is for testAclSubmitApplication where app is rejected
|
||||
// and no app is added.
|
||||
if (scheduler.applications.containsKey(id.getApplicationId())) {
|
||||
scheduler.addApplicationAttempt(id);
|
||||
if (scheduler.getSchedulerApplications().containsKey(id.getApplicationId())) {
|
||||
scheduler.addApplicationAttempt(id, false);
|
||||
}
|
||||
List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
|
||||
ResourceRequest request = createResourceRequest(memory, vcores, ResourceRequest.ANY,
|
||||
|
@ -590,7 +589,7 @@ public class TestFairScheduler {
|
|||
// Make sure queue 2 is waiting with a reservation
|
||||
assertEquals(0, scheduler.getQueueManager().getQueue("queue2").
|
||||
getResourceUsage().getMemory());
|
||||
assertEquals(1024, scheduler.appAttempts.get(attId).getCurrentReservation().getMemory());
|
||||
assertEquals(1024, scheduler.getSchedulerApp(attId).getCurrentReservation().getMemory());
|
||||
|
||||
// Now another node checks in with capacity
|
||||
RMNode node2 =
|
||||
|
@ -606,10 +605,10 @@ public class TestFairScheduler {
|
|||
getResourceUsage().getMemory());
|
||||
|
||||
// The old reservation should still be there...
|
||||
assertEquals(1024, scheduler.appAttempts.get(attId).getCurrentReservation().getMemory());
|
||||
assertEquals(1024, scheduler.getSchedulerApp(attId).getCurrentReservation().getMemory());
|
||||
// ... but it should disappear when we update the first node.
|
||||
scheduler.handle(updateEvent);
|
||||
assertEquals(0, scheduler.appAttempts.get(attId).getCurrentReservation().getMemory());
|
||||
assertEquals(0, scheduler.getSchedulerApp(attId).getCurrentReservation().getMemory());
|
||||
|
||||
}
|
||||
|
||||
|
@ -630,7 +629,7 @@ public class TestFairScheduler {
|
|||
"user1");
|
||||
scheduler.handle(appAddedEvent);
|
||||
AppAttemptAddedSchedulerEvent attempAddedEvent =
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId);
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId, false);
|
||||
scheduler.handle(attempAddedEvent);
|
||||
assertEquals(1, scheduler.getQueueManager().getLeafQueue("user1", true)
|
||||
.getRunnableAppSchedulables().size());
|
||||
|
@ -656,7 +655,7 @@ public class TestFairScheduler {
|
|||
"user2");
|
||||
scheduler.handle(appAddedEvent);
|
||||
AppAttemptAddedSchedulerEvent attempAddedEvent =
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId);
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId, false);
|
||||
scheduler.handle(attempAddedEvent);
|
||||
assertEquals(0, scheduler.getQueueManager().getLeafQueue("user1", true)
|
||||
.getRunnableAppSchedulables().size());
|
||||
|
@ -710,7 +709,6 @@ public class TestFairScheduler {
|
|||
scheduler.reinitialize(conf, resourceManager.getRMContext());
|
||||
|
||||
ApplicationAttemptId appId;
|
||||
Map<ApplicationAttemptId, FSSchedulerApp> apps = scheduler.appAttempts;
|
||||
|
||||
List<QueuePlacementRule> rules = new ArrayList<QueuePlacementRule>();
|
||||
rules.add(new QueuePlacementRule.Specified().initialize(true, null));
|
||||
|
@ -723,17 +721,17 @@ public class TestFairScheduler {
|
|||
scheduler.getAllocationConfiguration().placementPolicy =
|
||||
new QueuePlacementPolicy(rules, queues, conf);
|
||||
appId = createSchedulingRequest(1024, "somequeue", "user1");
|
||||
assertEquals("root.somequeue", apps.get(appId).getQueueName());
|
||||
assertEquals("root.somequeue", scheduler.getSchedulerApp(appId).getQueueName());
|
||||
appId = createSchedulingRequest(1024, "default", "user1");
|
||||
assertEquals("root.user1", apps.get(appId).getQueueName());
|
||||
assertEquals("root.user1", scheduler.getSchedulerApp(appId).getQueueName());
|
||||
appId = createSchedulingRequest(1024, "default", "user3");
|
||||
assertEquals("root.user3group", apps.get(appId).getQueueName());
|
||||
assertEquals("root.user3group", scheduler.getSchedulerApp(appId).getQueueName());
|
||||
appId = createSchedulingRequest(1024, "default", "user4");
|
||||
assertEquals("root.user4subgroup1", apps.get(appId).getQueueName());
|
||||
assertEquals("root.user4subgroup1", scheduler.getSchedulerApp(appId).getQueueName());
|
||||
appId = createSchedulingRequest(1024, "default", "user5");
|
||||
assertEquals("root.user5subgroup2", apps.get(appId).getQueueName());
|
||||
assertEquals("root.user5subgroup2", scheduler.getSchedulerApp(appId).getQueueName());
|
||||
appId = createSchedulingRequest(1024, "default", "otheruser");
|
||||
assertEquals("root.default", apps.get(appId).getQueueName());
|
||||
assertEquals("root.default", scheduler.getSchedulerApp(appId).getQueueName());
|
||||
|
||||
// test without specified as first rule
|
||||
rules = new ArrayList<QueuePlacementRule>();
|
||||
|
@ -743,11 +741,11 @@ public class TestFairScheduler {
|
|||
scheduler.getAllocationConfiguration().placementPolicy =
|
||||
new QueuePlacementPolicy(rules, queues, conf);
|
||||
appId = createSchedulingRequest(1024, "somequeue", "user1");
|
||||
assertEquals("root.user1", apps.get(appId).getQueueName());
|
||||
assertEquals("root.user1", scheduler.getSchedulerApp(appId).getQueueName());
|
||||
appId = createSchedulingRequest(1024, "somequeue", "otheruser");
|
||||
assertEquals("root.somequeue", apps.get(appId).getQueueName());
|
||||
assertEquals("root.somequeue", scheduler.getSchedulerApp(appId).getQueueName());
|
||||
appId = createSchedulingRequest(1024, "default", "otheruser");
|
||||
assertEquals("root.default", apps.get(appId).getQueueName());
|
||||
assertEquals("root.default", scheduler.getSchedulerApp(appId).getQueueName());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -802,13 +800,13 @@ public class TestFairScheduler {
|
|||
|
||||
ApplicationAttemptId id11 = createAppAttemptId(1, 1);
|
||||
scheduler.addApplication(id11.getApplicationId(), "root.queue1", "user1");
|
||||
scheduler.addApplicationAttempt(id11);
|
||||
scheduler.addApplicationAttempt(id11, false);
|
||||
ApplicationAttemptId id21 = createAppAttemptId(2, 1);
|
||||
scheduler.addApplication(id21.getApplicationId(), "root.queue2", "user1");
|
||||
scheduler.addApplicationAttempt(id21);
|
||||
scheduler.addApplicationAttempt(id21, false);
|
||||
ApplicationAttemptId id22 = createAppAttemptId(2, 2);
|
||||
scheduler.addApplication(id22.getApplicationId(), "root.queue2", "user1");
|
||||
scheduler.addApplicationAttempt(id22);
|
||||
scheduler.addApplicationAttempt(id22, false);
|
||||
|
||||
int minReqSize =
|
||||
FairSchedulerConfiguration.DEFAULT_RM_SCHEDULER_INCREMENT_ALLOCATION_MB;
|
||||
|
@ -854,7 +852,7 @@ public class TestFairScheduler {
|
|||
"user1");
|
||||
scheduler.handle(appAddedEvent);
|
||||
AppAttemptAddedSchedulerEvent attemptAddedEvent =
|
||||
new AppAttemptAddedSchedulerEvent(createAppAttemptId(1, 1));
|
||||
new AppAttemptAddedSchedulerEvent(createAppAttemptId(1, 1), false);
|
||||
scheduler.handle(attemptAddedEvent);
|
||||
|
||||
// Scheduler should have two queues (the default and the one created for user1)
|
||||
|
@ -865,7 +863,7 @@ public class TestFairScheduler {
|
|||
.getRunnableAppSchedulables().size());
|
||||
|
||||
AppAttemptRemovedSchedulerEvent appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent(
|
||||
createAppAttemptId(1, 1), RMAppAttemptState.FINISHED);
|
||||
createAppAttemptId(1, 1), RMAppAttemptState.FINISHED, false);
|
||||
|
||||
// Now remove app
|
||||
scheduler.handle(appRemovedEvent1);
|
||||
|
@ -1138,12 +1136,12 @@ public class TestFairScheduler {
|
|||
scheduler.handle(nodeUpdate3);
|
||||
}
|
||||
|
||||
assertEquals(1, scheduler.appAttempts.get(app1).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.appAttempts.get(app2).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.appAttempts.get(app3).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.appAttempts.get(app4).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.appAttempts.get(app5).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.appAttempts.get(app6).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.getSchedulerApp(app1).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.getSchedulerApp(app2).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.getSchedulerApp(app3).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.getSchedulerApp(app4).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.getSchedulerApp(app5).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.getSchedulerApp(app6).getLiveContainers().size());
|
||||
|
||||
// Now new requests arrive from queues C and D
|
||||
ApplicationAttemptId app7 =
|
||||
|
@ -1166,16 +1164,16 @@ public class TestFairScheduler {
|
|||
// Make sure it is lowest priority container.
|
||||
scheduler.preemptResources(scheduler.getQueueManager().getLeafQueues(),
|
||||
Resources.createResource(2 * 1024));
|
||||
assertEquals(1, scheduler.appAttempts.get(app1).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.appAttempts.get(app2).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.appAttempts.get(app4).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.appAttempts.get(app5).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.getSchedulerApp(app1).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.getSchedulerApp(app2).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.getSchedulerApp(app4).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.getSchedulerApp(app5).getLiveContainers().size());
|
||||
|
||||
// First verify we are adding containers to preemption list for the application
|
||||
assertTrue(!Collections.disjoint(scheduler.appAttempts.get(app3).getLiveContainers(),
|
||||
scheduler.appAttempts.get(app3).getPreemptionContainers()));
|
||||
assertTrue(!Collections.disjoint(scheduler.appAttempts.get(app6).getLiveContainers(),
|
||||
scheduler.appAttempts.get(app6).getPreemptionContainers()));
|
||||
assertTrue(!Collections.disjoint(scheduler.getSchedulerApp(app3).getLiveContainers(),
|
||||
scheduler.getSchedulerApp(app3).getPreemptionContainers()));
|
||||
assertTrue(!Collections.disjoint(scheduler.getSchedulerApp(app6).getLiveContainers(),
|
||||
scheduler.getSchedulerApp(app6).getPreemptionContainers()));
|
||||
|
||||
// Pretend 15 seconds have passed
|
||||
clock.tick(15);
|
||||
|
@ -1185,8 +1183,8 @@ public class TestFairScheduler {
|
|||
Resources.createResource(2 * 1024));
|
||||
|
||||
// At this point the containers should have been killed (since we are not simulating AM)
|
||||
assertEquals(0, scheduler.appAttempts.get(app6).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.appAttempts.get(app3).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.getSchedulerApp(app6).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.getSchedulerApp(app3).getLiveContainers().size());
|
||||
|
||||
// Trigger a kill by insisting we want containers back
|
||||
scheduler.preemptResources(scheduler.getQueueManager().getLeafQueues(),
|
||||
|
@ -1200,22 +1198,22 @@ public class TestFairScheduler {
|
|||
scheduler.preemptResources(scheduler.getQueueManager().getLeafQueues(),
|
||||
Resources.createResource(2 * 1024));
|
||||
|
||||
assertEquals(1, scheduler.appAttempts.get(app1).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.appAttempts.get(app2).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.appAttempts.get(app3).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.appAttempts.get(app4).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.appAttempts.get(app5).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.appAttempts.get(app6).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.getSchedulerApp(app1).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.getSchedulerApp(app2).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.getSchedulerApp(app3).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.getSchedulerApp(app4).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.getSchedulerApp(app5).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.getSchedulerApp(app6).getLiveContainers().size());
|
||||
|
||||
// Now A and B are below fair share, so preemption shouldn't do anything
|
||||
scheduler.preemptResources(scheduler.getQueueManager().getLeafQueues(),
|
||||
Resources.createResource(2 * 1024));
|
||||
assertEquals(1, scheduler.appAttempts.get(app1).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.appAttempts.get(app2).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.appAttempts.get(app3).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.appAttempts.get(app4).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.appAttempts.get(app5).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.appAttempts.get(app6).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.getSchedulerApp(app1).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.getSchedulerApp(app2).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.getSchedulerApp(app3).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.getSchedulerApp(app4).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.getSchedulerApp(app5).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.getSchedulerApp(app6).getLiveContainers().size());
|
||||
}
|
||||
|
||||
@Test (timeout = 5000)
|
||||
|
@ -1374,9 +1372,9 @@ public class TestFairScheduler {
|
|||
|
||||
// One container should get reservation and the other should get nothing
|
||||
assertEquals(1024,
|
||||
scheduler.appAttempts.get(attId1).getCurrentReservation().getMemory());
|
||||
scheduler.getSchedulerApp(attId1).getCurrentReservation().getMemory());
|
||||
assertEquals(0,
|
||||
scheduler.appAttempts.get(attId2).getCurrentReservation().getMemory());
|
||||
scheduler.getSchedulerApp(attId2).getCurrentReservation().getMemory());
|
||||
}
|
||||
|
||||
@Test (timeout = 5000)
|
||||
|
@ -1411,7 +1409,7 @@ public class TestFairScheduler {
|
|||
scheduler.handle(updateEvent);
|
||||
|
||||
// App 1 should be running
|
||||
assertEquals(1, scheduler.appAttempts.get(attId1).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.getSchedulerApp(attId1).getLiveContainers().size());
|
||||
|
||||
ApplicationAttemptId attId2 = createSchedulingRequest(1024, "queue1",
|
||||
"user1", 1);
|
||||
|
@ -1420,7 +1418,7 @@ public class TestFairScheduler {
|
|||
scheduler.handle(updateEvent);
|
||||
|
||||
// App 2 should not be running
|
||||
assertEquals(0, scheduler.appAttempts.get(attId2).getLiveContainers().size());
|
||||
assertEquals(0, scheduler.getSchedulerApp(attId2).getLiveContainers().size());
|
||||
|
||||
// Request another container for app 1
|
||||
createSchedulingRequestExistingApplication(1024, 1, attId1);
|
||||
|
@ -1429,7 +1427,7 @@ public class TestFairScheduler {
|
|||
scheduler.handle(updateEvent);
|
||||
|
||||
// Request should be fulfilled
|
||||
assertEquals(2, scheduler.appAttempts.get(attId1).getLiveContainers().size());
|
||||
assertEquals(2, scheduler.getSchedulerApp(attId1).getLiveContainers().size());
|
||||
}
|
||||
|
||||
@Test (timeout = 5000)
|
||||
|
@ -1449,10 +1447,10 @@ public class TestFairScheduler {
|
|||
NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1);
|
||||
scheduler.handle(updateEvent);
|
||||
|
||||
FSSchedulerApp app = scheduler.appAttempts.get(attId);
|
||||
FSSchedulerApp app = scheduler.getSchedulerApp(attId);
|
||||
assertEquals(1, app.getLiveContainers().size());
|
||||
|
||||
ContainerId containerId = scheduler.appAttempts.get(attId)
|
||||
ContainerId containerId = scheduler.getSchedulerApp(attId)
|
||||
.getLiveContainers().iterator().next().getContainerId();
|
||||
|
||||
// Cause reservation to be created
|
||||
|
@ -1521,9 +1519,9 @@ public class TestFairScheduler {
|
|||
ApplicationAttemptId attId2 = createSchedulingRequest(1024, "queue1",
|
||||
"norealuserhasthisname2", 1);
|
||||
|
||||
FSSchedulerApp app1 = scheduler.appAttempts.get(attId1);
|
||||
FSSchedulerApp app1 = scheduler.getSchedulerApp(attId1);
|
||||
assertNotNull("The application was not allowed", app1);
|
||||
FSSchedulerApp app2 = scheduler.appAttempts.get(attId2);
|
||||
FSSchedulerApp app2 = scheduler.getSchedulerApp(attId2);
|
||||
assertNull("The application was allowed", app2);
|
||||
}
|
||||
|
||||
|
@ -1547,7 +1545,7 @@ public class TestFairScheduler {
|
|||
|
||||
ApplicationAttemptId appId = createAppAttemptId(this.APP_ID++, this.ATTEMPT_ID++);
|
||||
scheduler.addApplication(appId.getApplicationId(), "queue1", "user1");
|
||||
scheduler.addApplicationAttempt(appId);
|
||||
scheduler.addApplicationAttempt(appId, false);
|
||||
|
||||
// 1 request with 2 nodes on the same rack. another request with 1 node on
|
||||
// a different rack
|
||||
|
@ -1566,14 +1564,14 @@ public class TestFairScheduler {
|
|||
NodeUpdateSchedulerEvent updateEvent1 = new NodeUpdateSchedulerEvent(node1);
|
||||
scheduler.handle(updateEvent1);
|
||||
// should assign node local
|
||||
assertEquals(1, scheduler.appAttempts.get(appId).getLiveContainers().size());
|
||||
assertEquals(1, scheduler.getSchedulerApp(appId).getLiveContainers().size());
|
||||
|
||||
// node 2 checks in
|
||||
scheduler.update();
|
||||
NodeUpdateSchedulerEvent updateEvent2 = new NodeUpdateSchedulerEvent(node2);
|
||||
scheduler.handle(updateEvent2);
|
||||
// should assign rack local
|
||||
assertEquals(2, scheduler.appAttempts.get(appId).getLiveContainers().size());
|
||||
assertEquals(2, scheduler.getSchedulerApp(appId).getLiveContainers().size());
|
||||
}
|
||||
|
||||
@Test (timeout = 5000)
|
||||
|
@ -1592,8 +1590,8 @@ public class TestFairScheduler {
|
|||
"user1", 2);
|
||||
ApplicationAttemptId attId2 = createSchedulingRequest(1024, "queue1",
|
||||
"user1", 2);
|
||||
FSSchedulerApp app1 = scheduler.appAttempts.get(attId1);
|
||||
FSSchedulerApp app2 = scheduler.appAttempts.get(attId2);
|
||||
FSSchedulerApp app1 = scheduler.getSchedulerApp(attId1);
|
||||
FSSchedulerApp app2 = scheduler.getSchedulerApp(attId2);
|
||||
|
||||
FSLeafQueue queue1 = scheduler.getQueueManager().getLeafQueue("queue1", true);
|
||||
queue1.setPolicy(new FifoPolicy());
|
||||
|
@ -1633,7 +1631,7 @@ public class TestFairScheduler {
|
|||
|
||||
ApplicationAttemptId attId =
|
||||
createSchedulingRequest(1024, "root.default", "user", 8);
|
||||
FSSchedulerApp app = scheduler.appAttempts.get(attId);
|
||||
FSSchedulerApp app = scheduler.getSchedulerApp(attId);
|
||||
|
||||
// set maxAssign to 2: only 2 containers should be allocated
|
||||
scheduler.maxAssign = 2;
|
||||
|
@ -1695,10 +1693,10 @@ public class TestFairScheduler {
|
|||
ApplicationAttemptId attId4 =
|
||||
createSchedulingRequest(1024, fifoQueue, user, 4);
|
||||
|
||||
FSSchedulerApp app1 = scheduler.appAttempts.get(attId1);
|
||||
FSSchedulerApp app2 = scheduler.appAttempts.get(attId2);
|
||||
FSSchedulerApp app3 = scheduler.appAttempts.get(attId3);
|
||||
FSSchedulerApp app4 = scheduler.appAttempts.get(attId4);
|
||||
FSSchedulerApp app1 = scheduler.getSchedulerApp(attId1);
|
||||
FSSchedulerApp app2 = scheduler.getSchedulerApp(attId2);
|
||||
FSSchedulerApp app3 = scheduler.getSchedulerApp(attId3);
|
||||
FSSchedulerApp app4 = scheduler.getSchedulerApp(attId4);
|
||||
|
||||
scheduler.getQueueManager().getLeafQueue(fifoQueue, true)
|
||||
.setPolicy(SchedulingPolicy.parse("fifo"));
|
||||
|
@ -1813,7 +1811,7 @@ public class TestFairScheduler {
|
|||
NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1);
|
||||
scheduler.handle(updateEvent);
|
||||
|
||||
FSSchedulerApp app = scheduler.appAttempts.get(attId);
|
||||
FSSchedulerApp app = scheduler.getSchedulerApp(attId);
|
||||
assertEquals(0, app.getLiveContainers().size());
|
||||
assertEquals(0, app.getReservedContainers().size());
|
||||
|
||||
|
@ -1882,7 +1880,7 @@ public class TestFairScheduler {
|
|||
NodeUpdateSchedulerEvent node2UpdateEvent = new NodeUpdateSchedulerEvent(node2);
|
||||
|
||||
// no matter how many heartbeats, node2 should never get a container
|
||||
FSSchedulerApp app = scheduler.appAttempts.get(attId1);
|
||||
FSSchedulerApp app = scheduler.getSchedulerApp(attId1);
|
||||
for (int i = 0; i < 10; i++) {
|
||||
scheduler.handle(node2UpdateEvent);
|
||||
assertEquals(0, app.getLiveContainers().size());
|
||||
|
@ -1921,7 +1919,7 @@ public class TestFairScheduler {
|
|||
NodeUpdateSchedulerEvent node2UpdateEvent = new NodeUpdateSchedulerEvent(node2);
|
||||
|
||||
// no matter how many heartbeats, node2 should never get a container
|
||||
FSSchedulerApp app = scheduler.appAttempts.get(attId1);
|
||||
FSSchedulerApp app = scheduler.getSchedulerApp(attId1);
|
||||
for (int i = 0; i < 10; i++) {
|
||||
scheduler.handle(node2UpdateEvent);
|
||||
assertEquals(0, app.getLiveContainers().size());
|
||||
|
@ -1954,7 +1952,7 @@ public class TestFairScheduler {
|
|||
|
||||
ApplicationAttemptId attId = createSchedulingRequest(1024, "queue1",
|
||||
"user1", 0);
|
||||
FSSchedulerApp app = scheduler.appAttempts.get(attId);
|
||||
FSSchedulerApp app = scheduler.getSchedulerApp(attId);
|
||||
|
||||
ResourceRequest nodeRequest = createResourceRequest(1024, node2.getHostName(), 1, 2, true);
|
||||
ResourceRequest rackRequest = createResourceRequest(1024, "rack1", 1, 2, true);
|
||||
|
@ -1994,7 +1992,7 @@ public class TestFairScheduler {
|
|||
|
||||
ApplicationAttemptId attId = createSchedulingRequest(1024, 1, "default",
|
||||
"user1", 2);
|
||||
FSSchedulerApp app = scheduler.appAttempts.get(attId);
|
||||
FSSchedulerApp app = scheduler.getSchedulerApp(attId);
|
||||
scheduler.update();
|
||||
|
||||
NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node1);
|
||||
|
@ -2014,10 +2012,10 @@ public class TestFairScheduler {
|
|||
|
||||
ApplicationAttemptId appAttId1 = createSchedulingRequest(2048, 1, "queue1",
|
||||
"user1", 2);
|
||||
FSSchedulerApp app1 = scheduler.appAttempts.get(appAttId1);
|
||||
FSSchedulerApp app1 = scheduler.getSchedulerApp(appAttId1);
|
||||
ApplicationAttemptId appAttId2 = createSchedulingRequest(1024, 2, "queue1",
|
||||
"user1", 2);
|
||||
FSSchedulerApp app2 = scheduler.appAttempts.get(appAttId2);
|
||||
FSSchedulerApp app2 = scheduler.getSchedulerApp(appAttId2);
|
||||
|
||||
DominantResourceFairnessPolicy drfPolicy = new DominantResourceFairnessPolicy();
|
||||
drfPolicy.initialize(scheduler.getClusterCapacity());
|
||||
|
@ -2055,13 +2053,13 @@ public class TestFairScheduler {
|
|||
|
||||
ApplicationAttemptId appAttId1 = createSchedulingRequest(3072, 1, "queue1",
|
||||
"user1", 2);
|
||||
FSSchedulerApp app1 = scheduler.appAttempts.get(appAttId1);
|
||||
FSSchedulerApp app1 = scheduler.getSchedulerApp(appAttId1);
|
||||
ApplicationAttemptId appAttId2 = createSchedulingRequest(2048, 2, "queue1",
|
||||
"user1", 2);
|
||||
FSSchedulerApp app2 = scheduler.appAttempts.get(appAttId2);
|
||||
FSSchedulerApp app2 = scheduler.getSchedulerApp(appAttId2);
|
||||
ApplicationAttemptId appAttId3 = createSchedulingRequest(1024, 2, "queue2",
|
||||
"user1", 2);
|
||||
FSSchedulerApp app3 = scheduler.appAttempts.get(appAttId3);
|
||||
FSSchedulerApp app3 = scheduler.getSchedulerApp(appAttId3);
|
||||
|
||||
DominantResourceFairnessPolicy drfPolicy = new DominantResourceFairnessPolicy();
|
||||
drfPolicy.initialize(scheduler.getClusterCapacity());
|
||||
|
@ -2092,19 +2090,19 @@ public class TestFairScheduler {
|
|||
ApplicationAttemptId appAttId1 = createSchedulingRequest(3074, 1, "queue1.subqueue1",
|
||||
"user1", 2);
|
||||
Thread.sleep(3); // so that start times will be different
|
||||
FSSchedulerApp app1 = scheduler.appAttempts.get(appAttId1);
|
||||
FSSchedulerApp app1 = scheduler.getSchedulerApp(appAttId1);
|
||||
ApplicationAttemptId appAttId2 = createSchedulingRequest(1024, 3, "queue1.subqueue1",
|
||||
"user1", 2);
|
||||
Thread.sleep(3); // so that start times will be different
|
||||
FSSchedulerApp app2 = scheduler.appAttempts.get(appAttId2);
|
||||
FSSchedulerApp app2 = scheduler.getSchedulerApp(appAttId2);
|
||||
ApplicationAttemptId appAttId3 = createSchedulingRequest(2048, 2, "queue1.subqueue2",
|
||||
"user1", 2);
|
||||
Thread.sleep(3); // so that start times will be different
|
||||
FSSchedulerApp app3 = scheduler.appAttempts.get(appAttId3);
|
||||
FSSchedulerApp app3 = scheduler.getSchedulerApp(appAttId3);
|
||||
ApplicationAttemptId appAttId4 = createSchedulingRequest(1024, 2, "queue2",
|
||||
"user1", 2);
|
||||
Thread.sleep(3); // so that start times will be different
|
||||
FSSchedulerApp app4 = scheduler.appAttempts.get(appAttId4);
|
||||
FSSchedulerApp app4 = scheduler.getSchedulerApp(appAttId4);
|
||||
|
||||
DominantResourceFairnessPolicy drfPolicy = new DominantResourceFairnessPolicy();
|
||||
drfPolicy.initialize(scheduler.getClusterCapacity());
|
||||
|
@ -2184,7 +2182,7 @@ public class TestFairScheduler {
|
|||
NodeUpdateSchedulerEvent(node2);
|
||||
|
||||
// no matter how many heartbeats, node2 should never get a container
|
||||
FSSchedulerApp app = scheduler.appAttempts.get(attId1);
|
||||
FSSchedulerApp app = scheduler.getSchedulerApp(attId1);
|
||||
for (int i = 0; i < 10; i++) {
|
||||
scheduler.handle(node2UpdateEvent);
|
||||
assertEquals(0, app.getLiveContainers().size());
|
||||
|
@ -2195,16 +2193,8 @@ public class TestFairScheduler {
|
|||
assertEquals(1, app.getLiveContainers().size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConcurrentAccessOnApplications() throws Exception {
|
||||
FairScheduler fs = new FairScheduler();
|
||||
TestCapacityScheduler.verifyConcurrentAccessOnApplications(
|
||||
fs.appAttempts, FSSchedulerApp.class, FSLeafQueue.class);
|
||||
}
|
||||
|
||||
|
||||
private void verifyAppRunnable(ApplicationAttemptId attId, boolean runnable) {
|
||||
FSSchedulerApp app = scheduler.appAttempts.get(attId);
|
||||
FSSchedulerApp app = scheduler.getSchedulerApp(attId);
|
||||
FSLeafQueue queue = app.getQueue();
|
||||
Collection<AppSchedulable> runnableApps =
|
||||
queue.getRunnableAppSchedulables();
|
||||
|
@ -2260,7 +2250,7 @@ public class TestFairScheduler {
|
|||
|
||||
// Remove app 1 and both app 2 and app 4 should becomes runnable in its place
|
||||
AppAttemptRemovedSchedulerEvent appRemovedEvent1 =
|
||||
new AppAttemptRemovedSchedulerEvent(attId1, RMAppAttemptState.FINISHED);
|
||||
new AppAttemptRemovedSchedulerEvent(attId1, RMAppAttemptState.FINISHED, false);
|
||||
scheduler.handle(appRemovedEvent1);
|
||||
verifyAppRunnable(attId2, true);
|
||||
verifyQueueNumRunnable("queue2", 1, 0);
|
||||
|
@ -2324,7 +2314,7 @@ public class TestFairScheduler {
|
|||
// Even though the app was removed from sub3, the app from sub2 gets to go
|
||||
// because it came in first
|
||||
AppAttemptRemovedSchedulerEvent appRemovedEvent1 =
|
||||
new AppAttemptRemovedSchedulerEvent(attId2, RMAppAttemptState.FINISHED);
|
||||
new AppAttemptRemovedSchedulerEvent(attId2, RMAppAttemptState.FINISHED, false);
|
||||
scheduler.handle(appRemovedEvent1);
|
||||
verifyAppRunnable(attId4, true);
|
||||
verifyQueueNumRunnable("queue1.sub2", 2, 0);
|
||||
|
@ -2333,7 +2323,7 @@ public class TestFairScheduler {
|
|||
|
||||
// Now test removal of a non-runnable app
|
||||
AppAttemptRemovedSchedulerEvent appRemovedEvent2 =
|
||||
new AppAttemptRemovedSchedulerEvent(attId5, RMAppAttemptState.KILLED);
|
||||
new AppAttemptRemovedSchedulerEvent(attId5, RMAppAttemptState.KILLED, true);
|
||||
scheduler.handle(appRemovedEvent2);
|
||||
assertEquals(0, scheduler.maxRunningEnforcer.usersNonRunnableApps
|
||||
.get("user1").size());
|
||||
|
@ -2341,7 +2331,7 @@ public class TestFairScheduler {
|
|||
verifyQueueNumRunnable("queue1.sub3", 0, 0);
|
||||
// verify it doesn't become runnable when there would be space for it
|
||||
AppAttemptRemovedSchedulerEvent appRemovedEvent3 =
|
||||
new AppAttemptRemovedSchedulerEvent(attId4, RMAppAttemptState.FINISHED);
|
||||
new AppAttemptRemovedSchedulerEvent(attId4, RMAppAttemptState.FINISHED, true);
|
||||
scheduler.handle(appRemovedEvent3);
|
||||
verifyQueueNumRunnable("queue1.sub2", 1, 0);
|
||||
verifyQueueNumRunnable("queue1.sub3", 0, 0);
|
||||
|
@ -2378,7 +2368,7 @@ public class TestFairScheduler {
|
|||
ApplicationAttemptId appAttemptId =
|
||||
createAppAttemptId(this.APP_ID++, this.ATTEMPT_ID++);
|
||||
fs.addApplication(appAttemptId.getApplicationId(), "queue11", "user11");
|
||||
fs.addApplicationAttempt(appAttemptId);
|
||||
fs.addApplicationAttempt(appAttemptId, false);
|
||||
List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
|
||||
ResourceRequest request =
|
||||
createResourceRequest(1024, 1, ResourceRequest.ANY, 1, 1, true);
|
||||
|
@ -2389,7 +2379,7 @@ public class TestFairScheduler {
|
|||
// at least one pass
|
||||
Thread.sleep(fs.getConf().getContinuousSchedulingSleepMs() + 500);
|
||||
|
||||
FSSchedulerApp app = fs.appAttempts.get(appAttemptId);
|
||||
FSSchedulerApp app = fs.getSchedulerApp(appAttemptId);
|
||||
// Wait until app gets resources.
|
||||
while (app.getCurrentConsumption().equals(Resources.none())) { }
|
||||
|
||||
|
@ -2477,7 +2467,7 @@ public class TestFairScheduler {
|
|||
|
||||
ApplicationAttemptId appAttemptId =
|
||||
createSchedulingRequest(GB, "root.default", "user", 1);
|
||||
FSSchedulerApp app = scheduler.appAttempts.get(appAttemptId);
|
||||
FSSchedulerApp app = scheduler.getSchedulerApp(appAttemptId);
|
||||
|
||||
// Verify the blacklist can be updated independent of requesting containers
|
||||
scheduler.allocate(appAttemptId, Collections.<ResourceRequest>emptyList(),
|
||||
|
@ -2487,7 +2477,7 @@ public class TestFairScheduler {
|
|||
scheduler.allocate(appAttemptId, Collections.<ResourceRequest>emptyList(),
|
||||
Collections.<ContainerId>emptyList(), null,
|
||||
Collections.singletonList(host));
|
||||
assertFalse(scheduler.appAttempts.get(appAttemptId).isBlacklisted(host));
|
||||
assertFalse(scheduler.getSchedulerApp(appAttemptId).isBlacklisted(host));
|
||||
|
||||
List<ResourceRequest> update = Arrays.asList(
|
||||
createResourceRequest(GB, node.getHostName(), 1, 0, true));
|
||||
|
@ -2555,6 +2545,6 @@ public class TestFairScheduler {
|
|||
FairScheduler scheduler =
|
||||
(FairScheduler) resourceManager.getResourceScheduler();
|
||||
TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(
|
||||
scheduler.applications, scheduler, "default");
|
||||
scheduler.getSchedulerApplications(), scheduler, "default");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,101 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
|
||||
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.hadoop.yarn.util.SystemClock;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
|
||||
public class TestQueueManager {
|
||||
private FairSchedulerConfiguration conf;
|
||||
private QueueManager queueManager;
|
||||
private Set<FSQueue> notEmptyQueues;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
conf = new FairSchedulerConfiguration();
|
||||
FairScheduler scheduler = mock(FairScheduler.class);
|
||||
AllocationConfiguration allocConf = new AllocationConfiguration(conf);
|
||||
when(scheduler.getAllocationConfiguration()).thenReturn(allocConf);
|
||||
when(scheduler.getConf()).thenReturn(conf);
|
||||
SystemClock clock = new SystemClock();
|
||||
when(scheduler.getClock()).thenReturn(clock);
|
||||
notEmptyQueues = new HashSet<FSQueue>();
|
||||
queueManager = new QueueManager(scheduler) {
|
||||
@Override
|
||||
public boolean isEmpty(FSQueue queue) {
|
||||
return !notEmptyQueues.contains(queue);
|
||||
}
|
||||
};
|
||||
FSQueueMetrics.forQueue("root", null, true, conf);
|
||||
queueManager.initialize(conf);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReloadTurnsLeafQueueIntoParent() throws Exception {
|
||||
updateConfiguredQueues(queueManager, "queue1");
|
||||
|
||||
// When no apps are running in the leaf queue, should be fine turning it
|
||||
// into a parent.
|
||||
updateConfiguredQueues(queueManager, "queue1.queue2");
|
||||
assertNull(queueManager.getLeafQueue("queue1", false));
|
||||
assertNotNull(queueManager.getLeafQueue("queue1.queue2", false));
|
||||
|
||||
// When leaf queues are empty, should be ok deleting them and
|
||||
// turning parent into a leaf.
|
||||
updateConfiguredQueues(queueManager, "queue1");
|
||||
assertNull(queueManager.getLeafQueue("queue1.queue2", false));
|
||||
assertNotNull(queueManager.getLeafQueue("queue1", false));
|
||||
|
||||
// When apps exist in leaf queue, we shouldn't be able to create
|
||||
// children under it, but things should work otherwise.
|
||||
notEmptyQueues.add(queueManager.getLeafQueue("queue1", false));
|
||||
updateConfiguredQueues(queueManager, "queue1.queue2");
|
||||
assertNull(queueManager.getLeafQueue("queue1.queue2", false));
|
||||
assertNotNull(queueManager.getLeafQueue("queue1", false));
|
||||
|
||||
// When apps exist in leaf queues under a parent queue, shouldn't be
|
||||
// able to turn it into a leaf queue, but things should work otherwise.
|
||||
notEmptyQueues.clear();
|
||||
updateConfiguredQueues(queueManager, "queue1.queue2");
|
||||
notEmptyQueues.add(queueManager.getQueue("root.queue1"));
|
||||
updateConfiguredQueues(queueManager, "queue1");
|
||||
assertNotNull(queueManager.getLeafQueue("queue1.queue2", false));
|
||||
assertNull(queueManager.getLeafQueue("queue1", false));
|
||||
|
||||
// Should never to be able to create a queue under the default queue
|
||||
updateConfiguredQueues(queueManager, "default.queue3");
|
||||
assertNull(queueManager.getLeafQueue("default.queue3", false));
|
||||
assertNotNull(queueManager.getLeafQueue("default", false));
|
||||
}
|
||||
|
||||
private void updateConfiguredQueues(QueueManager queueMgr, String... confQueues) {
|
||||
AllocationConfiguration allocConf = new AllocationConfiguration(conf);
|
||||
allocConf.queueNames = Sets.newHashSet(confQueues);
|
||||
queueMgr.updateAllocationConfiguration(allocConf);
|
||||
}
|
||||
}
|
|
@ -70,7 +70,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptA
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
|
||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||
|
@ -156,17 +155,12 @@ public class TestFifoScheduler {
|
|||
SchedulerEvent appEvent = new AppAddedSchedulerEvent(appId, "queue", "user");
|
||||
schedular.handle(appEvent);
|
||||
SchedulerEvent attemptEvent =
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId);
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId, false);
|
||||
schedular.handle(attemptEvent);
|
||||
|
||||
appAttemptId = BuilderUtils.newApplicationAttemptId(appId, 2);
|
||||
|
||||
SchedulerEvent appEvent2 =
|
||||
new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "queue",
|
||||
"user");
|
||||
schedular.handle(appEvent2);
|
||||
SchedulerEvent attemptEvent2 =
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId);
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId, false);
|
||||
schedular.handle(attemptEvent2);
|
||||
|
||||
int afterAppsSubmitted = metrics.getAppsSubmitted();
|
||||
|
@ -203,7 +197,7 @@ public class TestFifoScheduler {
|
|||
"user1");
|
||||
scheduler.handle(appEvent);
|
||||
AppAttemptAddedSchedulerEvent attemptEvent =
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId);
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId, false);
|
||||
scheduler.handle(attemptEvent);
|
||||
|
||||
int memory = 64;
|
||||
|
@ -293,7 +287,7 @@ public class TestFifoScheduler {
|
|||
"user1");
|
||||
scheduler.handle(appEvent);
|
||||
AppAttemptAddedSchedulerEvent attemptEvent =
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId);
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId, false);
|
||||
scheduler.handle(attemptEvent);
|
||||
|
||||
int memory = 1024;
|
||||
|
@ -534,13 +528,6 @@ public class TestFifoScheduler {
|
|||
LOG.info("--- END: testFifoScheduler ---");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConcurrentAccessOnApplications() throws Exception {
|
||||
FifoScheduler fs = new FifoScheduler();
|
||||
TestCapacityScheduler.verifyConcurrentAccessOnApplications(
|
||||
fs.appAttempts, FiCaSchedulerApp.class, Queue.class);
|
||||
}
|
||||
|
||||
@SuppressWarnings("resource")
|
||||
@Test
|
||||
public void testBlackListNodes() throws Exception {
|
||||
|
@ -564,18 +551,18 @@ public class TestFifoScheduler {
|
|||
"user");
|
||||
fs.handle(appEvent);
|
||||
SchedulerEvent attemptEvent =
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId);
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId, false);
|
||||
fs.handle(attemptEvent);
|
||||
|
||||
// Verify the blacklist can be updated independent of requesting containers
|
||||
fs.allocate(appAttemptId, Collections.<ResourceRequest>emptyList(),
|
||||
Collections.<ContainerId>emptyList(),
|
||||
Collections.singletonList(host), null);
|
||||
Assert.assertTrue(fs.getApplication(appAttemptId).isBlacklisted(host));
|
||||
Assert.assertTrue(fs.getApplicationAttempt(appAttemptId).isBlacklisted(host));
|
||||
fs.allocate(appAttemptId, Collections.<ResourceRequest>emptyList(),
|
||||
Collections.<ContainerId>emptyList(), null,
|
||||
Collections.singletonList(host));
|
||||
Assert.assertFalse(fs.getApplication(appAttemptId).isBlacklisted(host));
|
||||
Assert.assertFalse(fs.getApplicationAttempt(appAttemptId).isBlacklisted(host));
|
||||
rm.stop();
|
||||
}
|
||||
|
||||
|
@ -604,8 +591,8 @@ public class TestFifoScheduler {
|
|||
ResourceScheduler.class);
|
||||
MockRM rm = new MockRM(conf);
|
||||
FifoScheduler fs = (FifoScheduler)rm.getResourceScheduler();
|
||||
TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(fs.applications,
|
||||
fs, "queue");
|
||||
TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(
|
||||
fs.getSchedulerApplications(), fs, "queue");
|
||||
}
|
||||
|
||||
private void checkApplicationResourceUsage(int expected,
|
||||
|
|
|
@ -0,0 +1,615 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import junit.framework.Assert;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.net.NetworkTopology;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.Priority;
|
||||
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceOption;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.event.AsyncDispatcher;
|
||||
import org.apache.hadoop.yarn.event.InlineDispatcher;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.Application;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.MockNodes;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.Task;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.TestSchedulerUtils;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.TestCapacityScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
|
||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestFifoScheduler {
|
||||
private static final Log LOG = LogFactory.getLog(TestFifoScheduler.class);
|
||||
private final int GB = 1024;
|
||||
|
||||
private ResourceManager resourceManager = null;
|
||||
|
||||
private static final RecordFactory recordFactory =
|
||||
RecordFactoryProvider.getRecordFactory(null);
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
resourceManager = new ResourceManager();
|
||||
Configuration conf = new Configuration();
|
||||
conf.setClass(YarnConfiguration.RM_SCHEDULER,
|
||||
FifoScheduler.class, ResourceScheduler.class);
|
||||
resourceManager.init(conf);
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() throws Exception {
|
||||
resourceManager.stop();
|
||||
}
|
||||
|
||||
private org.apache.hadoop.yarn.server.resourcemanager.NodeManager
|
||||
registerNode(String hostName, int containerManagerPort, int nmHttpPort,
|
||||
String rackName, Resource capability) throws IOException,
|
||||
YarnException {
|
||||
return new org.apache.hadoop.yarn.server.resourcemanager.NodeManager(
|
||||
hostName, containerManagerPort, nmHttpPort, rackName, capability,
|
||||
resourceManager);
|
||||
}
|
||||
|
||||
private ApplicationAttemptId createAppAttemptId(int appId, int attemptId) {
|
||||
ApplicationId appIdImpl = ApplicationId.newInstance(0, appId);
|
||||
ApplicationAttemptId attId =
|
||||
ApplicationAttemptId.newInstance(appIdImpl, attemptId);
|
||||
return attId;
|
||||
}
|
||||
|
||||
private ResourceRequest createResourceRequest(int memory, String host,
|
||||
int priority, int numContainers) {
|
||||
ResourceRequest request = recordFactory
|
||||
.newRecordInstance(ResourceRequest.class);
|
||||
request.setCapability(Resources.createResource(memory));
|
||||
request.setResourceName(host);
|
||||
request.setNumContainers(numContainers);
|
||||
Priority prio = recordFactory.newRecordInstance(Priority.class);
|
||||
prio.setPriority(priority);
|
||||
request.setPriority(prio);
|
||||
return request;
|
||||
}
|
||||
|
||||
@Test(timeout=5000)
|
||||
public void testFifoSchedulerCapacityWhenNoNMs() {
|
||||
FifoScheduler scheduler = new FifoScheduler();
|
||||
QueueInfo queueInfo = scheduler.getQueueInfo(null, false, false);
|
||||
Assert.assertEquals(0.0f, queueInfo.getCurrentCapacity());
|
||||
}
|
||||
|
||||
@Test(timeout=5000)
|
||||
public void testAppAttemptMetrics() throws Exception {
|
||||
AsyncDispatcher dispatcher = new InlineDispatcher();
|
||||
RMContext rmContext = new RMContextImpl(dispatcher, null,
|
||||
null, null, null, null, null, null, null);
|
||||
|
||||
FifoScheduler schedular = new FifoScheduler();
|
||||
schedular.reinitialize(new Configuration(), rmContext);
|
||||
QueueMetrics metrics = schedular.getRootQueueMetrics();
|
||||
int beforeAppsSubmitted = metrics.getAppsSubmitted();
|
||||
|
||||
ApplicationId appId = BuilderUtils.newApplicationId(200, 1);
|
||||
ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId(
|
||||
appId, 1);
|
||||
|
||||
SchedulerEvent appEvent = new AppAddedSchedulerEvent(appId, "queue", "user");
|
||||
schedular.handle(appEvent);
|
||||
SchedulerEvent attemptEvent =
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId, false);
|
||||
schedular.handle(attemptEvent);
|
||||
|
||||
appAttemptId = BuilderUtils.newApplicationAttemptId(appId, 2);
|
||||
SchedulerEvent attemptEvent2 =
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId, false);
|
||||
schedular.handle(attemptEvent2);
|
||||
|
||||
int afterAppsSubmitted = metrics.getAppsSubmitted();
|
||||
Assert.assertEquals(1, afterAppsSubmitted - beforeAppsSubmitted);
|
||||
}
|
||||
|
||||
@Test(timeout=2000)
|
||||
public void testNodeLocalAssignment() throws Exception {
|
||||
AsyncDispatcher dispatcher = new InlineDispatcher();
|
||||
Configuration conf = new Configuration();
|
||||
RMContainerTokenSecretManager containerTokenSecretManager =
|
||||
new RMContainerTokenSecretManager(conf);
|
||||
containerTokenSecretManager.rollMasterKey();
|
||||
NMTokenSecretManagerInRM nmTokenSecretManager =
|
||||
new NMTokenSecretManagerInRM(conf);
|
||||
nmTokenSecretManager.rollMasterKey();
|
||||
RMContext rmContext = new RMContextImpl(dispatcher, null, null, null, null,
|
||||
null, containerTokenSecretManager, nmTokenSecretManager, null);
|
||||
|
||||
FifoScheduler scheduler = new FifoScheduler();
|
||||
scheduler.reinitialize(new Configuration(), rmContext);
|
||||
|
||||
RMNode node0 = MockNodes.newNodeInfo(1,
|
||||
Resources.createResource(1024 * 64), 1, "127.0.0.1");
|
||||
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node0);
|
||||
scheduler.handle(nodeEvent1);
|
||||
|
||||
int _appId = 1;
|
||||
int _appAttemptId = 1;
|
||||
ApplicationAttemptId appAttemptId = createAppAttemptId(_appId,
|
||||
_appAttemptId);
|
||||
AppAddedSchedulerEvent appEvent =
|
||||
new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "queue1",
|
||||
"user1");
|
||||
scheduler.handle(appEvent);
|
||||
AppAttemptAddedSchedulerEvent attemptEvent =
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId, false);
|
||||
scheduler.handle(attemptEvent);
|
||||
|
||||
int memory = 64;
|
||||
int nConts = 3;
|
||||
int priority = 20;
|
||||
|
||||
List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
|
||||
ResourceRequest nodeLocal = createResourceRequest(memory,
|
||||
node0.getHostName(), priority, nConts);
|
||||
ResourceRequest rackLocal = createResourceRequest(memory,
|
||||
node0.getRackName(), priority, nConts);
|
||||
ResourceRequest any = createResourceRequest(memory, ResourceRequest.ANY, priority,
|
||||
nConts);
|
||||
ask.add(nodeLocal);
|
||||
ask.add(rackLocal);
|
||||
ask.add(any);
|
||||
scheduler.allocate(appAttemptId, ask, new ArrayList<ContainerId>(), null, null);
|
||||
|
||||
NodeUpdateSchedulerEvent node0Update = new NodeUpdateSchedulerEvent(node0);
|
||||
|
||||
// Before the node update event, there are 3 local requests outstanding
|
||||
Assert.assertEquals(3, nodeLocal.getNumContainers());
|
||||
|
||||
scheduler.handle(node0Update);
|
||||
|
||||
// After the node update event, check that there are no more local requests
|
||||
// outstanding
|
||||
Assert.assertEquals(0, nodeLocal.getNumContainers());
|
||||
//Also check that the containers were scheduled
|
||||
SchedulerAppReport info = scheduler.getSchedulerAppInfo(appAttemptId);
|
||||
Assert.assertEquals(3, info.getLiveContainers().size());
|
||||
}
|
||||
|
||||
@Test(timeout=2000)
|
||||
public void testUpdateResourceOnNode() throws Exception {
|
||||
AsyncDispatcher dispatcher = new InlineDispatcher();
|
||||
Configuration conf = new Configuration();
|
||||
RMContainerTokenSecretManager containerTokenSecretManager =
|
||||
new RMContainerTokenSecretManager(conf);
|
||||
containerTokenSecretManager.rollMasterKey();
|
||||
NMTokenSecretManagerInRM nmTokenSecretManager =
|
||||
new NMTokenSecretManagerInRM(conf);
|
||||
nmTokenSecretManager.rollMasterKey();
|
||||
RMContext rmContext = new RMContextImpl(dispatcher, null, null, null, null,
|
||||
null, containerTokenSecretManager, nmTokenSecretManager, null);
|
||||
|
||||
FifoScheduler scheduler = new FifoScheduler(){
|
||||
@SuppressWarnings("unused")
|
||||
public Map<NodeId, FiCaSchedulerNode> getNodes(){
|
||||
return nodes;
|
||||
}
|
||||
};
|
||||
scheduler.reinitialize(new Configuration(), rmContext);
|
||||
RMNode node0 = MockNodes.newNodeInfo(1,
|
||||
Resources.createResource(2048, 4), 1, "127.0.0.1");
|
||||
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node0);
|
||||
scheduler.handle(nodeEvent1);
|
||||
|
||||
Method method = scheduler.getClass().getDeclaredMethod("getNodes");
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<NodeId, FiCaSchedulerNode> schedulerNodes =
|
||||
(Map<NodeId, FiCaSchedulerNode>) method.invoke(scheduler);
|
||||
assertEquals(schedulerNodes.values().size(), 1);
|
||||
|
||||
// set resource of RMNode to 1024 and verify it works.
|
||||
node0.setResourceOption(ResourceOption.newInstance(
|
||||
Resources.createResource(1024, 4), RMNode.OVER_COMMIT_TIMEOUT_MILLIS_DEFAULT));
|
||||
assertEquals(node0.getTotalCapability().getMemory(), 1024);
|
||||
// verify that SchedulerNode's resource hasn't been changed.
|
||||
assertEquals(schedulerNodes.get(node0.getNodeID()).
|
||||
getAvailableResource().getMemory(), 2048);
|
||||
// now, NM heartbeat comes.
|
||||
NodeUpdateSchedulerEvent node0Update = new NodeUpdateSchedulerEvent(node0);
|
||||
scheduler.handle(node0Update);
|
||||
// SchedulerNode's available resource is changed.
|
||||
assertEquals(schedulerNodes.get(node0.getNodeID()).
|
||||
getAvailableResource().getMemory(), 1024);
|
||||
QueueInfo queueInfo = scheduler.getQueueInfo(null, false, false);
|
||||
Assert.assertEquals(0.0f, queueInfo.getCurrentCapacity());
|
||||
|
||||
int _appId = 1;
|
||||
int _appAttemptId = 1;
|
||||
ApplicationAttemptId appAttemptId = createAppAttemptId(_appId,
|
||||
_appAttemptId);
|
||||
AppAddedSchedulerEvent appEvent =
|
||||
new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "queue1",
|
||||
"user1");
|
||||
scheduler.handle(appEvent);
|
||||
AppAttemptAddedSchedulerEvent attemptEvent =
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId, false);
|
||||
scheduler.handle(attemptEvent);
|
||||
|
||||
int memory = 1024;
|
||||
int priority = 1;
|
||||
|
||||
List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
|
||||
ResourceRequest nodeLocal = createResourceRequest(memory,
|
||||
node0.getHostName(), priority, 1);
|
||||
ResourceRequest rackLocal = createResourceRequest(memory,
|
||||
node0.getRackName(), priority, 1);
|
||||
ResourceRequest any = createResourceRequest(memory, ResourceRequest.ANY, priority,
|
||||
1);
|
||||
ask.add(nodeLocal);
|
||||
ask.add(rackLocal);
|
||||
ask.add(any);
|
||||
scheduler.allocate(appAttemptId, ask, new ArrayList<ContainerId>(), null, null);
|
||||
|
||||
// Before the node update event, there are one local request
|
||||
Assert.assertEquals(1, nodeLocal.getNumContainers());
|
||||
|
||||
// Now schedule.
|
||||
scheduler.handle(node0Update);
|
||||
|
||||
// After the node update event, check no local request
|
||||
Assert.assertEquals(0, nodeLocal.getNumContainers());
|
||||
// Also check that one container was scheduled
|
||||
SchedulerAppReport info = scheduler.getSchedulerAppInfo(appAttemptId);
|
||||
Assert.assertEquals(1, info.getLiveContainers().size());
|
||||
// And check the default Queue now is full.
|
||||
queueInfo = scheduler.getQueueInfo(null, false, false);
|
||||
Assert.assertEquals(1.0f, queueInfo.getCurrentCapacity());
|
||||
}
|
||||
|
||||
// @Test
|
||||
public void testFifoScheduler() throws Exception {
|
||||
|
||||
LOG.info("--- START: testFifoScheduler ---");
|
||||
|
||||
final int GB = 1024;
|
||||
|
||||
// Register node1
|
||||
String host_0 = "host_0";
|
||||
org.apache.hadoop.yarn.server.resourcemanager.NodeManager nm_0 =
|
||||
registerNode(host_0, 1234, 2345, NetworkTopology.DEFAULT_RACK,
|
||||
Resources.createResource(4 * GB, 1));
|
||||
nm_0.heartbeat();
|
||||
|
||||
// Register node2
|
||||
String host_1 = "host_1";
|
||||
org.apache.hadoop.yarn.server.resourcemanager.NodeManager nm_1 =
|
||||
registerNode(host_1, 1234, 2345, NetworkTopology.DEFAULT_RACK,
|
||||
Resources.createResource(2 * GB, 1));
|
||||
nm_1.heartbeat();
|
||||
|
||||
// ResourceRequest priorities
|
||||
Priority priority_0 =
|
||||
org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.create(0);
|
||||
Priority priority_1 =
|
||||
org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.create(1);
|
||||
|
||||
// Submit an application
|
||||
Application application_0 = new Application("user_0", resourceManager);
|
||||
application_0.submit();
|
||||
|
||||
application_0.addNodeManager(host_0, 1234, nm_0);
|
||||
application_0.addNodeManager(host_1, 1234, nm_1);
|
||||
|
||||
Resource capability_0_0 = Resources.createResource(GB);
|
||||
application_0.addResourceRequestSpec(priority_1, capability_0_0);
|
||||
|
||||
Resource capability_0_1 = Resources.createResource(2 * GB);
|
||||
application_0.addResourceRequestSpec(priority_0, capability_0_1);
|
||||
|
||||
Task task_0_0 = new Task(application_0, priority_1,
|
||||
new String[] {host_0, host_1});
|
||||
application_0.addTask(task_0_0);
|
||||
|
||||
// Submit another application
|
||||
Application application_1 = new Application("user_1", resourceManager);
|
||||
application_1.submit();
|
||||
|
||||
application_1.addNodeManager(host_0, 1234, nm_0);
|
||||
application_1.addNodeManager(host_1, 1234, nm_1);
|
||||
|
||||
Resource capability_1_0 = Resources.createResource(3 * GB);
|
||||
application_1.addResourceRequestSpec(priority_1, capability_1_0);
|
||||
|
||||
Resource capability_1_1 = Resources.createResource(4 * GB);
|
||||
application_1.addResourceRequestSpec(priority_0, capability_1_1);
|
||||
|
||||
Task task_1_0 = new Task(application_1, priority_1,
|
||||
new String[] {host_0, host_1});
|
||||
application_1.addTask(task_1_0);
|
||||
|
||||
// Send resource requests to the scheduler
|
||||
LOG.info("Send resource requests to the scheduler");
|
||||
application_0.schedule();
|
||||
application_1.schedule();
|
||||
|
||||
// Send a heartbeat to kick the tires on the Scheduler
|
||||
LOG.info("Send a heartbeat to kick the tires on the Scheduler... " +
|
||||
"nm0 -> task_0_0 and task_1_0 allocated, used=4G " +
|
||||
"nm1 -> nothing allocated");
|
||||
nm_0.heartbeat(); // task_0_0 and task_1_0 allocated, used=4G
|
||||
nm_1.heartbeat(); // nothing allocated
|
||||
|
||||
// Get allocations from the scheduler
|
||||
application_0.schedule(); // task_0_0
|
||||
checkApplicationResourceUsage(GB, application_0);
|
||||
|
||||
application_1.schedule(); // task_1_0
|
||||
checkApplicationResourceUsage(3 * GB, application_1);
|
||||
|
||||
nm_0.heartbeat();
|
||||
nm_1.heartbeat();
|
||||
|
||||
checkNodeResourceUsage(4*GB, nm_0); // task_0_0 (1G) and task_1_0 (3G)
|
||||
checkNodeResourceUsage(0*GB, nm_1); // no tasks, 2G available
|
||||
|
||||
LOG.info("Adding new tasks...");
|
||||
|
||||
Task task_1_1 = new Task(application_1, priority_1,
|
||||
new String[] {ResourceRequest.ANY});
|
||||
application_1.addTask(task_1_1);
|
||||
|
||||
Task task_1_2 = new Task(application_1, priority_1,
|
||||
new String[] {ResourceRequest.ANY});
|
||||
application_1.addTask(task_1_2);
|
||||
|
||||
Task task_1_3 = new Task(application_1, priority_0,
|
||||
new String[] {ResourceRequest.ANY});
|
||||
application_1.addTask(task_1_3);
|
||||
|
||||
application_1.schedule();
|
||||
|
||||
Task task_0_1 = new Task(application_0, priority_1,
|
||||
new String[] {host_0, host_1});
|
||||
application_0.addTask(task_0_1);
|
||||
|
||||
Task task_0_2 = new Task(application_0, priority_1,
|
||||
new String[] {host_0, host_1});
|
||||
application_0.addTask(task_0_2);
|
||||
|
||||
Task task_0_3 = new Task(application_0, priority_0,
|
||||
new String[] {ResourceRequest.ANY});
|
||||
application_0.addTask(task_0_3);
|
||||
|
||||
application_0.schedule();
|
||||
|
||||
// Send a heartbeat to kick the tires on the Scheduler
|
||||
LOG.info("Sending hb from " + nm_0.getHostName());
|
||||
nm_0.heartbeat(); // nothing new, used=4G
|
||||
|
||||
LOG.info("Sending hb from " + nm_1.getHostName());
|
||||
nm_1.heartbeat(); // task_0_3, used=2G
|
||||
|
||||
// Get allocations from the scheduler
|
||||
LOG.info("Trying to allocate...");
|
||||
application_0.schedule();
|
||||
checkApplicationResourceUsage(3 * GB, application_0);
|
||||
application_1.schedule();
|
||||
checkApplicationResourceUsage(3 * GB, application_1);
|
||||
nm_0.heartbeat();
|
||||
nm_1.heartbeat();
|
||||
checkNodeResourceUsage(4*GB, nm_0);
|
||||
checkNodeResourceUsage(2*GB, nm_1);
|
||||
|
||||
// Complete tasks
|
||||
LOG.info("Finishing up task_0_0");
|
||||
application_0.finishTask(task_0_0); // Now task_0_1
|
||||
application_0.schedule();
|
||||
application_1.schedule();
|
||||
nm_0.heartbeat();
|
||||
nm_1.heartbeat();
|
||||
checkApplicationResourceUsage(3 * GB, application_0);
|
||||
checkApplicationResourceUsage(3 * GB, application_1);
|
||||
checkNodeResourceUsage(4*GB, nm_0);
|
||||
checkNodeResourceUsage(2*GB, nm_1);
|
||||
|
||||
LOG.info("Finishing up task_1_0");
|
||||
application_1.finishTask(task_1_0); // Now task_0_2
|
||||
application_0.schedule(); // final overcommit for app0 caused here
|
||||
application_1.schedule();
|
||||
nm_0.heartbeat(); // final overcommit for app0 occurs here
|
||||
nm_1.heartbeat();
|
||||
checkApplicationResourceUsage(4 * GB, application_0);
|
||||
checkApplicationResourceUsage(0 * GB, application_1);
|
||||
//checkNodeResourceUsage(1*GB, nm_0); // final over-commit -> rm.node->1G, test.node=2G
|
||||
checkNodeResourceUsage(2*GB, nm_1);
|
||||
|
||||
LOG.info("Finishing up task_0_3");
|
||||
application_0.finishTask(task_0_3); // No more
|
||||
application_0.schedule();
|
||||
application_1.schedule();
|
||||
nm_0.heartbeat();
|
||||
nm_1.heartbeat();
|
||||
checkApplicationResourceUsage(2 * GB, application_0);
|
||||
checkApplicationResourceUsage(0 * GB, application_1);
|
||||
//checkNodeResourceUsage(2*GB, nm_0); // final over-commit, rm.node->1G, test.node->2G
|
||||
checkNodeResourceUsage(0*GB, nm_1);
|
||||
|
||||
LOG.info("Finishing up task_0_1");
|
||||
application_0.finishTask(task_0_1);
|
||||
application_0.schedule();
|
||||
application_1.schedule();
|
||||
nm_0.heartbeat();
|
||||
nm_1.heartbeat();
|
||||
checkApplicationResourceUsage(1 * GB, application_0);
|
||||
checkApplicationResourceUsage(0 * GB, application_1);
|
||||
|
||||
LOG.info("Finishing up task_0_2");
|
||||
application_0.finishTask(task_0_2); // now task_1_3 can go!
|
||||
application_0.schedule();
|
||||
application_1.schedule();
|
||||
nm_0.heartbeat();
|
||||
nm_1.heartbeat();
|
||||
checkApplicationResourceUsage(0 * GB, application_0);
|
||||
checkApplicationResourceUsage(4 * GB, application_1);
|
||||
|
||||
LOG.info("Finishing up task_1_3");
|
||||
application_1.finishTask(task_1_3); // now task_1_1
|
||||
application_0.schedule();
|
||||
application_1.schedule();
|
||||
nm_0.heartbeat();
|
||||
nm_1.heartbeat();
|
||||
checkApplicationResourceUsage(0 * GB, application_0);
|
||||
checkApplicationResourceUsage(3 * GB, application_1);
|
||||
|
||||
LOG.info("Finishing up task_1_1");
|
||||
application_1.finishTask(task_1_1);
|
||||
application_0.schedule();
|
||||
application_1.schedule();
|
||||
nm_0.heartbeat();
|
||||
nm_1.heartbeat();
|
||||
checkApplicationResourceUsage(0 * GB, application_0);
|
||||
checkApplicationResourceUsage(3 * GB, application_1);
|
||||
|
||||
LOG.info("--- END: testFifoScheduler ---");
|
||||
}
|
||||
|
||||
@SuppressWarnings("resource")
|
||||
@Test
|
||||
public void testBlackListNodes() throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class,
|
||||
ResourceScheduler.class);
|
||||
MockRM rm = new MockRM(conf);
|
||||
rm.start();
|
||||
FifoScheduler fs = (FifoScheduler) rm.getResourceScheduler();
|
||||
|
||||
String host = "127.0.0.1";
|
||||
RMNode node =
|
||||
MockNodes.newNodeInfo(0, MockNodes.newResource(4 * GB), 1, host);
|
||||
fs.handle(new NodeAddedSchedulerEvent(node));
|
||||
|
||||
ApplicationId appId = BuilderUtils.newApplicationId(100, 1);
|
||||
ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId(
|
||||
appId, 1);
|
||||
SchedulerEvent appEvent =
|
||||
new AppAddedSchedulerEvent(appId, "default",
|
||||
"user");
|
||||
fs.handle(appEvent);
|
||||
SchedulerEvent attemptEvent =
|
||||
new AppAttemptAddedSchedulerEvent(appAttemptId, false);
|
||||
fs.handle(attemptEvent);
|
||||
|
||||
// Verify the blacklist can be updated independent of requesting containers
|
||||
fs.allocate(appAttemptId, Collections.<ResourceRequest>emptyList(),
|
||||
Collections.<ContainerId>emptyList(),
|
||||
Collections.singletonList(host), null);
|
||||
Assert.assertTrue(fs.getApplicationAttempt(appAttemptId).isBlacklisted(host));
|
||||
fs.allocate(appAttemptId, Collections.<ResourceRequest>emptyList(),
|
||||
Collections.<ContainerId>emptyList(), null,
|
||||
Collections.singletonList(host));
|
||||
Assert.assertFalse(fs.getApplicationAttempt(appAttemptId).isBlacklisted(host));
|
||||
rm.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetAppsInQueue() throws Exception {
|
||||
Application application_0 = new Application("user_0", resourceManager);
|
||||
application_0.submit();
|
||||
|
||||
Application application_1 = new Application("user_0", resourceManager);
|
||||
application_1.submit();
|
||||
|
||||
ResourceScheduler scheduler = resourceManager.getResourceScheduler();
|
||||
|
||||
List<ApplicationAttemptId> appsInDefault = scheduler.getAppsInQueue("default");
|
||||
assertTrue(appsInDefault.contains(application_0.getApplicationAttemptId()));
|
||||
assertTrue(appsInDefault.contains(application_1.getApplicationAttemptId()));
|
||||
assertEquals(2, appsInDefault.size());
|
||||
|
||||
Assert.assertNull(scheduler.getAppsInQueue("someotherqueue"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAddAndRemoveAppFromFiFoScheduler() throws Exception {
|
||||
Configuration conf = new Configuration();
|
||||
conf.setClass(YarnConfiguration.RM_SCHEDULER, FifoScheduler.class,
|
||||
ResourceScheduler.class);
|
||||
MockRM rm = new MockRM(conf);
|
||||
FifoScheduler fs = (FifoScheduler)rm.getResourceScheduler();
|
||||
TestSchedulerUtils.verifyAppAddedAndRemovedFromScheduler(fs.applications,
|
||||
fs, "queue");
|
||||
}
|
||||
|
||||
private void checkApplicationResourceUsage(int expected,
|
||||
Application application) {
|
||||
Assert.assertEquals(expected, application.getUsedResources().getMemory());
|
||||
}
|
||||
|
||||
private void checkNodeResourceUsage(int expected,
|
||||
org.apache.hadoop.yarn.server.resourcemanager.NodeManager node) {
|
||||
Assert.assertEquals(expected, node.getUsed().getMemory());
|
||||
node.checkResourceUsage();
|
||||
}
|
||||
|
||||
public static void main(String[] arg) throws Exception {
|
||||
TestFifoScheduler t = new TestFifoScheduler();
|
||||
t.setUp();
|
||||
t.testFifoScheduler();
|
||||
t.tearDown();
|
||||
}
|
||||
}
|
|
@ -29,6 +29,7 @@ import javax.xml.parsers.DocumentBuilder;
|
|||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.ha.HAServiceProtocol;
|
||||
import org.apache.hadoop.service.Service.STATE;
|
||||
import org.apache.hadoop.util.VersionInfo;
|
||||
import org.apache.hadoop.yarn.api.records.QueueState;
|
||||
|
@ -267,6 +268,7 @@ public class TestRMWebServices extends JerseyTest {
|
|||
verifyClusterGeneric(WebServicesTestUtils.getXmlLong(element, "id"),
|
||||
WebServicesTestUtils.getXmlLong(element, "startedOn"),
|
||||
WebServicesTestUtils.getXmlString(element, "state"),
|
||||
WebServicesTestUtils.getXmlString(element, "haState"),
|
||||
WebServicesTestUtils.getXmlString(element, "hadoopVersionBuiltOn"),
|
||||
WebServicesTestUtils.getXmlString(element, "hadoopBuildVersion"),
|
||||
WebServicesTestUtils.getXmlString(element, "hadoopVersion"),
|
||||
|
@ -282,9 +284,10 @@ public class TestRMWebServices extends JerseyTest {
|
|||
Exception {
|
||||
assertEquals("incorrect number of elements", 1, json.length());
|
||||
JSONObject info = json.getJSONObject("clusterInfo");
|
||||
assertEquals("incorrect number of elements", 9, info.length());
|
||||
assertEquals("incorrect number of elements", 10, info.length());
|
||||
verifyClusterGeneric(info.getLong("id"), info.getLong("startedOn"),
|
||||
info.getString("state"), info.getString("hadoopVersionBuiltOn"),
|
||||
info.getString("state"), info.getString("haState"),
|
||||
info.getString("hadoopVersionBuiltOn"),
|
||||
info.getString("hadoopBuildVersion"), info.getString("hadoopVersion"),
|
||||
info.getString("resourceManagerVersionBuiltOn"),
|
||||
info.getString("resourceManagerBuildVersion"),
|
||||
|
@ -293,9 +296,10 @@ public class TestRMWebServices extends JerseyTest {
|
|||
}
|
||||
|
||||
public void verifyClusterGeneric(long clusterid, long startedon,
|
||||
String state, String hadoopVersionBuiltOn, String hadoopBuildVersion,
|
||||
String hadoopVersion, String resourceManagerVersionBuiltOn,
|
||||
String resourceManagerBuildVersion, String resourceManagerVersion) {
|
||||
String state, String haState, String hadoopVersionBuiltOn,
|
||||
String hadoopBuildVersion, String hadoopVersion,
|
||||
String resourceManagerVersionBuiltOn, String resourceManagerBuildVersion,
|
||||
String resourceManagerVersion) {
|
||||
|
||||
assertEquals("clusterId doesn't match: ",
|
||||
ResourceManager.getClusterTimeStamp(), clusterid);
|
||||
|
@ -303,6 +307,8 @@ public class TestRMWebServices extends JerseyTest {
|
|||
ResourceManager.getClusterTimeStamp(), startedon);
|
||||
assertTrue("stated doesn't match: " + state,
|
||||
state.matches(STATE.INITED.toString()));
|
||||
assertTrue("HA state doesn't match: " + haState,
|
||||
haState.matches("INITIALIZING"));
|
||||
|
||||
WebServicesTestUtils.checkStringMatch("hadoopVersionBuiltOn",
|
||||
VersionInfo.getDate(), hadoopVersionBuiltOn);
|
||||
|
|
|
@ -41,9 +41,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||
|
@ -1387,29 +1384,30 @@ public class TestRMWebServicesApps extends JerseyTest {
|
|||
rm.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test (timeout = 20000)
|
||||
public void testMultipleAppAttempts() throws JSONException, Exception {
|
||||
rm.start();
|
||||
MockNM amNodeManager = rm.registerNode("127.0.0.1:1234", 2048);
|
||||
MockNM amNodeManager = rm.registerNode("127.0.0.1:1234", 8192);
|
||||
RMApp app1 = rm.submitApp(CONTAINER_MB, "testwordcount", "user1");
|
||||
amNodeManager.nodeHeartbeat(true);
|
||||
rm.waitForState(app1.getCurrentAppAttempt().getAppAttemptId(),
|
||||
RMAppAttemptState.ALLOCATED);
|
||||
MockAM am = MockRM.launchAM(app1, rm, amNodeManager);
|
||||
int maxAppAttempts = rm.getConfig().getInt(
|
||||
YarnConfiguration.RM_AM_MAX_ATTEMPTS,
|
||||
YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS);
|
||||
assertTrue(maxAppAttempts > 1);
|
||||
int retriesLeft = maxAppAttempts;
|
||||
while (--retriesLeft > 0) {
|
||||
RMAppEvent event =
|
||||
new RMAppFailedAttemptEvent(app1.getApplicationId(),
|
||||
RMAppEventType.ATTEMPT_FAILED, "");
|
||||
app1.handle(event);
|
||||
int numAttempt = 1;
|
||||
while (true) {
|
||||
// fail the AM by sending CONTAINER_FINISHED event without registering.
|
||||
amNodeManager.nodeHeartbeat(am.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
|
||||
am.waitForState(RMAppAttemptState.FAILED);
|
||||
if (numAttempt == maxAppAttempts) {
|
||||
rm.waitForState(app1.getApplicationId(), RMAppState.FAILED);
|
||||
break;
|
||||
}
|
||||
// wait for app to start a new attempt.
|
||||
rm.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED);
|
||||
amNodeManager.nodeHeartbeat(true);
|
||||
am = MockRM.launchAM(app1, rm, amNodeManager);
|
||||
numAttempt++;
|
||||
}
|
||||
rm.waitForState(app1.getCurrentAppAttempt().getAppAttemptId(),
|
||||
RMAppAttemptState.ALLOCATED);
|
||||
assertEquals("incorrect number of attempts", maxAppAttempts,
|
||||
app1.getAppAttempts().values().size());
|
||||
testAppAttemptsHelper(app1.getApplicationId().toString(), app1,
|
||||
|
|
|
@ -32,29 +32,101 @@
|
|||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>commons-el</groupId>
|
||||
<artifactId>commons-el</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-runtime</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-compiler</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jsp-2.1-jetty</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-annotations</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-common</artifactId>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-common</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-nodemanager</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-minikdc</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-common</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
|
|
@ -38,14 +38,94 @@
|
|||
<artifactId>servlet-api</artifactId>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>commons-el</groupId>
|
||||
<artifactId>commons-el</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-runtime</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-compiler</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jsp-2.1-jetty</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-common</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-common</artifactId>
|
||||
</dependency>
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-api</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-httpclient</groupId>
|
||||
<artifactId>commons-httpclient</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.guava</groupId>
|
||||
<artifactId>guava</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-logging</groupId>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jetty</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.glassfish.grizzly</groupId>
|
||||
<artifactId>grizzly-http-servlet</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey.jersey-test-framework</groupId>
|
||||
<artifactId>jersey-test-framework-grizzly2</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
|
|
@ -32,14 +32,7 @@
|
|||
<hadoop.common.build.dir>${basedir}/../../../../hadoop-common-project/hadoop-common/target</hadoop.common.build.dir>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-common</artifactId>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<!-- Do not add dependencies here, add them to the POM of the leaf module -->
|
||||
|
||||
<modules>
|
||||
<module>hadoop-yarn-server-common</module>
|
||||
|
|
|
@ -26,10 +26,13 @@
|
|||
<artifactId>hadoop-yarn-site</artifactId>
|
||||
<version>3.0.0-SNAPSHOT</version>
|
||||
<name>hadoop-yarn-site</name>
|
||||
<packaging>pom</packaging>
|
||||
|
||||
<properties>
|
||||
<!-- Needed for generating FindBugs warnings using parent pom -->
|
||||
<yarn.basedir>${project.parent.parent.basedir}</yarn.basedir>
|
||||
</properties>
|
||||
|
||||
<!-- Do not add dependencies here, this is a documentation only module -->
|
||||
|
||||
</project>
|
||||
|
|
|
@ -62,6 +62,8 @@ ResourceManager REST API's.
|
|||
*---------------+--------------+-------------------------------+
|
||||
| state | string | The ResourceManager state - valid values are: NOTINITED, INITED, STARTED, STOPPED|
|
||||
*---------------+--------------+-------------------------------+
|
||||
| haState | string | The ResourceManager HA state - valid values are: INITIALIZING, ACTIVE, STANDBY, STOPPED|
|
||||
*---------------+--------------+-------------------------------+
|
||||
| resourceManagerVersion | string | Version of the ResourceManager |
|
||||
*---------------+--------------+-------------------------------+
|
||||
| resourceManagerBuildVersion | string | ResourceManager build string with build version, user, and checksum |
|
||||
|
|
|
@ -33,112 +33,7 @@
|
|||
<hadoop.common.build.dir>${basedir}/../../../hadoop-common-project/hadoop-common/target</hadoop.common.build.dir>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>commons-el</groupId>
|
||||
<artifactId>commons-el</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-runtime</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-compiler</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jsp-2.1-jetty</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-annotations</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.inject.extensions</groupId>
|
||||
<artifactId>guice-servlet</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.netty</groupId>
|
||||
<artifactId>netty</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.protobuf</groupId>
|
||||
<artifactId>protobuf-java</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.inject</groupId>
|
||||
<artifactId>guice</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>cglib</groupId>
|
||||
<artifactId>cglib</artifactId>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey.jersey-test-framework</groupId>
|
||||
<artifactId>jersey-test-framework-core</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey.jersey-test-framework</groupId>
|
||||
<artifactId>jersey-test-framework-grizzly2</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey</groupId>
|
||||
<artifactId>jersey-server</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey</groupId>
|
||||
<artifactId>jersey-json</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey.contribs</groupId>
|
||||
<artifactId>jersey-guice</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<!-- Do not add dependencies here, add them to the POM of the leaf module -->
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
|
|
|
@ -41,132 +41,7 @@
|
|||
<module>hadoop-yarn</module>
|
||||
</modules>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.google.protobuf</groupId>
|
||||
<artifactId>protobuf-java</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.avro</groupId>
|
||||
<artifactId>avro</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jetty</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.apache.ant</groupId>
|
||||
<artifactId>ant</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>io.netty</groupId>
|
||||
<artifactId>netty</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.apache.velocity</groupId>
|
||||
<artifactId>velocity</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<artifactId>paranamer-ant</artifactId>
|
||||
<groupId>com.thoughtworks.paranamer</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<scope>provided</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>commons-el</groupId>
|
||||
<artifactId>commons-el</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-runtime</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>tomcat</groupId>
|
||||
<artifactId>jasper-compiler</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.mortbay.jetty</groupId>
|
||||
<artifactId>jsp-2.1-jetty</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-annotations</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.inject</groupId>
|
||||
<artifactId>guice</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>cglib</groupId>
|
||||
<artifactId>cglib</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey</groupId>
|
||||
<artifactId>jersey-server</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.sun.jersey.contribs</groupId>
|
||||
<artifactId>jersey-guice</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.inject.extensions</groupId>
|
||||
<artifactId>guice-servlet</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>io.netty</groupId>
|
||||
<artifactId>netty</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>commons-io</groupId>
|
||||
<artifactId>commons-io</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.hsqldb</groupId>
|
||||
<artifactId>hsqldb</artifactId>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
<!-- Do not add dependencies here, add them to the POM of the leaf module -->
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
|
|
Loading…
Reference in New Issue