diff --git a/hadoop-common-project/dev-support/test-patch.properties b/hadoop-common-project/dev-support/test-patch.properties index 15b54bfcf0d..c33b2a9440b 100644 --- a/hadoop-common-project/dev-support/test-patch.properties +++ b/hadoop-common-project/dev-support/test-patch.properties @@ -18,4 +18,4 @@ OK_RELEASEAUDIT_WARNINGS=0 OK_FINDBUGS_WARNINGS=0 -OK_JAVADOC_WARNINGS=6 +OK_JAVADOC_WARNINGS=13 diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index ae260fa633c..bdb41ca5294 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -142,10 +142,21 @@ Release 0.23.1 - Unreleased HADOOP-7919. Remove the unused hadoop.logfile.* properties from the core-default.xml file. (harsh) + HADOOP-7939. Improve Hadoop subcomponent integration in Hadoop 0.23. (rvs via tucu) + + HADOOP-7988. Upper case in hostname part of the principals doesn't work with + kerberos. (jitendra) + + HADOOP-8002. SecurityUtil acquired token message should be a debug rather than info. + (Arpit Gupta via mahadev) + OPTIMIZATIONS BUG FIXES - + + HADOOP-7998. CheckFileSystem does not correctly honor setVerifyChecksum + (Daryn Sharp via bobby) + HADOOP-7811. TestUserGroupInformation#testGetServerSideGroups test fails in chroot. (Jonathan Eagles via mahadev) @@ -227,6 +238,15 @@ Release 0.23.1 - Unreleased HADOOP-7981. Improve documentation for org.apache.hadoop.io.compress. Decompressor.getRemaining (Jonathan Eagles via mahadev) + HADOOP-7997. SequenceFile.createWriter(...createParent...) no + longer works on existing file. (Gregory Chanan via eli) + + HADOOP-7993. Hadoop ignores old-style config options for enabling compressed + output. (Anupam Seth via mahadev) + + HADOOP-8000. fetchdt command not available in bin/hadoop. + (Arpit Gupta via mahadev) + Release 0.23.0 - 2011-11-01 INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop b/hadoop-common-project/hadoop-common/src/main/bin/hadoop index e93040f33b6..89dd17c43e4 100755 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop @@ -50,7 +50,7 @@ fi COMMAND=$1 case $COMMAND in #hdfs commands - namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer) + namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer|fetchdt) echo "DEPRECATED: Use of this script to execute hdfs command is deprecated." 1>&2 echo "Instead use the hdfs command for it." 1>&2 echo "" 1>&2 diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh index 71c94817146..c8ecc42664b 100644 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh @@ -25,9 +25,21 @@ common_bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P) script="$(basename -- "$this")" this="$common_bin/$script" +[ -f "$common_bin/hadoop-layout.sh" ] && . "$common_bin/hadoop-layout.sh" + +HADOOP_COMMON_DIR=${HADOOP_COMMON_DIR:-"share/hadoop/common"} +HADOOP_COMMON_LIB_JARS_DIR=${HADOOP_COMMON_LIB_JARS_DIR:-"share/hadoop/common/lib"} +HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_COMMON_LIB_NATIVE_DIR:-"lib/native"} +HDFS_DIR=${HDFS_DIR:-"share/hadoop/hdfs"} +HDFS_LIB_JARS_DIR=${HDFS_LIB_JARS_DIR:-"share/hadoop/hdfs/lib"} +YARN_DIR=${YARN_DIR:-"share/hadoop/mapreduce"} +YARN_LIB_JARS_DIR=${YARN_LIB_JARS_DIR:-"share/hadoop/mapreduce/lib"} +MAPRED_DIR=${MAPRED_DIR:-"share/hadoop/mapreduce"} +MAPRED_LIB_JARS_DIR=${MAPRED_LIB_JARS_DIR:-"share/hadoop/mapreduce/lib"} + # the root of the Hadoop installation # See HADOOP-6255 for directory structure layout -HADOOP_DEFAULT_PREFIX=`dirname "$this"`/.. +HADOOP_DEFAULT_PREFIX=$(cd -P -- "$common_bin"/.. && pwd -P) HADOOP_PREFIX=${HADOOP_PREFIX:-$HADOOP_DEFAULT_PREFIX} export HADOOP_PREFIX @@ -144,16 +156,22 @@ CLASSPATH="${HADOOP_CONF_DIR}" # so that filenames w/ spaces are handled correctly in loops below IFS= +if [ "$HADOOP_COMMON_HOME" = "" ]; then + if [ -d "${HADOOP_PREFIX}/$HADOOP_COMMON_DIR" ]; then + HADOOP_COMMON_HOME=$HADOOP_PREFIX + fi +fi + # for releases, add core hadoop jar & webapps to CLASSPATH -if [ -d "$HADOOP_PREFIX/share/hadoop/common/webapps" ]; then - CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/share/hadoop/common/webapps +if [ -d "$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR/webapps" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR fi -if [ -d "$HADOOP_PREFIX/share/hadoop/common/lib" ]; then - CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/share/hadoop/common/lib'/*' +if [ -d "$HADOOP_COMMON_HOME/$HADOOP_COMMON_LIB_JARS_DIR" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_LIB_JARS_DIR'/*' fi -CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/share/hadoop/common'/*' +CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR'/*' # add user-specified CLASSPATH last if [ "$HADOOP_CLASSPATH" != "" ]; then @@ -185,13 +203,13 @@ fi # setup 'java.library.path' for native-hadoop code if necessary -if [ -d "${HADOOP_PREFIX}/build/native" -o -d "${HADOOP_PREFIX}/lib/native" ]; then +if [ -d "${HADOOP_PREFIX}/build/native" -o -d "${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR" ]; then - if [ -d "${HADOOP_PREFIX}/lib/native" ]; then + if [ -d "${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR" ]; then if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then - JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_PREFIX}/lib/native + JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR else - JAVA_LIBRARY_PATH=${HADOOP_PREFIX}/lib/native + JAVA_LIBRARY_PATH=${HADOOP_PREFIX}/$HADOOP_COMMON_LIB_NATIVE_DIR fi fi fi @@ -216,37 +234,56 @@ HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true" # put hdfs in classpath if present if [ "$HADOOP_HDFS_HOME" = "" ]; then - if [ -d "${HADOOP_PREFIX}/share/hadoop/hdfs" ]; then + if [ -d "${HADOOP_PREFIX}/$HDFS_DIR" ]; then HADOOP_HDFS_HOME=$HADOOP_PREFIX fi fi -if [ -d "$HADOOP_HDFS_HOME/share/hadoop/hdfs/webapps" ]; then - CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/share/hadoop/hdfs +if [ -d "$HADOOP_HDFS_HOME/$HDFS_DIR/webapps" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_DIR fi -if [ -d "$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib" ]; then - CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib'/*' +if [ -d "$HADOOP_HDFS_HOME/$HDFS_LIB_JARS_DIR" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_LIB_JARS_DIR'/*' fi -CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/share/hadoop/hdfs'/*' +CLASSPATH=${CLASSPATH}:$HADOOP_HDFS_HOME/$HDFS_DIR'/*' # put yarn in classpath if present if [ "$YARN_HOME" = "" ]; then - if [ -d "${HADOOP_PREFIX}/share/hadoop/mapreduce" ]; then + if [ -d "${HADOOP_PREFIX}/$YARN_DIR" ]; then YARN_HOME=$HADOOP_PREFIX fi fi -if [ -d "$YARN_HOME/share/hadoop/mapreduce/webapps" ]; then - CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce +if [ -d "$YARN_HOME/$YARN_DIR/webapps" ]; then + CLASSPATH=${CLASSPATH}:$YARN_HOME/$YARN_DIR fi -if [ -d "$YARN_HOME/share/hadoop/mapreduce/lib" ]; then - CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce/lib'/*' +if [ -d "$YARN_HOME/$YARN_LIB_JARS_DIR" ]; then + CLASSPATH=${CLASSPATH}:$YARN_HOME/$YARN_LIB_JARS_DIR'/*' fi -CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce'/*' +CLASSPATH=${CLASSPATH}:$YARN_HOME/$YARN_DIR'/*' + +# put mapred in classpath if present AND different from YARN +if [ "$HADOOP_MAPRED_HOME" = "" ]; then + if [ -d "${HADOOP_PREFIX}/$MAPRED_DIR" ]; then + HADOOP_MAPRED_HOME=$HADOOP_PREFIX + fi +fi + +if [ "$HADOOP_MAPRED_HOME/$MAPRED_DIR" != "$YARN_HOME/$YARN_DIR" ] ; then + if [ -d "$HADOOP_MAPRED_HOME/$MAPRED_DIR/webapps" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_DIR + fi + + if [ -d "$HADOOP_MAPRED_HOME/$MAPRED_LIB_JARS_DIR" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_LIB_JARS_DIR'/*' + fi + + CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_DIR'/*' +fi # cygwin path translation if $cygwin; then diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java index 58f9aa7c8af..0555a79df6d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -345,7 +345,17 @@ public class Configuration implements Iterable>, } return name; } - + + private void handleDeprecation() { + LOG.debug("Handling deprecation for all properties in config..."); + Set keys = new HashSet(); + keys.addAll(getProps().keySet()); + for (Object item: keys) { + LOG.debug("Handling deprecation for " + (String)item); + handleDeprecation((String)item); + } + } + static{ //print deprecation warning if hadoop-site.xml is found in classpath ClassLoader cL = Thread.currentThread().getContextClassLoader(); @@ -1665,7 +1675,7 @@ public class Configuration implements Iterable>, Element conf = doc.createElement("configuration"); doc.appendChild(conf); conf.appendChild(doc.createTextNode("\n")); - getProps(); // ensure properties is set + handleDeprecation(); //ensure properties is set and deprecation is handled for (Enumeration e = properties.keys(); e.hasMoreElements();) { String name = (String)e.nextElement(); Object object = properties.get(name); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java index f24c3924caf..de1178930f7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java @@ -304,8 +304,9 @@ public abstract class ChecksumFileSystem extends FilterFileSystem { */ @Override public FSDataInputStream open(Path f, int bufferSize) throws IOException { - return new FSDataInputStream( - new ChecksumFSInputChecker(this, f, bufferSize)); + return verifyChecksum + ? new FSDataInputStream(new ChecksumFSInputChecker(this, f, bufferSize)) + : getRawFileSystem().open(f, bufferSize); } /** {@inheritDoc} */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java index a64bd1bf9e2..476eaeb14b5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java @@ -467,7 +467,7 @@ public class SequenceFile { Metadata metadata) throws IOException { return createWriter(FileContext.getFileContext(fs.getUri(), conf), conf, name, keyClass, valClass, compressionType, codec, - metadata, EnumSet.of(CreateFlag.CREATE), + metadata, EnumSet.of(CreateFlag.CREATE,CreateFlag.OVERWRITE), CreateOpts.bufferSize(bufferSize), createParent ? CreateOpts.createParent() : CreateOpts.donotCreateParent(), diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java index a72b1f272ca..26858874649 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java @@ -236,7 +236,7 @@ public class SecurityUtil { if (fqdn == null || fqdn.equals("") || fqdn.equals("0.0.0.0")) { fqdn = getLocalHostName(); } - return components[0] + "/" + fqdn + "@" + components[2]; + return components[0] + "/" + fqdn.toLowerCase() + "@" + components[2]; } static String getLocalHostName() throws UnknownHostException { @@ -409,7 +409,9 @@ public class SecurityUtil { Text service = buildTokenService(addr); if (token != null) { token.setService(service); - LOG.info("Acquired token "+token); // Token#toString() prints service + if (LOG.isDebugEnabled()) { + LOG.debug("Acquired token "+token); // Token#toString() prints service + } } else { LOG.warn("Failed to get token for service "+service); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestDeprecatedKeys.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestDeprecatedKeys.java index 7008544f7b8..8631771b9df 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestDeprecatedKeys.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestDeprecatedKeys.java @@ -18,6 +18,8 @@ package org.apache.hadoop.conf; +import java.io.ByteArrayOutputStream; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; @@ -32,4 +34,22 @@ public class TestDeprecatedKeys extends TestCase { String scriptFile = conf.get(CommonConfigurationKeys.NET_TOPOLOGY_SCRIPT_FILE_NAME_KEY); assertTrue(scriptFile.equals("xyz")) ; } + + //Tests reading / writing a conf file with deprecation after setting + public void testReadWriteWithDeprecatedKeys() throws Exception { + Configuration conf = new Configuration(); + conf.setBoolean("old.config.yet.to.be.deprecated", true); + Configuration.addDeprecation("old.config.yet.to.be.deprecated", + new String[]{"new.conf.to.replace.deprecated.conf"}); + ByteArrayOutputStream out=new ByteArrayOutputStream(); + String fileContents; + try { + conf.writeXml(out); + fileContents = out.toString(); + } finally { + out.close(); + } + assertTrue(fileContents.contains("old.config.yet.to.be.deprecated")); + assertTrue(fileContents.contains("new.conf.to.replace.deprecated.conf")); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestChecksumFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestChecksumFileSystem.java index 373bdf12d5a..80347a72b45 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestChecksumFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestChecksumFileSystem.java @@ -22,12 +22,22 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FSDataOutputStream; import static org.apache.hadoop.fs.FileSystemTestHelper.*; import org.apache.hadoop.conf.Configuration; -import junit.framework.TestCase; +import org.junit.*; +import static org.junit.Assert.*; -public class TestChecksumFileSystem extends TestCase { +public class TestChecksumFileSystem { static final String TEST_ROOT_DIR = System.getProperty("test.build.data","build/test/data/work-dir/localfs"); + static LocalFileSystem localFs; + + @Before + public void resetLocalFs() throws Exception { + localFs = FileSystem.getLocal(new Configuration()); + localFs.setVerifyChecksum(true); + } + + @Test public void testgetChecksumLength() throws Exception { assertEquals(8, ChecksumFileSystem.getChecksumLength(0L, 512)); assertEquals(12, ChecksumFileSystem.getChecksumLength(1L, 512)); @@ -40,9 +50,8 @@ public class TestChecksumFileSystem extends TestCase { ChecksumFileSystem.getChecksumLength(10000000000000L, 10)); } + @Test public void testVerifyChecksum() throws Exception { - Configuration conf = new Configuration(); - LocalFileSystem localFs = FileSystem.getLocal(conf); Path testPath = new Path(TEST_ROOT_DIR, "testPath"); Path testPath11 = new Path(TEST_ROOT_DIR, "testPath11"); FSDataOutputStream fout = localFs.create(testPath); @@ -68,7 +77,7 @@ public class TestChecksumFileSystem extends TestCase { //copying the wrong checksum file FileUtil.copy(localFs, localFs.getChecksumFile(testPath11), localFs, - localFs.getChecksumFile(testPath),false,true,conf); + localFs.getChecksumFile(testPath),false,true,localFs.getConf()); assertTrue("checksum exists", localFs.exists(localFs.getChecksumFile(testPath))); boolean errorRead = false; @@ -80,20 +89,13 @@ public class TestChecksumFileSystem extends TestCase { assertTrue("error reading", errorRead); //now setting verify false, the read should succeed - try { - localFs.setVerifyChecksum(false); - String str = readFile(localFs, testPath, 1024).toString(); - assertTrue("read", "testing".equals(str)); - } finally { - // reset for other tests - localFs.setVerifyChecksum(true); - } - + localFs.setVerifyChecksum(false); + String str = readFile(localFs, testPath, 1024).toString(); + assertTrue("read", "testing".equals(str)); } + @Test public void testMultiChunkFile() throws Exception { - Configuration conf = new Configuration(); - LocalFileSystem localFs = FileSystem.getLocal(conf); Path testPath = new Path(TEST_ROOT_DIR, "testMultiChunk"); FSDataOutputStream fout = localFs.create(testPath); for (int i = 0; i < 1000; i++) { @@ -116,9 +118,8 @@ public class TestChecksumFileSystem extends TestCase { * Test to ensure that if the checksum file is truncated, a * ChecksumException is thrown */ + @Test public void testTruncatedChecksum() throws Exception { - Configuration conf = new Configuration(); - LocalFileSystem localFs = FileSystem.getLocal(conf); Path testPath = new Path(TEST_ROOT_DIR, "testtruncatedcrc"); FSDataOutputStream fout = localFs.create(testPath); fout.write("testing truncation".getBytes()); @@ -146,14 +147,60 @@ public class TestChecksumFileSystem extends TestCase { } // telling it not to verify checksums, should avoid issue. + localFs.setVerifyChecksum(false); + String str = readFile(localFs, testPath, 1024).toString(); + assertTrue("read", "testing truncation".equals(str)); + } + + @Test + public void testStreamType() throws Exception { + Path testPath = new Path(TEST_ROOT_DIR, "testStreamType"); + localFs.create(testPath).close(); + FSDataInputStream in = null; + + localFs.setVerifyChecksum(true); + in = localFs.open(testPath); + assertTrue("stream is input checker", + in.getWrappedStream() instanceof FSInputChecker); + + localFs.setVerifyChecksum(false); + in = localFs.open(testPath); + assertFalse("stream is not input checker", + in.getWrappedStream() instanceof FSInputChecker); + } + + @Test + public void testCorruptedChecksum() throws Exception { + Path testPath = new Path(TEST_ROOT_DIR, "testCorruptChecksum"); + Path checksumPath = localFs.getChecksumFile(testPath); + + // write a file to generate checksum + FSDataOutputStream out = localFs.create(testPath, true); + out.write("testing 1 2 3".getBytes()); + out.close(); + assertTrue(localFs.exists(checksumPath)); + FileStatus stat = localFs.getFileStatus(checksumPath); + + // alter file directly so checksum is invalid + out = localFs.getRawFileSystem().create(testPath, true); + out.write("testing stale checksum".getBytes()); + out.close(); + assertTrue(localFs.exists(checksumPath)); + // checksum didn't change on disk + assertEquals(stat, localFs.getFileStatus(checksumPath)); + + Exception e = null; try { - localFs.setVerifyChecksum(false); - String str = readFile(localFs, testPath, 1024).toString(); - assertTrue("read", "testing truncation".equals(str)); - } finally { - // reset for other tests localFs.setVerifyChecksum(true); + readFile(localFs, testPath, 1024); + } catch (ChecksumException ce) { + e = ce; + } finally { + assertNotNull("got checksum error", e); } + localFs.setVerifyChecksum(false); + String str = readFile(localFs, testPath, 1024); + assertEquals("testing stale checksum", str); } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java index 18bc8df6515..58998be8e65 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java @@ -517,6 +517,23 @@ public class TestSequenceFile extends TestCase { assertTrue("InputStream for " + path + " should have been closed.", openedFile[0].isClosed()); } + /** + * Test that makes sure createWriter succeeds on a file that was + * already created + * @throws IOException + */ + public void testCreateWriterOnExistingFile() throws IOException { + Configuration conf = new Configuration(); + FileSystem fs = FileSystem.getLocal(conf); + Path name = new Path(new Path(System.getProperty("test.build.data","."), + "createWriterOnExistingFile") , "file"); + + fs.create(name); + SequenceFile.createWriter(fs, conf, name, RandomDatum.class, + RandomDatum.class, 512, (short) 1, 4096, false, + CompressionType.NONE, null, new Metadata()); + } + public void testRecursiveSeqFileCreate() throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestSecurityUtil.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestSecurityUtil.java index 84f585b5999..57a1e116de7 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestSecurityUtil.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestSecurityUtil.java @@ -89,6 +89,16 @@ public class TestSecurityUtil { Mockito.verify(notUsed, Mockito.never()).getCanonicalHostName(); } + @Test + public void testPrincipalsWithLowerCaseHosts() throws IOException { + String service = "xyz/"; + String realm = "@REALM"; + String principalInConf = service + SecurityUtil.HOSTNAME_PATTERN + realm; + String hostname = "FooHost"; + String principal = service + hostname.toLowerCase() + realm; + verify(principalInConf, hostname, principal); + } + @Test public void testLocalHostNameForNullOrWild() throws Exception { String local = SecurityUtil.getLocalHostName(); diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/sbin/httpfs.sh b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/sbin/httpfs.sh index 6566ab25c2d..e45bd42d003 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/sbin/httpfs.sh +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/sbin/httpfs.sh @@ -55,8 +55,8 @@ if [ "${1}" = "stop" ]; then fi if [ "${HTTPFS_SILENT}" != "true" ]; then - ${BASEDIR}/share/hadoop/httpfs/tomcat/bin/catalina.sh "$@" + ${CATALINA_BASE:-"${BASEDIR}/share/hadoop/httpfs/tomcat"}/bin/catalina.sh "$@" else - ${BASEDIR}/share/hadoop/httpfs/tomcat/bin/catalina.sh "$@" > /dev/null + ${CATALINA_BASE:-"${BASEDIR}/share/hadoop/httpfs/tomcat"}/bin/catalina.sh "$@" > /dev/null fi diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/servlet/TestHostnameFilter.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/servlet/TestHostnameFilter.java index f30ab0c0cd2..f3a2a5ad654 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/servlet/TestHostnameFilter.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/servlet/TestHostnameFilter.java @@ -47,7 +47,7 @@ public class TestHostnameFilter extends HTestCase { @Override public void doFilter(ServletRequest servletRequest, ServletResponse servletResponse) throws IOException, ServletException { - Assert.assertEquals(HostnameFilter.get(), "localhost"); + Assert.assertTrue(HostnameFilter.get().contains("localhost")); invoked.set(true); } }; diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index b459ab9da5e..ad6b5dfc21e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -295,6 +295,11 @@ Release 0.23.1 - UNRELEASED HDFS-2837. mvn javadoc:javadoc not seeing LimitedPrivate class (revans2 via tucu) + HDFS-2840. TestHostnameFilter should work with localhost or localhost.localdomain (tucu) + + HDFS-2791. If block report races with closing of file, replica is + incorrectly marked corrupt. (todd) + Release 0.23.0 - 2011-11-01 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh index 09eec6e5dec..2aabf5300bf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh @@ -22,8 +22,6 @@ bin=`which "$0"` bin=`dirname "${bin}"` bin=`cd "$bin"; pwd` -export HADOOP_PREFIX="${HADOOP_PREFIX:-$bin/..}" - DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 9200a4a3bce..ee94414335a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -1571,7 +1571,24 @@ public class BlockManager { } case RBW: case RWR: - return storedBlock.isComplete(); + if (!storedBlock.isComplete()) { + return false; + } else if (storedBlock.getGenerationStamp() != iblk.getGenerationStamp()) { + return true; + } else { // COMPLETE block, same genstamp + if (reportedState == ReplicaState.RBW) { + // If it's a RBW report for a COMPLETE block, it may just be that + // the block report got a little bit delayed after the pipeline + // closed. So, ignore this report, assuming we will get a + // FINALIZED replica later. See HDFS-2791 + LOG.info("Received an RBW replica for block " + storedBlock + + " on " + dn.getName() + ": ignoring it, since the block is " + + "complete with the same generation stamp."); + return false; + } else { + return true; + } + } case RUR: // should not be reported case TEMPORARY: // should not be reported default: diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java index 27e189257a7..80e5a4805c1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java @@ -781,4 +781,13 @@ class BPOfferService implements Runnable { return; } + @VisibleForTesting + DatanodeProtocol getBpNamenode() { + return bpNamenode; + } + + @VisibleForTesting + void setBpNamenode(DatanodeProtocol bpNamenode) { + this.bpNamenode = bpNamenode; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AppendTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AppendTestUtil.java index 5369a9e2e4e..1c3ad442ea1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AppendTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AppendTestUtil.java @@ -101,7 +101,7 @@ public class AppendTestUtil { return DFSTestUtil.getFileSystemAs(ugi, conf); } - static void write(OutputStream out, int offset, int length) throws IOException { + public static void write(OutputStream out, int offset, int length) throws IOException { final byte[] bytes = new byte[length]; for(int i = 0; i < length; i++) { bytes[i] = (byte)(offset + i); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeAdapter.java index 8595c94d340..842616cd625 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeAdapter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeAdapter.java @@ -17,6 +17,15 @@ */ package org.apache.hadoop.hdfs.server.datanode; +import java.io.IOException; + +import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol; +import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.test.GenericTestUtils; +import org.mockito.Mockito; + +import com.google.common.base.Preconditions; + /** * WARNING!! This is TEST ONLY class: it never has to be used * for ANY development purposes. @@ -42,4 +51,34 @@ public class DataNodeAdapter { boolean heartbeatsDisabledForTests) { dn.setHeartbeatsDisabledForTests(heartbeatsDisabledForTests); } + + /** + * Insert a Mockito spy object between the given DataNode and + * the given NameNode. This can be used to delay or wait for + * RPC calls on the datanode->NN path. + */ + public static DatanodeProtocol spyOnBposToNN( + DataNode dn, NameNode nn) { + String bpid = nn.getNamesystem().getBlockPoolId(); + + BPOfferService bpos = null; + for (BPOfferService thisBpos : dn.getAllBpOs()) { + if (thisBpos.getBlockPoolId().equals(bpid)) { + bpos = thisBpos; + break; + } + } + Preconditions.checkArgument(bpos != null, + "No such bpid: %s", bpid); + + // When protobufs are merged, the following can be converted + // to a simple spy. Because you can't spy on proxy objects, + // we have to use the DelegateAnswer trick. + DatanodeProtocol origNN = bpos.getBpNamenode(); + DatanodeProtocol spy = Mockito.mock(DatanodeProtocol.class, + new GenericTestUtils.DelegateAnswer(origNN)); + + bpos.setBpNamenode(spy); + return spy; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockReport.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockReport.java index 5fafc7788ed..8c53fd98f1a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockReport.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockReport.java @@ -21,7 +21,9 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.AppendTestUtil; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; @@ -35,14 +37,19 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand; +import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; +import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.test.GenericTestUtils.DelayAnswer; import org.apache.log4j.Level; import org.junit.After; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import org.junit.Before; import org.junit.Test; +import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; import java.io.File; import java.io.FilenameFilter; @@ -50,6 +57,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Random; +import java.util.concurrent.CountDownLatch; /** * This test simulates a variety of situations when blocks are being @@ -491,6 +499,84 @@ public class TestBlockReport { resetConfiguration(); // return the initial state of the configuration } } + + /** + * Test for the case where one of the DNs in the pipeline is in the + * process of doing a block report exactly when the block is closed. + * In this case, the block report becomes delayed until after the + * block is marked completed on the NN, and hence it reports an RBW + * replica for a COMPLETE block. Such a report should not be marked + * corrupt. + * This is a regression test for HDFS-2791. + */ + @Test + public void testOneReplicaRbwReportArrivesAfterBlockCompleted() throws Exception { + final CountDownLatch brFinished = new CountDownLatch(1); + DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) { + @Override + protected Object passThrough(InvocationOnMock invocation) + throws Throwable { + try { + return super.passThrough(invocation); + } finally { + // inform the test that our block report went through. + brFinished.countDown(); + } + } + }; + + final String METHOD_NAME = GenericTestUtils.getMethodName(); + Path filePath = new Path("/" + METHOD_NAME + ".dat"); + + // Start a second DN for this test -- we're checking + // what happens when one of the DNs is slowed for some reason. + REPL_FACTOR = 2; + startDNandWait(null, false); + + NameNode nn = cluster.getNameNode(); + + FSDataOutputStream out = fs.create(filePath, REPL_FACTOR); + try { + AppendTestUtil.write(out, 0, 10); + out.hflush(); + + // Set up a spy so that we can delay the block report coming + // from this node. + DataNode dn = cluster.getDataNodes().get(0); + DatanodeProtocol spy = + DataNodeAdapter.spyOnBposToNN(dn, nn); + + Mockito.doAnswer(delayer) + .when(spy).blockReport( + Mockito.anyObject(), + Mockito.anyString(), + Mockito.anyObject()); + + // Force a block report to be generated. The block report will have + // an RBW replica in it. Wait for the RPC to be sent, but block + // it before it gets to the NN. + dn.scheduleAllBlockReport(0); + delayer.waitForCall(); + + } finally { + IOUtils.closeStream(out); + } + + // Now that the stream is closed, the NN will have the block in COMPLETE + // state. + delayer.proceed(); + brFinished.await(); + + // Verify that no replicas are marked corrupt, and that the + // file is still readable. + BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager()); + assertEquals(0, nn.getNamesystem().getCorruptReplicaBlocks()); + DFSTestUtil.readFile(fs, filePath); + + // Ensure that the file is readable even from the DN that we futzed with. + cluster.stopDataNode(1); + DFSTestUtil.readFile(fs, filePath); + } private void waitForTempReplica(Block bl, int DN_N1) throws IOException { final boolean tooLongWait = false; diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 55800e99bee..43aee211a06 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -36,6 +36,8 @@ Release 0.23.1 - Unreleased MAPREDUCE-778. Rumen Anonymizer. (Amar Kamat and Chris Douglas via amarrk) IMPROVEMENTS + MAPREDUCE-3481. [Gridmix] Improve Gridmix STRESS mode. (amarrk) + MAPREDUCE-3597. [Rumen] Rumen should provide APIs to access all the job-history related information. @@ -153,6 +155,15 @@ Release 0.23.1 - Unreleased MAPREDUCE-3737. The Web Application Proxy's is not documented very well. (Robert Evans via mahadev) + MAPREDUCE-3699. Increased RPC handlers for all YARN servers to reasonable + values for working at scale. (Hitesh Shah via vinodkv) + + MAPREDUCE-3693. Added mapreduce.admin.user.env to mapred-default.xml. + (Roman Shapshonik via acmurthy) + + MAPREDUCE-3732. Modified CapacityScheduler to use only users with pending + requests for computing user-limits. (Arun C Murthy via vinodkv) + OPTIMIZATIONS MAPREDUCE-3567. Extraneous JobConf objects in AM heap. (Vinod Kumar @@ -179,7 +190,13 @@ Release 0.23.1 - Unreleased MAPREDUCE-3718. Change default AM heartbeat interval to 1 second. (Hitesh Shah via sseth) + MAPREDUCE-3360. Added information about lost/rebooted/decommissioned nodes + on the webapps. (Bhallamudi Venkata Siva Kamesh and Jason Lowe via vinodkv) + BUG FIXES + MAPREDUCE-2784. [Gridmix] Bug fixes in ExecutionSummarizer and + ResourceUsageMatcher. (amarrk) + MAPREDUCE-3194. "mapred mradmin" command is broken in mrv2 (Jason Lowe via bobby) @@ -558,6 +575,12 @@ Release 0.23.1 - Unreleased MAPREDUCE-3735. Add distcp jar to the distribution (tar). (mahadev) + MAPREDUCE-3720. Changed bin/mapred job -list to not print job-specific + information not available at RM. (vinodkv via acmurthy) + + MAPREDUCE-3742. "yarn logs" command fails with ClassNotFoundException. + (Jason Lowe via mahadev) + Release 0.23.0 - 2011-11-01 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/bin/mapred b/hadoop-mapreduce-project/bin/mapred index 2ffe9049170..ff1ebbc67df 100755 --- a/hadoop-mapreduce-project/bin/mapred +++ b/hadoop-mapreduce-project/bin/mapred @@ -91,15 +91,15 @@ if [ -d "$HADOOP_MAPRED_HOME/build/tools" ]; then fi # for releases, add core mapred jar & webapps to CLASSPATH -if [ -d "$HADOOP_PREFIX/share/hadoop/mapreduce/webapps" ]; then - CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/share/hadoop/mapreduce +if [ -d "$HADOOP_PREFIX/${MAPRED_DIR}/webapps" ]; then + CLASSPATH=${CLASSPATH}:$HADOOP_PREFIX/${MAPRED_DIR} fi -for f in $HADOOP_MAPRED_HOME/share/hadoop-mapreduce/*.jar; do +for f in $HADOOP_MAPRED_HOME/${MAPRED_DIR}/*.jar; do CLASSPATH=${CLASSPATH}:$f; done # add libs to CLASSPATH -for f in $HADOOP_MAPRED_HOME/lib/*.jar; do +for f in $HADOOP_MAPRED_HOME/${MAPRED_LIB_JARS_DIR}/*.jar; do CLASSPATH=${CLASSPATH}:$f; done diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java index 2af7ef2ace4..ba6b07e8ac9 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java @@ -455,10 +455,14 @@ public class Job extends JobContextImpl implements JobContext { public String toString() { ensureState(JobState.RUNNING); String reasonforFailure = " "; + int numMaps = 0; + int numReduces = 0; try { updateStatus(); if (status.getState().equals(JobStatus.State.FAILED)) reasonforFailure = getTaskFailureEventString(); + numMaps = getTaskReports(TaskType.MAP).length; + numReduces = getTaskReports(TaskType.REDUCE).length; } catch (IOException e) { } catch (InterruptedException ie) { } @@ -468,6 +472,8 @@ public class Job extends JobContextImpl implements JobContext { sb.append("Job Tracking URL : ").append(status.getTrackingUrl()); sb.append("\n"); sb.append("Uber job : ").append(status.isUber()).append("\n"); + sb.append("Number of maps: ").append(numMaps); + sb.append("Number of reduces: ").append(numReduces); sb.append("map() completion: "); sb.append(status.getMapProgress()).append("\n"); sb.append("reduce() completion: "); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index a1c4407f26f..7b684c5b618 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -412,7 +412,7 @@ public interface MRJobConfig { /** The number of threads used to handle task RPC calls.*/ public static final String MR_AM_TASK_LISTENER_THREAD_COUNT = MR_AM_PREFIX + "job.task.listener.thread-count"; - public static final int DEFAULT_MR_AM_TASK_LISTENER_THREAD_COUNT = 10; + public static final int DEFAULT_MR_AM_TASK_LISTENER_THREAD_COUNT = 30; /** How often the AM should send heartbeats to the RM.*/ public static final String MR_AM_TO_RM_HEARTBEAT_INTERVAL_MS = diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/tools/CLI.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/tools/CLI.java index f7ac9c40a6a..607fb347e88 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/tools/CLI.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/tools/CLI.java @@ -18,6 +18,7 @@ package org.apache.hadoop.mapreduce.tools; import java.io.IOException; +import java.io.PrintWriter; import java.util.ArrayList; import java.util.List; @@ -25,6 +26,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.ipc.RemoteException; @@ -560,25 +562,28 @@ public class CLI extends Configured implements Tool { } } } - + public void displayJobList(JobStatus[] jobs) throws IOException, InterruptedException { - System.out.println("Total jobs:" + jobs.length); - System.out.println("JobId\tState\tStartTime\t" + - "UserName\tQueue\tPriority\tMaps\tReduces\tUsedContainers\t" + - "RsvdContainers\tUsedMem\tRsvdMem\tNeededMem\tAM info"); - for (JobStatus job : jobs) { - TaskReport[] mapReports = - cluster.getJob(job.getJobID()).getTaskReports(TaskType.MAP); - TaskReport[] reduceReports = - cluster.getJob(job.getJobID()).getTaskReports(TaskType.REDUCE); + displayJobList(jobs, new PrintWriter(System.out)); + } - System.out.printf("%s\t%s\t%d\t%s\t%s\t%s\t%d\t%d\t%d\t%d\t%dM\t%dM\t%dM\t%s\n", + @Private + public static String headerPattern = "%23s\t%10s\t%14s\t%12s\t%12s\t%10s\t%15s\t%15s\t%8s\t%8s\t%10s\t%10s\n"; + @Private + public static String dataPattern = "%23s\t%10s\t%14d\t%12s\t%12s\t%10s\t%14d\t%14d\t%7dM\t%7sM\t%9dM\t%10s\n"; + + @Private + public void displayJobList(JobStatus[] jobs, PrintWriter writer) { + writer.println("Total jobs:" + jobs.length); + writer.printf(headerPattern, "JobId", "State", "StartTime", "UserName", + "Queue", "Priority", "UsedContainers", + "RsvdContainers", "UsedMem", "RsvdMem", "NeededMem", "AM info"); + for (JobStatus job : jobs) { + writer.printf(dataPattern, job.getJobID().toString(), job.getState(), job.getStartTime(), job.getUsername(), job.getQueue(), job.getPriority().name(), - mapReports.length, - reduceReports.length, job.getNumUsedSlots(), job.getNumReservedSlots(), job.getUsedMem(), @@ -586,6 +591,7 @@ public class CLI extends Configured implements Tool { job.getNeededMem(), job.getSchedulingInfo()); } + writer.flush(); } public static void main(String[] argv) throws Exception { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 5d02250c49c..920f8df4555 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -440,6 +440,16 @@ + + mapreduce.admin.user.env + LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native + Expert: Additional execution environment entries for + map and reduce task processes. This is not an additive property. + You must preserve the original value if you want your map and + reduce tasks to have access to native libraries (compression, etc). + + + mapreduce.task.tmp.dir ./tmp @@ -1224,4 +1234,18 @@ mapreduce.job.end-notification.max.retry.interval + + yarn.app.mapreduce.am.job.task.listener.thread-count + 30 + The number of threads used to handle RPC calls in the + MR AppMaster from remote tasks + + + + yarn.app.mapreduce.am.scheduler.heartbeat.interval-ms + 1000 + The interval in ms at which the MR AppMaster should send + heartbeats to the ResourceManager + + diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/JobClientUnitTest.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/JobClientUnitTest.java index 3f54e09a33d..a49f1fa798a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/JobClientUnitTest.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/JobClientUnitTest.java @@ -22,19 +22,24 @@ import static org.junit.Assert.assertEquals; import static org.mockito.Matchers.isA; import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import java.io.ByteArrayOutputStream; import java.io.IOException; -import org.apache.hadoop.mapred.JobConf; +import java.io.PrintWriter; + import org.apache.hadoop.mapreduce.Cluster; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobPriority; import org.apache.hadoop.mapreduce.JobStatus; -import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.mapreduce.TaskReport; +import org.apache.hadoop.mapreduce.TaskType; +import org.junit.Assert; import org.junit.Test; +@SuppressWarnings("deprecation") public class JobClientUnitTest { public class TestJobClient extends JobClient { @@ -48,7 +53,6 @@ public class JobClientUnitTest { } } - @SuppressWarnings("deprecation") @Test public void testMapTaskReportsWithNullJob() throws Exception { TestJobClient client = new TestJobClient(new JobConf()); @@ -64,7 +68,6 @@ public class JobClientUnitTest { verify(mockCluster).getJob(id); } - @SuppressWarnings("deprecation") @Test public void testReduceTaskReportsWithNullJob() throws Exception { TestJobClient client = new TestJobClient(new JobConf()); @@ -80,7 +83,6 @@ public class JobClientUnitTest { verify(mockCluster).getJob(id); } - @SuppressWarnings("deprecation") @Test public void testSetupTaskReportsWithNullJob() throws Exception { TestJobClient client = new TestJobClient(new JobConf()); @@ -96,7 +98,6 @@ public class JobClientUnitTest { verify(mockCluster).getJob(id); } - @SuppressWarnings("deprecation") @Test public void testCleanupTaskReportsWithNullJob() throws Exception { TestJobClient client = new TestJobClient(new JobConf()); @@ -115,12 +116,15 @@ public class JobClientUnitTest { @Test public void testShowJob() throws Exception { TestJobClient client = new TestJobClient(new JobConf()); - JobID jobID = new JobID("test", 0); + + long startTime = System.currentTimeMillis(); + + JobID jobID = new JobID(String.valueOf(startTime), 12345); JobStatus mockJobStatus = mock(JobStatus.class); when(mockJobStatus.getJobID()).thenReturn(jobID); when(mockJobStatus.getState()).thenReturn(JobStatus.State.RUNNING); - when(mockJobStatus.getStartTime()).thenReturn(0L); + when(mockJobStatus.getStartTime()).thenReturn(startTime); when(mockJobStatus.getUsername()).thenReturn("mockuser"); when(mockJobStatus.getQueue()).thenReturn("mockqueue"); when(mockJobStatus.getPriority()).thenReturn(JobPriority.NORMAL); @@ -132,18 +136,21 @@ public class JobClientUnitTest { when(mockJobStatus.getSchedulingInfo()).thenReturn("NA"); Job mockJob = mock(Job.class); - when(mockJob.getTaskReports(isA(TaskType.class))).thenReturn(new TaskReport[0]); + when(mockJob.getTaskReports(isA(TaskType.class))).thenReturn( + new TaskReport[5]); Cluster mockCluster = mock(Cluster.class); when(mockCluster.getJob(jobID)).thenReturn(mockJob); client.setCluster(mockCluster); - - client.displayJobList(new JobStatus[] {mockJobStatus}); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + client.displayJobList(new JobStatus[] {mockJobStatus}, new PrintWriter(out)); + String commandLineOutput = out.toString(); + System.out.println(commandLineOutput); + Assert.assertTrue(commandLineOutput.contains("Total jobs:1")); + verify(mockJobStatus, atLeastOnce()).getJobID(); - verify(mockJob, atLeastOnce()).getTaskReports(isA(TaskType.class)); - verify(mockCluster, atLeastOnce()).getJob(jobID); verify(mockJobStatus).getState(); verify(mockJobStatus).getStartTime(); verify(mockJobStatus).getUsername(); @@ -155,5 +162,9 @@ public class JobClientUnitTest { verify(mockJobStatus).getReservedMem(); verify(mockJobStatus).getNeededMem(); verify(mockJobStatus).getSchedulingInfo(); + + // This call should not go to each AM. + verify(mockCluster, never()).getJob(jobID); + verify(mockJob, never()).getTaskReports(isA(TaskType.class)); } } diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn index 3cf3c798b0e..7ceac4feae8 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn @@ -140,8 +140,8 @@ if [ -d "$YARN_HOME/build/tools" ]; then CLASSPATH=${CLASSPATH}:$YARN_HOME/build/tools fi -CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce/* -CLASSPATH=${CLASSPATH}:$YARN_HOME/share/hadoop/mapreduce/lib/* +CLASSPATH=${CLASSPATH}:$YARN_HOME/${YARN_DIR}/* +CLASSPATH=${CLASSPATH}:$YARN_HOME/${YARN_LIB_JARS_DIR}/* # so that filenames w/ spaces are handled correctly in loops below IFS= @@ -194,7 +194,7 @@ elif [ "$COMMAND" = "jar" ] ; then CLASS=org.apache.hadoop.util.RunJar YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS" elif [ "$COMMAND" = "logs" ] ; then - CLASS=org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation.LogDumper + CLASS=org.apache.hadoop.yarn.logaggregation.LogDumper YARN_OPTS="$YARN_OPTS $YARN_CLIENT_OPTS" elif [ "$COMMAND" = "daemonlog" ] ; then CLASS=org.apache.hadoop.log.LogLevel diff --git a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-config.sh b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-config.sh index 27570442731..934a461b645 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-config.sh +++ b/hadoop-mapreduce-project/hadoop-yarn/bin/yarn-config.sh @@ -19,8 +19,6 @@ bin=`which "$0"` bin=`dirname "${bin}"` bin=`cd "$bin"; pwd` -export HADOOP_PREFIX="${HADOOP_PREFIX:-$bin/..}" - DEFAULT_LIBEXEC_DIR="$bin"/../libexec HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR} if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index a0300797790..f4cbf6e65bf 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -90,7 +90,7 @@ public class YarnConfiguration extends Configuration { /** The number of threads used to handle applications manager requests.*/ public static final String RM_CLIENT_THREAD_COUNT = RM_PREFIX + "client.thread-count"; - public static final int DEFAULT_RM_CLIENT_THREAD_COUNT = 10; + public static final int DEFAULT_RM_CLIENT_THREAD_COUNT = 50; /** The Kerberos principal for the resource manager.*/ public static final String RM_PRINCIPAL = @@ -106,7 +106,7 @@ public class YarnConfiguration extends Configuration { /** Number of threads to handle scheduler interface.*/ public static final String RM_SCHEDULER_CLIENT_THREAD_COUNT = RM_PREFIX + "scheduler.client.thread-count"; - public static final int DEFAULT_RM_SCHEDULER_CLIENT_THREAD_COUNT = 10; + public static final int DEFAULT_RM_SCHEDULER_CLIENT_THREAD_COUNT = 50; /** The address of the RM web application.*/ public static final String RM_WEBAPP_ADDRESS = @@ -184,7 +184,7 @@ public class YarnConfiguration extends Configuration { /** Number of threads to handle resource tracker calls.*/ public static final String RM_RESOURCE_TRACKER_CLIENT_THREAD_COUNT = RM_PREFIX + "resource-tracker.client.thread-count"; - public static final int DEFAULT_RM_RESOURCE_TRACKER_CLIENT_THREAD_COUNT = 10; + public static final int DEFAULT_RM_RESOURCE_TRACKER_CLIENT_THREAD_COUNT = 50; /** The class to use as the resource scheduler.*/ public static final String RM_SCHEDULER = @@ -257,7 +257,7 @@ public class YarnConfiguration extends Configuration { /** Number of threads container manager uses.*/ public static final String NM_CONTAINER_MGR_THREAD_COUNT = NM_PREFIX + "container-manager.thread-count"; - public static final int DEFAULT_NM_CONTAINER_MGR_THREAD_COUNT = 5; + public static final int DEFAULT_NM_CONTAINER_MGR_THREAD_COUNT = 20; /** Number of threads used in cleanup.*/ public static final String NM_DELETE_THREAD_COUNT = diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/resources/yarn-default.xml b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/resources/yarn-default.xml index b9e5ea47e40..cea45798a71 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/resources/yarn-default.xml +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/resources/yarn-default.xml @@ -67,7 +67,7 @@ The number of threads used to handle applications manager requests. yarn.resourcemanager.client.thread-count - 10 + 50 @@ -90,7 +90,7 @@ Number of threads to handle scheduler interface. yarn.resourcemanager.scheduler.client.thread-count - 10 + 50 @@ -179,7 +179,7 @@ Number of threads to handle resource tracker calls. yarn.resourcemanager.resource-tracker.client.thread-count - 10 + 50 @@ -244,7 +244,7 @@ Number of threads container manager uses. yarn.nodemanager.container-manager.thread-count - 5 + 20 diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java index 2a37856d5dd..e9e1b2fb0b0 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java @@ -29,7 +29,6 @@ import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MetricsRegistry; -import org.apache.hadoop.metrics2.lib.MutableCounterInt; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType; @@ -39,9 +38,9 @@ public class ClusterMetrics { private static AtomicBoolean isInitialized = new AtomicBoolean(false); - @Metric("# of NMs") MutableGaugeInt numNMs; - @Metric("# of decommissioned NMs") MutableCounterInt numDecommissionedNMs; - @Metric("# of lost NMs") MutableCounterInt numLostNMs; + @Metric("# of active NMs") MutableGaugeInt numNMs; + @Metric("# of decommissioned NMs") MutableGaugeInt numDecommissionedNMs; + @Metric("# of lost NMs") MutableGaugeInt numLostNMs; @Metric("# of unhealthy NMs") MutableGaugeInt numUnhealthyNMs; @Metric("# of Rebooted NMs") MutableGaugeInt numRebootedNMs; @@ -73,8 +72,8 @@ public class ClusterMetrics { } } - //Total Nodemanagers - public int getNumNMs() { + //Active Nodemanagers + public int getNumActiveNMs() { return numNMs.value(); } @@ -87,6 +86,10 @@ public class ClusterMetrics { numDecommissionedNMs.incr(); } + public void decrDecommisionedNMs() { + numDecommissionedNMs.decr(); + } + //Lost NMs public int getNumLostNMs() { return numLostNMs.value(); @@ -96,6 +99,10 @@ public class ClusterMetrics { numLostNMs.incr(); } + public void decrNumLostNMs() { + numLostNMs.decr(); + } + //Unhealthy NMs public int getUnhealthyNMs() { return numUnhealthyNMs.value(); @@ -118,6 +125,10 @@ public class ClusterMetrics { numRebootedNMs.incr(); } + public void decrNumRebootedNMs() { + numRebootedNMs.decr(); + } + public void removeNode(RMNodeEventType nodeEventType) { numNMs.decr(); switch(nodeEventType){ diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java index 3d975818f2c..117e77cb77b 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java @@ -43,6 +43,8 @@ public interface RMContext { ApplicationsStore getApplicationsStore(); ConcurrentMap getRMApps(); + + ConcurrentMap getInactiveRMNodes(); ConcurrentMap getRMNodes(); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java index a177f1cc168..029a22c8fe3 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java @@ -43,6 +43,9 @@ public class RMContextImpl implements RMContext { private final ConcurrentMap nodes = new ConcurrentHashMap(); + + private final ConcurrentMap inactiveNodes + = new ConcurrentHashMap(); private AMLivelinessMonitor amLivelinessMonitor; private ContainerAllocationExpirer containerAllocationExpirer; @@ -83,6 +86,11 @@ public class RMContextImpl implements RMContext { public ConcurrentMap getRMNodes() { return this.nodes; } + + @Override + public ConcurrentMap getInactiveRMNodes() { + return this.inactiveNodes; + } @Override public ContainerAllocationExpirer getContainerAllocationExpirer() { diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java index ccebe3a8908..75c91aa83f2 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java @@ -220,10 +220,6 @@ public class ResourceTrackerService extends AbstractService implements if (rmNode == null) { /* node does not exist */ LOG.info("Node not found rebooting " + remoteNodeStatus.getNodeId()); - - // Updating the metrics directly as reboot event cannot be - // triggered on a null rmNode - ClusterMetrics.getMetrics().incrNumRebootedNMs(); return reboot; } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java index 7f2b48f85be..4e79540733f 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java @@ -119,7 +119,7 @@ public class RMNodeImpl implements RMNode, EventHandler { RMNodeEventType.DECOMMISSION, new RemoveNodeTransition()) .addTransition(RMNodeState.RUNNING, RMNodeState.LOST, RMNodeEventType.EXPIRE, new RemoveNodeTransition()) - .addTransition(RMNodeState.RUNNING, RMNodeState.LOST, + .addTransition(RMNodeState.RUNNING, RMNodeState.REBOOTED, RMNodeEventType.REBOOTING, new RemoveNodeTransition()) .addTransition(RMNodeState.RUNNING, RMNodeState.RUNNING, RMNodeEventType.CLEANUP_APP, new CleanUpAppTransition()) @@ -307,6 +307,21 @@ public class RMNodeImpl implements RMNode, EventHandler { public static class AddNodeTransition implements SingleArcTransition { + + private void updateMetrics(RMNodeState nodeState) { + ClusterMetrics metrics = ClusterMetrics.getMetrics(); + switch (nodeState) { + case LOST: + metrics.decrNumLostNMs(); + break; + case REBOOTED: + metrics.decrNumRebootedNMs(); + break; + case DECOMMISSIONED: + metrics.decrDecommisionedNMs(); + break; + } + } @SuppressWarnings("unchecked") @Override @@ -315,6 +330,13 @@ public class RMNodeImpl implements RMNode, EventHandler { rmNode.context.getDispatcher().getEventHandler().handle( new NodeAddedSchedulerEvent(rmNode)); + + String host = rmNode.nodeId.getHost(); + if (rmNode.context.getInactiveRMNodes().containsKey(host)) { + RMNode node = rmNode.context.getInactiveRMNodes().get(host); + rmNode.context.getInactiveRMNodes().remove(host); + updateMetrics(node.getState()); + } ClusterMetrics.getMetrics().addNode(); } @@ -353,7 +375,7 @@ public class RMNodeImpl implements RMNode, EventHandler { // Remove the node from the system. rmNode.context.getRMNodes().remove(rmNode.nodeId); LOG.info("Removed Node " + rmNode.nodeId); - + rmNode.context.getInactiveRMNodes().put(rmNode.nodeId.getHost(), rmNode); //Update the metrics ClusterMetrics.getMetrics().removeNode(event.getType()); } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ActiveUsersManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ActiveUsersManager.java new file mode 100644 index 00000000000..18fbca654c2 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ActiveUsersManager.java @@ -0,0 +1,109 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.resourcemanager.scheduler; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.yarn.Lock; +import org.apache.hadoop.yarn.api.records.ApplicationId; + +/** + * {@link ActiveUsersManager} tracks active users in the system. + * A user is deemed to be active if he has any running applications with + * outstanding resource requests. + * + * An active user is defined as someone with outstanding resource requests. + */ +@Private +public class ActiveUsersManager { + + private static final Log LOG = LogFactory.getLog(ActiveUsersManager.class); + + private final QueueMetrics metrics; + + private int activeUsers = 0; + private Map> usersApplications = + new HashMap>(); + + public ActiveUsersManager(QueueMetrics metrics) { + this.metrics = metrics; + } + + /** + * An application has new outstanding requests. + * + * @param user application user + * @param applicationId activated application + */ + @Lock({Queue.class, SchedulerApp.class}) + synchronized public void activateApplication( + String user, ApplicationId applicationId) { + Set userApps = usersApplications.get(user); + if (userApps == null) { + userApps = new HashSet(); + usersApplications.put(user, userApps); + ++activeUsers; + metrics.incrActiveUsers(); + LOG.debug("User " + user + " added to activeUsers, currently: " + + activeUsers); + } + if (userApps.add(applicationId)) { + metrics.activateApp(user); + } + } + + /** + * An application has no more outstanding requests. + * + * @param user application user + * @param applicationId deactivated application + */ + @Lock({Queue.class, SchedulerApp.class}) + synchronized public void deactivateApplication( + String user, ApplicationId applicationId) { + Set userApps = usersApplications.get(user); + if (userApps != null) { + if (userApps.remove(applicationId)) { + metrics.deactivateApp(user); + } + if (userApps.isEmpty()) { + usersApplications.remove(user); + --activeUsers; + metrics.decrActiveUsers(); + LOG.debug("User " + user + " removed from activeUsers, currently: " + + activeUsers); + } + } + } + + /** + * Get number of active users i.e. users with applications which have pending + * resource requests. + * @return number of active users + */ + @Lock({Queue.class, SchedulerApp.class}) + synchronized public int getNumActiveUsers() { + return activeUsers; + } +} diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java index 6dae436a339..2040505be67 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java @@ -36,12 +36,11 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; -import org.apache.hadoop.yarn.factories.RecordFactory; -import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl; /** * This class keeps track of all the consumption of an application. This also @@ -59,27 +58,27 @@ public class AppSchedulingInfo { final String user; private final AtomicInteger containerIdCounter = new AtomicInteger(0); - private final RecordFactory recordFactory = RecordFactoryProvider - .getRecordFactory(null); - final Set priorities = new TreeSet( new org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.Comparator()); final Map> requests = new HashMap>(); - private final ApplicationStore store; - + //private final ApplicationStore store; + private final ActiveUsersManager activeUsersManager; + /* Allocated by scheduler */ boolean pending = true; // for app metrics public AppSchedulingInfo(ApplicationAttemptId appAttemptId, - String user, Queue queue, ApplicationStore store) { + String user, Queue queue, ActiveUsersManager activeUsersManager, + ApplicationStore store) { this.applicationAttemptId = appAttemptId; this.applicationId = appAttemptId.getApplicationId(); this.queue = queue; this.queueName = queue.getQueueName(); this.user = user; - this.store = store; + //this.store = store; + this.activeUsersManager = activeUsersManager; } public ApplicationId getApplicationId() { @@ -123,7 +122,8 @@ public class AppSchedulingInfo { * @param requests * resources to be acquired */ - synchronized public void updateResourceRequests(List requests) { + synchronized public void updateResourceRequests( + List requests) { QueueMetrics metrics = queue.getMetrics(); // Update resource requests for (ResourceRequest request : requests) { @@ -138,6 +138,16 @@ public class AppSchedulingInfo { + request); } updatePendingResources = true; + + // Premature optimization? + // Assumes that we won't see more than one priority request updated + // in one call, reasonable assumption... however, it's totally safe + // to activate same application more than once. + // Thus we don't need another loop ala the one in decrementOutstanding() + // which is needed during deactivate. + if (request.getNumContainers() > 0) { + activeUsersManager.activateApplication(user, applicationId); + } } Map asks = this.requests.get(priority); @@ -246,10 +256,7 @@ public class AppSchedulingInfo { this.requests.get(priority).remove(node.getRackName()); } - // Do not remove ANY - ResourceRequest offSwitchRequest = requests.get(priority).get( - RMNode.ANY); - offSwitchRequest.setNumContainers(offSwitchRequest.getNumContainers() - 1); + decrementOutstanding(requests.get(priority).get(RMNode.ANY)); } /** @@ -271,10 +278,7 @@ public class AppSchedulingInfo { this.requests.get(priority).remove(node.getRackName()); } - // Do not remove ANY - ResourceRequest offSwitchRequest = requests.get(priority).get( - RMNode.ANY); - offSwitchRequest.setNumContainers(offSwitchRequest.getNumContainers() - 1); + decrementOutstanding(requests.get(priority).get(RMNode.ANY)); } /** @@ -291,11 +295,32 @@ public class AppSchedulingInfo { allocate(container); // Update future requirements - - // Do not remove ANY - offSwitchRequest.setNumContainers(offSwitchRequest.getNumContainers() - 1); + decrementOutstanding(offSwitchRequest); } + synchronized private void decrementOutstanding( + ResourceRequest offSwitchRequest) { + int numOffSwitchContainers = offSwitchRequest.getNumContainers() - 1; + + // Do not remove ANY + offSwitchRequest.setNumContainers(numOffSwitchContainers); + + // Do we have any outstanding requests? + // If there is nothing, we need to deactivate this application + if (numOffSwitchContainers == 0) { + boolean deactivate = true; + for (Priority priority : getPriorities()) { + ResourceRequest request = getResourceRequest(priority, RMNodeImpl.ANY); + if (request.getNumContainers() > 0) { + deactivate = false; + break; + } + } + if (deactivate) { + activeUsersManager.deactivateApplication(user, applicationId); + } + } + } synchronized private void allocate(Container container) { // Update consumption and track allocations //TODO: fixme sharad diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java index 5005d673582..29a0e2f8c9f 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java @@ -60,6 +60,8 @@ public class QueueMetrics { @Metric("# of pending containers") MutableGaugeInt pendingContainers; @Metric("# of reserved memory in GiB") MutableGaugeInt reservedGB; @Metric("# of reserved containers") MutableGaugeInt reservedContainers; + @Metric("# of active users") MutableGaugeInt activeUsers; + @Metric("# of active users") MutableGaugeInt activeApplications; static final Logger LOG = LoggerFactory.getLogger(QueueMetrics.class); static final int GB = 1024; // resource.memory is in MB @@ -287,6 +289,36 @@ public class QueueMetrics { } } + public void incrActiveUsers() { + activeUsers.incr(); + } + + public void decrActiveUsers() { + activeUsers.decr(); + } + + public void activateApp(String user) { + activeApplications.incr(); + QueueMetrics userMetrics = getUserMetrics(user); + if (userMetrics != null) { + userMetrics.activateApp(user); + } + if (parent != null) { + parent.activateApp(user); + } + } + + public void deactivateApp(String user) { + activeApplications.decr(); + QueueMetrics userMetrics = getUserMetrics(user); + if (userMetrics != null) { + userMetrics.deactivateApp(user); + } + if (parent != null) { + parent.deactivateApp(user); + } + } + public int getAppsSubmitted() { return appsSubmitted.value(); } @@ -338,4 +370,12 @@ public class QueueMetrics { public int getReservedContainers() { return reservedContainers.value(); } + + public int getActiveUsers() { + return activeUsers.value(); + } + + public int getActiveApps() { + return activeApplications.value(); + } } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java index 8e25e3d2221..c8ed2c08554 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java @@ -102,11 +102,12 @@ public class SchedulerApp { private final RMContext rmContext; public SchedulerApp(ApplicationAttemptId applicationAttemptId, - String user, Queue queue, + String user, Queue queue, ActiveUsersManager activeUsersManager, RMContext rmContext, ApplicationStore store) { this.rmContext = rmContext; this.appSchedulingInfo = - new AppSchedulingInfo(applicationAttemptId, user, queue, store); + new AppSchedulingInfo(applicationAttemptId, user, queue, + activeUsersManager, store); this.queue = queue; } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java index 01532de9911..b646e14fb8d 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java @@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.api.records.QueueState; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; @@ -197,6 +198,12 @@ extends org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue { */ public void updateClusterResource(Resource clusterResource); + /** + * Get the {@link ActiveUsersManager} for the queue. + * @return the ActiveUsersManager for the queue + */ + public ActiveUsersManager getActiveUsersManager(); + /** * Recover the state of the queue * @param clusterResource the resource of the cluster diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 364494b76cb..1dd92a74070 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -355,7 +355,8 @@ implements ResourceScheduler, CapacitySchedulerContext { // TODO: Fix store SchedulerApp SchedulerApp = - new SchedulerApp(applicationAttemptId, user, queue, rmContext, null); + new SchedulerApp(applicationAttemptId, user, queue, + queue.getActiveUsersManager(), rmContext, null); // Submit to the queue try { diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 1faef7a1e8b..08ee09463a0 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -37,6 +37,8 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.AccessControlList; +import org.apache.hadoop.yarn.Lock; +import org.apache.hadoop.yarn.Lock.NoLock; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -58,6 +60,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApp; @@ -120,6 +123,8 @@ public class LeafQueue implements CSQueue { private CapacitySchedulerContext scheduler; + private final ActiveUsersManager activeUsersManager; + final static int DEFAULT_AM_RESOURCE = 2 * 1024; public LeafQueue(CapacitySchedulerContext cs, @@ -132,7 +137,7 @@ public class LeafQueue implements CSQueue { this.metrics = old != null ? old.getMetrics() : QueueMetrics.forQueue(getQueuePath(), parent, cs.getConfiguration().getEnableUserMetrics()); - + this.activeUsersManager = new ActiveUsersManager(metrics); this.minimumAllocation = cs.getMinimumResourceCapability(); this.maximumAllocation = cs.getMaximumResourceCapability(); this.minimumAllocationFactor = @@ -348,6 +353,11 @@ public class LeafQueue implements CSQueue { return maxActiveApplicationsPerUser; } + @Override + public ActiveUsersManager getActiveUsersManager() { + return activeUsersManager; + } + @Override public synchronized float getUsedCapacity() { return usedCapacity; @@ -674,6 +684,12 @@ public class LeafQueue implements CSQueue { // Check if we can activate more applications activateApplications(); + // Inform the activeUsersManager + synchronized (application) { + activeUsersManager.deactivateApplication( + application.getUser(), application.getApplicationId()); + } + LOG.info("Application removed -" + " appId: " + application.getApplicationId() + " user: " + application.getUser() + @@ -837,6 +853,7 @@ public class LeafQueue implements CSQueue { return true; } + @Lock({LeafQueue.class, SchedulerApp.class}) private Resource computeAndSetUserResourceLimit(SchedulerApp application, Resource clusterResource, Resource required) { String user = application.getUser(); @@ -853,6 +870,7 @@ public class LeafQueue implements CSQueue { minimumAllocation.getMemory(); } + @Lock(NoLock.class) private Resource computeUserLimit(SchedulerApp application, Resource clusterResource, Resource required) { // What is our current capacity? @@ -877,11 +895,8 @@ public class LeafQueue implements CSQueue { // queue's configured capacity * user-limit-factor. // Also, the queue's configured capacity should be higher than // queue-hard-limit * ulMin - - String userName = application.getUser(); - final int activeUsers = users.size(); - User user = getUser(userName); + final int activeUsers = activeUsersManager.getNumActiveUsers(); int limit = roundUp( @@ -893,12 +908,13 @@ public class LeafQueue implements CSQueue { ); if (LOG.isDebugEnabled()) { + String userName = application.getUser(); LOG.debug("User limit computation for " + userName + " in queue " + getQueueName() + " userLimit=" + userLimit + " userLimitFactor=" + userLimitFactor + " required: " + required + - " consumed: " + user.getConsumedResources() + + " consumed: " + getUser(userName).getConsumedResources() + " limit: " + limit + " queueCapacity: " + queueCapacity + " qconsumed: " + consumed + @@ -1308,8 +1324,10 @@ public class LeafQueue implements CSQueue { // Update application properties for (SchedulerApp application : activeApplications) { - computeAndSetUserResourceLimit( - application, clusterResource, Resources.none()); + synchronized (application) { + computeAndSetUserResourceLimit( + application, clusterResource, Resources.none()); + } } } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java index 7d3acc5ad38..39aa197f2b0 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java @@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApp; @@ -240,6 +241,12 @@ public class ParentQueue implements CSQueue { return maximumCapacity; } + @Override + public ActiveUsersManager getActiveUsersManager() { + // Should never be called since all applications are submitted to LeafQueues + return null; + } + @Override public synchronized float getUsedCapacity() { return usedCapacity; diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index 145cb8d20d5..9f5f2cc8ca1 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -66,6 +66,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue; @@ -124,10 +125,11 @@ public class FifoScheduler implements ResourceScheduler { private Map applications = new TreeMap(); + + private final ActiveUsersManager activeUsersManager; private static final String DEFAULT_QUEUE_NAME = "default"; - private final QueueMetrics metrics = - QueueMetrics.forQueue(DEFAULT_QUEUE_NAME, null, false); + private final QueueMetrics metrics; private final Queue DEFAULT_QUEUE = new Queue() { @Override @@ -174,6 +176,11 @@ public class FifoScheduler implements ResourceScheduler { } }; + public FifoScheduler() { + metrics = QueueMetrics.forQueue(DEFAULT_QUEUE_NAME, null, false); + activeUsersManager = new ActiveUsersManager(metrics); + } + @Override public Resource getMinimumResourceCapability() { return minimumAllocation; @@ -288,7 +295,7 @@ public class FifoScheduler implements ResourceScheduler { String user) { // TODO: Fix store SchedulerApp schedulerApp = - new SchedulerApp(appAttemptId, user, DEFAULT_QUEUE, + new SchedulerApp(appAttemptId, user, DEFAULT_QUEUE, activeUsersManager, this.rmContext, null); applications.put(appAttemptId, schedulerApp); metrics.submitApp(user); @@ -318,6 +325,12 @@ public class FifoScheduler implements ResourceScheduler { RMContainerEventType.KILL); } + // Inform the activeUsersManager + synchronized (application) { + activeUsersManager.deactivateApplication( + application.getUser(), application.getApplicationId()); + } + // Clean up pending requests, metrics etc. application.stop(rmAppAttemptFinalState); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java index 4b3d33c177e..92a84a244c7 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java @@ -68,7 +68,7 @@ public class MetricsOverviewTable extends HtmlBlock { th().$class("ui-state-default")._("Memory Used")._(). th().$class("ui-state-default")._("Memory Total")._(). th().$class("ui-state-default")._("Memory Reserved")._(). - th().$class("ui-state-default")._("Total Nodes")._(). + th().$class("ui-state-default")._("Active Nodes")._(). th().$class("ui-state-default")._("Decommissioned Nodes")._(). th().$class("ui-state-default")._("Lost Nodes")._(). th().$class("ui-state-default")._("Unhealthy Nodes")._(). @@ -82,7 +82,7 @@ public class MetricsOverviewTable extends HtmlBlock { td(StringUtils.byteDesc(clusterMetrics.getAllocatedMB() * BYTES_IN_MB)). td(StringUtils.byteDesc(clusterMetrics.getTotalMB() * BYTES_IN_MB)). td(StringUtils.byteDesc(clusterMetrics.getReservedMB() * BYTES_IN_MB)). - td().a(url("nodes"),String.valueOf(clusterMetrics.getTotalNodes()))._(). + td().a(url("nodes"),String.valueOf(clusterMetrics.getActiveNodes()))._(). td().a(url("nodes/decommissioned"),String.valueOf(clusterMetrics.getDecommissionedNodes()))._(). td().a(url("nodes/lost"),String.valueOf(clusterMetrics.getLostNodes()))._(). td().a(url("nodes/unhealthy"),String.valueOf(clusterMetrics.getUnhealthyNodes()))._(). diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java index 79c371211cc..cb6b6c5ad09 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java @@ -24,6 +24,8 @@ import static org.apache.hadoop.yarn.webapp.view.JQueryUI.DATATABLES_ID; import static org.apache.hadoop.yarn.webapp.view.JQueryUI.initID; import static org.apache.hadoop.yarn.webapp.view.JQueryUI.tableInit; +import java.util.Collection; + import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; @@ -36,6 +38,7 @@ import org.apache.hadoop.yarn.webapp.SubView; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY; +import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TR; import org.apache.hadoop.yarn.webapp.view.HtmlBlock; import com.google.inject.Inject; @@ -79,7 +82,19 @@ class NodesPage extends RmView { if(type != null && !type.isEmpty()) { stateFilter = RMNodeState.valueOf(type.toUpperCase()); } - for (RMNode ni : this.rmContext.getRMNodes().values()) { + Collection rmNodes = this.rmContext.getRMNodes().values(); + boolean isInactive = false; + if (stateFilter != null) { + switch (stateFilter) { + case DECOMMISSIONED: + case LOST: + case REBOOTED: + rmNodes = this.rmContext.getInactiveRMNodes().values(); + isInactive = true; + break; + } + } + for (RMNode ni : rmNodes) { if(stateFilter != null) { RMNodeState state = ni.getState(); if(!stateFilter.equals(state)) { @@ -89,12 +104,17 @@ class NodesPage extends RmView { NodeInfo info = new NodeInfo(ni, sched); int usedMemory = (int)info.getUsedMemory(); int availableMemory = (int)info.getAvailableMemory(); - tbody.tr(). + TR>> row = tbody.tr(). td(info.getRack()). td(info.getState()). - td(info.getNodeId()). - td().a("http://" + info.getNodeHTTPAddress(), info.getNodeHTTPAddress())._(). - td(info.getHealthStatus()). + td(info.getNodeId()); + if (isInactive) { + row.td()._("N/A")._(); + } else { + String httpAddress = info.getNodeHTTPAddress(); + row.td().a("http://" + httpAddress, httpAddress)._(); + } + row.td(info.getHealthStatus()). td(Times.format(info.getLastHealthUpdate())). td(info.getHealthReport()). td(String.valueOf(info.getNumContainers())). diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java index 06551b21a8c..449ba758e86 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp; import java.io.IOException; +import java.util.Collection; import java.util.concurrent.ConcurrentMap; import javax.servlet.http.HttpServletRequest; @@ -68,6 +69,7 @@ import com.google.inject.Singleton; @Singleton @Path("/ws/v1/cluster") public class RMWebServices { + private static final String EMPTY = ""; private static final Log LOG = LogFactory.getLog(RMWebServices.class); private final ResourceManager rm; private static RecordFactory recordFactory = RecordFactoryProvider @@ -144,12 +146,23 @@ public class RMWebServices { if (sched == null) { throw new NotFoundException("Null ResourceScheduler instance"); } - + Collection rmNodes = this.rm.getRMContext().getRMNodes().values(); + boolean isInactive = false; + if (filterState != null && !filterState.isEmpty()) { + RMNodeState nodeState = RMNodeState.valueOf(filterState.toUpperCase()); + switch (nodeState) { + case DECOMMISSIONED: + case LOST: + case REBOOTED: + rmNodes = this.rm.getRMContext().getInactiveRMNodes().values(); + isInactive = true; + break; + } + } NodesInfo allNodes = new NodesInfo(); - for (RMNode ni : this.rm.getRMContext().getRMNodes().values()) { + for (RMNode ni : rmNodes) { NodeInfo nodeInfo = new NodeInfo(ni, sched); if (filterState != null) { - RMNodeState.valueOf(filterState); if (!(nodeInfo.getState().equalsIgnoreCase(filterState))) { continue; } @@ -165,6 +178,9 @@ public class RMWebServices { continue; } } + if (isInactive) { + nodeInfo.setNodeHTTPAddress(EMPTY); + } allNodes.add(nodeInfo); } return allNodes; @@ -183,10 +199,19 @@ public class RMWebServices { } NodeId nid = ConverterUtils.toNodeId(nodeId); RMNode ni = this.rm.getRMContext().getRMNodes().get(nid); + boolean isInactive = false; if (ni == null) { - throw new NotFoundException("nodeId, " + nodeId + ", is not found"); + ni = this.rm.getRMContext().getInactiveRMNodes().get(nid.getHost()); + if (ni == null) { + throw new NotFoundException("nodeId, " + nodeId + ", is not found"); + } + isInactive = true; } - return new NodeInfo(ni, sched); + NodeInfo nodeInfo = new NodeInfo(ni, sched); + if (isInactive) { + nodeInfo.setNodeHTTPAddress(EMPTY); + } + return nodeInfo; } @GET diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterMetricsInfo.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterMetricsInfo.java index fcf878346ce..7d63b057a19 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterMetricsInfo.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterMetricsInfo.java @@ -44,6 +44,7 @@ public class ClusterMetricsInfo { protected int unhealthyNodes; protected int decommissionedNodes; protected int rebootedNodes; + protected int activeNodes; public ClusterMetricsInfo() { } // JAXB needs this @@ -59,12 +60,13 @@ public class ClusterMetricsInfo { this.allocatedMB = metrics.getAllocatedGB() * MB_IN_GB; this.containersAllocated = metrics.getAllocatedContainers(); this.totalMB = availableMB + reservedMB + allocatedMB; - this.totalNodes = clusterMetrics.getNumNMs(); + this.activeNodes = clusterMetrics.getNumActiveNMs(); this.lostNodes = clusterMetrics.getNumLostNMs(); this.unhealthyNodes = clusterMetrics.getUnhealthyNMs(); this.decommissionedNodes = clusterMetrics.getNumDecommisionedNMs(); this.rebootedNodes = clusterMetrics.getNumRebootedNMs(); - + this.totalNodes = activeNodes + lostNodes + decommissionedNodes + + rebootedNodes; } public int getAppsSubmitted() { @@ -94,6 +96,10 @@ public class ClusterMetricsInfo { public int getTotalNodes() { return this.totalNodes; } + + public int getActiveNodes() { + return this.activeNodes; + } public int getLostNodes() { return this.lostNodes; diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java index bafecbb338f..facd73aef67 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java @@ -94,6 +94,10 @@ public class NodeInfo { public String getNodeHTTPAddress() { return this.nodeHTTPAddress; } + + public void setNodeHTTPAddress(String nodeHTTPAddress) { + this.nodeHTTPAddress = nodeHTTPAddress; + } public String getHealthStatus() { return this.healthStatus; diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java index bd44f10b9ed..3434b3c4343 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java @@ -81,13 +81,20 @@ public class MockNM { } public HeartbeatResponse nodeHeartbeat(boolean b) throws Exception { - return nodeHeartbeat(new HashMap>(), b); + return nodeHeartbeat(new HashMap>(), + b, ++responseId); } public HeartbeatResponse nodeHeartbeat(Map> conts, boolean isHealthy) throws Exception { + return nodeHeartbeat(conts, isHealthy, ++responseId); + } + + public HeartbeatResponse nodeHeartbeat(Map> conts, boolean isHealthy, int resId) throws Exception { NodeHeartbeatRequest req = Records.newRecord(NodeHeartbeatRequest.class); NodeStatus status = Records.newRecord(NodeStatus.class); + status.setResponseId(resId); status.setNodeId(nodeId); for (Map.Entry> entry : conts.entrySet()) { status.setContainersStatuses(entry.getValue()); @@ -97,7 +104,6 @@ public class MockNM { healthStatus.setIsNodeHealthy(isHealthy); healthStatus.setLastHealthReportTime(1); status.setNodeHealthStatus(healthStatus); - status.setResponseId(++responseId); req.setNodeStatus(status); return resourceTracker.nodeHeartbeat(req).getHeartbeatResponse(); } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java index 90b43504c1c..7ded620043a 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java @@ -56,6 +56,17 @@ public class MockNodes { } return list; } + + public static List lostNodes(int racks, int nodesPerRack, + Resource perNode) { + List list = Lists.newArrayList(); + for (int i = 0; i < racks; ++i) { + for (int j = 0; j < nodesPerRack; ++j) { + list.add(lostNodeInfo(i, perNode, RMNodeState.LOST)); + } + } + return list; + } public static NodeId newNodeID(String host, int port) { NodeId nid = recordFactory.newRecordInstance(NodeId.class); @@ -82,92 +93,120 @@ public class MockNodes { return rs; } - public static RMNode newNodeInfo(int rack, final Resource perNode) { + private static class MockRMNodeImpl implements RMNode { + private NodeId nodeId; + private String hostName; + private String nodeAddr; + private String httpAddress; + private int cmdPort; + private Resource perNode; + private String rackName; + private NodeHealthStatus nodeHealthStatus; + private RMNodeState state; + + public MockRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress, + Resource perNode, String rackName, NodeHealthStatus nodeHealthStatus, + int cmdPort, String hostName, RMNodeState state) { + this.nodeId = nodeId; + this.nodeAddr = nodeAddr; + this.httpAddress = httpAddress; + this.perNode = perNode; + this.rackName = rackName; + this.nodeHealthStatus = nodeHealthStatus; + this.cmdPort = cmdPort; + this.hostName = hostName; + this.state = state; + } + + @Override + public NodeId getNodeID() { + return this.nodeId; + } + + @Override + public String getHostName() { + return this.hostName; + } + + @Override + public int getCommandPort() { + return this.cmdPort; + } + + @Override + public int getHttpPort() { + return 0; + } + + @Override + public String getNodeAddress() { + return this.nodeAddr; + } + + @Override + public String getHttpAddress() { + return this.httpAddress; + } + + @Override + public NodeHealthStatus getNodeHealthStatus() { + return this.nodeHealthStatus; + } + + @Override + public Resource getTotalCapability() { + return this.perNode; + } + + @Override + public String getRackName() { + return this.rackName; + } + + @Override + public Node getNode() { + throw new UnsupportedOperationException("Not supported yet."); + } + + @Override + public RMNodeState getState() { + return this.state; + } + + @Override + public List getContainersToCleanUp() { + return null; + } + + @Override + public List getAppsToCleanup() { + return null; + } + + @Override + public HeartbeatResponse getLastHeartBeatResponse() { + return null; + } + }; + + private static RMNode buildRMNode(int rack, final Resource perNode, RMNodeState state, String httpAddr) { final String rackName = "rack"+ rack; final int nid = NODE_ID++; final String hostName = "host"+ nid; final int port = 123; final NodeId nodeID = newNodeID(hostName, port); - final String httpAddress = "localhost:0"; + final String httpAddress = httpAddr; final NodeHealthStatus nodeHealthStatus = recordFactory.newRecordInstance(NodeHealthStatus.class); - final Resource used = newUsedResource(perNode); - final Resource avail = newAvailResource(perNode, used); - return new RMNode() { - @Override - public NodeId getNodeID() { - return nodeID; - } + return new MockRMNodeImpl(nodeID, hostName, httpAddress, perNode, rackName, + nodeHealthStatus, nid, hostName, state); + } - @Override - public String getNodeAddress() { - return hostName; - } + public static RMNode lostNodeInfo(int rack, final Resource perNode, RMNodeState state) { + return buildRMNode(rack, perNode, state, "N/A"); + } - @Override - public String getHttpAddress() { - return httpAddress; - } - - @Override - public Resource getTotalCapability() { - return perNode; - } - - @Override - public String getRackName() { - return rackName; - } - - @Override - public Node getNode() { - throw new UnsupportedOperationException("Not supported yet."); - } - - @Override - public NodeHealthStatus getNodeHealthStatus() { - return nodeHealthStatus; - } - - @Override - public int getCommandPort() { - return nid; - } - - @Override - public int getHttpPort() { - // TODO Auto-generated method stub - return 0; - } - - @Override - public String getHostName() { - return hostName; - } - - @Override - public RMNodeState getState() { - // TODO Auto-generated method stub - return null; - } - - @Override - public List getAppsToCleanup() { - // TODO Auto-generated method stub - return null; - } - - @Override - public List getContainersToCleanUp() { - // TODO Auto-generated method stub - return null; - } - - @Override - public HeartbeatResponse getLastHeartBeatResponse() { - // TODO Auto-generated method stub - return null; - } - }; + public static RMNode newNodeInfo(int rack, final Resource perNode) { + return buildRMNode(rack, perNode, null, "localhost:0"); } } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index 1b10854eeb5..6d9e726e878 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -130,6 +130,12 @@ public class MockRM extends ResourceManager { nm.getNodeId()); node.handle(new RMNodeEvent(nm.getNodeId(), RMNodeEventType.STARTED)); } + + public void sendNodeLost(MockNM nm) throws Exception { + RMNodeImpl node = (RMNodeImpl) getRMContext().getRMNodes().get( + nm.getNodeId()); + node.handle(new RMNodeEvent(nm.getNodeId(), RMNodeEventType.EXPIRE)); + } public void NMwaitForState(NodeId nodeid, RMNodeState finalState) throws Exception { diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java index 4b4fb92e057..ccd8d57a0d1 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMNodeTransitions.java @@ -31,6 +31,7 @@ import junit.framework.Assert; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeHealthStatus; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse; import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore; @@ -100,8 +101,8 @@ public class TestRMNodeTransitions { rmDispatcher.register(SchedulerEventType.class, new TestSchedulerEventDispatcher()); - - node = new RMNodeImpl(null, rmContext, null, 0, 0, null, null); + NodeId nodeId = BuilderUtils.newNodeId("localhost", 0); + node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null); } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java index 183396092bb..8b3f4a08e90 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java @@ -157,14 +157,14 @@ public class TestResourceTrackerService { rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); - MockNM nm2 = new MockNM("host2:1234", 2048, rm.getResourceTrackerService()); + MockNM nm2 = rm.registerNode("host2:1234", 2048); int initialMetricCount = ClusterMetrics.getMetrics().getNumRebootedNMs(); HeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm2.nodeHeartbeat( - new HashMap>(), true); + new HashMap>(), true, -100); Assert.assertTrue(NodeAction.REBOOT.equals(nodeHeartbeat.getNodeAction())); checkRebootedNMCount(rm, ++initialMetricCount); } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java index f1a1d956ebf..9c5851a5db8 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java @@ -302,7 +302,8 @@ public class TestApplicationLimits { final ApplicationAttemptId appAttemptId_0_0 = TestUtils.getMockApplicationAttemptId(0, 0); SchedulerApp app_0_0 = - spy(new SchedulerApp(appAttemptId_0_0, user_0, queue, rmContext, null)); + spy(new SchedulerApp(appAttemptId_0_0, user_0, queue, + queue.getActiveUsersManager(), rmContext, null)); queue.submitApplication(app_0_0, user_0, A); List app_0_0_requests = new ArrayList(); @@ -320,7 +321,8 @@ public class TestApplicationLimits { final ApplicationAttemptId appAttemptId_0_1 = TestUtils.getMockApplicationAttemptId(1, 0); SchedulerApp app_0_1 = - spy(new SchedulerApp(appAttemptId_0_1, user_0, queue, rmContext, null)); + spy(new SchedulerApp(appAttemptId_0_1, user_0, queue, + queue.getActiveUsersManager(), rmContext, null)); queue.submitApplication(app_0_1, user_0, A); List app_0_1_requests = new ArrayList(); @@ -338,7 +340,8 @@ public class TestApplicationLimits { final ApplicationAttemptId appAttemptId_1_0 = TestUtils.getMockApplicationAttemptId(2, 0); SchedulerApp app_1_0 = - spy(new SchedulerApp(appAttemptId_1_0, user_1, queue, rmContext, null)); + spy(new SchedulerApp(appAttemptId_1_0, user_1, queue, + queue.getActiveUsersManager(), rmContext, null)); queue.submitApplication(app_1_0, user_1, A); List app_1_0_requests = new ArrayList(); diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index bee6e02553a..5e4243cccb7 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -18,8 +18,18 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; -import static org.junit.Assert.*; -import static org.mockito.Mockito.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; import java.io.IOException; import java.util.ArrayList; @@ -28,9 +38,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Container; @@ -48,19 +55,17 @@ import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; - import org.junit.After; import org.junit.Before; import org.junit.Test; - import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; public class TestLeafQueue { - private static final Log LOG = LogFactory.getLog(TestLeafQueue.class); private final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); @@ -136,7 +141,6 @@ public class TestLeafQueue { final String Q_C1 = Q_C + "." + C1; conf.setCapacity(Q_C1, 100); - LOG.info("Setup top-level queues a and b"); } static LeafQueue stubLeafQueue(LeafQueue queue) { @@ -217,13 +221,15 @@ public class TestLeafQueue { final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0); SchedulerApp app_0 = - new SchedulerApp(appAttemptId_0, user_0, a, rmContext, null); + new SchedulerApp(appAttemptId_0, user_0, a, + mock(ActiveUsersManager.class), rmContext, null); a.submitApplication(app_0, user_0, B); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); SchedulerApp app_1 = - new SchedulerApp(appAttemptId_1, user_0, a, rmContext, null); + new SchedulerApp(appAttemptId_1, user_0, a, + mock(ActiveUsersManager.class), rmContext, null); a.submitApplication(app_1, user_0, B); // same user @@ -264,13 +270,15 @@ public class TestLeafQueue { final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0); SchedulerApp app_0 = - new SchedulerApp(appAttemptId_0, user_0, a, rmContext, null); + new SchedulerApp(appAttemptId_0, user_0, a, + mock(ActiveUsersManager.class), rmContext, null); a.submitApplication(app_0, user_0, A); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); SchedulerApp app_1 = - new SchedulerApp(appAttemptId_1, user_0, a, rmContext, null); + new SchedulerApp(appAttemptId_1, user_0, a, + mock(ActiveUsersManager.class), rmContext, null); a.submitApplication(app_1, user_0, A); // same user @@ -371,6 +379,99 @@ public class TestLeafQueue { assertEquals(1, a.getMetrics().getAvailableGB()); } + @Test + public void testUserLimits() throws Exception { + // Mock the queue + LeafQueue a = stubLeafQueue((LeafQueue)queues.get(A)); + //unset maxCapacity + a.setMaxCapacity(1.0f); + + // Users + final String user_0 = "user_0"; + final String user_1 = "user_1"; + + // Submit applications + final ApplicationAttemptId appAttemptId_0 = + TestUtils.getMockApplicationAttemptId(0, 0); + SchedulerApp app_0 = + new SchedulerApp(appAttemptId_0, user_0, a, + a.getActiveUsersManager(), rmContext, null); + a.submitApplication(app_0, user_0, A); + + final ApplicationAttemptId appAttemptId_1 = + TestUtils.getMockApplicationAttemptId(1, 0); + SchedulerApp app_1 = + new SchedulerApp(appAttemptId_1, user_0, a, + a.getActiveUsersManager(), rmContext, null); + a.submitApplication(app_1, user_0, A); // same user + + final ApplicationAttemptId appAttemptId_2 = + TestUtils.getMockApplicationAttemptId(2, 0); + SchedulerApp app_2 = + new SchedulerApp(appAttemptId_2, user_1, a, + a.getActiveUsersManager(), rmContext, null); + a.submitApplication(app_2, user_1, A); + + // Setup some nodes + String host_0 = "host_0"; + SchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 8*GB); + String host_1 = "host_1"; + SchedulerNode node_1 = TestUtils.getMockNode(host_1, DEFAULT_RACK, 0, 8*GB); + + final int numNodes = 2; + Resource clusterResource = Resources.createResource(numNodes * (8*GB)); + when(csContext.getNumClusterNodes()).thenReturn(numNodes); + + // Setup resource-requests + Priority priority = TestUtils.createMockPriority(1); + app_0.updateResourceRequests(Collections.singletonList( + TestUtils.createResourceRequest(RMNodeImpl.ANY, 2*GB, 1, priority, + recordFactory))); + + app_1.updateResourceRequests(Collections.singletonList( + TestUtils.createResourceRequest(RMNodeImpl.ANY, 1*GB, 2, priority, + recordFactory))); + + /** + * Start testing... + */ + + // Set user-limit + a.setUserLimit(50); + a.setUserLimitFactor(2); + + // Now, only user_0 should be active since he is the only one with + // outstanding requests + assertEquals("There should only be 1 active user!", + 1, a.getActiveUsersManager().getNumActiveUsers()); + + // This commented code is key to test 'activeUsers'. + // It should fail the test if uncommented since + // it would increase 'activeUsers' to 2 and stop user_2 + // Pre MAPREDUCE-3732 this test should fail without this block too +// app_2.updateResourceRequests(Collections.singletonList( +// TestUtils.createResourceRequest(RMNodeImpl.ANY, 1*GB, 1, priority, +// recordFactory))); + + // 1 container to user_0 + a.assignContainers(clusterResource, node_0); + assertEquals(2*GB, a.getUsedResources().getMemory()); + assertEquals(2*GB, app_0.getCurrentConsumption().getMemory()); + assertEquals(0*GB, app_1.getCurrentConsumption().getMemory()); + + // Again one to user_0 since he hasn't exceeded user limit yet + a.assignContainers(clusterResource, node_0); + assertEquals(3*GB, a.getUsedResources().getMemory()); + assertEquals(2*GB, app_0.getCurrentConsumption().getMemory()); + assertEquals(1*GB, app_1.getCurrentConsumption().getMemory()); + + // One more to user_0 since he is the only active user + a.assignContainers(clusterResource, node_1); + assertEquals(4*GB, a.getUsedResources().getMemory()); + assertEquals(2*GB, app_0.getCurrentConsumption().getMemory()); + assertEquals(2*GB, app_1.getCurrentConsumption().getMemory()); + } + @Test public void testSingleQueueWithMultipleUsers() throws Exception { @@ -388,15 +489,31 @@ public class TestLeafQueue { final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0); SchedulerApp app_0 = - new SchedulerApp(appAttemptId_0, user_0, a, rmContext, null); + new SchedulerApp(appAttemptId_0, user_0, a, + a.getActiveUsersManager(), rmContext, null); a.submitApplication(app_0, user_0, A); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); SchedulerApp app_1 = - new SchedulerApp(appAttemptId_1, user_0, a, rmContext, null); + new SchedulerApp(appAttemptId_1, user_0, a, + a.getActiveUsersManager(), rmContext, null); a.submitApplication(app_1, user_0, A); // same user + final ApplicationAttemptId appAttemptId_2 = + TestUtils.getMockApplicationAttemptId(2, 0); + SchedulerApp app_2 = + new SchedulerApp(appAttemptId_2, user_1, a, + a.getActiveUsersManager(), rmContext, null); + a.submitApplication(app_2, user_1, A); + + final ApplicationAttemptId appAttemptId_3 = + TestUtils.getMockApplicationAttemptId(3, 0); + SchedulerApp app_3 = + new SchedulerApp(appAttemptId_3, user_2, a, + a.getActiveUsersManager(), rmContext, null); + a.submitApplication(app_3, user_2, A); + // Setup some nodes String host_0 = "host_0"; SchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 8*GB); @@ -438,19 +555,8 @@ public class TestLeafQueue { assertEquals(2*GB, a.getUsedResources().getMemory()); assertEquals(2*GB, app_0.getCurrentConsumption().getMemory()); assertEquals(0*GB, app_1.getCurrentConsumption().getMemory()); - - // Submit more apps - final ApplicationAttemptId appAttemptId_2 = - TestUtils.getMockApplicationAttemptId(2, 0); - SchedulerApp app_2 = - new SchedulerApp(appAttemptId_2, user_1, a, rmContext, null); - a.submitApplication(app_2, user_1, A); - - final ApplicationAttemptId appAttemptId_3 = - TestUtils.getMockApplicationAttemptId(3, 0); - SchedulerApp app_3 = - new SchedulerApp(appAttemptId_3, user_2, a, rmContext, null); - a.submitApplication(app_3, user_2, A); + + // Submit resource requests for other apps now to 'activate' them app_2.updateResourceRequests(Collections.singletonList( TestUtils.createResourceRequest(RMNodeImpl.ANY, 3*GB, 1, priority, @@ -558,13 +664,15 @@ public class TestLeafQueue { final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0); SchedulerApp app_0 = - new SchedulerApp(appAttemptId_0, user_0, a, rmContext, null); + new SchedulerApp(appAttemptId_0, user_0, a, + mock(ActiveUsersManager.class), rmContext, null); a.submitApplication(app_0, user_0, A); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); SchedulerApp app_1 = - new SchedulerApp(appAttemptId_1, user_1, a, rmContext, null); + new SchedulerApp(appAttemptId_1, user_1, a, + mock(ActiveUsersManager.class), rmContext, null); a.submitApplication(app_1, user_1, A); // Setup some nodes @@ -657,13 +765,15 @@ public class TestLeafQueue { final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0); SchedulerApp app_0 = - new SchedulerApp(appAttemptId_0, user_0, a, rmContext, null); + new SchedulerApp(appAttemptId_0, user_0, a, + mock(ActiveUsersManager.class), rmContext, null); a.submitApplication(app_0, user_0, A); final ApplicationAttemptId appAttemptId_1 = TestUtils.getMockApplicationAttemptId(1, 0); SchedulerApp app_1 = - new SchedulerApp(appAttemptId_1, user_1, a, rmContext, null); + new SchedulerApp(appAttemptId_1, user_1, a, + mock(ActiveUsersManager.class), rmContext, null); a.submitApplication(app_1, user_1, A); // Setup some nodes @@ -770,7 +880,8 @@ public class TestLeafQueue { final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0); SchedulerApp app_0 = - spy(new SchedulerApp(appAttemptId_0, user_0, a, rmContext, null)); + spy(new SchedulerApp(appAttemptId_0, user_0, a, + mock(ActiveUsersManager.class), rmContext, null)); a.submitApplication(app_0, user_0, A); // Setup some nodes and racks @@ -899,7 +1010,8 @@ public class TestLeafQueue { final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0); SchedulerApp app_0 = - spy(new SchedulerApp(appAttemptId_0, user_0, a, rmContext, null)); + spy(new SchedulerApp(appAttemptId_0, user_0, a, + mock(ActiveUsersManager.class), rmContext, null)); a.submitApplication(app_0, user_0, A); // Setup some nodes and racks @@ -1028,7 +1140,8 @@ public class TestLeafQueue { final ApplicationAttemptId appAttemptId_0 = TestUtils.getMockApplicationAttemptId(0, 0); SchedulerApp app_0 = - spy(new SchedulerApp(appAttemptId_0, user_0, a, rmContext, null)); + spy(new SchedulerApp(appAttemptId_0, user_0, a, + mock(ActiveUsersManager.class), rmContext, null)); a.submitApplication(app_0, user_0, A); // Setup some nodes and racks diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java index 4a264fd24bd..a32c285eec8 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java @@ -24,6 +24,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.webapp.NodesPage.NodesBlock; import org.apache.hadoop.yarn.webapp.test.WebAppTests; +import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; @@ -36,39 +37,65 @@ import com.google.inject.Module; * data for all the columns in the table as specified in the header. */ public class TestNodesPage { + + final int numberOfRacks = 2; + final int numberOfNodesPerRack = 2; + // Number of Actual Table Headers for NodesPage.NodesBlock might change in + // future. In that case this value should be adjusted to the new value. + final int numberOfThInMetricsTable = 10; + final int numberOfActualTableHeaders = 10; - @Test - public void testNodesBlockRender() throws Exception { - final int numberOfRacks = 2; - final int numberOfNodesPerRack = 2; - // Number of Actual Table Headers for NodesPage.NodesBlock might change in - // future. In that case this value should be adjusted to the new value. - final int numberOfThInMetricsTable = 10; - final int numberOfActualTableHeaders = 10; - - Injector injector = WebAppTests.createMockInjector(RMContext.class, - TestRMWebApp.mockRMContext(3, numberOfRacks, numberOfNodesPerRack, 8*TestRMWebApp.GiB), - new Module() { + private Injector injector; + + @Before + public void setUp() throws Exception { + injector = WebAppTests.createMockInjector(RMContext.class, TestRMWebApp + .mockRMContext(3, numberOfRacks, numberOfNodesPerRack, + 8 * TestRMWebApp.GiB), new Module() { @Override public void configure(Binder binder) { try { - binder.bind(ResourceManager.class).toInstance(TestRMWebApp.mockRm(3, - numberOfRacks, numberOfNodesPerRack, 8*TestRMWebApp.GiB)); + binder.bind(ResourceManager.class).toInstance( + TestRMWebApp.mockRm(3, numberOfRacks, numberOfNodesPerRack, + 8 * TestRMWebApp.GiB)); } catch (IOException e) { throw new IllegalStateException(e); } } }); + } + + @Test + public void testNodesBlockRender() throws Exception { injector.getInstance(NodesBlock.class).render(); PrintWriter writer = injector.getInstance(PrintWriter.class); WebAppTests.flushOutput(injector); - Mockito.verify(writer, Mockito.times(numberOfActualTableHeaders + - numberOfThInMetricsTable)).print( - " lostNodes = MockNodes.lostNodes(racks, numNodes, + newResource(mbsPerNode)); + final ConcurrentMap lostNodesMap = Maps.newConcurrentMap(); + for (RMNode node : lostNodes) { + lostNodesMap.put(node.getHostName(), node); + } return new RMContextImpl(new MemStore(), null, null, null, null) { @Override public ConcurrentMap getRMApps() { return applicationsMaps; } @Override + public ConcurrentMap getInactiveRMNodes() { + return lostNodesMap; + } + @Override public ConcurrentMap getRMNodes() { return nodesMap; } diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java index 51d2fa7d0dd..746eec234e9 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java @@ -370,7 +370,8 @@ public class TestRMWebServices extends JerseyTest { WebServicesTestUtils.getXmlInt(element, "lostNodes"), WebServicesTestUtils.getXmlInt(element, "unhealthyNodes"), WebServicesTestUtils.getXmlInt(element, "decommissionedNodes"), - WebServicesTestUtils.getXmlInt(element, "rebootedNodes")); + WebServicesTestUtils.getXmlInt(element, "rebootedNodes"), + WebServicesTestUtils.getXmlInt(element, "activeNodes")); } } @@ -378,7 +379,7 @@ public class TestRMWebServices extends JerseyTest { Exception { assertEquals("incorrect number of elements", 1, json.length()); JSONObject clusterinfo = json.getJSONObject("clusterMetrics"); - assertEquals("incorrect number of elements", 11, clusterinfo.length()); + assertEquals("incorrect number of elements", 12, clusterinfo.length()); verifyClusterMetrics(clusterinfo.getInt("appsSubmitted"), clusterinfo.getInt("reservedMB"), clusterinfo.getInt("availableMB"), clusterinfo.getInt("allocatedMB"), @@ -386,13 +387,13 @@ public class TestRMWebServices extends JerseyTest { clusterinfo.getInt("totalMB"), clusterinfo.getInt("totalNodes"), clusterinfo.getInt("lostNodes"), clusterinfo.getInt("unhealthyNodes"), clusterinfo.getInt("decommissionedNodes"), - clusterinfo.getInt("rebootedNodes")); + clusterinfo.getInt("rebootedNodes"),clusterinfo.getInt("activeNodes")); } public void verifyClusterMetrics(int sub, int reservedMB, int availableMB, int allocMB, int containersAlloc, int totalMB, int totalNodes, int lostNodes, int unhealthyNodes, int decommissionedNodes, - int rebootedNodes) throws JSONException, Exception { + int rebootedNodes, int activeNodes) throws JSONException, Exception { ResourceScheduler rs = rm.getResourceScheduler(); QueueMetrics metrics = rs.getRootQueueMetrics(); @@ -412,8 +413,11 @@ public class TestRMWebServices extends JerseyTest { * MB_IN_GB, allocMB); assertEquals("containersAllocated doesn't match", 0, containersAlloc); assertEquals("totalMB doesn't match", totalMBExpect, totalMB); - assertEquals("totalNodes doesn't match", clusterMetrics.getNumNMs(), - totalNodes); + assertEquals( + "totalNodes doesn't match", + clusterMetrics.getNumActiveNMs() + clusterMetrics.getNumLostNMs() + + clusterMetrics.getNumDecommisionedNMs() + + clusterMetrics.getNumRebootedNMs(), totalNodes); assertEquals("lostNodes doesn't match", clusterMetrics.getNumLostNMs(), lostNodes); assertEquals("unhealthyNodes doesn't match", @@ -422,6 +426,8 @@ public class TestRMWebServices extends JerseyTest { clusterMetrics.getNumDecommisionedNMs(), decommissionedNodes); assertEquals("rebootedNodes doesn't match", clusterMetrics.getNumRebootedNMs(), rebootedNodes); + assertEquals("activeNodes doesn't match", clusterMetrics.getNumActiveNMs(), + activeNodes); } @Test diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java index 8a52ac153d2..8886d6e451a 100644 --- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java +++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java @@ -202,6 +202,69 @@ public class TestRMWebServicesNodes extends JerseyTest { rm.stop(); } } + + @Test + public void testNodesQueryStateLost() throws JSONException, Exception { + WebResource r = resource(); + MockNM nm1 = rm.registerNode("h1:1234", 5120); + MockNM nm2 = rm.registerNode("h2:1234", 5120); + rm.sendNodeStarted(nm1); + rm.sendNodeStarted(nm2); + rm.NMwaitForState(nm1.getNodeId(), RMNodeState.RUNNING); + rm.NMwaitForState(nm2.getNodeId(), RMNodeState.RUNNING); + rm.sendNodeLost(nm1); + rm.sendNodeLost(nm2); + + ClientResponse response = r.path("ws").path("v1").path("cluster") + .path("nodes").queryParam("state", RMNodeState.LOST.toString()) + .accept(MediaType.APPLICATION_JSON).get(ClientResponse.class); + + assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType()); + JSONObject json = response.getEntity(JSONObject.class); + JSONObject nodes = json.getJSONObject("nodes"); + assertEquals("incorrect number of elements", 1, nodes.length()); + JSONArray nodeArray = nodes.getJSONArray("node"); + assertEquals("incorrect number of elements", 2, nodeArray.length()); + for (int i = 0; i < nodeArray.length(); ++i) { + JSONObject info = nodeArray.getJSONObject(i); + String host = info.get("id").toString().split(":")[0]; + RMNode rmNode = rm.getRMContext().getInactiveRMNodes().get(host); + WebServicesTestUtils.checkStringMatch("nodeHTTPAddress", "", + info.getString("nodeHTTPAddress")); + WebServicesTestUtils.checkStringMatch("state", rmNode.getState() + .toString(), info.getString("state")); + } + } + + @Test + public void testSingleNodeQueryStateLost() throws JSONException, Exception { + WebResource r = resource(); + MockNM nm1 = rm.registerNode("h1:1234", 5120); + MockNM nm2 = rm.registerNode("h2:1234", 5120); + rm.sendNodeStarted(nm1); + rm.sendNodeStarted(nm2); + rm.NMwaitForState(nm1.getNodeId(), RMNodeState.RUNNING); + rm.NMwaitForState(nm2.getNodeId(), RMNodeState.RUNNING); + rm.sendNodeLost(nm1); + rm.sendNodeLost(nm2); + + ClientResponse response = r.path("ws").path("v1").path("cluster") + .path("nodes").path("h2:1234").accept(MediaType.APPLICATION_JSON) + .get(ClientResponse.class); + + assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType()); + JSONObject json = response.getEntity(JSONObject.class); + JSONObject info = json.getJSONObject("node"); + String id = info.get("id").toString(); + + assertEquals("Incorrect Node Information.", "h2:1234", id); + + RMNode rmNode = rm.getRMContext().getInactiveRMNodes().get("h2"); + WebServicesTestUtils.checkStringMatch("nodeHTTPAddress", "", + info.getString("nodeHTTPAddress")); + WebServicesTestUtils.checkStringMatch("state", + rmNode.getState().toString(), info.getString("state")); + } @Test public void testNodesQueryHealthy() throws JSONException, Exception { diff --git a/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/ExecutionSummarizer.java b/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/ExecutionSummarizer.java index f59bf9e66c7..fc362c5643a 100644 --- a/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/ExecutionSummarizer.java +++ b/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/ExecutionSummarizer.java @@ -149,10 +149,15 @@ class ExecutionSummarizer implements StatListener { throws IOException { numJobsInInputTrace = factory.numJobsInTrace; endTime = System.currentTimeMillis(); - Path inputTracePath = new Path(inputPath); - FileSystem fs = inputTracePath.getFileSystem(conf); - inputTraceLocation = fs.makeQualified(inputTracePath).toString(); - inputTraceSignature = getTraceSignature(inputTraceLocation); + if ("-".equals(inputPath)) { + inputTraceLocation = Summarizer.NA; + inputTraceSignature = Summarizer.NA; + } else { + Path inputTracePath = new Path(inputPath); + FileSystem fs = inputTracePath.getFileSystem(conf); + inputTraceLocation = fs.makeQualified(inputTracePath).toString(); + inputTraceSignature = getTraceSignature(inputPath); + } jobSubmissionPolicy = Gridmix.getJobSubmissionPolicy(conf).name(); resolver = userResolver.getClass().getName(); if (dataSize > 0) { diff --git a/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Gridmix.java b/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Gridmix.java index 902351cd7bf..b4a0e0b5e2d 100644 --- a/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Gridmix.java +++ b/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Gridmix.java @@ -314,9 +314,13 @@ public class Gridmix extends Configured implements Tool { } }); - // print the run summary - System.out.print("\n\n"); - System.out.println(summarizer.toString()); + // print the gridmix summary if the run was successful + if (val == 0) { + // print the run summary + System.out.print("\n\n"); + System.out.println(summarizer.toString()); + } + return val; } diff --git a/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Statistics.java b/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Statistics.java index 6d8c166a678..54f1730cf26 100644 --- a/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Statistics.java +++ b/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Statistics.java @@ -101,13 +101,15 @@ public class Statistics implements Component { } int maps = 0; + int reds = 0; if (jobdesc == null) { throw new IllegalArgumentException( " JobStory not available for job " + job.getJobName()); } else { maps = jobdesc.getNumberMaps(); + reds = jobdesc.getNumberReduces(); } - JobStats stats = new JobStats(maps,job); + JobStats stats = new JobStats(maps, reds, job); jobMaps.put(seq,stats); } @@ -258,15 +260,20 @@ public class Statistics implements Component { */ static class JobStats { private int noOfMaps; + private int noOfReds; private Job job; - public JobStats(int noOfMaps,Job job){ + public JobStats(int noOfMaps,int numOfReds, Job job){ this.job = job; this.noOfMaps = noOfMaps; + this.noOfReds = numOfReds; } public int getNoOfMaps() { return noOfMaps; } + public int getNoOfReds() { + return noOfReds; + } /** * Returns the job , diff --git a/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/StressJobFactory.java b/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/StressJobFactory.java index 1060321dcab..d78d6313331 100644 --- a/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/StressJobFactory.java +++ b/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/StressJobFactory.java @@ -31,13 +31,12 @@ import org.apache.hadoop.tools.rumen.JobStoryProducer; import java.io.IOException; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.locks.Condition; +import java.util.concurrent.atomic.AtomicBoolean; public class StressJobFactory extends JobFactory { public static final Log LOG = LogFactory.getLog(StressJobFactory.class); private final LoadStatus loadStatus = new LoadStatus(); - private final Condition condUnderloaded = this.lock.newCondition(); /** * The minimum ratio between pending+running map tasks (aka. incomplete map * tasks) and cluster map slot capacity for us to consider the cluster is @@ -150,23 +149,32 @@ public class StressJobFactory extends JobFactory { } LOG.info("START STRESS @ " + System.currentTimeMillis()); while (!Thread.currentThread().isInterrupted()) { - lock.lock(); try { while (loadStatus.overloaded()) { - //Wait while JT is overloaded. + if (LOG.isDebugEnabled()) { + LOG.debug("Cluster overloaded in run! Sleeping..."); + } + // sleep try { - condUnderloaded.await(); + Thread.sleep(1000); } catch (InterruptedException ie) { return; } } while (!loadStatus.overloaded()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Cluster underloaded in run! Stressing..."); + } try { + //TODO This in-line read can block submission for large jobs. final JobStory job = getNextJobFiltered(); if (null == job) { return; } + if (LOG.isDebugEnabled()) { + LOG.debug("Job Selected: " + job.getJobID()); + } submitter.add( jobCreator.createGridmixJob( conf, 0L, job, scratch, @@ -175,14 +183,20 @@ public class StressJobFactory extends JobFactory { sequence.getAndIncrement())); // TODO: We need to take care of scenario when one map/reduce // takes more than 1 slot. - loadStatus.mapSlotsBackfill -= - calcEffectiveIncompleteMapTasks( - loadStatus.mapSlotCapacity, job.getNumberMaps(), 0.0f); - loadStatus.reduceSlotsBackfill -= - calcEffectiveIncompleteReduceTasks( - loadStatus.reduceSlotCapacity, job.getNumberReduces(), - 0.0f); - --loadStatus.numJobsBackfill; + + // Lock the loadjob as we are making updates + int incompleteMapTasks = (int) calcEffectiveIncompleteMapTasks( + loadStatus.getMapCapacity(), + job.getNumberMaps(), 0.0f); + loadStatus.decrementMapLoad(incompleteMapTasks); + + int incompleteReduceTasks = + (int) calcEffectiveIncompleteReduceTasks( + loadStatus.getReduceCapacity(), + job.getNumberReduces(), 0.0f); + loadStatus.decrementReduceLoad(incompleteReduceTasks); + + loadStatus.decrementJobLoad(1); } catch (IOException e) { LOG.error("Error while submitting the job ", e); error = e; @@ -191,7 +205,7 @@ public class StressJobFactory extends JobFactory { } } finally { - lock.unlock(); + // do nothing } } } catch (InterruptedException e) { @@ -210,19 +224,11 @@ public class StressJobFactory extends JobFactory { */ @Override public void update(Statistics.ClusterStats item) { - lock.lock(); + ClusterStatus clusterMetrics = item.getStatus(); try { - ClusterStatus clusterMetrics = item.getStatus(); - try { - checkLoadAndGetSlotsToBackfill(item,clusterMetrics); - } catch (Exception e) { - LOG.error("Couldn't get the new Status",e); - } - if (!loadStatus.overloaded()) { - condUnderloaded.signalAll(); - } - } finally { - lock.unlock(); + checkLoadAndGetSlotsToBackfill(item, clusterMetrics); + } catch (Exception e) { + LOG.error("Couldn't get the new Status",e); } } @@ -254,18 +260,25 @@ public class StressJobFactory extends JobFactory { */ private void checkLoadAndGetSlotsToBackfill( ClusterStats stats, ClusterStatus clusterStatus) throws IOException, InterruptedException { - loadStatus.mapSlotCapacity = clusterStatus.getMaxMapTasks(); - loadStatus.reduceSlotCapacity = clusterStatus.getMaxReduceTasks(); + // update the max cluster capacity incase its updated + int mapCapacity = clusterStatus.getMaxMapTasks(); + loadStatus.updateMapCapacity(mapCapacity); - loadStatus.numJobsBackfill = - (int) (maxJobTrackerRatio * clusterStatus.getTaskTrackers()) - - stats.getNumRunningJob(); - if (loadStatus.numJobsBackfill <= 0) { + int reduceCapacity = clusterStatus.getMaxReduceTasks(); + + loadStatus.updateReduceCapacity(reduceCapacity); + + int numTrackers = clusterStatus.getTaskTrackers(); + + int jobLoad = + (int) (maxJobTrackerRatio * numTrackers) - stats.getNumRunningJob(); + loadStatus.updateJobLoad(jobLoad); + if (loadStatus.getJobLoad() <= 0) { if (LOG.isDebugEnabled()) { - LOG.debug(System.currentTimeMillis() + " Overloaded is " + LOG.debug(System.currentTimeMillis() + " [JobLoad] Overloaded is " + Boolean.TRUE.toString() + " NumJobsBackfill is " - + loadStatus.numJobsBackfill); + + loadStatus.getJobLoad()); } return; // stop calculation because we know it is overloaded. } @@ -275,56 +288,84 @@ public class StressJobFactory extends JobFactory { float mapProgress = job.getJob().mapProgress(); int noOfMaps = job.getNoOfMaps(); incompleteMapTasks += - calcEffectiveIncompleteMapTasks( - clusterStatus.getMaxMapTasks(), noOfMaps, mapProgress); + calcEffectiveIncompleteMapTasks(mapCapacity, noOfMaps, mapProgress); } - loadStatus.mapSlotsBackfill = - (int) ((overloadMapTaskMapSlotRatio * clusterStatus.getMaxMapTasks()) - - incompleteMapTasks); - if (loadStatus.mapSlotsBackfill <= 0) { + + int mapSlotsBackFill = + (int) ((overloadMapTaskMapSlotRatio * mapCapacity) - incompleteMapTasks); + loadStatus.updateMapLoad(mapSlotsBackFill); + + if (loadStatus.getMapLoad() <= 0) { if (LOG.isDebugEnabled()) { - LOG.debug(System.currentTimeMillis() + " Overloaded is " + LOG.debug(System.currentTimeMillis() + " [MAP-LOAD] Overloaded is " + Boolean.TRUE.toString() + " MapSlotsBackfill is " - + loadStatus.mapSlotsBackfill); + + loadStatus.getMapLoad()); } return; // stop calculation because we know it is overloaded. } float incompleteReduceTasks = 0; // include pending & running reduce tasks. for (JobStats job : ClusterStats.getRunningJobStats()) { - int noOfReduces = job.getJob().getNumReduceTasks(); + // Cached the num-reds value in JobStats + int noOfReduces = job.getNoOfReds(); if (noOfReduces > 0) { float reduceProgress = job.getJob().reduceProgress(); incompleteReduceTasks += - calcEffectiveIncompleteReduceTasks( - clusterStatus.getMaxReduceTasks(), noOfReduces, reduceProgress); + calcEffectiveIncompleteReduceTasks(reduceCapacity, noOfReduces, + reduceProgress); } } - loadStatus.reduceSlotsBackfill = - (int) ((overloadReduceTaskReduceSlotRatio * clusterStatus.getMaxReduceTasks()) + + int reduceSlotsBackFill = + (int)((overloadReduceTaskReduceSlotRatio * reduceCapacity) - incompleteReduceTasks); - if (loadStatus.reduceSlotsBackfill <= 0) { + loadStatus.updateReduceLoad(reduceSlotsBackFill); + if (loadStatus.getReduceLoad() <= 0) { if (LOG.isDebugEnabled()) { - LOG.debug(System.currentTimeMillis() + " Overloaded is " + LOG.debug(System.currentTimeMillis() + " [REDUCE-LOAD] Overloaded is " + Boolean.TRUE.toString() + " ReduceSlotsBackfill is " - + loadStatus.reduceSlotsBackfill); + + loadStatus.getReduceLoad()); } return; // stop calculation because we know it is overloaded. } if (LOG.isDebugEnabled()) { - LOG.debug(System.currentTimeMillis() + " Overloaded is " + LOG.debug(System.currentTimeMillis() + " [OVERALL] Overloaded is " + Boolean.FALSE.toString() + "Current load Status is " + loadStatus); } } static class LoadStatus { - int mapSlotsBackfill; - int mapSlotCapacity; - int reduceSlotsBackfill; - int reduceSlotCapacity; - int numJobsBackfill; + /** + * Additional number of map slots that can be requested before + * declaring (by Gridmix STRESS mode) the cluster as overloaded. + */ + private volatile int mapSlotsBackfill; + + /** + * Determines the total map slot capacity of the cluster. + */ + private volatile int mapSlotCapacity; + + /** + * Additional number of reduce slots that can be requested before + * declaring (by Gridmix STRESS mode) the cluster as overloaded. + */ + private volatile int reduceSlotsBackfill; + + /** + * Determines the total reduce slot capacity of the cluster. + */ + private volatile int reduceSlotCapacity; + + /** + * Determines the max count of running jobs in the cluster. + */ + private volatile int numJobsBackfill; + + // set the default to true + private AtomicBoolean overloaded = new AtomicBoolean(true); /** * Construct the LoadStatus in an unknown state - assuming the cluster is @@ -339,12 +380,76 @@ public class StressJobFactory extends JobFactory { reduceSlotCapacity = -1; } - public boolean overloaded() { - return (mapSlotsBackfill <= 0) || (reduceSlotsBackfill <= 0) - || (numJobsBackfill <= 0); + public synchronized int getMapLoad() { + return mapSlotsBackfill; } - public String toString() { + public synchronized int getMapCapacity() { + return mapSlotCapacity; + } + + public synchronized int getReduceLoad() { + return reduceSlotsBackfill; + } + + public synchronized int getReduceCapacity() { + return reduceSlotCapacity; + } + + public synchronized int getJobLoad() { + return numJobsBackfill; + } + + public synchronized void decrementMapLoad(int mapSlotsConsumed) { + this.mapSlotsBackfill -= mapSlotsConsumed; + updateOverloadStatus(); + } + + public synchronized void decrementReduceLoad(int reduceSlotsConsumed) { + this.reduceSlotsBackfill -= reduceSlotsConsumed; + updateOverloadStatus(); + } + + public synchronized void decrementJobLoad(int numJobsConsumed) { + this.numJobsBackfill -= numJobsConsumed; + updateOverloadStatus(); + } + + public synchronized void updateMapCapacity(int mapSlotsCapacity) { + this.mapSlotCapacity = mapSlotsCapacity; + updateOverloadStatus(); + } + + public synchronized void updateReduceCapacity(int reduceSlotsCapacity) { + this.reduceSlotCapacity = reduceSlotsCapacity; + updateOverloadStatus(); + } + + public synchronized void updateMapLoad(int mapSlotsBackfill) { + this.mapSlotsBackfill = mapSlotsBackfill; + updateOverloadStatus(); + } + + public synchronized void updateReduceLoad(int reduceSlotsBackfill) { + this.reduceSlotsBackfill = reduceSlotsBackfill; + updateOverloadStatus(); + } + + public synchronized void updateJobLoad(int numJobsBackfill) { + this.numJobsBackfill = numJobsBackfill; + updateOverloadStatus(); + } + + private synchronized void updateOverloadStatus() { + overloaded.set((mapSlotsBackfill <= 0) || (reduceSlotsBackfill <= 0) + || (numJobsBackfill <= 0)); + } + + public synchronized boolean overloaded() { + return overloaded.get(); + } + + public synchronized String toString() { // TODO Use StringBuilder instead return " Overloaded = " + overloaded() + ", MapSlotBackfill = " + mapSlotsBackfill diff --git a/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageMatcher.java b/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageMatcher.java index 10d6e733f1c..917cd09372a 100644 --- a/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageMatcher.java +++ b/hadoop-mapreduce-project/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageMatcher.java @@ -52,15 +52,23 @@ public class ResourceUsageMatcher { @SuppressWarnings("unchecked") public void configure(Configuration conf, ResourceCalculatorPlugin monitor, ResourceUsageMetrics metrics, Progressive progress) { - Class[] plugins = - conf.getClasses(RESOURCE_USAGE_EMULATION_PLUGINS, - ResourceUsageEmulatorPlugin.class); + Class[] plugins = conf.getClasses(RESOURCE_USAGE_EMULATION_PLUGINS); if (plugins == null) { System.out.println("No resource usage emulator plugins configured."); } else { - for (Class plugin : plugins) { - if (plugin != null) { - emulationPlugins.add(ReflectionUtils.newInstance(plugin, conf)); + for (Class clazz : plugins) { + if (clazz != null) { + if (ResourceUsageEmulatorPlugin.class.isAssignableFrom(clazz)) { + ResourceUsageEmulatorPlugin plugin = + (ResourceUsageEmulatorPlugin) ReflectionUtils.newInstance(clazz, + conf); + emulationPlugins.add(plugin); + } else { + throw new RuntimeException("Misconfigured resource usage plugins. " + + "Class " + clazz.getClass().getName() + " is not a resource " + + "usage plugin as it does not extend " + + ResourceUsageEmulatorPlugin.class.getName()); + } } } } diff --git a/hadoop-mapreduce-project/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSubmission.java b/hadoop-mapreduce-project/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSubmission.java index 802745522f2..22b742678f6 100644 --- a/hadoop-mapreduce-project/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSubmission.java +++ b/hadoop-mapreduce-project/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSubmission.java @@ -101,10 +101,17 @@ public class TestGridmixSubmission { retiredJobs = new LinkedBlockingQueue(); } - public void verify(ArrayList submitted) throws Exception { + public void verify(ArrayList submitted, Configuration clientConf) + throws Exception { final ArrayList succeeded = new ArrayList(); assertEquals("Bad job count", expected, retiredJobs.drainTo(succeeded)); final HashMap sub = new HashMap(); + + // define the input and output path for the run + final Path in = new Path("foo").makeQualified(GridmixTestUtils.dfs); + final Path out = + new Path(in, clientConf.get(Gridmix.GRIDMIX_OUT_DIR, "gridmix")); + for (JobStory spec : submitted) { sub.put(spec.getJobID().toString(), spec); } @@ -115,8 +122,7 @@ public class TestGridmixSubmission { Configuration conf = job.getConfiguration(); if (GenerateData.JOB_NAME.equals(jobName)) { verifyQueue(conf, jobName); - final Path in = new Path("foo").makeQualified(GridmixTestUtils.dfs); - final Path out = new Path("/gridmix").makeQualified(GridmixTestUtils.dfs); + final ContentSummary generated = GridmixTestUtils.dfs.getContentSummary(in); assertTrue("Mismatched data gen", // +/- 100k for logs (GENDATA << 20) < generated.getLength() + GENSLOP || @@ -164,7 +170,7 @@ public class TestGridmixSubmission { final FileStatus stat = GridmixTestUtils.dfs.getFileStatus( - new Path(GridmixTestUtils.DEST, "" + Integer.valueOf(jobSeqNum))); + new Path(out, "" + Integer.valueOf(jobSeqNum))); assertEquals("Wrong owner for " + jobName, spec.getUser(), stat.getOwner()); @@ -337,8 +343,9 @@ public class TestGridmixSubmission { private JobFactory factory; private TestMonitor monitor; - public void checkMonitor() throws Exception { - monitor.verify(((DebugJobFactory.Debuggable)factory).getSubmitted()); + public void checkMonitor(Configuration conf) throws Exception { + monitor.verify(((DebugJobFactory.Debuggable)factory).getSubmitted(), + conf); } @Override @@ -534,9 +541,11 @@ public class TestGridmixSubmission { GridmixTestUtils.dfs.setPermission(root, new FsPermission((short)0777)); int res = ToolRunner.run(conf, client, argv); assertEquals("Client exited with nonzero status", 0, res); - client.checkMonitor(); + client.checkMonitor(conf); } catch (Exception e) { e.printStackTrace(); + // fail the test if there is an exception + throw new RuntimeException(e); } finally { in.getFileSystem(conf).delete(in, true); out.getFileSystem(conf).delete(out, true); diff --git a/hadoop-mapreduce-project/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSummary.java b/hadoop-mapreduce-project/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSummary.java index 694ca2e7e3d..64af603bec5 100644 --- a/hadoop-mapreduce-project/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSummary.java +++ b/hadoop-mapreduce-project/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestGridmixSummary.java @@ -159,7 +159,7 @@ public class TestGridmixSummary { @Override protected Thread createReaderThread() { - return null; + return new Thread(); } } @@ -243,7 +243,7 @@ public class TestGridmixSummary { tid, es.getInputTraceSignature()); // test trace location Path qPath = fs.makeQualified(testTraceFile); - assertEquals("Mismatch in trace signature", + assertEquals("Mismatch in trace filename", qPath.toString(), es.getInputTraceLocation()); // test expected data size assertEquals("Mismatch in expected data size", @@ -275,7 +275,7 @@ public class TestGridmixSummary { es.finalize(factory, testTraceFile.toString(), 0L, resolver, dataStats, conf); // test missing expected data size - assertEquals("Mismatch in trace signature", + assertEquals("Mismatch in trace data size", Summarizer.NA, es.getExpectedDataSize()); assertFalse("Mismatch in trace signature", tid.equals(es.getInputTraceSignature())); @@ -295,6 +295,12 @@ public class TestGridmixSummary { assertEquals("Mismatch in trace signature", tid, es.getInputTraceSignature()); + // finalize trace identifier '-' input + es.finalize(factory, "-", 0L, resolver, dataStats, conf); + assertEquals("Mismatch in trace signature", + Summarizer.NA, es.getInputTraceSignature()); + assertEquals("Mismatch in trace file location", + Summarizer.NA, es.getInputTraceLocation()); } // test the ExecutionSummarizer @@ -332,7 +338,7 @@ public class TestGridmixSummary { return isSuccessful; }; }; - return new JobStats(numMaps, fakeJob); + return new JobStats(numMaps, numReds, fakeJob); } /**