From 7e75226e68715c3eca9d346c8eaf2f265aa70d23 Mon Sep 17 00:00:00 2001 From: Allen Wittenauer Date: Thu, 21 Aug 2014 14:57:11 +0000 Subject: [PATCH 01/28] YARN-2424. LCE should support non-cgroups, non-secure mode (Chris Douglas via aw) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619421 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../hadoop/yarn/conf/YarnConfiguration.java | 9 +++++++++ .../src/main/resources/yarn-default.xml | 18 ++++++++++++++++-- .../nodemanager/LinuxContainerExecutor.java | 18 +++++++++++++++--- .../TestLinuxContainerExecutor.java | 7 +++++++ 5 files changed, 50 insertions(+), 5 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index a4a432d75dd..5eb5e400d30 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -226,6 +226,9 @@ Release 2.6.0 - UNRELEASED YARN-1919. Potential NPE in EmbeddedElectorService#stop. (Tsuyoshi Ozawa via kasha) + YARN-2424. LCE should support non-cgroups, non-secure mode (Chris Douglas + via aw) + Release 2.5.0 - 2014-08-11 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index d227e4f415d..034ec4f90e5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -836,6 +836,15 @@ public class YarnConfiguration extends Configuration { public static final String NM_LINUX_CONTAINER_GROUP = NM_PREFIX + "linux-container-executor.group"; + /** + * True if linux-container-executor should limit itself to one user + * when running in non-secure mode. + */ + public static final String NM_NONSECURE_MODE_LIMIT_USERS = NM_PREFIX + + "linux-container-executor.nonsecure-mode.limit-users"; + + public static final boolean DEFAULT_NM_NONSECURE_MODE_LIMIT_USERS = true; + /** * The UNIX user that containers will run as when Linux-container-executor * is used in nonsecure mode (a use case for this is using cgroups). diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 55b3490a4e8..9b4a90f4790 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -991,8 +991,22 @@ - The UNIX user that containers will run as when Linux-container-executor - is used in nonsecure mode (a use case for this is using cgroups). + This determines which of the two modes that LCE should use on + a non-secure cluster. If this value is set to true, then all containers + will be launched as the user specified in + yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user. If + this value is set to false, then containers will run as the user who + submitted the application. + yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users + true + + + + The UNIX user that containers will run as when + Linux-container-executor is used in nonsecure mode (a use case for this + is using cgroups) if the + yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users is + set to true. yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user nobody diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java index 7962da28c8c..804864e4cba 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java @@ -57,8 +57,8 @@ public class LinuxContainerExecutor extends ContainerExecutor { private LCEResourcesHandler resourcesHandler; private boolean containerSchedPriorityIsSet = false; private int containerSchedPriorityAdjustment = 0; - - + private boolean containerLimitUsers; + @Override public void setConf(Configuration conf) { super.setConf(conf); @@ -81,6 +81,13 @@ public class LinuxContainerExecutor extends ContainerExecutor { nonsecureLocalUserPattern = Pattern.compile( conf.get(YarnConfiguration.NM_NONSECURE_MODE_USER_PATTERN_KEY, YarnConfiguration.DEFAULT_NM_NONSECURE_MODE_USER_PATTERN)); + containerLimitUsers = conf.getBoolean( + YarnConfiguration.NM_NONSECURE_MODE_LIMIT_USERS, + YarnConfiguration.DEFAULT_NM_NONSECURE_MODE_LIMIT_USERS); + if (!containerLimitUsers) { + LOG.warn(YarnConfiguration.NM_NONSECURE_MODE_LIMIT_USERS + + ": impersonation without authentication enabled"); + } } void verifyUsernamePattern(String user) { @@ -92,7 +99,12 @@ public class LinuxContainerExecutor extends ContainerExecutor { } String getRunAsUser(String user) { - return UserGroupInformation.isSecurityEnabled() ? user : nonsecureLocalUser; + if (UserGroupInformation.isSecurityEnabled() || + !containerLimitUsers) { + return user; + } else { + return nonsecureLocalUser; + } } /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java index f840730a385..a5ec43b67a1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutor.java @@ -279,6 +279,13 @@ public class TestLinuxContainerExecutor { lce.setConf(conf); Assert.assertEquals("bar", lce.getRunAsUser("foo")); + //nonsecure without limits + conf.set(YarnConfiguration.NM_NONSECURE_MODE_LOCAL_USER_KEY, "bar"); + conf.setBoolean(YarnConfiguration.NM_NONSECURE_MODE_LIMIT_USERS, false); + lce = new LinuxContainerExecutor(); + lce.setConf(conf); + Assert.assertEquals("foo", lce.getRunAsUser("foo")); + //secure conf = new YarnConfiguration(); conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION, From e5e9d792c7903c58b62f2baabafd031d5d965c57 Mon Sep 17 00:00:00 2001 From: Karthik Kambatla Date: Thu, 21 Aug 2014 17:37:34 +0000 Subject: [PATCH 02/28] MAPREDUCE-5974. Allow specifying multiple MapOutputCollectors with fallback. (Todd Lipcon via kasha) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619492 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 ++ .../org/apache/hadoop/mapred/MapTask.java | 37 ++++++++++++++----- .../src/main/resources/mapred-default.xml | 4 +- .../PluggableShuffleAndPluggableSort.apt.vm | 8 +++- 4 files changed, 40 insertions(+), 12 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 5cc965a0b3f..cd4d6a5e643 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -187,6 +187,9 @@ Release 2.6.0 - UNRELEASED MAPREDUCE-5906. Inconsistent configuration in property "mapreduce.reduce.shuffle.input.buffer.percent" (Akira AJISAKA via aw) + MAPREDUCE-5974. Allow specifying multiple MapOutputCollectors with + fallback. (Todd Lipcon via kasha) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java index b533ebe8e47..dfcbe093832 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java @@ -381,16 +381,35 @@ public class MapTask extends Task { private MapOutputCollector createSortingCollector(JobConf job, TaskReporter reporter) throws IOException, ClassNotFoundException { - MapOutputCollector collector - = (MapOutputCollector) - ReflectionUtils.newInstance( - job.getClass(JobContext.MAP_OUTPUT_COLLECTOR_CLASS_ATTR, - MapOutputBuffer.class, MapOutputCollector.class), job); - LOG.info("Map output collector class = " + collector.getClass().getName()); MapOutputCollector.Context context = - new MapOutputCollector.Context(this, job, reporter); - collector.init(context); - return collector; + new MapOutputCollector.Context(this, job, reporter); + + Class[] collectorClasses = job.getClasses( + JobContext.MAP_OUTPUT_COLLECTOR_CLASS_ATTR, MapOutputBuffer.class); + int remainingCollectors = collectorClasses.length; + for (Class clazz : collectorClasses) { + try { + if (!MapOutputCollector.class.isAssignableFrom(clazz)) { + throw new IOException("Invalid output collector class: " + clazz.getName() + + " (does not implement MapOutputCollector)"); + } + Class subclazz = + clazz.asSubclass(MapOutputCollector.class); + LOG.debug("Trying map output collector class: " + subclazz.getName()); + MapOutputCollector collector = + ReflectionUtils.newInstance(subclazz, job); + collector.init(context); + LOG.info("Map output collector class = " + collector.getClass().getName()); + return collector; + } catch (Exception e) { + String msg = "Unable to initialize MapOutputCollector " + clazz.getName(); + if (--remainingCollectors > 0) { + msg += " (" + remainingCollectors + " more collector(s) to try)"; + } + LOG.warn(msg, e); + } + } + throw new IOException("Unable to initialize any output collector"); } @SuppressWarnings("unchecked") diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 110e3162833..b2503c74e20 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -408,7 +408,9 @@ mapreduce.job.map.output.collector.class org.apache.hadoop.mapred.MapTask$MapOutputBuffer - It defines the MapOutputCollector implementation to use. + The MapOutputCollector implementation(s) to use. This may be a comma-separated + list of class names, in which case the map task will try to initialize each + of the collectors in turn. The first to successfully initialize will be used. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/PluggableShuffleAndPluggableSort.apt.vm b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/PluggableShuffleAndPluggableSort.apt.vm index 1b06ca9bfbe..06d802213d9 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/PluggableShuffleAndPluggableSort.apt.vm +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/PluggableShuffleAndPluggableSort.apt.vm @@ -71,11 +71,16 @@ Hadoop MapReduce Next Generation - Pluggable Shuffle and Pluggable Sort *--------------------------------------+---------------------+-----------------+ | <<>> | <<>> | The <<>> implementation to use | *--------------------------------------+---------------------+-----------------+ -| <<>> | <<>> | The <<>> implementation to use | +| <<>> | <<>> | The <<>> implementation(s) to use | *--------------------------------------+---------------------+-----------------+ These properties can also be set in the <<>> to change the default values for all jobs. + The collector class configuration may specify a comma-separated list of collector implementations. + In this case, the map task will attempt to instantiate each in turn until one of the + implementations successfully initializes. This can be useful if a given collector + implementation is only compatible with certain types of keys or values, for example. + ** NodeManager Configuration properties, <<>> in all nodes: *--------------------------------------+---------------------+-----------------+ @@ -91,4 +96,3 @@ Hadoop MapReduce Next Generation - Pluggable Shuffle and Pluggable Sort <<>> property, for example <<>>. Then the property defining the corresponding class must be <<>>. - \ No newline at end of file From 7b28f363b1b3f12cecc92d0bba8eb3021b67b48e Mon Sep 17 00:00:00 2001 From: Brandon Li Date: Thu, 21 Aug 2014 17:53:54 +0000 Subject: [PATCH 03/28] HDFS-6890. NFS readdirplus doesn't return dotdot attributes. Contributed by Brandon Li git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619500 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java | 6 ++++-- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java index 3ef9240263f..0c7aebeebf9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java @@ -1643,6 +1643,7 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { DirectoryListing dlisting = null; Nfs3FileAttributes postOpDirAttr = null; long dotdotFileId = 0; + HdfsFileStatus dotdotStatus = null; try { String dirFileIdPath = Nfs3Utils.getFileIdPath(handle); dirStatus = dfsClient.getFileInfo(dirFileIdPath); @@ -1678,7 +1679,7 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { if (cookie == 0) { // Get dotdot fileId String dotdotFileIdPath = dirFileIdPath + "/.."; - HdfsFileStatus dotdotStatus = dfsClient.getFileInfo(dotdotFileIdPath); + dotdotStatus = dfsClient.getFileInfo(dotdotFileIdPath); if (dotdotStatus == null) { // This should not happen @@ -1723,7 +1724,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { postOpDirAttr.getFileId(), ".", 0, postOpDirAttr, new FileHandle( postOpDirAttr.getFileId())); entries[1] = new READDIRPLUS3Response.EntryPlus3(dotdotFileId, "..", - dotdotFileId, postOpDirAttr, new FileHandle(dotdotFileId)); + dotdotFileId, Nfs3Utils.getNfs3FileAttrFromFileStatus(dotdotStatus, + iug), new FileHandle(dotdotFileId)); for (int i = 2; i < n + 2; i++) { long fileId = fstatus[i - 2].getFileId(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 14ef2f1989b..5776f892b5c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -626,6 +626,8 @@ Release 2.6.0 - UNRELEASED HDFS-6870. Blocks and INodes could leak for Rename with overwrite flag. (Yi Liu via jing9) + HDFS-6890. NFS readdirplus doesn't return dotdot attributes (brandonli) + Release 2.5.0 - 2014-08-11 INCOMPATIBLE CHANGES From ddb7f12ef91d995f85fc4c5b67f9f4a1599ecc25 Mon Sep 17 00:00:00 2001 From: Alejandro Abdelnur Date: Thu, 21 Aug 2014 19:03:28 +0000 Subject: [PATCH 04/28] HADOOP-10992. Merge KMS to branch-2, updating hadoop-common CHANGES.txt. (tucu) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619556 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 248 +++++++++--------- 1 file changed, 126 insertions(+), 122 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index c697be1c512..6c202715045 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -13,8 +13,6 @@ Trunk (Unreleased) NEW FEATURES - HADOOP-10433. Key Management Server based on KeyProvider API. (tucu) - HADOOP-9629. Support Windows Azure Storage - Blob as a file system in Hadoop. (Dexter Bradshaw, Mostafa Elhemali, Xi Fang, Johannes Klein, David Lao, Mike Liddell, Chuan Liu, Lengning Liu, Ivan Mitic, Michael Rys, @@ -25,9 +23,6 @@ Trunk (Unreleased) Mike Liddell, Chuan Liu, Lengning Liu, Ivan Mitic, Michael Rys, Alexander Stojanovich, Brian Swan, and Min Wei via cnauroth) - HADOOP-10719. Add generateEncryptedKey and decryptEncryptedKey - methods to KeyProvider. (asuresh via tucu) - IMPROVEMENTS HADOOP-8017. Configure hadoop-main pom to get rid of M2E plugin execution @@ -121,93 +116,15 @@ Trunk (Unreleased) HADOOP-9833 move slf4j to version 1.7.5 (Kousuke Saruta via stevel) - HADOOP-10141. Create KeyProvider API to separate encryption key storage - from the applications. (omalley) - - HADOOP-10201. Add listing to KeyProvider API. (Larry McCay via omalley) - - HADOOP-10177. Create CLI tools for managing keys. (Larry McCay via omalley) - - HADOOP-10244. TestKeyShell improperly tests the results of delete (Larry - McCay via omalley) - HADOOP-10325. Improve jenkins javadoc warnings from test-patch.sh (cmccabe) HADOOP-10342. Add a new method to UGI to use a Kerberos login subject to build a new UGI. (Larry McCay via omalley) - HADOOP-10237. JavaKeyStoreProvider needs to set keystore permissions - correctly. (Larry McCay via omalley) - - HADOOP-10432. Refactor SSLFactory to expose static method to determine - HostnameVerifier. (tucu) - - HADOOP-10427. KeyProvider implementations should be thread safe. (tucu) - - HADOOP-10429. KeyStores should have methods to generate the materials - themselves, KeyShell should use them. (tucu) - - HADOOP-10428. JavaKeyStoreProvider should accept keystore password via - configuration falling back to ENV VAR. (tucu) - - HADOOP-10430. KeyProvider Metadata should have an optional description, - there should be a method to retrieve the metadata from all keys. (tucu) - - HADOOP-10534. KeyProvider getKeysMetadata should take a list of names - rather than returning all keys. (omalley) - HADOOP-10563. Remove the dependency of jsp in trunk. (wheat9) HADOOP-10485. Remove dead classes in hadoop-streaming. (wheat9) - HADOOP-10696. Add optional attributes to KeyProvider Options and Metadata. - (tucu) - - HADOOP-10695. KMSClientProvider should respect a configurable timeout. - (yoderme via tucu) - - HADOOP-10757. KeyProvider KeyVersion should provide the key name. - (asuresh via tucu) - - HADOOP-10769. Create KeyProvider extension to handle delegation tokens. - (Arun Suresh via atm) - - HADOOP-10812. Delegate KeyProviderExtension#toString to underlying - KeyProvider. (wang) - - HADOOP-10736. Add key attributes to the key shell. (Mike Yoder via wang) - - HADOOP-10824. Refactor KMSACLs to avoid locking. (Benoy Antony via umamahesh) - - HADOOP-10841. EncryptedKeyVersion should have a key name property. - (asuresh via tucu) - - HADOOP-10842. CryptoExtension generateEncryptedKey method should - receive the key name. (asuresh via tucu) - - HADOOP-10750. KMSKeyProviderCache should be in hadoop-common. - (asuresh via tucu) - - HADOOP-10720. KMS: Implement generateEncryptedKey and decryptEncryptedKey - in the REST API. (asuresh via tucu) - - HADOOP-10891. Add EncryptedKeyVersion factory method to - KeyProviderCryptoExtension. (wang) - - HADOOP-10756. KMS audit log should consolidate successful similar requests. - (asuresh via tucu) - - HADOOP-10793. KeyShell args should use single-dash style. (wang) - - HADOOP-10936. Change default KeyProvider bitlength to 128. (wang) - - HADOOP-10224. JavaKeyStoreProvider has to protect against corrupting - underlying store. (asuresh via tucu) - - HADOOP-10770. KMS add delegation token support. (tucu) - - HADOOP-10698. KMS, add proxyuser support. (tucu) - BUG FIXES HADOOP-9451. Fault single-layer config if node group topology is enabled. @@ -379,22 +296,9 @@ Trunk (Unreleased) HADOOP-10044 Improve the javadoc of rpc code (sanjay Radia) - HADOOP-10488. TestKeyProviderFactory fails randomly. (tucu) - - HADOOP-10431. Change visibility of KeyStore.Options getter methods to public. (tucu) - - HADOOP-10583. bin/hadoop key throws NPE with no args and assorted other fixups. (clamb via tucu) - - HADOOP-10586. KeyShell doesn't allow setting Options via CLI. (clamb via tucu) - HADOOP-10625. Trim configuration names when putting/getting them to properties. (Wangda Tan via xgong) - HADOOP-10645. TestKMS fails because race condition writing acl files. (tucu) - - HADOOP-10611. KMS, keyVersion name should not be assumed to be - keyName@versionNumber. (tucu) - HADOOP-10717. HttpServer2 should load jsp DTD from local jars instead of going remote. (Dapeng Sun via wheat9) @@ -409,33 +313,12 @@ Trunk (Unreleased) HADOOP-10834. Typo in CredentialShell usage. (Benoy Antony via umamahesh) - HADOOP-10816. KeyShell returns -1 on error to the shell, should be 1. - (Mike Yoder via wang) - HADOOP-10840. Fix OutOfMemoryError caused by metrics system in Azure File System. (Shanyu Zhao via cnauroth) - HADOOP-10826. Iteration on KeyProviderFactory.serviceLoader is - thread-unsafe. (benoyantony viat tucu) - - HADOOP-10881. Clarify usage of encryption and encrypted encryption - key in KeyProviderCryptoExtension. (wang) - - HADOOP-10920. site plugin couldn't parse hadoop-kms index.apt.vm. - (Akira Ajisaka via wang) - HADOOP-10925. Compilation fails in native link0 function on Windows. (cnauroth) - HADOOP-10939. Fix TestKeyProviderFactory testcases to use default 128 bit - length keys. (Arun Suresh via wang) - - HADOOP-10862. Miscellaneous trivial corrections to KMS classes. - (asuresh via tucu) - - HADOOP-10967. Improve DefaultCryptoExtension#generateEncryptedKey - performance. (hitliuyi via tucu) - OPTIMIZATIONS HADOOP-7761. Improve the performance of raw comparisons. (todd) @@ -498,6 +381,8 @@ Release 2.6.0 - UNRELEASED NEW FEATURES + HADOOP-10433. Key Management Server based on KeyProvider API. (tucu) + IMPROVEMENTS HADOOP-10808. Remove unused native code for munlock. (cnauroth) @@ -582,10 +467,91 @@ Release 2.6.0 - UNRELEASED HADOOP-10975. org.apache.hadoop.util.DataChecksum should support calculating checksums in native code (James Thomas via Colin Patrick McCabe) + HADOOP-10201. Add listing to KeyProvider API. (Larry McCay via omalley) + + HADOOP-10177. Create CLI tools for managing keys. (Larry McCay via omalley) + + HADOOP-10432. Refactor SSLFactory to expose static method to determine + HostnameVerifier. (tucu) + + HADOOP-10429. KeyStores should have methods to generate the materials + themselves, KeyShell should use them. (tucu) + + HADOOP-10427. KeyProvider implementations should be thread safe. (tucu) + + HADOOP-10428. JavaKeyStoreProvider should accept keystore password via + configuration falling back to ENV VAR. (tucu) + + HADOOP-10430. KeyProvider Metadata should have an optional description, + there should be a method to retrieve the metadata from all keys. (tucu) + + HADOOP-10431. Change visibility of KeyStore.Options getter methods to + public. (tucu) + + HADOOP-10534. KeyProvider getKeysMetadata should take a list of names + rather than returning all keys. (omalley) + + HADOOP-10719. Add generateEncryptedKey and decryptEncryptedKey + methods to KeyProvider. (asuresh via tucu) + + HADOOP-10817. ProxyUsers configuration should support configurable + prefixes. (tucu) + + HADOOP-10881. Clarify usage of encryption and encrypted encryption + key in KeyProviderCryptoExtension. (wang) + + HADOOP-10770. KMS add delegation token support. (tucu) + + HADOOP-10698. KMS, add proxyuser support. (tucu) + OPTIMIZATIONS HADOOP-10838. Byte array native checksumming. (James Thomas via todd) + HADOOP-10696. Add optional attributes to KeyProvider Options and Metadata. + (tucu) + + HADOOP-10695. KMSClientProvider should respect a configurable timeout. + (yoderme via tucu) + + HADOOP-10757. KeyProvider KeyVersion should provide the key name. + (asuresh via tucu) + + HADOOP-10769. Create KeyProvider extension to handle delegation tokens. + (Arun Suresh via atm) + + HADOOP-10812. Delegate KeyProviderExtension#toString to underlying + KeyProvider. (wang) + + HADOOP-10736. Add key attributes to the key shell. (Mike Yoder via wang) + + HADOOP-10824. Refactor KMSACLs to avoid locking. (Benoy Antony via umamahesh) + + HADOOP-10841. EncryptedKeyVersion should have a key name property. + (asuresh via tucu) + + HADOOP-10842. CryptoExtension generateEncryptedKey method should + receive the key name. (asuresh via tucu) + + HADOOP-10750. KMSKeyProviderCache should be in hadoop-common. + (asuresh via tucu) + + HADOOP-10720. KMS: Implement generateEncryptedKey and decryptEncryptedKey + in the REST API. (asuresh via tucu) + + HADOOP-10891. Add EncryptedKeyVersion factory method to + KeyProviderCryptoExtension. (wang) + + HADOOP-10756. KMS audit log should consolidate successful similar requests. + (asuresh via tucu) + + HADOOP-10793. KeyShell args should use single-dash style. (wang) + + HADOOP-10936. Change default KeyProvider bitlength to 128. (wang) + + HADOOP-10224. JavaKeyStoreProvider has to protect against corrupting + underlying store. (asuresh via tucu) + BUG FIXES HADOOP-10781. Unportable getgrouplist() usage breaks FreeBSD (Dmitry @@ -621,11 +587,6 @@ Release 2.6.0 - UNRELEASED HADOOP-10927. Fix CredentialShell help behavior and error codes. (Josh Elser via wang) - HADOOP-10937. Need to set version name correctly before decrypting EEK. - (Arun Suresh via wang) - - HADOOP-10918. JMXJsonServlet fails when used within Tomcat. (tucu) - HADOOP-10933. FileBasedKeyStoresFactory Should use Configuration.getPassword for SSL Passwords. (lmccay via tucu) @@ -676,6 +637,49 @@ Release 2.6.0 - UNRELEASED HADOOP-10968. hadoop native build fails to detect java_libarch on ppc64le (Dinar Valeev via Colin Patrick McCabe) + HADOOP-10141. Create KeyProvider API to separate encryption key storage + from the applications. (omalley) + + HADOOP-10237. JavaKeyStoreProvider needs to set keystore permissions + correctly. (Larry McCay via omalley) + + HADOOP-10244. TestKeyShell improperly tests the results of delete (Larry + McCay via omalley) + + HADOOP-10583. bin/hadoop key throws NPE with no args and assorted other fixups. (clamb via tucu) + + HADOOP-10586. KeyShell doesn't allow setting Options via CLI. (clamb via tucu) + + HADOOP-10645. TestKMS fails because race condition writing acl files. (tucu) + + HADOOP-10611. KMS, keyVersion name should not be assumed to be + keyName@versionNumber. (tucu) + + HADOOP-10816. KeyShell returns -1 on error to the shell, should be 1. + (Mike Yoder via wang) + + HADOOP-10826. Iteration on KeyProviderFactory.serviceLoader is + thread-unsafe. (benoyantony viat tucu) + + HADOOP-10920. site plugin couldn't parse hadoop-kms index.apt.vm. + (Akira Ajisaka via wang) + + HADOOP-10937. Need to set version name correctly before decrypting EEK. + (Arun Suresh via wang) + + HADOOP-10918. JMXJsonServlet fails when used within Tomcat. (tucu) + + HADOOP-10939. Fix TestKeyProviderFactory testcases to use default 128 bit + length keys. (Arun Suresh via wang) + + HADOOP-10862. Miscellaneous trivial corrections to KMS classes. + (asuresh via tucu) + + HADOOP-10967. Improve DefaultCryptoExtension#generateEncryptedKey + performance. (hitliuyi via tucu) + + HADOOP-10488. TestKeyProviderFactory fails randomly. (tucu) + Release 2.5.0 - 2014-08-11 INCOMPATIBLE CHANGES From cbbb899aedacd59040f55ac5ed911c1e62bf3879 Mon Sep 17 00:00:00 2001 From: Allen Wittenauer Date: Thu, 21 Aug 2014 21:33:35 +0000 Subject: [PATCH 05/28] YARN-2436. [post-HADOOP-9902] yarn application help doesn't work git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619603 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 2 ++ hadoop-yarn-project/hadoop-yarn/bin/yarn | 1 + 2 files changed, 3 insertions(+) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 5eb5e400d30..ed162ba06d4 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -18,6 +18,8 @@ Trunk - Unreleased YARN-2216 TestRMApplicationHistoryWriter sometimes fails in trunk. (Zhijie Shen via xgong) + YARN-2436. [post-HADOOP-9902] yarn application help doesn't work (aw) + Release 2.6.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn b/hadoop-yarn-project/hadoop-yarn/bin/yarn index 2017d57feac..dfef8112f05 100644 --- a/hadoop-yarn-project/hadoop-yarn/bin/yarn +++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn @@ -73,6 +73,7 @@ case "${COMMAND}" in application|applicationattempt|container) CLASS=org.apache.hadoop.yarn.client.cli.ApplicationCLI YARN_OPTS="${YARN_OPTS} ${YARN_CLIENT_OPTS}" + set -- "${COMMAND}" "$@" ;; classpath) hadoop_finalize From da4ba50269254456650c08c739f2b394d1182ee4 Mon Sep 17 00:00:00 2001 From: Jason Darrell Lowe Date: Thu, 21 Aug 2014 21:38:16 +0000 Subject: [PATCH 06/28] HADOOP-10893. isolated classloader on the client side. Contributed by Sangjin Lee git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619604 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 + .../dev-support/findbugsExcludeFile.xml | 5 + .../src/main/bin/hadoop-config.cmd | 10 +- .../src/main/bin/hadoop-functions.sh | 21 +- .../hadoop-common/src/main/bin/hadoop.cmd | 20 ++ .../hadoop-common/src/main/conf/hadoop-env.sh | 11 + .../hadoop/util/ApplicationClassLoader.java | 219 ++++++++++++++++++ .../java/org/apache/hadoop/util/RunJar.java | 115 +++++++-- .../apache/hadoop/util/ClassLoaderCheck.java | 33 +++ .../hadoop/util/ClassLoaderCheckMain.java | 34 +++ .../hadoop/util/ClassLoaderCheckSecond.java | 24 ++ .../hadoop/util/ClassLoaderCheckThird.java | 24 ++ .../util/TestApplicationClassLoader.java | 12 +- .../org/apache/hadoop/util/TestRunJar.java | 66 +++++- .../hadoop/mapreduce/v2/util/MRApps.java | 3 +- .../hadoop/mapreduce/v2/util/TestMRApps.java | 5 +- .../src/main/resources/mapred-default.xml | 14 +- .../hadoop/mapreduce/v2/TestMRJobs.java | 5 +- .../dev-support/findbugs-exclude.xml | 7 + .../yarn/util/ApplicationClassLoader.java | 170 +------------- 20 files changed, 584 insertions(+), 217 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ApplicationClassLoader.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/ClassLoaderCheck.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/ClassLoaderCheckMain.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/ClassLoaderCheckSecond.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/ClassLoaderCheckThird.java rename {hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn => hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop}/util/TestApplicationClassLoader.java (95%) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 6c202715045..c880e1167dd 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -383,6 +383,9 @@ Release 2.6.0 - UNRELEASED HADOOP-10433. Key Management Server based on KeyProvider API. (tucu) + HADOOP-10893. isolated classloader on the client side (Sangjin Lee via + jlowe) + IMPROVEMENTS HADOOP-10808. Remove unused native code for munlock. (cnauroth) diff --git a/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml b/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml index e0b217118db..14690341c84 100644 --- a/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml +++ b/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml @@ -108,6 +108,11 @@ + + + + + diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.cmd b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.cmd index 3ea576cef20..d8da5b16aab 100644 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.cmd +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.cmd @@ -282,10 +282,12 @@ if not "%HADOOP_MAPRED_HOME%\%MAPRED_DIR%" == "%HADOOP_YARN_HOME%\%YARN_DIR%" ( @rem if defined HADOOP_CLASSPATH ( - if defined HADOOP_USER_CLASSPATH_FIRST ( - set CLASSPATH=%HADOOP_CLASSPATH%;%CLASSPATH%; - ) else ( - set CLASSPATH=%CLASSPATH%;%HADOOP_CLASSPATH%; + if not defined HADOOP_USE_CLIENT_CLASSLOADER ( + if defined HADOOP_USER_CLASSPATH_FIRST ( + set CLASSPATH=%HADOOP_CLASSPATH%;%CLASSPATH%; + ) else ( + set CLASSPATH=%CLASSPATH%;%HADOOP_CLASSPATH%; + ) ) ) diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh index 646c11ee3f7..f2437fa2ff2 100644 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh @@ -450,7 +450,8 @@ function hadoop_add_to_classpath_mapred function hadoop_add_to_classpath_userpath { # Add the user-specified HADOOP_CLASSPATH to the - # official CLASSPATH env var. + # official CLASSPATH env var if HADOOP_USE_CLIENT_CLASSLOADER + # is not set. # Add it first or last depending on if user has # set env-var HADOOP_USER_CLASSPATH_FIRST # we'll also dedupe it, because we're cool like that. @@ -469,14 +470,16 @@ function hadoop_add_to_classpath_userpath done let j=c-1 - if [[ -z "${HADOOP_USER_CLASSPATH_FIRST}" ]]; then - for ((i=j; i>=0; i--)); do - hadoop_add_classpath "${array[$i]}" before - done - else - for ((i=0; i<=j; i++)); do - hadoop_add_classpath "${array[$i]}" after - done + if [[ -z "${HADOOP_USE_CLIENT_CLASSLOADER}" ]]; then + if [[ -z "${HADOOP_USER_CLASSPATH_FIRST}" ]]; then + for ((i=j; i>=0; i--)); do + hadoop_add_classpath "${array[$i]}" before + done + else + for ((i=0; i<=j; i++)); do + hadoop_add_classpath "${array[$i]}" after + done + fi fi fi } diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop.cmd b/hadoop-common-project/hadoop-common/src/main/bin/hadoop.cmd index 04a302c0f38..f9cfe14b3f0 100644 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop.cmd +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop.cmd @@ -28,6 +28,26 @@ @rem classpath. Can be defined, for example, @rem by doing @rem export HADOOP_USER_CLASSPATH_FIRST=true +@rem +@rem HADOOP_USE_CLIENT_CLASSLOADER When defined, HADOOP_CLASSPATH and the +@rem jar as the hadoop jar argument are +@rem handled by a separate isolated client +@rem classloader. If it is set, +@rem HADOOP_USER_CLASSPATH_FIRST is +@rem ignored. Can be defined by doing +@rem export HADOOP_USE_CLIENT_CLASSLOADER=true +@rem +@rem HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES +@rem When defined, it overrides the default +@rem definition of system classes for the +@rem client classloader when +@rem HADOOP_USE_CLIENT_CLASSLOADER is +@rem enabled. Names ending in '.' (period) +@rem are treated as package names, and names +@rem starting with a '-' are treated as +@rem negative matches. For example, +@rem export HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES="-org.apache.hadoop.UserClass,java.,javax.,org.apache.hadoop." + @rem @rem HADOOP_HEAPSIZE The maximum amount of heap to use, in MB. @rem Default is 1000. diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh index f50e4126636..eda47c93492 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh +++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh @@ -111,6 +111,17 @@ esac # Should HADOOP_USER_CLASSPATH be first in the official CLASSPATH? # export HADOOP_USER_CLASSPATH_FIRST="yes" +# If HADOOP_USE_CLIENT_CLASSLOADER is set, HADOOP_CLASSPATH along with the main +# jar are handled by a separate isolated client classloader. If it is set, +# HADOOP_USER_CLASSPATH_FIRST is ignored. Can be defined by doing +# export HADOOP_USE_CLIENT_CLASSLOADER=true + +# HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES overrides the default definition of +# system classes for the client classloader when HADOOP_USE_CLIENT_CLASSLOADER +# is enabled. Names ending in '.' (period) are treated as package names, and +# names starting with a '-' are treated as negative matches. For example, +# export HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES="-org.apache.hadoop.UserClass,java.,javax.,org.apache.hadoop." + ### # Options for remote shell connectivity ### diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ApplicationClassLoader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ApplicationClassLoader.java new file mode 100644 index 00000000000..5dda10fc887 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ApplicationClassLoader.java @@ -0,0 +1,219 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.io.File; +import java.io.FilenameFilter; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLClassLoader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; + +/** + * A {@link URLClassLoader} for application isolation. Classes from the + * application JARs are loaded in preference to the parent loader. + */ +@Public +@Unstable +public class ApplicationClassLoader extends URLClassLoader { + /** + * Default value of the system classes if the user did not override them. + * JDK classes, hadoop classes and resources, and some select third-party + * classes are considered system classes, and are not loaded by the + * application classloader. + */ + public static final String DEFAULT_SYSTEM_CLASSES = + "java.," + + "javax.," + + "org.w3c.dom.," + + "org.xml.sax.," + + "org.apache.commons.logging.," + + "org.apache.log4j.," + + "org.apache.hadoop.," + + "core-default.xml," + + "hdfs-default.xml," + + "mapred-default.xml," + + "yarn-default.xml"; + + private static final Log LOG = + LogFactory.getLog(ApplicationClassLoader.class.getName()); + + private static final FilenameFilter JAR_FILENAME_FILTER = + new FilenameFilter() { + @Override + public boolean accept(File dir, String name) { + return name.endsWith(".jar") || name.endsWith(".JAR"); + } + }; + + private final ClassLoader parent; + private final List systemClasses; + + public ApplicationClassLoader(URL[] urls, ClassLoader parent, + List systemClasses) { + super(urls, parent); + if (LOG.isDebugEnabled()) { + LOG.debug("urls: " + Arrays.toString(urls)); + LOG.debug("system classes: " + systemClasses); + } + this.parent = parent; + if (parent == null) { + throw new IllegalArgumentException("No parent classloader!"); + } + // if the caller-specified system classes are null or empty, use the default + this.systemClasses = (systemClasses == null || systemClasses.isEmpty()) ? + Arrays.asList(StringUtils.getTrimmedStrings(DEFAULT_SYSTEM_CLASSES)) : + systemClasses; + LOG.info("system classes: " + this.systemClasses); + } + + public ApplicationClassLoader(String classpath, ClassLoader parent, + List systemClasses) throws MalformedURLException { + this(constructUrlsFromClasspath(classpath), parent, systemClasses); + } + + static URL[] constructUrlsFromClasspath(String classpath) + throws MalformedURLException { + List urls = new ArrayList(); + for (String element : classpath.split(File.pathSeparator)) { + if (element.endsWith("/*")) { + String dir = element.substring(0, element.length() - 1); + File[] files = new File(dir).listFiles(JAR_FILENAME_FILTER); + if (files != null) { + for (File file : files) { + urls.add(file.toURI().toURL()); + } + } + } else { + File file = new File(element); + if (file.exists()) { + urls.add(new File(element).toURI().toURL()); + } + } + } + return urls.toArray(new URL[urls.size()]); + } + + @Override + public URL getResource(String name) { + URL url = null; + + if (!isSystemClass(name, systemClasses)) { + url= findResource(name); + if (url == null && name.startsWith("/")) { + if (LOG.isDebugEnabled()) { + LOG.debug("Remove leading / off " + name); + } + url= findResource(name.substring(1)); + } + } + + if (url == null) { + url= parent.getResource(name); + } + + if (url != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("getResource("+name+")=" + url); + } + } + + return url; + } + + @Override + public Class loadClass(String name) throws ClassNotFoundException { + return this.loadClass(name, false); + } + + @Override + protected synchronized Class loadClass(String name, boolean resolve) + throws ClassNotFoundException { + + if (LOG.isDebugEnabled()) { + LOG.debug("Loading class: " + name); + } + + Class c = findLoadedClass(name); + ClassNotFoundException ex = null; + + if (c == null && !isSystemClass(name, systemClasses)) { + // Try to load class from this classloader's URLs. Note that this is like + // the servlet spec, not the usual Java 2 behaviour where we ask the + // parent to attempt to load first. + try { + c = findClass(name); + if (LOG.isDebugEnabled() && c != null) { + LOG.debug("Loaded class: " + name + " "); + } + } catch (ClassNotFoundException e) { + if (LOG.isDebugEnabled()) { + LOG.debug(e); + } + ex = e; + } + } + + if (c == null) { // try parent + c = parent.loadClass(name); + if (LOG.isDebugEnabled() && c != null) { + LOG.debug("Loaded class from parent: " + name + " "); + } + } + + if (c == null) { + throw ex != null ? ex : new ClassNotFoundException(name); + } + + if (resolve) { + resolveClass(c); + } + + return c; + } + + public static boolean isSystemClass(String name, List systemClasses) { + if (systemClasses != null) { + String canonicalName = name.replace('/', '.'); + while (canonicalName.startsWith(".")) { + canonicalName=canonicalName.substring(1); + } + for (String c : systemClasses) { + boolean result = true; + if (c.startsWith("-")) { + c = c.substring(1); + result = false; + } + if (c.endsWith(".") && canonicalName.startsWith(c)) { + return result; + } else if (canonicalName.equals(c)) { + return result; + } + } + } + return false; + } +} \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/RunJar.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/RunJar.java index 08b4fd15d84..75b43b63fbd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/RunJar.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/RunJar.java @@ -18,23 +18,25 @@ package org.apache.hadoop.util; -import java.lang.reflect.Array; -import java.lang.reflect.Method; -import java.lang.reflect.InvocationTargetException; -import java.net.URL; -import java.net.URLClassLoader; +import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.io.File; -import java.util.regex.Pattern; -import java.util.Arrays; +import java.lang.reflect.Array; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLClassLoader; import java.util.ArrayList; +import java.util.Arrays; import java.util.Enumeration; -import java.util.jar.JarFile; +import java.util.List; import java.util.jar.JarEntry; +import java.util.jar.JarFile; import java.util.jar.Manifest; +import java.util.regex.Pattern; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -55,6 +57,21 @@ public class RunJar { */ public static final int SHUTDOWN_HOOK_PRIORITY = 10; + /** + * Environment key for using the client classloader. + */ + public static final String HADOOP_USE_CLIENT_CLASSLOADER = + "HADOOP_USE_CLIENT_CLASSLOADER"; + /** + * Environment key for the (user-provided) hadoop classpath. + */ + public static final String HADOOP_CLASSPATH = "HADOOP_CLASSPATH"; + /** + * Environment key for the system classes. + */ + public static final String HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES = + "HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES"; + /** * Unpack a jar file into a directory. * @@ -116,6 +133,10 @@ public class RunJar { /** Run a Hadoop job jar. If the main class is not in the jar's manifest, * then it must be provided on the command line. */ public static void main(String[] args) throws Throwable { + new RunJar().run(args); + } + + public void run(String[] args) throws Throwable { String usage = "RunJar jarFile [mainClass] args..."; if (args.length < 1) { @@ -187,19 +208,7 @@ public class RunJar { unJar(file, workDir); - ArrayList classPath = new ArrayList(); - classPath.add(new File(workDir+"/").toURI().toURL()); - classPath.add(file.toURI().toURL()); - classPath.add(new File(workDir, "classes/").toURI().toURL()); - File[] libs = new File(workDir, "lib").listFiles(); - if (libs != null) { - for (int i = 0; i < libs.length; i++) { - classPath.add(libs[i].toURI().toURL()); - } - } - - ClassLoader loader = - new URLClassLoader(classPath.toArray(new URL[0])); + ClassLoader loader = createClassLoader(file, workDir); Thread.currentThread().setContextClassLoader(loader); Class mainClass = Class.forName(mainClassName, true, loader); @@ -214,5 +223,65 @@ public class RunJar { throw e.getTargetException(); } } - + + /** + * Creates a classloader based on the environment that was specified by the + * user. If HADOOP_USE_CLIENT_CLASSLOADER is specified, it creates an + * application classloader that provides the isolation of the user class space + * from the hadoop classes and their dependencies. It forms a class space for + * the user jar as well as the HADOOP_CLASSPATH. Otherwise, it creates a + * classloader that simply adds the user jar to the classpath. + */ + private ClassLoader createClassLoader(File file, final File workDir) + throws MalformedURLException { + ClassLoader loader; + // see if the client classloader is enabled + if (useClientClassLoader()) { + StringBuilder sb = new StringBuilder(); + sb.append(workDir+"/"). + append(File.pathSeparator).append(file). + append(File.pathSeparator).append(workDir+"/classes/"). + append(File.pathSeparator).append(workDir+"/lib/*"); + // HADOOP_CLASSPATH is added to the client classpath + String hadoopClasspath = getHadoopClasspath(); + if (hadoopClasspath != null && !hadoopClasspath.isEmpty()) { + sb.append(File.pathSeparator).append(hadoopClasspath); + } + String clientClasspath = sb.toString(); + // get the system classes + String systemClasses = getSystemClasses(); + List systemClassesList = systemClasses == null ? + null : + Arrays.asList(StringUtils.getTrimmedStrings(systemClasses)); + // create an application classloader that isolates the user classes + loader = new ApplicationClassLoader(clientClasspath, + getClass().getClassLoader(), systemClassesList); + } else { + List classPath = new ArrayList(); + classPath.add(new File(workDir+"/").toURI().toURL()); + classPath.add(file.toURI().toURL()); + classPath.add(new File(workDir, "classes/").toURI().toURL()); + File[] libs = new File(workDir, "lib").listFiles(); + if (libs != null) { + for (int i = 0; i < libs.length; i++) { + classPath.add(libs[i].toURI().toURL()); + } + } + // create a normal parent-delegating classloader + loader = new URLClassLoader(classPath.toArray(new URL[0])); + } + return loader; + } + + boolean useClientClassLoader() { + return Boolean.parseBoolean(System.getenv(HADOOP_USE_CLIENT_CLASSLOADER)); + } + + String getHadoopClasspath() { + return System.getenv(HADOOP_CLASSPATH); + } + + String getSystemClasses() { + return System.getenv(HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/ClassLoaderCheck.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/ClassLoaderCheck.java new file mode 100644 index 00000000000..aa2cc0eee41 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/ClassLoaderCheck.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +public class ClassLoaderCheck { + /** + * Verifies the class is loaded by the right classloader. + */ + public static void checkClassLoader(Class cls, + boolean shouldBeLoadedByAppClassLoader) { + boolean loadedByAppClassLoader = + cls.getClassLoader() instanceof ApplicationClassLoader; + if ((shouldBeLoadedByAppClassLoader && !loadedByAppClassLoader) || + (!shouldBeLoadedByAppClassLoader && loadedByAppClassLoader)) { + throw new RuntimeException("incorrect classloader used"); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/ClassLoaderCheckMain.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/ClassLoaderCheckMain.java new file mode 100644 index 00000000000..bb14ac9594f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/ClassLoaderCheckMain.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +/** + * Test class used by {@link TestRunJar} to verify that it is loaded by the + * {@link ApplicationClassLoader}. + */ +public class ClassLoaderCheckMain { + public static void main(String[] args) { + // ClassLoaderCheckMain should be loaded by the application classloader + ClassLoaderCheck.checkClassLoader(ClassLoaderCheckMain.class, true); + // ClassLoaderCheckSecond should NOT be loaded by the application + // classloader + ClassLoaderCheck.checkClassLoader(ClassLoaderCheckSecond.class, false); + // ClassLoaderCheckThird should be loaded by the application classloader + ClassLoaderCheck.checkClassLoader(ClassLoaderCheckThird.class, true); + } +} \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/ClassLoaderCheckSecond.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/ClassLoaderCheckSecond.java new file mode 100644 index 00000000000..45601bd07dc --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/ClassLoaderCheckSecond.java @@ -0,0 +1,24 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +/** + * A class {@link ClassLoaderCheckMain} depends on that should be loaded by the + * system classloader. + */ +public class ClassLoaderCheckSecond {} \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/ClassLoaderCheckThird.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/ClassLoaderCheckThird.java new file mode 100644 index 00000000000..dd4c0c4a1fa --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/ClassLoaderCheckThird.java @@ -0,0 +1,24 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +/** + * A class {@link ClassLoaderCheckMain} depends on that should be loaded by the + * application classloader. + */ +public class ClassLoaderCheckThird {} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestApplicationClassLoader.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestApplicationClassLoader.java similarity index 95% rename from hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestApplicationClassLoader.java rename to hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestApplicationClassLoader.java index bb4b28c616d..5d0e131bd6c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestApplicationClassLoader.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestApplicationClassLoader.java @@ -16,18 +16,15 @@ * limitations under the License. */ -package org.apache.hadoop.yarn.util; +package org.apache.hadoop.util; +import static org.apache.hadoop.util.ApplicationClassLoader.constructUrlsFromClasspath; +import static org.apache.hadoop.util.ApplicationClassLoader.isSystemClass; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; -import static org.apache.hadoop.yarn.util.ApplicationClassLoader.constructUrlsFromClasspath; -import static org.apache.hadoop.yarn.util.ApplicationClassLoader.isSystemClass; - -import com.google.common.base.Splitter; -import com.google.common.collect.Lists; import java.io.File; import java.io.FileOutputStream; @@ -43,6 +40,9 @@ import org.apache.hadoop.fs.FileUtil; import org.junit.Before; import org.junit.Test; +import com.google.common.base.Splitter; +import com.google.common.collect.Lists; + public class TestApplicationClassLoader { private static File testDir = new File(System.getProperty("test.build.data", diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestRunJar.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestRunJar.java index 8903fca52f8..9e279689a49 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestRunJar.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestRunJar.java @@ -17,23 +17,30 @@ */ package org.apache.hadoop.util; -import junit.framework.TestCase; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; + +import java.io.BufferedInputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; import java.util.jar.JarOutputStream; import java.util.regex.Pattern; import java.util.zip.ZipEntry; +import junit.framework.TestCase; + +import org.apache.hadoop.fs.FileUtil; import org.junit.After; import org.junit.Before; import org.junit.Test; -import org.apache.hadoop.fs.FileUtil; public class TestRunJar extends TestCase { private File TEST_ROOT_DIR; private static final String TEST_JAR_NAME="test-runjar.jar"; + private static final String TEST_JAR_2_NAME = "test-runjar2.jar"; @Override @Before @@ -107,4 +114,59 @@ public class TestRunJar extends TestCase { new File(unjarDir, "foobaz.txt").exists()); } + + /** + * Tests the client classloader to verify the main class and its dependent + * class are loaded correctly by the application classloader, and others are + * loaded by the system classloader. + */ + @Test + public void testClientClassLoader() throws Throwable { + RunJar runJar = spy(new RunJar()); + // enable the client classloader + when(runJar.useClientClassLoader()).thenReturn(true); + // set the system classes and blacklist the test main class and the test + // third class so they can be loaded by the application classloader + String mainCls = ClassLoaderCheckMain.class.getName(); + String thirdCls = ClassLoaderCheckThird.class.getName(); + String systemClasses = "-" + mainCls + "," + + "-" + thirdCls + "," + + ApplicationClassLoader.DEFAULT_SYSTEM_CLASSES; + when(runJar.getSystemClasses()).thenReturn(systemClasses); + + // create the test jar + File testJar = makeClassLoaderTestJar(mainCls, thirdCls); + // form the args + String[] args = new String[3]; + args[0] = testJar.getAbsolutePath(); + args[1] = mainCls; + + // run RunJar + runJar.run(args); + // it should not throw an exception + } + + private File makeClassLoaderTestJar(String... clsNames) throws IOException { + File jarFile = new File(TEST_ROOT_DIR, TEST_JAR_2_NAME); + JarOutputStream jstream = + new JarOutputStream(new FileOutputStream(jarFile)); + for (String clsName: clsNames) { + String name = clsName.replace('.', '/') + ".class"; + InputStream entryInputStream = this.getClass().getResourceAsStream( + "/" + name); + ZipEntry entry = new ZipEntry(name); + jstream.putNextEntry(entry); + BufferedInputStream bufInputStream = new BufferedInputStream( + entryInputStream, 2048); + int count; + byte[] data = new byte[2048]; + while ((count = bufInputStream.read(data, 0, 2048)) != -1) { + jstream.write(data, 0, count); + } + jstream.closeEntry(); + } + jstream.close(); + + return jarFile; + } } \ No newline at end of file diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java index 423b842962f..3bd8414099c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java @@ -34,6 +34,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import com.google.common.annotations.VisibleForTesting; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -56,6 +57,7 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState; import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskState; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; +import org.apache.hadoop.util.ApplicationClassLoader; import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.StringInterner; import org.apache.hadoop.util.StringUtils; @@ -67,7 +69,6 @@ import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; -import org.apache.hadoop.yarn.util.ApplicationClassLoader; import org.apache.hadoop.yarn.util.Apps; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.log4j.RollingFileAppender; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java index 2e0423ff22e..02a59e73e2d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java @@ -51,6 +51,7 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskState; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; +import org.apache.hadoop.util.ApplicationClassLoader; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -58,7 +59,6 @@ import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; -import org.apache.hadoop.yarn.util.ApplicationClassLoader; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -514,7 +514,8 @@ public class TestMRApps { @Test public void testSystemClasses() { final List systemClasses = - Arrays.asList(MRApps.getSystemClasses(new Configuration())); + Arrays.asList(StringUtils.getTrimmedStrings( + ApplicationClassLoader.DEFAULT_SYSTEM_CLASSES)); for (String defaultXml : DEFAULT_XMLS) { assertTrue(defaultXml + " must be system resource", ApplicationClassLoader.isSystemClass(defaultXml, systemClasses)); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index b2503c74e20..802ffa1759f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -1227,13 +1227,13 @@ mapreduce.job.classloader.system.classes - java.,javax.,org.w3c.dom.,org.xml.sax.,org.apache.commons.logging., - org.apache.log4j.,org.apache.hadoop.,core-default.xml, - hdfs-default.xml,mapred-default.xml,yarn-default.xml - A comma-separated list of classes that should be loaded from the - system classpath, not the user-supplied JARs, when mapreduce.job.classloader - is enabled. Names ending in '.' (period) are treated as package names, - and names starting with a '-' are treated as negative matches. + + Used to override the default definition of the system classes for + the job classloader. The system classes are a comma-separated list of + classes that should be loaded from the system classpath, not the + user-supplied JARs, when mapreduce.job.classloader is enabled. Names ending + in '.' (period) are treated as package names, and names starting with a '-' + are treated as negative matches. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java index 6b47554e8eb..32153996c8d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java @@ -84,13 +84,13 @@ import org.apache.hadoop.mapreduce.v2.app.speculate.Speculator; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.util.ApplicationClassLoader; import org.apache.hadoop.util.JarFinder; import org.apache.hadoop.util.Shell; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; -import org.apache.hadoop.yarn.util.ApplicationClassLoader; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.log4j.Level; import org.junit.AfterClass; @@ -242,8 +242,7 @@ public class TestMRJobs { // to test AM loading user classes such as output format class, we want // to blacklist them from the system classes (they need to be prepended // as the first match wins) - String systemClasses = - sleepConf.get(MRJobConfig.MAPREDUCE_JOB_CLASSLOADER_SYSTEM_CLASSES); + String systemClasses = ApplicationClassLoader.DEFAULT_SYSTEM_CLASSES; // exclude the custom classes from system classes systemClasses = "-" + CustomOutputFormat.class.getName() + ",-" + CustomSpeculator.class.getName() + "," + diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml index 6609a260130..b1dfb1ec5ed 100644 --- a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml @@ -344,4 +344,11 @@ + + + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ApplicationClassLoader.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ApplicationClassLoader.java index 63dc5b798c1..ee9ad4c8ddd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ApplicationClassLoader.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ApplicationClassLoader.java @@ -18,180 +18,30 @@ package org.apache.hadoop.yarn.util; -import java.io.File; -import java.io.FilenameFilter; import java.net.MalformedURLException; import java.net.URL; -import java.net.URLClassLoader; -import java.util.ArrayList; import java.util.List; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Unstable; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Splitter; - /** - * A {@link URLClassLoader} for YARN application isolation. Classes from - * the application JARs are loaded in preference to the parent loader. + * This type has been deprecated in favor of + * {@link org.apache.hadoop.util.ApplicationClassLoader}. All new uses of + * ApplicationClassLoader should use that type instead. */ @Public @Unstable -public class ApplicationClassLoader extends URLClassLoader { - - private static final Log LOG = - LogFactory.getLog(ApplicationClassLoader.class.getName()); - - private static final FilenameFilter JAR_FILENAME_FILTER = - new FilenameFilter() { - @Override - public boolean accept(File dir, String name) { - return name.endsWith(".jar") || name.endsWith(".JAR"); - } - }; - - private ClassLoader parent; - private List systemClasses; - +@Deprecated +public class ApplicationClassLoader extends + org.apache.hadoop.util.ApplicationClassLoader { public ApplicationClassLoader(URL[] urls, ClassLoader parent, List systemClasses) { - super(urls, parent); - this.parent = parent; - if (parent == null) { - throw new IllegalArgumentException("No parent classloader!"); - } - this.systemClasses = systemClasses; + super(urls, parent, systemClasses); } - + public ApplicationClassLoader(String classpath, ClassLoader parent, List systemClasses) throws MalformedURLException { - this(constructUrlsFromClasspath(classpath), parent, systemClasses); + super(classpath, parent, systemClasses); } - - @VisibleForTesting - static URL[] constructUrlsFromClasspath(String classpath) - throws MalformedURLException { - List urls = new ArrayList(); - for (String element : Splitter.on(File.pathSeparator).split(classpath)) { - if (element.endsWith("/*")) { - String dir = element.substring(0, element.length() - 1); - File[] files = new File(dir).listFiles(JAR_FILENAME_FILTER); - if (files != null) { - for (File file : files) { - urls.add(file.toURI().toURL()); - } - } - } else { - File file = new File(element); - if (file.exists()) { - urls.add(new File(element).toURI().toURL()); - } - } - } - return urls.toArray(new URL[urls.size()]); - } - - @Override - public URL getResource(String name) { - URL url = null; - - if (!isSystemClass(name, systemClasses)) { - url= findResource(name); - if (url == null && name.startsWith("/")) { - if (LOG.isDebugEnabled()) { - LOG.debug("Remove leading / off " + name); - } - url= findResource(name.substring(1)); - } - } - - if (url == null) { - url= parent.getResource(name); - } - - if (url != null) { - if (LOG.isDebugEnabled()) { - LOG.debug("getResource("+name+")=" + url); - } - } - - return url; - } - - @Override - public Class loadClass(String name) throws ClassNotFoundException { - return this.loadClass(name, false); - } - - @Override - protected synchronized Class loadClass(String name, boolean resolve) - throws ClassNotFoundException { - - if (LOG.isDebugEnabled()) { - LOG.debug("Loading class: " + name); - } - - Class c = findLoadedClass(name); - ClassNotFoundException ex = null; - - if (c == null && !isSystemClass(name, systemClasses)) { - // Try to load class from this classloader's URLs. Note that this is like - // the servlet spec, not the usual Java 2 behaviour where we ask the - // parent to attempt to load first. - try { - c = findClass(name); - if (LOG.isDebugEnabled() && c != null) { - LOG.debug("Loaded class: " + name + " "); - } - } catch (ClassNotFoundException e) { - if (LOG.isDebugEnabled()) { - LOG.debug(e); - } - ex = e; - } - } - - if (c == null) { // try parent - c = parent.loadClass(name); - if (LOG.isDebugEnabled() && c != null) { - LOG.debug("Loaded class from parent: " + name + " "); - } - } - - if (c == null) { - throw ex != null ? ex : new ClassNotFoundException(name); - } - - if (resolve) { - resolveClass(c); - } - - return c; - } - - @VisibleForTesting - public static boolean isSystemClass(String name, List systemClasses) { - if (systemClasses != null) { - String canonicalName = name.replace('/', '.'); - while (canonicalName.startsWith(".")) { - canonicalName=canonicalName.substring(1); - } - for (String c : systemClasses) { - boolean result = true; - if (c.startsWith("-")) { - c = c.substring(1); - result = false; - } - if (c.endsWith(".") && canonicalName.startsWith(c)) { - return result; - } else if (canonicalName.equals(c)) { - return result; - } - } - } - return false; - } -} \ No newline at end of file +} From 4236c6600eda9cdda708d02f3a5a3fe31228f70c Mon Sep 17 00:00:00 2001 From: Jason Darrell Lowe Date: Thu, 21 Aug 2014 22:41:34 +0000 Subject: [PATCH 07/28] YARN-2434. RM should not recover containers from previously failed attempt when AM restart is not enabled. Contributed by Jian He git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619614 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../scheduler/AbstractYarnScheduler.java | 13 +++++++++++++ .../TestWorkPreservingRMRestart.java | 13 +++++++++++++ 3 files changed, 29 insertions(+) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index ed162ba06d4..df0a29dadbd 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -231,6 +231,9 @@ Release 2.6.0 - UNRELEASED YARN-2424. LCE should support non-cgroups, non-secure mode (Chris Douglas via aw) + YARN-2434. RM should not recover containers from previously failed attempt + when AM restart is not enabled (Jian He via jlowe) + Release 2.5.0 - 2014-08-11 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index 72ee7dbbe0a..ab56bb97212 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -273,6 +273,19 @@ public abstract class AbstractYarnScheduler SchedulerApplicationAttempt schedulerAttempt = schedulerApp.getCurrentAppAttempt(); + if (!rmApp.getApplicationSubmissionContext() + .getKeepContainersAcrossApplicationAttempts()) { + // Do not recover containers for stopped attempt or previous attempt. + if (schedulerAttempt.isStopped() + || !schedulerAttempt.getApplicationAttemptId().equals( + container.getContainerId().getApplicationAttemptId())) { + LOG.info("Skip recovering container " + container + + " for already stopped attempt."); + killOrphanContainerOnNode(nm, container); + continue; + } + } + // create container RMContainer rmContainer = recoverAndCreateContainer(container, nm); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java index df64d4c32d2..d6af0d7307e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java @@ -513,6 +513,19 @@ public class TestWorkPreservingRMRestart { // just-recovered containers. assertNull(scheduler.getRMContainer(runningContainer.getContainerId())); assertNull(scheduler.getRMContainer(completedContainer.getContainerId())); + + rm2.waitForNewAMToLaunchAndRegister(app1.getApplicationId(), 2, nm1); + + MockNM nm2 = + new MockNM("127.1.1.1:4321", 8192, rm2.getResourceTrackerService()); + NMContainerStatus previousAttemptContainer = + TestRMRestart.createNMContainerStatus(am1.getApplicationAttemptId(), 4, + ContainerState.RUNNING); + nm2.registerNode(Arrays.asList(previousAttemptContainer), null); + // Wait for RM to settle down on recovering containers; + Thread.sleep(3000); + // check containers from previous failed attempt should not be recovered. + assertNull(scheduler.getRMContainer(previousAttemptContainer.getContainerId())); } // Apps already completed before RM restart. Restarted RM scheduler should not From 5109157ed1fbcfcc117f823995cf1a378627e2fd Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Thu, 21 Aug 2014 23:28:44 +0000 Subject: [PATCH 08/28] MAPREDUCE-5130. Add missing job config options to mapred-default.xml (Ray Chiang via Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619626 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../org/apache/hadoop/mapred/JobConf.java | 94 ++++--------------- .../hadoop/mapreduce/util/ConfigUtil.java | 2 + .../src/main/resources/mapred-default.xml | 89 +++++++++++++++++- .../org/apache/hadoop/mapred/TestJobConf.java | 15 +-- .../org/apache/hadoop/conf/TestJobConf.java | 20 ++-- .../hadoop/mapred/gridmix/TestHighRamJob.java | 12 +-- 7 files changed, 133 insertions(+), 102 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index cd4d6a5e643..dfef8e5b99e 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -190,6 +190,9 @@ Release 2.6.0 - UNRELEASED MAPREDUCE-5974. Allow specifying multiple MapOutputCollectors with fallback. (Todd Lipcon via kasha) + MAPREDUCE-5130. Add missing job config options to mapred-default.xml + (Ray Chiang via Sandy Ryza) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java index 861c47bbb4c..de78e208e70 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java @@ -151,7 +151,9 @@ public class JobConf extends Configuration { /** * A value which if set for memory related configuration options, * indicates that the options are turned off. + * Deprecated because it makes no sense in the context of MR2. */ + @Deprecated public static final long DISABLED_MEMORY_LIMIT = -1L; /** @@ -1809,27 +1811,19 @@ public class JobConf extends Configuration { * Get memory required to run a map task of the job, in MB. * * If a value is specified in the configuration, it is returned. - * Else, it returns {@link #DISABLED_MEMORY_LIMIT}. + * Else, it returns {@link JobContext#DEFAULT_MAP_MEMORY_MB}. *

* For backward compatibility, if the job configuration sets the * key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different * from {@link #DISABLED_MEMORY_LIMIT}, that value will be used * after converting it from bytes to MB. * @return memory required to run a map task of the job, in MB, - * or {@link #DISABLED_MEMORY_LIMIT} if unset. */ public long getMemoryForMapTask() { long value = getDeprecatedMemoryValue(); - if (value == DISABLED_MEMORY_LIMIT) { - value = normalizeMemoryConfigValue( - getLong(JobConf.MAPREDUCE_JOB_MAP_MEMORY_MB_PROPERTY, - DISABLED_MEMORY_LIMIT)); - } - // In case that M/R 1.x applications use the old property name - if (value == DISABLED_MEMORY_LIMIT) { - value = normalizeMemoryConfigValue( - getLong(JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY, - DISABLED_MEMORY_LIMIT)); + if (value < 0) { + return getLong(JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY, + JobContext.DEFAULT_MAP_MEMORY_MB); } return value; } @@ -1844,27 +1838,19 @@ public class JobConf extends Configuration { * Get memory required to run a reduce task of the job, in MB. * * If a value is specified in the configuration, it is returned. - * Else, it returns {@link #DISABLED_MEMORY_LIMIT}. + * Else, it returns {@link JobContext#DEFAULT_REDUCE_MEMORY_MB}. *

* For backward compatibility, if the job configuration sets the * key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different * from {@link #DISABLED_MEMORY_LIMIT}, that value will be used * after converting it from bytes to MB. - * @return memory required to run a reduce task of the job, in MB, - * or {@link #DISABLED_MEMORY_LIMIT} if unset. + * @return memory required to run a reduce task of the job, in MB. */ public long getMemoryForReduceTask() { long value = getDeprecatedMemoryValue(); - if (value == DISABLED_MEMORY_LIMIT) { - value = normalizeMemoryConfigValue( - getLong(JobConf.MAPREDUCE_JOB_REDUCE_MEMORY_MB_PROPERTY, - DISABLED_MEMORY_LIMIT)); - } - // In case that M/R 1.x applications use the old property name - if (value == DISABLED_MEMORY_LIMIT) { - value = normalizeMemoryConfigValue( - getLong(JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY, - DISABLED_MEMORY_LIMIT)); + if (value < 0) { + return getLong(JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY, + JobContext.DEFAULT_REDUCE_MEMORY_MB); } return value; } @@ -1876,8 +1862,7 @@ public class JobConf extends Configuration { private long getDeprecatedMemoryValue() { long oldValue = getLong(MAPRED_TASK_MAXVMEM_PROPERTY, DISABLED_MEMORY_LIMIT); - oldValue = normalizeMemoryConfigValue(oldValue); - if (oldValue != DISABLED_MEMORY_LIMIT) { + if (oldValue > 0) { oldValue /= (1024*1024); } return oldValue; @@ -1921,39 +1906,6 @@ public class JobConf extends Configuration { return val; } - /** - * Compute the number of slots required to run a single map task-attempt - * of this job. - * @param slotSizePerMap cluster-wide value of the amount of memory required - * to run a map-task - * @return the number of slots required to run a single map task-attempt - * 1 if memory parameters are disabled. - */ - int computeNumSlotsPerMap(long slotSizePerMap) { - if ((slotSizePerMap==DISABLED_MEMORY_LIMIT) || - (getMemoryForMapTask()==DISABLED_MEMORY_LIMIT)) { - return 1; - } - return (int)(Math.ceil((float)getMemoryForMapTask() / (float)slotSizePerMap)); - } - - /** - * Compute the number of slots required to run a single reduce task-attempt - * of this job. - * @param slotSizePerReduce cluster-wide value of the amount of memory - * required to run a reduce-task - * @return the number of slots required to run a single reduce task-attempt - * 1 if memory parameters are disabled - */ - int computeNumSlotsPerReduce(long slotSizePerReduce) { - if ((slotSizePerReduce==DISABLED_MEMORY_LIMIT) || - (getMemoryForReduceTask()==DISABLED_MEMORY_LIMIT)) { - return 1; - } - return - (int)(Math.ceil((float)getMemoryForReduceTask() / (float)slotSizePerReduce)); - } - /** * Find a jar that contains a class of the same name, if any. * It will return a jar file, even if that is not the first thing @@ -1975,14 +1927,12 @@ public class JobConf extends Configuration { * set for map and reduce tasks of a job, in MB. *

* For backward compatibility, if the job configuration sets the - * key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different - * from {@link #DISABLED_MEMORY_LIMIT}, that value is returned. + * key {@link #MAPRED_TASK_MAXVMEM_PROPERTY}, that value is returned. * Otherwise, this method will return the larger of the values returned by * {@link #getMemoryForMapTask()} and {@link #getMemoryForReduceTask()} * after converting them into bytes. * - * @return Memory required to run a task of this job, in bytes, - * or {@link #DISABLED_MEMORY_LIMIT}, if unset. + * @return Memory required to run a task of this job, in bytes. * @see #setMaxVirtualMemoryForTask(long) * @deprecated Use {@link #getMemoryForMapTask()} and * {@link #getMemoryForReduceTask()} @@ -1993,15 +1943,8 @@ public class JobConf extends Configuration { "getMaxVirtualMemoryForTask() is deprecated. " + "Instead use getMemoryForMapTask() and getMemoryForReduceTask()"); - long value = getLong(MAPRED_TASK_MAXVMEM_PROPERTY, DISABLED_MEMORY_LIMIT); - value = normalizeMemoryConfigValue(value); - if (value == DISABLED_MEMORY_LIMIT) { - value = Math.max(getMemoryForMapTask(), getMemoryForReduceTask()); - value = normalizeMemoryConfigValue(value); - if (value != DISABLED_MEMORY_LIMIT) { - value *= 1024*1024; - } - } + long value = getLong(MAPRED_TASK_MAXVMEM_PROPERTY, + Math.max(getMemoryForMapTask(), getMemoryForReduceTask()) * 1024 * 1024); return value; } @@ -2027,9 +1970,8 @@ public class JobConf extends Configuration { public void setMaxVirtualMemoryForTask(long vmem) { LOG.warn("setMaxVirtualMemoryForTask() is deprecated."+ "Instead use setMemoryForMapTask() and setMemoryForReduceTask()"); - if(vmem != DISABLED_MEMORY_LIMIT && vmem < 0) { - setMemoryForMapTask(DISABLED_MEMORY_LIMIT); - setMemoryForReduceTask(DISABLED_MEMORY_LIMIT); + if (vmem < 0) { + throw new IllegalArgumentException("Task memory allocation may not be < 0"); } if(get(JobConf.MAPRED_TASK_MAXVMEM_PROPERTY) == null) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java index 5a38da8749f..450f3664355 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java @@ -278,6 +278,8 @@ public class ConfigUtil { MRJobConfig.TASK_DEBUGOUT_LINES), new DeprecationDelta("mapred.merge.recordsBeforeProgress", MRJobConfig.RECORDS_BEFORE_PROGRESS), + new DeprecationDelta("mapred.merge.recordsBeforeProgress", + MRJobConfig.COMBINE_RECORDS_BEFORE_PROGRESS), new DeprecationDelta("mapred.skip.attempts.to.start.skipping", MRJobConfig.SKIP_START_ATTEMPTS), new DeprecationDelta("mapred.task.id", diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 802ffa1759f..703a1039569 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -185,11 +185,42 @@ + + mapreduce.map.memory.mb + 1024 + The amount of memory to request from the scheduler for each + map task. + + + + + mapreduce.map.cpu.vcores + 1 + The number of virtual cores to request from the scheduler for + each map task. + + + + + mapreduce.reduce.memory.mb + 1024 + The amount of memory to request from the scheduler for each + reduce task. + + + + + mapreduce.reduce.cpu.vcores + 1 + The number of virtual cores to request from the scheduler for + each reduce task. + + mapred.child.java.opts -Xmx200m - Java opts for the task tracker child processes. + Java opts for the task processes. The following symbol, if present, will be interpolated: @taskid@ is replaced by current TaskID. Any other occurrences of '@' will go unchanged. For example, to enable verbose gc logging to a file named for the taskid in @@ -203,17 +234,55 @@ + + + + mapred.child.env - User added environment variables for the task tracker child - processes. Example : + User added environment variables for the task processes. + Example : 1) A=foo This will set the env variable A to foo 2) B=$B:c This is inherit nodemanager's B env variable on Unix. 3) B=%B%;c This is inherit nodemanager's B env variable on Windows. + + + + mapreduce.admin.user.env @@ -490,6 +559,12 @@ + + mapreduce.input.lineinputformat.linespermap + 1 + When using NLineInputFormat, the number of lines of input data + to include in each split. + @@ -923,6 +998,14 @@ + + mapreduce.task.combine.progress.records + 10000 + The number of records to process during combine output collection + before sending a progress notification. + + + mapreduce.job.reduce.slowstart.completedmaps 0.05 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestJobConf.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestJobConf.java index f4327459e66..3d924e1f72d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestJobConf.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestJobConf.java @@ -140,18 +140,21 @@ public class TestJobConf { conf.setQueueName("qname"); assertEquals("qname", conf.getQueueName()); - assertEquals(1, conf.computeNumSlotsPerMap(100L)); - assertEquals(1, conf.computeNumSlotsPerReduce(100L)); - conf.setMemoryForMapTask(100 * 1000); - assertEquals(1000, conf.computeNumSlotsPerMap(100L)); + assertEquals(100 * 1000, conf.getMemoryForMapTask()); conf.setMemoryForReduceTask(1000 * 1000); - assertEquals(1000, conf.computeNumSlotsPerReduce(1000L)); + assertEquals(1000 * 1000, conf.getMemoryForReduceTask()); assertEquals(-1, conf.getMaxPhysicalMemoryForTask()); assertEquals("The variable key is no longer used.", JobConf.deprecatedString("key")); - + + // make sure mapreduce.map|reduce.java.opts are not set by default + // so that they won't override mapred.child.java.opts + assertEquals("mapreduce.map.java.opts should not be set by default", + null, conf.get(JobConf.MAPRED_MAP_TASK_JAVA_OPTS)); + assertEquals("mapreduce.reduce.java.opts should not be set by default", + null, conf.get(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS)); } /** diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/conf/TestJobConf.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/conf/TestJobConf.java index b69f450ed35..e380d9200cf 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/conf/TestJobConf.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/conf/TestJobConf.java @@ -108,6 +108,11 @@ public class TestJobConf { JobConf configuration = new JobConf(); configuration.set(JobConf.MAPRED_TASK_MAXVMEM_PROPERTY, "-3"); + Assert.assertEquals(MRJobConfig.DEFAULT_MAP_MEMORY_MB, + configuration.getMemoryForMapTask()); + Assert.assertEquals(MRJobConfig.DEFAULT_REDUCE_MEMORY_MB, + configuration.getMemoryForReduceTask()); + configuration.set(MRJobConfig.MAP_MEMORY_MB, "4"); configuration.set(MRJobConfig.REDUCE_MEMORY_MB, "5"); Assert.assertEquals(4, configuration.getMemoryForMapTask()); @@ -116,23 +121,16 @@ public class TestJobConf { } /** - * Test that negative values for all memory configuration properties causes - * APIs to disable memory limits + * Test that negative values for new configuration keys get passed through. */ @Test public void testNegativeValuesForMemoryParams() { JobConf configuration = new JobConf(); - - configuration.set(JobConf.MAPRED_TASK_MAXVMEM_PROPERTY, "-4"); + configuration.set(MRJobConfig.MAP_MEMORY_MB, "-5"); configuration.set(MRJobConfig.REDUCE_MEMORY_MB, "-6"); - - Assert.assertEquals(JobConf.DISABLED_MEMORY_LIMIT, - configuration.getMemoryForMapTask()); - Assert.assertEquals(JobConf.DISABLED_MEMORY_LIMIT, - configuration.getMemoryForReduceTask()); - Assert.assertEquals(JobConf.DISABLED_MEMORY_LIMIT, - configuration.getMaxVirtualMemoryForTask()); + Assert.assertEquals(-5, configuration.getMemoryForMapTask()); + Assert.assertEquals(-6, configuration.getMemoryForReduceTask()); } /** diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestHighRamJob.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestHighRamJob.java index 5523d731b50..9cc84ea6d73 100644 --- a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestHighRamJob.java +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestHighRamJob.java @@ -97,10 +97,10 @@ public class TestHighRamJob { // check if the high ram properties are not set assertEquals(expectedMapMB, simulatedConf.getLong(MRJobConfig.MAP_MEMORY_MB, - JobConf.DISABLED_MEMORY_LIMIT)); + MRJobConfig.DEFAULT_MAP_MEMORY_MB)); assertEquals(expectedReduceMB, simulatedConf.getLong(MRJobConfig.REDUCE_MEMORY_MB, - JobConf.DISABLED_MEMORY_LIMIT)); + MRJobConfig.DEFAULT_MAP_MEMORY_MB)); } /** @@ -114,10 +114,10 @@ public class TestHighRamJob { // test : check high ram emulation disabled gridmixConf.setBoolean(GridmixJob.GRIDMIX_HIGHRAM_EMULATION_ENABLE, false); - testHighRamConfig(10, 20, 5, 10, JobConf.DISABLED_MEMORY_LIMIT, - JobConf.DISABLED_MEMORY_LIMIT, - JobConf.DISABLED_MEMORY_LIMIT, - JobConf.DISABLED_MEMORY_LIMIT, gridmixConf); + testHighRamConfig(10, 20, 5, 10, MRJobConfig.DEFAULT_MAP_MEMORY_MB, + MRJobConfig.DEFAULT_REDUCE_MEMORY_MB, + MRJobConfig.DEFAULT_MAP_MEMORY_MB, + MRJobConfig.DEFAULT_REDUCE_MEMORY_MB, gridmixConf); // test : check with high ram enabled (default) and no scaling gridmixConf = new Configuration(); From 7be28083472ee83d396a075c99ce5c59d29ec3f6 Mon Sep 17 00:00:00 2001 From: Allen Wittenauer Date: Thu, 21 Aug 2014 23:58:25 +0000 Subject: [PATCH 09/28] HADOOP-8896. Javadoc points to Wrong Reader and Writer classes in SequenceFile (Ray Chiang via aw) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619632 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../src/main/java/org/apache/hadoop/io/SequenceFile.java | 9 +++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index c880e1167dd..655df79264c 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -507,6 +507,9 @@ Release 2.6.0 - UNRELEASED HADOOP-10698. KMS, add proxyuser support. (tucu) + HADOOP-8896. Javadoc points to Wrong Reader and Writer classes + in SequenceFile (Ray Chiang via aw) + OPTIMIZATIONS HADOOP-10838. Byte array native checksumming. (James Thomas via todd) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java index 0c5f31534ad..4cda1077482 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java @@ -53,8 +53,9 @@ import org.apache.hadoop.util.Time; * SequenceFiles are flat files consisting of binary key/value * pairs. * - *

SequenceFile provides {@link Writer}, {@link Reader} and - * {@link Sorter} classes for writing, reading and sorting respectively.

+ *

SequenceFile provides {@link SequenceFile.Writer}, + * {@link SequenceFile.Reader} and {@link Sorter} classes for writing, + * reading and sorting respectively.

* * There are three SequenceFile Writers based on the * {@link CompressionType} used to compress key/value pairs: @@ -79,8 +80,8 @@ import org.apache.hadoop.util.Time; *

The recommended way is to use the static createWriter methods * provided by the SequenceFile to chose the preferred format.

* - *

The {@link Reader} acts as the bridge and can read any of the above - * SequenceFile formats.

+ *

The {@link SequenceFile.Reader} acts as the bridge and can read any of the + * above SequenceFile formats.

* *

SequenceFile Formats

* From b6c24472f31c1509699b5b4d0c0f9fb5db69a49a Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Fri, 22 Aug 2014 04:05:18 +0000 Subject: [PATCH 10/28] HADOOP-10989. Work around buggy getgrouplist() implementations on Linux that return 0 on failure. Contributed by Chris Nauroth. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619659 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../src/org/apache/hadoop/security/hadoop_user_info.c | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 655df79264c..52fc3e0eec7 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -686,6 +686,9 @@ Release 2.6.0 - UNRELEASED HADOOP-10488. TestKeyProviderFactory fails randomly. (tucu) + HADOOP-10989. Work around buggy getgrouplist() implementations on Linux that + return 0 on failure. (cnauroth) + Release 2.5.0 - 2014-08-11 INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/hadoop_user_info.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/hadoop_user_info.c index ca288ec7da2..e2438b1b06c 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/hadoop_user_info.c +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/security/hadoop_user_info.c @@ -193,7 +193,17 @@ int hadoop_user_info_getgroups(struct hadoop_user_info *uinfo) ngroups = uinfo->gids_size; ret = getgrouplist(uinfo->pwd.pw_name, uinfo->pwd.pw_gid, uinfo->gids, &ngroups); + // Return value is different on Linux vs. FreeBSD. Linux: the number of groups + // or -1 on error. FreeBSD: 0 on success or -1 on error. Unfortunately, we + // can't accept a 0 return on Linux, because buggy implementations have been + // observed to return 0 but leave the other out parameters in an indeterminate + // state. This deviates from the man page, but it has been observed in + // practice. See issue HADOOP-10989 for details. +#ifdef __linux__ + if (ret > 0) { +#else if (ret >= 0) { +#endif uinfo->num_gids = ngroups; ret = put_primary_gid_first(uinfo); if (ret) { From 3aa3b0abc2a6994cde428ae73183561e40bfc96c Mon Sep 17 00:00:00 2001 From: Alejandro Abdelnur Date: Fri, 22 Aug 2014 05:17:22 +0000 Subject: [PATCH 11/28] HDF-6905. fs-encryption merge triggered release audit failures. (clamb via tucu) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619667 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 ++ .../hdfs/protocol/EncryptionZoneWithId.java | 17 +++++++++++++++++ .../namenode/EncryptionFaultInjector.java | 17 +++++++++++++++++ .../server/namenode/EncryptionZoneManager.java | 17 +++++++++++++++++ 4 files changed, 53 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 5776f892b5c..8ff94827dad 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -346,6 +346,8 @@ Trunk (Unreleased) HDFS-6839. Fix TestCLI to expect new output. (clamb) + HDFS-6905. fs-encryption merge triggered release audit failures. (clamb via tucu) + Release 2.6.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZoneWithId.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZoneWithId.java index 7ed4884bbd5..e7fd2aefd95 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZoneWithId.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZoneWithId.java @@ -1,3 +1,20 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hdfs.protocol; import org.apache.commons.lang.builder.HashCodeBuilder; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionFaultInjector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionFaultInjector.java index 2e65a892046..27d8f501d1a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionFaultInjector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionFaultInjector.java @@ -1,3 +1,20 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hdfs.server.namenode; import java.io.IOException; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java index e45d540386f..a0e1f0ccac2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java @@ -1,3 +1,20 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hdfs.server.namenode; import java.io.IOException; From 0097b15e2150f95745f64179a0ef4644e96128f5 Mon Sep 17 00:00:00 2001 From: Karthik Kambatla Date: Fri, 22 Aug 2014 15:44:47 +0000 Subject: [PATCH 12/28] YARN-2393. FairScheduler: Add the notion of steady fair share. (Wei Yan via kasha) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619845 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../scheduler/fair/FSAppAttempt.java | 6 - .../scheduler/fair/FSParentQueue.java | 11 +- .../scheduler/fair/FSQueue.java | 19 ++- .../scheduler/fair/FSQueueMetrics.java | 17 ++- .../scheduler/fair/FairScheduler.java | 4 + .../scheduler/fair/QueueManager.java | 10 +- .../scheduler/fair/Schedulable.java | 7 - .../scheduler/fair/SchedulingPolicy.java | 27 +++- .../fair/policies/ComputeFairShares.java | 34 ++++- .../DominantResourceFairnessPolicy.java | 9 ++ .../fair/policies/FairSharePolicy.java | 8 + .../scheduler/fair/policies/FifoPolicy.java | 8 + .../scheduler/fair/FakeSchedulable.java | 5 - .../scheduler/fair/TestFairScheduler.java | 139 +++++++++++++++++- .../fair/TestFairSchedulerFairShare.java | 68 ++++++++- 16 files changed, 328 insertions(+), 47 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index df0a29dadbd..5b61b4146c1 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -55,6 +55,9 @@ Release 2.6.0 - UNRELEASED YARN-2174. Enable HTTPs for the writer REST API of TimelineServer. (Zhijie Shen via jianhe) + YARN-2393. FairScheduler: Add the notion of steady fair share. + (Wei Yan via kasha) + IMPROVEMENTS YARN-2197. Add a link to YARN CHANGES.txt in the left side of doc diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java index eb6f6413893..bf543768f8c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSAppAttempt.java @@ -717,12 +717,6 @@ public class FSAppAttempt extends SchedulerApplicationAttempt this.fairShare = fairShare; } - @Override - public boolean isActive() { - return true; - } - - @Override public void updateDemand() { demand = Resources.createResource(0); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java index 9af72a511e0..26a706c7f03 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java @@ -35,7 +35,6 @@ import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.util.resource.Resources; -import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; @@ -68,6 +67,16 @@ public class FSParentQueue extends FSQueue { } } + public void recomputeSteadyShares() { + policy.computeSteadyShares(childQueues, getSteadyFairShare()); + for (FSQueue childQueue : childQueues) { + childQueue.getMetrics().setSteadyFairShare(childQueue.getSteadyFairShare()); + if (childQueue instanceof FSParentQueue) { + ((FSParentQueue) childQueue).recomputeSteadyShares(); + } + } + } + @Override public Resource getDemand() { return demand; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java index c071c73321d..00f0795e1da 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueue.java @@ -41,6 +41,7 @@ import org.apache.hadoop.yarn.util.resource.Resources; @Unstable public abstract class FSQueue implements Queue, Schedulable { private Resource fairShare = Resources.createResource(0, 0); + private Resource steadyFairShare = Resources.createResource(0, 0); private final String name; protected final FairScheduler scheduler; private final FSQueueMetrics metrics; @@ -151,7 +152,17 @@ public abstract class FSQueue implements Queue, Schedulable { this.fairShare = fairShare; metrics.setFairShare(fairShare); } - + + /** Get the steady fair share assigned to this Schedulable. */ + public Resource getSteadyFairShare() { + return steadyFairShare; + } + + public void setSteadyFairShare(Resource steadyFairShare) { + this.steadyFairShare = steadyFairShare; + metrics.setSteadyFairShare(steadyFairShare); + } + public boolean hasAccess(QueueACL acl, UserGroupInformation user) { return scheduler.getAllocationConfiguration().hasAccess(name, acl, user); } @@ -161,7 +172,7 @@ public abstract class FSQueue implements Queue, Schedulable { * queue's current share */ public abstract void recomputeShares(); - + /** * Gets the children of this queue, if any. */ @@ -194,7 +205,9 @@ public abstract class FSQueue implements Queue, Schedulable { return true; } - @Override + /** + * Returns true if queue has at least one app running. + */ public boolean isActive() { return getNumRunnableApps() > 0; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueMetrics.java index ff0956e5f74..82c422b8207 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSQueueMetrics.java @@ -33,6 +33,8 @@ public class FSQueueMetrics extends QueueMetrics { @Metric("Fair share of memory in MB") MutableGaugeInt fairShareMB; @Metric("Fair share of CPU in vcores") MutableGaugeInt fairShareVCores; + @Metric("Steady fair share of memory in MB") MutableGaugeInt steadyFairShareMB; + @Metric("Steady fair share of CPU in vcores") MutableGaugeInt steadyFairShareVCores; @Metric("Minimum share of memory in MB") MutableGaugeInt minShareMB; @Metric("Minimum share of CPU in vcores") MutableGaugeInt minShareVCores; @Metric("Maximum share of memory in MB") MutableGaugeInt maxShareMB; @@ -55,7 +57,20 @@ public class FSQueueMetrics extends QueueMetrics { public int getFairShareVirtualCores() { return fairShareVCores.value(); } - + + public void setSteadyFairShare(Resource resource) { + steadyFairShareMB.set(resource.getMemory()); + steadyFairShareVCores.set(resource.getVirtualCores()); + } + + public int getSteadyFairShareMB() { + return steadyFairShareMB.value(); + } + + public int getSteadyFairShareVCores() { + return steadyFairShareVCores.value(); + } + public void setMinShare(Resource resource) { minShareMB.set(resource.getMemory()); minShareVCores.set(resource.getVirtualCores()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 0fcbad670e5..40c72a621e7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -851,6 +851,8 @@ public class FairScheduler extends Resources.addTo(clusterResource, node.getTotalCapability()); updateRootQueueMetrics(); + queueMgr.getRootQueue().setSteadyFairShare(clusterResource); + queueMgr.getRootQueue().recomputeSteadyShares(); LOG.info("Added node " + node.getNodeAddress() + " cluster capacity: " + clusterResource); } @@ -885,6 +887,8 @@ public class FairScheduler extends } nodes.remove(rmNode.getNodeID()); + queueMgr.getRootQueue().setSteadyFairShare(clusterResource); + queueMgr.getRootQueue().recomputeSteadyShares(); LOG.info("Removed node " + rmNode.getNodeAddress() + " cluster capacity: " + clusterResource); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java index 4f8735bbf03..490ba686598 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java @@ -118,6 +118,11 @@ public class QueueManager { if (queue == null && create) { // if the queue doesn't exist,create it and return queue = createQueue(name, queueType); + + // Update steady fair share for all queues + if (queue != null) { + rootQueue.recomputeSteadyShares(); + } } return queue; } @@ -190,7 +195,7 @@ public class QueueManager { parent = newParent; } } - + return parent; } @@ -376,5 +381,8 @@ public class QueueManager { + queue.getName(), ex); } } + + // Update steady fair shares for all queues + rootQueue.recomputeSteadyShares(); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/Schedulable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/Schedulable.java index 122b986defc..289887f63c5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/Schedulable.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/Schedulable.java @@ -24,7 +24,6 @@ import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; -import org.apache.hadoop.yarn.util.resource.Resources; /** * A Schedulable represents an entity that can be scheduled such as an @@ -102,10 +101,4 @@ public interface Schedulable { /** Assign a fair share to this Schedulable. */ public void setFairShare(Resource fairShare); - - /** - * Returns true if queue has atleast one app running. Always returns true for - * AppSchedulables. - */ - public boolean isActive(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/SchedulingPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/SchedulingPolicy.java index 1087c73aa19..ca006c580ed 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/SchedulingPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/SchedulingPolicy.java @@ -17,10 +17,6 @@ */ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair; -import java.util.Collection; -import java.util.Comparator; -import java.util.concurrent.ConcurrentHashMap; - import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.util.ReflectionUtils; @@ -29,6 +25,10 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.Dom import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FairSharePolicy; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FifoPolicy; +import java.util.Collection; +import java.util.Comparator; +import java.util.concurrent.ConcurrentHashMap; + @Public @Evolving public abstract class SchedulingPolicy { @@ -131,8 +131,10 @@ public abstract class SchedulingPolicy { public abstract Comparator getComparator(); /** - * Computes and updates the shares of {@link Schedulable}s as per the - * {@link SchedulingPolicy}, to be used later at schedule time. + * Computes and updates the shares of {@link Schedulable}s as per + * the {@link SchedulingPolicy}, to be used later for scheduling decisions. + * The shares computed are instantaneous and only consider queues with + * running applications. * * @param schedulables {@link Schedulable}s whose shares are to be updated * @param totalResources Total {@link Resource}s in the cluster @@ -140,6 +142,19 @@ public abstract class SchedulingPolicy { public abstract void computeShares( Collection schedulables, Resource totalResources); + /** + * Computes and updates the steady shares of {@link FSQueue}s as per the + * {@link SchedulingPolicy}. The steady share does not differentiate + * between queues with and without running applications under them. The + * steady share is not used for scheduling, it is displayed on the Web UI + * for better visibility. + * + * @param queues {@link FSQueue}s whose shares are to be updated + * @param totalResources Total {@link Resource}s in the cluster + */ + public abstract void computeSteadyShares( + Collection queues, Resource totalResources); + /** * Check if the resource usage is over the fair share under this policy * diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java index 6363ec0218c..6836758019b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/ComputeFairShares.java @@ -22,6 +22,7 @@ import java.util.Collection; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceType; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.Schedulable; /** @@ -49,14 +50,29 @@ public class ComputeFairShares { ResourceType type) { Collection activeSchedulables = new ArrayList(); for (Schedulable sched : schedulables) { - if (sched.isActive()) { - activeSchedulables.add(sched); - } else { + if ((sched instanceof FSQueue) && !((FSQueue) sched).isActive()) { setResourceValue(0, sched.getFairShare(), type); + } else { + activeSchedulables.add(sched); } } - computeSharesInternal(activeSchedulables, totalResources, type); + computeSharesInternal(activeSchedulables, totalResources, type, false); + } + + /** + * Compute the steady fair share of the given queues. The steady fair + * share is an allocation of shares considering all queues, i.e., + * active and inactive. + * + * @param queues + * @param totalResources + * @param type + */ + public static void computeSteadyShares( + Collection queues, Resource totalResources, + ResourceType type) { + computeSharesInternal(queues, totalResources, type, true); } /** @@ -102,7 +118,7 @@ public class ComputeFairShares { */ private static void computeSharesInternal( Collection schedulables, Resource totalResources, - ResourceType type) { + ResourceType type, boolean isSteadyShare) { if (schedulables.isEmpty()) { return; } @@ -145,7 +161,13 @@ public class ComputeFairShares { } // Set the fair shares based on the value of R we've converged to for (Schedulable sched : schedulables) { - setResourceValue(computeShare(sched, right, type), sched.getFairShare(), type); + if (isSteadyShare) { + setResourceValue(computeShare(sched, right, type), + ((FSQueue) sched).getSteadyFairShare(), type); + } else { + setResourceValue( + computeShare(sched, right, type), sched.getFairShare(), type); + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/DominantResourceFairnessPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/DominantResourceFairnessPolicy.java index af674b96056..42044bcaac1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/DominantResourceFairnessPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/DominantResourceFairnessPolicy.java @@ -26,6 +26,7 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceType; import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceWeights; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.Schedulable; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.SchedulingPolicy; import org.apache.hadoop.yarn.util.resource.Resources; @@ -68,6 +69,14 @@ public class DominantResourceFairnessPolicy extends SchedulingPolicy { ComputeFairShares.computeShares(schedulables, totalResources, type); } } + + @Override + public void computeSteadyShares(Collection queues, + Resource totalResources) { + for (ResourceType type : ResourceType.values()) { + ComputeFairShares.computeSteadyShares(queues, totalResources, type); + } + } @Override public boolean checkIfUsageOverFairShare(Resource usage, Resource fairShare) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FairSharePolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FairSharePolicy.java index c51852fa9d6..66bb88bf16c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FairSharePolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FairSharePolicy.java @@ -25,6 +25,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceType; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.Schedulable; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.SchedulingPolicy; import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; @@ -119,6 +120,13 @@ public class FairSharePolicy extends SchedulingPolicy { ComputeFairShares.computeShares(schedulables, totalResources, ResourceType.MEMORY); } + @Override + public void computeSteadyShares(Collection queues, + Resource totalResources) { + ComputeFairShares.computeSteadyShares(queues, totalResources, + ResourceType.MEMORY); + } + @Override public boolean checkIfUsageOverFairShare(Resource usage, Resource fairShare) { return Resources.greaterThan(RESOURCE_CALCULATOR, null, usage, fairShare); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FifoPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FifoPolicy.java index 0f4309759d4..591ee4936b9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FifoPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/policies/FifoPolicy.java @@ -24,6 +24,7 @@ import java.util.Comparator; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.Schedulable; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.SchedulingPolicy; import org.apache.hadoop.yarn.util.resource.Resources; @@ -87,6 +88,13 @@ public class FifoPolicy extends SchedulingPolicy { earliest.setFairShare(Resources.clone(totalResources)); } + @Override + public void computeSteadyShares(Collection queues, + Resource totalResources) { + // Nothing needs to do, as leaf queue doesn't have to calculate steady + // fair shares for applications. + } + @Override public boolean checkIfUsageOverFairShare(Resource usage, Resource fairShare) { throw new UnsupportedOperationException( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FakeSchedulable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FakeSchedulable.java index 5bd52ab7a07..5a170cf2c5a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FakeSchedulable.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FakeSchedulable.java @@ -100,11 +100,6 @@ public class FakeSchedulable implements Schedulable { this.fairShare = fairShare; } - @Override - public boolean isActive() { - return true; - } - @Override public Resource getDemand() { return null; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index a7b1738cda7..79e3184e79c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -292,14 +292,19 @@ public class TestFairScheduler extends FairSchedulerTestBase { createSchedulingRequest(10 * 1024, "root.default", "user1"); scheduler.update(); + scheduler.getQueueManager().getRootQueue() + .setSteadyFairShare(scheduler.getClusterResource()); + scheduler.getQueueManager().getRootQueue().recomputeSteadyShares(); Collection queues = scheduler.getQueueManager().getLeafQueues(); assertEquals(3, queues.size()); - // Divided three ways - betwen the two queues and the default queue + // Divided three ways - between the two queues and the default queue for (FSLeafQueue p : queues) { assertEquals(3414, p.getFairShare().getMemory()); assertEquals(3414, p.getMetrics().getFairShareMB()); + assertEquals(3414, p.getSteadyFairShare().getMemory()); + assertEquals(3414, p.getMetrics().getSteadyFairShareMB()); } } @@ -323,6 +328,9 @@ public class TestFairScheduler extends FairSchedulerTestBase { createSchedulingRequest(10 * 1024, "root.default", "user1"); scheduler.update(); + scheduler.getQueueManager().getRootQueue() + .setSteadyFairShare(scheduler.getClusterResource()); + scheduler.getQueueManager().getRootQueue().recomputeSteadyShares(); QueueManager queueManager = scheduler.getQueueManager(); Collection queues = queueManager.getLeafQueues(); @@ -333,10 +341,16 @@ public class TestFairScheduler extends FairSchedulerTestBase { FSLeafQueue queue3 = queueManager.getLeafQueue("parent.queue3", true); assertEquals(capacity / 2, queue1.getFairShare().getMemory()); assertEquals(capacity / 2, queue1.getMetrics().getFairShareMB()); + assertEquals(capacity / 2, queue1.getSteadyFairShare().getMemory()); + assertEquals(capacity / 2, queue1.getMetrics().getSteadyFairShareMB()); assertEquals(capacity / 4, queue2.getFairShare().getMemory()); assertEquals(capacity / 4, queue2.getMetrics().getFairShareMB()); + assertEquals(capacity / 4, queue2.getSteadyFairShare().getMemory()); + assertEquals(capacity / 4, queue2.getMetrics().getSteadyFairShareMB()); assertEquals(capacity / 4, queue3.getFairShare().getMemory()); assertEquals(capacity / 4, queue3.getMetrics().getFairShareMB()); + assertEquals(capacity / 4, queue3.getSteadyFairShare().getMemory()); + assertEquals(capacity / 4, queue3.getMetrics().getSteadyFairShareMB()); } @Test @@ -771,6 +785,9 @@ public class TestFairScheduler extends FairSchedulerTestBase { createSchedulingRequest(10 * 1024, "root.default", "user3"); scheduler.update(); + scheduler.getQueueManager().getRootQueue() + .setSteadyFairShare(scheduler.getClusterResource()); + scheduler.getQueueManager().getRootQueue().recomputeSteadyShares(); Collection leafQueues = scheduler.getQueueManager() .getLeafQueues(); @@ -780,12 +797,128 @@ public class TestFairScheduler extends FairSchedulerTestBase { || leaf.getName().equals("root.parentq.user2")) { // assert that the fair share is 1/4th node1's capacity assertEquals(capacity / 4, leaf.getFairShare().getMemory()); + // assert that the steady fair share is 1/4th node1's capacity + assertEquals(capacity / 4, leaf.getSteadyFairShare().getMemory()); // assert weights are equal for both the user queues assertEquals(1.0, leaf.getWeights().getWeight(ResourceType.MEMORY), 0); } } } - + + @Test + public void testSteadyFairShareWithReloadAndNodeAddRemove() throws Exception { + conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE); + + PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE)); + out.println(""); + out.println(""); + out.println("fair"); + out.println(""); + out.println(" drf"); + out.println(" "); + out.println(" 1"); + out.println(" "); + out.println(" "); + out.println(" 1"); + out.println(" "); + out.println(""); + out.println(""); + out.close(); + + scheduler.init(conf); + scheduler.start(); + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + // The steady fair share for all queues should be 0 + QueueManager queueManager = scheduler.getQueueManager(); + assertEquals(0, queueManager.getLeafQueue("child1", false) + .getSteadyFairShare().getMemory()); + assertEquals(0, queueManager.getLeafQueue("child2", false) + .getSteadyFairShare().getMemory()); + + // Add one node + RMNode node1 = + MockNodes + .newNodeInfo(1, Resources.createResource(6144), 1, "127.0.0.1"); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); + scheduler.handle(nodeEvent1); + assertEquals(6144, scheduler.getClusterResource().getMemory()); + + // The steady fair shares for all queues should be updated + assertEquals(2048, queueManager.getLeafQueue("child1", false) + .getSteadyFairShare().getMemory()); + assertEquals(2048, queueManager.getLeafQueue("child2", false) + .getSteadyFairShare().getMemory()); + + // Reload the allocation configuration file + out = new PrintWriter(new FileWriter(ALLOC_FILE)); + out.println(""); + out.println(""); + out.println("fair"); + out.println(""); + out.println(" drf"); + out.println(" "); + out.println(" 1"); + out.println(" "); + out.println(" "); + out.println(" 2"); + out.println(" "); + out.println(" "); + out.println(" 2"); + out.println(" "); + out.println(""); + out.println(""); + out.close(); + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + // The steady fair shares for all queues should be updated + assertEquals(1024, queueManager.getLeafQueue("child1", false) + .getSteadyFairShare().getMemory()); + assertEquals(2048, queueManager.getLeafQueue("child2", false) + .getSteadyFairShare().getMemory()); + assertEquals(2048, queueManager.getLeafQueue("child3", false) + .getSteadyFairShare().getMemory()); + + // Remove the node, steady fair shares should back to 0 + NodeRemovedSchedulerEvent nodeEvent2 = new NodeRemovedSchedulerEvent(node1); + scheduler.handle(nodeEvent2); + assertEquals(0, scheduler.getClusterResource().getMemory()); + assertEquals(0, queueManager.getLeafQueue("child1", false) + .getSteadyFairShare().getMemory()); + assertEquals(0, queueManager.getLeafQueue("child2", false) + .getSteadyFairShare().getMemory()); + } + + @Test + public void testSteadyFairShareWithQueueCreatedRuntime() throws Exception { + conf.setClass(CommonConfigurationKeys.HADOOP_SECURITY_GROUP_MAPPING, + SimpleGroupsMapping.class, GroupMappingServiceProvider.class); + conf.set(FairSchedulerConfiguration.USER_AS_DEFAULT_QUEUE, "true"); + scheduler.init(conf); + scheduler.start(); + scheduler.reinitialize(conf, resourceManager.getRMContext()); + + // Add one node + RMNode node1 = + MockNodes + .newNodeInfo(1, Resources.createResource(6144), 1, "127.0.0.1"); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); + scheduler.handle(nodeEvent1); + assertEquals(6144, scheduler.getClusterResource().getMemory()); + assertEquals(6144, scheduler.getQueueManager().getRootQueue() + .getSteadyFairShare().getMemory()); + assertEquals(6144, scheduler.getQueueManager() + .getLeafQueue("default", false).getSteadyFairShare().getMemory()); + + // Submit one application + ApplicationAttemptId appAttemptId1 = createAppAttemptId(1, 1); + createApplicationWithAMResource(appAttemptId1, "default", "user1", null); + assertEquals(3072, scheduler.getQueueManager() + .getLeafQueue("default", false).getSteadyFairShare().getMemory()); + assertEquals(3072, scheduler.getQueueManager() + .getLeafQueue("user1", false).getSteadyFairShare().getMemory()); + } + /** * Make allocation requests and ensure they are reflected in queue demand. */ @@ -873,7 +1006,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { } @Test - public void testHierarchicalQueueAllocationFileParsing() throws IOException, SAXException, + public void testHierarchicalQueueAllocationFileParsing() throws IOException, SAXException, AllocationConfigurationException, ParserConfigurationException { conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerFairShare.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerFairShare.java index 8b8ce93b506..ab8fcbc2b56 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerFairShare.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerFairShare.java @@ -109,13 +109,15 @@ public class TestFairSchedulerFairShare extends FairSchedulerTestBase { for (FSLeafQueue leaf : leafQueues) { if (leaf.getName().startsWith("root.parentA")) { - assertEquals(0, (double) leaf.getFairShare().getMemory() / nodeCapacity - * 100, 0); + assertEquals(0, (double) leaf.getFairShare().getMemory() / nodeCapacity, + 0); } else if (leaf.getName().startsWith("root.parentB")) { - assertEquals(0, (double) leaf.getFairShare().getMemory() / nodeCapacity - * 100, 0.1); + assertEquals(0, (double) leaf.getFairShare().getMemory() / nodeCapacity, + 0); } } + + verifySteadyFairShareMemory(leafQueues, nodeCapacity); } @Test @@ -135,14 +137,15 @@ public class TestFairSchedulerFairShare extends FairSchedulerTestBase { 100, (double) scheduler.getQueueManager() .getLeafQueue("root.parentA.childA1", false).getFairShare() - .getMemory() - / nodeCapacity * 100, 0.1); + .getMemory() / nodeCapacity * 100, 0.1); assertEquals( 0, (double) scheduler.getQueueManager() .getLeafQueue("root.parentA.childA2", false).getFairShare() - .getMemory() - / nodeCapacity * 100, 0.1); + .getMemory() / nodeCapacity, 0.1); + + verifySteadyFairShareMemory(scheduler.getQueueManager().getLeafQueues(), + nodeCapacity); } @Test @@ -167,6 +170,9 @@ public class TestFairSchedulerFairShare extends FairSchedulerTestBase { .getMemory() / nodeCapacity * 100, .9); } + + verifySteadyFairShareMemory(scheduler.getQueueManager().getLeafQueues(), + nodeCapacity); } @Test @@ -206,6 +212,9 @@ public class TestFairSchedulerFairShare extends FairSchedulerTestBase { .getLeafQueue("root.parentB.childB1", false).getFairShare() .getMemory() / nodeCapacity * 100, .9); + + verifySteadyFairShareMemory(scheduler.getQueueManager().getLeafQueues(), + nodeCapacity); } @Test @@ -253,6 +262,9 @@ public class TestFairSchedulerFairShare extends FairSchedulerTestBase { .getLeafQueue("root.parentA.childA2", false).getFairShare() .getMemory() / nodeCapacity * 100, 0.1); + + verifySteadyFairShareMemory(scheduler.getQueueManager().getLeafQueues(), + nodeCapacity); } @Test @@ -304,5 +316,45 @@ public class TestFairSchedulerFairShare extends FairSchedulerTestBase { .getLeafQueue("root.parentB.childB1", false).getFairShare() .getVirtualCores() / nodeVCores * 100, .9); + Collection leafQueues = scheduler.getQueueManager() + .getLeafQueues(); + + for (FSLeafQueue leaf : leafQueues) { + if (leaf.getName().startsWith("root.parentA")) { + assertEquals(0.2, + (double) leaf.getSteadyFairShare().getMemory() / nodeMem, 0.001); + assertEquals(0.2, + (double) leaf.getSteadyFairShare().getVirtualCores() / nodeVCores, + 0.001); + } else if (leaf.getName().startsWith("root.parentB")) { + assertEquals(0.05, + (double) leaf.getSteadyFairShare().getMemory() / nodeMem, 0.001); + assertEquals(0.1, + (double) leaf.getSteadyFairShare().getVirtualCores() / nodeVCores, + 0.001); + } + } + } + + /** + * Verify whether steady fair shares for all leaf queues still follow + * their weight, not related to active/inactive status. + * + * @param leafQueues + * @param nodeCapacity + */ + private void verifySteadyFairShareMemory(Collection leafQueues, + int nodeCapacity) { + for (FSLeafQueue leaf : leafQueues) { + if (leaf.getName().startsWith("root.parentA")) { + assertEquals(0.2, + (double) leaf.getSteadyFairShare().getMemory() / nodeCapacity, + 0.001); + } else if (leaf.getName().startsWith("root.parentB")) { + assertEquals(0.05, + (double) leaf.getSteadyFairShare().getMemory() / nodeCapacity, + 0.001); + } + } } } From 524a63e59ecfda99c66842462e1bac5d610e7997 Mon Sep 17 00:00:00 2001 From: Zhijie Shen Date: Fri, 22 Aug 2014 17:06:23 +0000 Subject: [PATCH 13/28] MAPREDUCE-6044. Fully qualified intermediate done dir path breaks per-user dir creation on Windows. Contributed by Zhijie Shen. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619863 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 ++ .../TestJobHistoryEventHandler.java | 30 ++++++++++++++++++- .../v2/jobhistory/JobHistoryUtils.java | 4 +-- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index dfef8e5b99e..ddf21ed838b 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -255,6 +255,9 @@ Release 2.6.0 - UNRELEASED MAPREDUCE-6012. DBInputSplit creates invalid ranges on Oracle. (Wei Yan via kasha) + MAPREDUCE-6044. Fully qualified intermediate done dir path breaks per-user dir + creation on Windows. (zjshen) + Release 2.5.0 - 2014-08-11 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java index d8a0cc7af52..7539e73ee63 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java @@ -25,7 +25,6 @@ import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import static org.mockito.Mockito.never; import java.io.File; import java.io.FileOutputStream; @@ -53,6 +52,8 @@ import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.app.AppContext; import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.mapreduce.v2.app.job.JobStateInternal; +import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig; +import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils; import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -399,6 +400,33 @@ public class TestJobHistoryEventHandler { } } + @Test + public void testGetHistoryIntermediateDoneDirForUser() throws IOException { + // Test relative path + Configuration conf = new Configuration(); + conf.set(JHAdminConfig.MR_HISTORY_INTERMEDIATE_DONE_DIR, + "/mapred/history/done_intermediate"); + conf.set(MRJobConfig.USER_NAME, System.getProperty("user.name")); + String pathStr = JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf); + Assert.assertEquals("/mapred/history/done_intermediate/" + + System.getProperty("user.name"), pathStr); + + // Test fully qualified path + // Create default configuration pointing to the minicluster + conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, + dfsCluster.getURI().toString()); + FileOutputStream os = new FileOutputStream(coreSitePath); + conf.writeXml(os); + os.close(); + // Simulate execution under a non-default namenode + conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, + "file:///"); + pathStr = JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf); + Assert.assertEquals(dfsCluster.getURI().toString() + + "/mapred/history/done_intermediate/" + System.getProperty("user.name"), + pathStr); + } + private void queueEvent(JHEvenHandlerForTest jheh, JobHistoryEvent event) { jheh.handle(event); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java index 167ee20a22e..e279c03ac1a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java @@ -292,8 +292,8 @@ public class JobHistoryUtils { * @return the intermediate done directory for jobhistory files. */ public static String getHistoryIntermediateDoneDirForUser(Configuration conf) throws IOException { - return getConfiguredHistoryIntermediateDoneDirPrefix(conf) + File.separator - + UserGroupInformation.getCurrentUser().getShortUserName(); + return new Path(getConfiguredHistoryIntermediateDoneDirPrefix(conf), + UserGroupInformation.getCurrentUser().getShortUserName()).toString(); } public static boolean shouldCreateNonUserDirectory(Configuration conf) { From e6c36500705d3d756de82ee0ce9ff226f34b938f Mon Sep 17 00:00:00 2001 From: Arpit Agarwal Date: Fri, 22 Aug 2014 18:14:55 +0000 Subject: [PATCH 14/28] HDFS-6829. DFSAdmin refreshSuperUserGroupsConfiguration failed in security cluster. (Contributed by zhaoyunjiong) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619882 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 8ff94827dad..bd70656732b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -630,6 +630,9 @@ Release 2.6.0 - UNRELEASED HDFS-6890. NFS readdirplus doesn't return dotdot attributes (brandonli) + HDFS-6829. DFSAdmin refreshSuperUserGroupsConfiguration failed in + security cluster (zhaoyunjiong via Arpit Agarwal) + Release 2.5.0 - 2014-08-11 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java index f4d39a830de..ad7be18a67c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java @@ -356,7 +356,7 @@ public class DFSAdmin extends FsShell { * Construct a DFSAdmin object. */ public DFSAdmin() { - this(null); + this(new HdfsConfiguration()); } /** From 10d267975ce1d7266e232f3615c6045d85142404 Mon Sep 17 00:00:00 2001 From: Allen Wittenauer Date: Fri, 22 Aug 2014 18:25:09 +0000 Subject: [PATCH 15/28] HADOOP-10998. Fix bash tab completion code to work (Jim Hester via aw) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619887 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 2 ++ .../src/contrib/bash-tab-completion/hadoop.sh | 28 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 52fc3e0eec7..50a6b82afd0 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -510,6 +510,8 @@ Release 2.6.0 - UNRELEASED HADOOP-8896. Javadoc points to Wrong Reader and Writer classes in SequenceFile (Ray Chiang via aw) + HADOOP-10998. Fix bash tab completion code to work (Jim Hester via aw) + OPTIMIZATIONS HADOOP-10838. Byte array native checksumming. (James Thomas via todd) diff --git a/hadoop-common-project/hadoop-common/src/contrib/bash-tab-completion/hadoop.sh b/hadoop-common-project/hadoop-common/src/contrib/bash-tab-completion/hadoop.sh index 725aee781bc..2a229d4bd86 100644 --- a/hadoop-common-project/hadoop-common/src/contrib/bash-tab-completion/hadoop.sh +++ b/hadoop-common-project/hadoop-common/src/contrib/bash-tab-completion/hadoop.sh @@ -26,7 +26,7 @@ _hadoop() { COMPREPLY=() cur=${COMP_WORDS[COMP_CWORD]} prev=${COMP_WORDS[COMP_CWORD-1]} - script=`which ${COMP_WORDS[0]}` + script=$(which ${COMP_WORDS[0]}) # Bash lets you tab complete things even if the script doesn't # exist (or isn't executable). Check to make sure it is, as we @@ -36,9 +36,9 @@ _hadoop() { 1) # Completing the first argument (the command). - temp=`$script | grep -n "^\s*or"`; - temp=`$script | head -n $((${temp%%:*} - 1)) | awk '/^ / {print $1}' | sort | uniq`; - COMPREPLY=(`compgen -W "${temp}" -- ${cur}`); + temp=$($script | grep -n "^\s*or"); + temp=$($script | head -n $((${temp%%:*} - 1)) | awk '/^ / {print $1}' | sort | uniq); + COMPREPLY=($(compgen -W "${temp}" -- ${cur})); return 0;; 2) @@ -51,21 +51,21 @@ _hadoop() { dfs | dfsadmin | fs | job | pipes) # One option per line, enclosed in square brackets - temp=`$script ${COMP_WORDS[1]} 2>&1 | awk '/^[ \t]*\[/ {gsub("[[\\]]", ""); print $1}'`; - COMPREPLY=(`compgen -W "${temp}" -- ${cur}`); + temp=$($script ${COMP_WORDS[1]} 2>&1 | awk '/^[ \t]*\[/ {gsub("[[\\]]", ""); print $1}'); + COMPREPLY=($(compgen -W "${temp}" -- ${cur})); return 0;; jar) # Any (jar) file - COMPREPLY=(`compgen -A file -- ${cur}`); + COMPREPLY=($(compgen -A file -- ${cur})); return 0;; namenode) # All options specified in one line, # enclosed in [] and separated with | - temp=`$script ${COMP_WORDS[1]} -help 2>&1 | grep Usage: | cut -d '[' -f 2- | awk '{gsub("] \\| \\[|]", " "); print $0}'`; - COMPREPLY=(`compgen -W "${temp}" -- ${cur}`); + temp=$($script ${COMP_WORDS[1]} -help 2>&1 | grep Usage: | cut -d '[' -f 2- | awk '{gsub("] \\| \\[|]", " "); print $0}'); + COMPREPLY=($(compgen -W "${temp}" -- ${cur})); return 0;; *) @@ -83,26 +83,24 @@ _hadoop() { # Pull the list of options, grep for the one the user is trying to use, # and then select the description of the relevant argument temp=$((${COMP_CWORD} - 1)); - temp=`$script ${COMP_WORDS[1]} 2>&1 | grep -- "${COMP_WORDS[2]} " | awk '{gsub("[[ \\]]", ""); print $0}' | cut -d '<' -f ${temp}`; + temp=$($script ${COMP_WORDS[1]} 2>&1 | grep -- "${COMP_WORDS[2]} " | awk '{gsub("[[ \\]]", ""); print $0}' | cut -d '<' -f ${temp} | cut -d '>' -f 1); if [ ${#temp} -lt 1 ]; then # No match return 1; fi; - temp=${temp:0:$((${#temp} - 1))}; - # Now do completion based on the argument case $temp in path | src | dst) # DFS path completion - temp=`$script ${COMP_WORDS[1]} -ls "${cur}*" 2>&1 | grep -vE '^Found ' | cut -f 1 | awk '{gsub("^.* ", ""); print $0;}'` - COMPREPLY=(`compgen -W "${temp}" -- ${cur}`); + temp=$($script ${COMP_WORDS[1]} -ls -d "${cur}*" 2>/dev/null | grep -vE '^Found ' | cut -f 1 | awk '{gsub("^.* ", ""); print $0;}'); + COMPREPLY=($(compgen -W "${temp}" -- ${cur})); return 0;; localsrc | localdst) # Local path completion - COMPREPLY=(`compgen -A file -- ${cur}`); + COMPREPLY=($(compgen -A file -- ${cur})); return 0;; *) From 4b3a6b87221076a6b5df2bf4243575018e5f1793 Mon Sep 17 00:00:00 2001 From: Arpit Agarwal Date: Fri, 22 Aug 2014 22:16:15 +0000 Subject: [PATCH 16/28] HADOOP-10282. Create a FairCallQueue: a multi-level call queue which schedules incoming calls and multiplexes outgoing calls. (Contributed by Chris Li) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619938 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 4 + .../org/apache/hadoop/ipc/FairCallQueue.java | 449 ++++++++++++++++++ .../hadoop/ipc/FairCallQueueMXBean.java | 27 ++ .../org/apache/hadoop/ipc/RpcMultiplexer.java | 32 ++ .../ipc/WeightedRoundRobinMultiplexer.java | 2 +- .../apache/hadoop/ipc/TestFairCallQueue.java | 392 +++++++++++++++ 6 files changed, 905 insertions(+), 1 deletion(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/FairCallQueue.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/FairCallQueueMXBean.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcMultiplexer.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestFairCallQueue.java diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 50a6b82afd0..0291c758e03 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -560,6 +560,10 @@ Release 2.6.0 - UNRELEASED HADOOP-10224. JavaKeyStoreProvider has to protect against corrupting underlying store. (asuresh via tucu) + HADOOP-10282. Create a FairCallQueue: a multi-level call queue which + schedules incoming calls and multiplexes outgoing calls. (Chris Li via + Arpit Agarwal) + BUG FIXES HADOOP-10781. Unportable getgrouplist() usage breaks FreeBSD (Dmitry diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/FairCallQueue.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/FairCallQueue.java new file mode 100644 index 00000000000..0b56243db58 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/FairCallQueue.java @@ -0,0 +1,449 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ipc; + +import java.lang.ref.WeakReference; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.AbstractQueue; +import java.util.HashMap; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.locks.ReentrantLock; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.commons.lang.NotImplementedException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.metrics2.util.MBeans; + +/** + * A queue with multiple levels for each priority. + */ +public class FairCallQueue extends AbstractQueue + implements BlockingQueue { + // Configuration Keys + public static final int IPC_CALLQUEUE_PRIORITY_LEVELS_DEFAULT = 4; + public static final String IPC_CALLQUEUE_PRIORITY_LEVELS_KEY = + "faircallqueue.priority-levels"; + + public static final Log LOG = LogFactory.getLog(FairCallQueue.class); + + /* The queues */ + private final ArrayList> queues; + + /* Read locks */ + private final ReentrantLock takeLock = new ReentrantLock(); + private final Condition notEmpty = takeLock.newCondition(); + private void signalNotEmpty() { + takeLock.lock(); + try { + notEmpty.signal(); + } finally { + takeLock.unlock(); + } + } + + /* Scheduler picks which queue to place in */ + private RpcScheduler scheduler; + + /* Multiplexer picks which queue to draw from */ + private RpcMultiplexer multiplexer; + + /* Statistic tracking */ + private final ArrayList overflowedCalls; + + /** + * Create a FairCallQueue. + * @param capacity the maximum size of each sub-queue + * @param ns the prefix to use for configuration + * @param conf the configuration to read from + * Notes: the FairCallQueue has no fixed capacity. Rather, it has a minimum + * capacity of `capacity` and a maximum capacity of `capacity * number_queues` + */ + public FairCallQueue(int capacity, String ns, Configuration conf) { + int numQueues = parseNumQueues(ns, conf); + LOG.info("FairCallQueue is in use with " + numQueues + " queues."); + + this.queues = new ArrayList>(numQueues); + this.overflowedCalls = new ArrayList(numQueues); + + for(int i=0; i < numQueues; i++) { + this.queues.add(new LinkedBlockingQueue(capacity)); + this.overflowedCalls.add(new AtomicLong(0)); + } + + this.scheduler = new DecayRpcScheduler(numQueues, ns, conf); + this.multiplexer = new WeightedRoundRobinMultiplexer(numQueues, ns, conf); + + // Make this the active source of metrics + MetricsProxy mp = MetricsProxy.getInstance(ns); + mp.setDelegate(this); + } + + /** + * Read the number of queues from the configuration. + * This will affect the FairCallQueue's overall capacity. + * @throws IllegalArgumentException on invalid queue count + */ + private static int parseNumQueues(String ns, Configuration conf) { + int retval = conf.getInt(ns + "." + IPC_CALLQUEUE_PRIORITY_LEVELS_KEY, + IPC_CALLQUEUE_PRIORITY_LEVELS_DEFAULT); + if(retval < 1) { + throw new IllegalArgumentException("numQueues must be at least 1"); + } + return retval; + } + + /** + * Returns the first non-empty queue with equal or lesser priority + * than startIdx. Wraps around, searching a maximum of N + * queues, where N is this.queues.size(). + * + * @param startIdx the queue number to start searching at + * @return the first non-empty queue with less priority, or null if + * everything was empty + */ + private BlockingQueue getFirstNonEmptyQueue(int startIdx) { + final int numQueues = this.queues.size(); + for(int i=0; i < numQueues; i++) { + int idx = (i + startIdx) % numQueues; // offset and wrap around + BlockingQueue queue = this.queues.get(idx); + if (queue.size() != 0) { + return queue; + } + } + + // All queues were empty + return null; + } + + /* AbstractQueue and BlockingQueue methods */ + + /** + * Put and offer follow the same pattern: + * 1. Get a priorityLevel from the scheduler + * 2. Get the nth sub-queue matching this priorityLevel + * 3. delegate the call to this sub-queue. + * + * But differ in how they handle overflow: + * - Put will move on to the next queue until it lands on the last queue + * - Offer does not attempt other queues on overflow + */ + @Override + public void put(E e) throws InterruptedException { + int priorityLevel = scheduler.getPriorityLevel(e); + + final int numLevels = this.queues.size(); + while (true) { + BlockingQueue q = this.queues.get(priorityLevel); + boolean res = q.offer(e); + if (!res) { + // Update stats + this.overflowedCalls.get(priorityLevel).getAndIncrement(); + + // If we failed to insert, try again on the next level + priorityLevel++; + + if (priorityLevel == numLevels) { + // That was the last one, we will block on put in the last queue + // Delete this line to drop the call + this.queues.get(priorityLevel-1).put(e); + break; + } + } else { + break; + } + } + + + signalNotEmpty(); + } + + @Override + public boolean offer(E e, long timeout, TimeUnit unit) + throws InterruptedException { + int priorityLevel = scheduler.getPriorityLevel(e); + BlockingQueue q = this.queues.get(priorityLevel); + boolean ret = q.offer(e, timeout, unit); + + signalNotEmpty(); + + return ret; + } + + @Override + public boolean offer(E e) { + int priorityLevel = scheduler.getPriorityLevel(e); + BlockingQueue q = this.queues.get(priorityLevel); + boolean ret = q.offer(e); + + signalNotEmpty(); + + return ret; + } + + @Override + public E take() throws InterruptedException { + int startIdx = this.multiplexer.getAndAdvanceCurrentIndex(); + + takeLock.lockInterruptibly(); + try { + // Wait while queue is empty + for (;;) { + BlockingQueue q = this.getFirstNonEmptyQueue(startIdx); + if (q != null) { + // Got queue, so return if we can poll out an object + E e = q.poll(); + if (e != null) { + return e; + } + } + + notEmpty.await(); + } + } finally { + takeLock.unlock(); + } + } + + @Override + public E poll(long timeout, TimeUnit unit) + throws InterruptedException { + + int startIdx = this.multiplexer.getAndAdvanceCurrentIndex(); + + long nanos = unit.toNanos(timeout); + takeLock.lockInterruptibly(); + try { + for (;;) { + BlockingQueue q = this.getFirstNonEmptyQueue(startIdx); + if (q != null) { + E e = q.poll(); + if (e != null) { + // Escape condition: there might be something available + return e; + } + } + + if (nanos <= 0) { + // Wait has elapsed + return null; + } + + try { + // Now wait on the condition for a bit. If we get + // spuriously awoken we'll re-loop + nanos = notEmpty.awaitNanos(nanos); + } catch (InterruptedException ie) { + notEmpty.signal(); // propagate to a non-interrupted thread + throw ie; + } + } + } finally { + takeLock.unlock(); + } + } + + /** + * poll() provides no strict consistency: it is possible for poll to return + * null even though an element is in the queue. + */ + @Override + public E poll() { + int startIdx = this.multiplexer.getAndAdvanceCurrentIndex(); + + BlockingQueue q = this.getFirstNonEmptyQueue(startIdx); + if (q == null) { + return null; // everything is empty + } + + // Delegate to the sub-queue's poll, which could still return null + return q.poll(); + } + + /** + * Peek, like poll, provides no strict consistency. + */ + @Override + public E peek() { + BlockingQueue q = this.getFirstNonEmptyQueue(0); + if (q == null) { + return null; + } else { + return q.peek(); + } + } + + /** + * Size returns the sum of all sub-queue sizes, so it may be greater than + * capacity. + * Note: size provides no strict consistency, and should not be used to + * control queue IO. + */ + @Override + public int size() { + int size = 0; + for (BlockingQueue q : this.queues) { + size += q.size(); + } + return size; + } + + /** + * Iterator is not implemented, as it is not needed. + */ + @Override + public Iterator iterator() { + throw new NotImplementedException(); + } + + /** + * drainTo defers to each sub-queue. Note that draining from a FairCallQueue + * to another FairCallQueue will likely fail, since the incoming calls + * may be scheduled differently in the new FairCallQueue. Nonetheless this + * method is provided for completeness. + */ + @Override + public int drainTo(Collection c, int maxElements) { + int sum = 0; + for (BlockingQueue q : this.queues) { + sum += q.drainTo(c, maxElements); + } + return sum; + } + + @Override + public int drainTo(Collection c) { + int sum = 0; + for (BlockingQueue q : this.queues) { + sum += q.drainTo(c); + } + return sum; + } + + /** + * Returns maximum remaining capacity. This does not reflect how much you can + * ideally fit in this FairCallQueue, as that would depend on the scheduler's + * decisions. + */ + @Override + public int remainingCapacity() { + int sum = 0; + for (BlockingQueue q : this.queues) { + sum += q.remainingCapacity(); + } + return sum; + } + + /** + * MetricsProxy is a singleton because we may init multiple + * FairCallQueues, but the metrics system cannot unregister beans cleanly. + */ + private static final class MetricsProxy implements FairCallQueueMXBean { + // One singleton per namespace + private static final HashMap INSTANCES = + new HashMap(); + + // Weakref for delegate, so we don't retain it forever if it can be GC'd + private WeakReference delegate; + + // Keep track of how many objects we registered + private int revisionNumber = 0; + + private MetricsProxy(String namespace) { + MBeans.register(namespace, "FairCallQueue", this); + } + + public static synchronized MetricsProxy getInstance(String namespace) { + MetricsProxy mp = INSTANCES.get(namespace); + if (mp == null) { + // We must create one + mp = new MetricsProxy(namespace); + INSTANCES.put(namespace, mp); + } + return mp; + } + + public void setDelegate(FairCallQueue obj) { + this.delegate = new WeakReference(obj); + this.revisionNumber++; + } + + @Override + public int[] getQueueSizes() { + FairCallQueue obj = this.delegate.get(); + if (obj == null) { + return new int[]{}; + } + + return obj.getQueueSizes(); + } + + @Override + public long[] getOverflowedCalls() { + FairCallQueue obj = this.delegate.get(); + if (obj == null) { + return new long[]{}; + } + + return obj.getOverflowedCalls(); + } + + @Override public int getRevision() { + return revisionNumber; + } + } + + // FairCallQueueMXBean + public int[] getQueueSizes() { + int numQueues = queues.size(); + int[] sizes = new int[numQueues]; + for (int i=0; i < numQueues; i++) { + sizes[i] = queues.get(i).size(); + } + return sizes; + } + + public long[] getOverflowedCalls() { + int numQueues = queues.size(); + long[] calls = new long[numQueues]; + for (int i=0; i < numQueues; i++) { + calls[i] = overflowedCalls.get(i).get(); + } + return calls; + } + + // For testing + @VisibleForTesting + public void setScheduler(RpcScheduler newScheduler) { + this.scheduler = newScheduler; + } + + @VisibleForTesting + public void setMultiplexer(RpcMultiplexer newMux) { + this.multiplexer = newMux; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/FairCallQueueMXBean.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/FairCallQueueMXBean.java new file mode 100644 index 00000000000..bd68ecb1ad3 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/FairCallQueueMXBean.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ipc; + +public interface FairCallQueueMXBean { + // Get the size of each subqueue, the index corrosponding to the priority + // level. + int[] getQueueSizes(); + long[] getOverflowedCalls(); + int getRevision(); +} \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcMultiplexer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcMultiplexer.java new file mode 100644 index 00000000000..01eecc55cfa --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcMultiplexer.java @@ -0,0 +1,32 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ipc; + +/** + * Implement this interface to make a pluggable multiplexer in the + * FairCallQueue. + */ +public interface RpcMultiplexer { + /** + * Should get current index and optionally perform whatever is needed + * to prepare the next index. + * @return current index + */ + int getAndAdvanceCurrentIndex(); +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WeightedRoundRobinMultiplexer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WeightedRoundRobinMultiplexer.java index 497ca757461..cfda94734cf 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WeightedRoundRobinMultiplexer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WeightedRoundRobinMultiplexer.java @@ -38,7 +38,7 @@ import org.apache.hadoop.conf.Configuration; * There may be more reads than the minimum due to race conditions. This is * allowed by design for performance reasons. */ -public class WeightedRoundRobinMultiplexer { +public class WeightedRoundRobinMultiplexer implements RpcMultiplexer { // Config keys public static final String IPC_CALLQUEUE_WRRMUX_WEIGHTS_KEY = "faircallqueue.multiplexer.weights"; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestFairCallQueue.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestFairCallQueue.java new file mode 100644 index 00000000000..acbedc50f9f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestFairCallQueue.java @@ -0,0 +1,392 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ipc; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import junit.framework.TestCase; + +import java.util.concurrent.TimeUnit; +import java.util.concurrent.BlockingQueue; + +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.conf.Configuration; +import org.mockito.Matchers; + +import static org.apache.hadoop.ipc.FairCallQueue.IPC_CALLQUEUE_PRIORITY_LEVELS_KEY; + +public class TestFairCallQueue extends TestCase { + private FairCallQueue fcq; + + private Schedulable mockCall(String id) { + Schedulable mockCall = mock(Schedulable.class); + UserGroupInformation ugi = mock(UserGroupInformation.class); + + when(ugi.getUserName()).thenReturn(id); + when(mockCall.getUserGroupInformation()).thenReturn(ugi); + + return mockCall; + } + + // A scheduler which always schedules into priority zero + private RpcScheduler alwaysZeroScheduler; + { + RpcScheduler sched = mock(RpcScheduler.class); + when(sched.getPriorityLevel(Matchers.any())).thenReturn(0); // always queue 0 + alwaysZeroScheduler = sched; + } + + public void setUp() { + Configuration conf = new Configuration(); + conf.setInt("ns." + IPC_CALLQUEUE_PRIORITY_LEVELS_KEY, 2); + + fcq = new FairCallQueue(5, "ns", conf); + } + + // + // Ensure that FairCallQueue properly implements BlockingQueue + // + public void testPollReturnsNullWhenEmpty() { + assertNull(fcq.poll()); + } + + public void testPollReturnsTopCallWhenNotEmpty() { + Schedulable call = mockCall("c"); + assertTrue(fcq.offer(call)); + + assertEquals(call, fcq.poll()); + + // Poll took it out so the fcq is empty + assertEquals(0, fcq.size()); + } + + public void testOfferSucceeds() { + fcq.setScheduler(alwaysZeroScheduler); + + for (int i = 0; i < 5; i++) { + // We can fit 10 calls + assertTrue(fcq.offer(mockCall("c"))); + } + + assertEquals(5, fcq.size()); + } + + public void testOfferFailsWhenFull() { + fcq.setScheduler(alwaysZeroScheduler); + for (int i = 0; i < 5; i++) { assertTrue(fcq.offer(mockCall("c"))); } + + assertFalse(fcq.offer(mockCall("c"))); // It's full + + assertEquals(5, fcq.size()); + } + + public void testOfferSucceedsWhenScheduledLowPriority() { + // Scheduler will schedule into queue 0 x 5, then queue 1 + RpcScheduler sched = mock(RpcScheduler.class); + when(sched.getPriorityLevel(Matchers.any())).thenReturn(0, 0, 0, 0, 0, 1, 0); + fcq.setScheduler(sched); + for (int i = 0; i < 5; i++) { assertTrue(fcq.offer(mockCall("c"))); } + + assertTrue(fcq.offer(mockCall("c"))); + + assertEquals(6, fcq.size()); + } + + public void testPeekNullWhenEmpty() { + assertNull(fcq.peek()); + } + + public void testPeekNonDestructive() { + Schedulable call = mockCall("c"); + assertTrue(fcq.offer(call)); + + assertEquals(call, fcq.peek()); + assertEquals(call, fcq.peek()); // Non-destructive + assertEquals(1, fcq.size()); + } + + public void testPeekPointsAtHead() { + Schedulable call = mockCall("c"); + Schedulable next = mockCall("b"); + fcq.offer(call); + fcq.offer(next); + + assertEquals(call, fcq.peek()); // Peek points at the head + } + + public void testPollTimeout() throws InterruptedException { + fcq.setScheduler(alwaysZeroScheduler); + + assertNull(fcq.poll(10, TimeUnit.MILLISECONDS)); + } + + public void testPollSuccess() throws InterruptedException { + fcq.setScheduler(alwaysZeroScheduler); + + Schedulable call = mockCall("c"); + assertTrue(fcq.offer(call)); + + assertEquals(call, fcq.poll(10, TimeUnit.MILLISECONDS)); + + assertEquals(0, fcq.size()); + } + + public void testOfferTimeout() throws InterruptedException { + fcq.setScheduler(alwaysZeroScheduler); + for (int i = 0; i < 5; i++) { + assertTrue(fcq.offer(mockCall("c"), 10, TimeUnit.MILLISECONDS)); + } + + assertFalse(fcq.offer(mockCall("e"), 10, TimeUnit.MILLISECONDS)); // It's full + + assertEquals(5, fcq.size()); + } + + public void testDrainTo() { + Configuration conf = new Configuration(); + conf.setInt("ns." + IPC_CALLQUEUE_PRIORITY_LEVELS_KEY, 2); + FairCallQueue fcq2 = new FairCallQueue(10, "ns", conf); + + fcq.setScheduler(alwaysZeroScheduler); + fcq2.setScheduler(alwaysZeroScheduler); + + // Start with 3 in fcq, to be drained + for (int i = 0; i < 3; i++) { + fcq.offer(mockCall("c")); + } + + fcq.drainTo(fcq2); + + assertEquals(0, fcq.size()); + assertEquals(3, fcq2.size()); + } + + public void testDrainToWithLimit() { + Configuration conf = new Configuration(); + conf.setInt("ns." + IPC_CALLQUEUE_PRIORITY_LEVELS_KEY, 2); + FairCallQueue fcq2 = new FairCallQueue(10, "ns", conf); + + fcq.setScheduler(alwaysZeroScheduler); + fcq2.setScheduler(alwaysZeroScheduler); + + // Start with 3 in fcq, to be drained + for (int i = 0; i < 3; i++) { + fcq.offer(mockCall("c")); + } + + fcq.drainTo(fcq2, 2); + + assertEquals(1, fcq.size()); + assertEquals(2, fcq2.size()); + } + + public void testInitialRemainingCapacity() { + assertEquals(10, fcq.remainingCapacity()); + } + + public void testFirstQueueFullRemainingCapacity() { + fcq.setScheduler(alwaysZeroScheduler); + while (fcq.offer(mockCall("c"))) ; // Queue 0 will fill up first, then queue 1 + + assertEquals(5, fcq.remainingCapacity()); + } + + public void testAllQueuesFullRemainingCapacity() { + RpcScheduler sched = mock(RpcScheduler.class); + when(sched.getPriorityLevel(Matchers.any())).thenReturn(0, 0, 0, 0, 0, 1, 1, 1, 1, 1); + fcq.setScheduler(sched); + while (fcq.offer(mockCall("c"))) ; + + assertEquals(0, fcq.remainingCapacity()); + assertEquals(10, fcq.size()); + } + + public void testQueuesPartialFilledRemainingCapacity() { + RpcScheduler sched = mock(RpcScheduler.class); + when(sched.getPriorityLevel(Matchers.any())).thenReturn(0, 1, 0, 1, 0); + fcq.setScheduler(sched); + for (int i = 0; i < 5; i++) { fcq.offer(mockCall("c")); } + + assertEquals(5, fcq.remainingCapacity()); + assertEquals(5, fcq.size()); + } + + /** + * Putter produces FakeCalls + */ + public class Putter implements Runnable { + private final BlockingQueue cq; + + public final String tag; + public volatile int callsAdded = 0; // How many calls we added, accurate unless interrupted + private final int maxCalls; + + public Putter(BlockingQueue aCq, int maxCalls, String tag) { + this.maxCalls = maxCalls; + this.cq = aCq; + this.tag = tag; + } + + private String getTag() { + if (this.tag != null) return this.tag; + return ""; + } + + @Override + public void run() { + try { + // Fill up to max (which is infinite if maxCalls < 0) + while (callsAdded < maxCalls || maxCalls < 0) { + cq.put(mockCall(getTag())); + callsAdded++; + } + } catch (InterruptedException e) { + return; + } + } + } + + /** + * Taker consumes FakeCalls + */ + public class Taker implements Runnable { + private final BlockingQueue cq; + + public final String tag; // if >= 0 means we will only take the matching tag, and put back + // anything else + public volatile int callsTaken = 0; // total calls taken, accurate if we aren't interrupted + public volatile Schedulable lastResult = null; // the last thing we took + private final int maxCalls; // maximum calls to take + + private IdentityProvider uip; + + public Taker(BlockingQueue aCq, int maxCalls, String tag) { + this.maxCalls = maxCalls; + this.cq = aCq; + this.tag = tag; + this.uip = new UserIdentityProvider(); + } + + @Override + public void run() { + try { + // Take while we don't exceed maxCalls, or if maxCalls is undefined (< 0) + while (callsTaken < maxCalls || maxCalls < 0) { + Schedulable res = cq.take(); + String identity = uip.makeIdentity(res); + + if (tag != null && this.tag.equals(identity)) { + // This call does not match our tag, we should put it back and try again + cq.put(res); + } else { + callsTaken++; + lastResult = res; + } + } + } catch (InterruptedException e) { + return; + } + } + } + + // Assert we can take exactly the numberOfTakes + public void assertCanTake(BlockingQueue cq, int numberOfTakes, + int takeAttempts) throws InterruptedException { + + Taker taker = new Taker(cq, takeAttempts, "default"); + Thread t = new Thread(taker); + t.start(); + t.join(100); + + assertEquals(numberOfTakes, taker.callsTaken); + t.interrupt(); + } + + // Assert we can put exactly the numberOfPuts + public void assertCanPut(BlockingQueue cq, int numberOfPuts, + int putAttempts) throws InterruptedException { + + Putter putter = new Putter(cq, putAttempts, null); + Thread t = new Thread(putter); + t.start(); + t.join(100); + + assertEquals(numberOfPuts, putter.callsAdded); + t.interrupt(); + } + + // Make sure put will overflow into lower queues when the top is full + public void testPutOverflows() throws InterruptedException { + fcq.setScheduler(alwaysZeroScheduler); + + // We can fit more than 5, even though the scheduler suggests the top queue + assertCanPut(fcq, 8, 8); + assertEquals(8, fcq.size()); + } + + public void testPutBlocksWhenAllFull() throws InterruptedException { + fcq.setScheduler(alwaysZeroScheduler); + + assertCanPut(fcq, 10, 10); // Fill up + assertEquals(10, fcq.size()); + + // Put more which causes overflow + assertCanPut(fcq, 0, 1); // Will block + } + + public void testTakeBlocksWhenEmpty() throws InterruptedException { + fcq.setScheduler(alwaysZeroScheduler); + assertCanTake(fcq, 0, 1); + } + + public void testTakeRemovesCall() throws InterruptedException { + fcq.setScheduler(alwaysZeroScheduler); + Schedulable call = mockCall("c"); + fcq.offer(call); + + assertEquals(call, fcq.take()); + assertEquals(0, fcq.size()); + } + + public void testTakeTriesNextQueue() throws InterruptedException { + // Make a FCQ filled with calls in q 1 but empty in q 0 + RpcScheduler q1Scheduler = mock(RpcScheduler.class); + when(q1Scheduler.getPriorityLevel(Matchers.any())).thenReturn(1); + fcq.setScheduler(q1Scheduler); + + // A mux which only draws from q 0 + RpcMultiplexer q0mux = mock(RpcMultiplexer.class); + when(q0mux.getAndAdvanceCurrentIndex()).thenReturn(0); + fcq.setMultiplexer(q0mux); + + Schedulable call = mockCall("c"); + fcq.put(call); + + // Take from q1 even though mux said q0, since q0 empty + assertEquals(call, fcq.take()); + assertEquals(0, fcq.size()); + } +} \ No newline at end of file From a83d055f2552e84e2d8096a1a30237cd21fa879a Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Sat, 23 Aug 2014 05:30:21 +0000 Subject: [PATCH 17/28] HDFS-4852. libhdfs documentation is out of date. Contributed by Chris Nauroth. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619967 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 ++ .../hadoop-hdfs/src/site/apt/LibHdfs.apt.vm | 29 ++++++++++++------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index bd70656732b..d0389ab7168 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -633,6 +633,8 @@ Release 2.6.0 - UNRELEASED HDFS-6829. DFSAdmin refreshSuperUserGroupsConfiguration failed in security cluster (zhaoyunjiong via Arpit Agarwal) + HDFS-4852. libhdfs documentation is out of date. (cnauroth) + Release 2.5.0 - 2014-08-11 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/LibHdfs.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/LibHdfs.apt.vm index 5ad50ab6356..23ff678ba51 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/LibHdfs.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/LibHdfs.apt.vm @@ -26,14 +26,17 @@ C API libhdfs (HDFS). It provides C APIs to a subset of the HDFS APIs to manipulate HDFS files and the filesystem. libhdfs is part of the Hadoop distribution and comes pre-compiled in - <<<${HADOOP_PREFIX}/libhdfs/libhdfs.so>>> . + <<<${HADOOP_HDFS_HOME}/lib/native/libhdfs.so>>> . libhdfs is compatible with + Windows and can be built on Windows by running <<>> within the + <<>> directory of the source tree. * The APIs - The libhdfs APIs are a subset of: {{{hadoop fs APIs}}}. + The libhdfs APIs are a subset of the + {{{../../api/org/apache/hadoop/fs/FileSystem.html}Hadoop FileSystem APIs}}. The header file for libhdfs describes each API in detail and is - available in <<<${HADOOP_PREFIX}/src/c++/libhdfs/hdfs.h>>> + available in <<<${HADOOP_HDFS_HOME}/include/hdfs.h>>>. * A Sample Program @@ -55,24 +58,28 @@ C API libhdfs fprintf(stderr, "Failed to 'flush' %s\n", writePath); exit(-1); } - hdfsCloseFile(fs, writeFile); + hdfsCloseFile(fs, writeFile); } ---- * How To Link With The Library - See the Makefile for <<>> in the libhdfs source directory - (<<<${HADOOP_PREFIX}/src/c++/libhdfs/Makefile>>>) or something like: - <<>> + See the CMake file for <<>> in the libhdfs source + directory (<<>>) or + something like: + <<>> * Common Problems The most common problem is the <<>> is not set properly when calling a program that uses libhdfs. Make sure you set it to all the - Hadoop jars needed to run Hadoop itself. Currently, there is no way to - programmatically generate the classpath, but a good bet is to include - all the jar files in <<<${HADOOP_PREFIX}>>> and <<<${HADOOP_PREFIX}/lib>>> as well - as the right configuration directory containing <<>> + Hadoop jars needed to run Hadoop itself as well as the right configuration + directory containing <<>>. It is not valid to use wildcard + syntax for specifying multiple jars. It may be useful to run + <<>> or <<>>> to + generate the correct classpath for your deployment. See + {{{../hadoop-common/CommandsManual.html#classpath}Hadoop Commands Reference}} + for more information on this command. * Thread Safe From e871955765a5a40707e866179945c5dc4fefd389 Mon Sep 17 00:00:00 2001 From: Arpit Agarwal Date: Sat, 23 Aug 2014 06:01:17 +0000 Subject: [PATCH 18/28] HDFS-6899. Allow changing MiniDFSCluster volumes per DN and capacity per volume. (Arpit Agarwal) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1619970 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../datanode/fsdataset/impl/FsVolumeImpl.java | 45 +++++-- .../apache/hadoop/hdfs/MiniDFSCluster.java | 119 +++++++++++++++--- .../hdfs/MiniDFSClusterWithNodeGroup.java | 26 +++- .../org/apache/hadoop/hdfs/TestSafeMode.java | 2 +- .../TestBlockHasMultipleReplicasOnSameDN.java | 5 +- ...stDnRespectsBlockReportSplitThreshold.java | 6 +- .../namenode/metrics/TestNameNodeMetrics.java | 2 +- 8 files changed, 171 insertions(+), 37 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index d0389ab7168..31ed15c6114 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -507,6 +507,9 @@ Release 2.6.0 - UNRELEASED HDFS-6758. block writer should pass the expected block size to DataXceiverServer. (Arpit Agarwal) + HDFS-6899. Allow changing MiniDFSCluster volumes per DN and capacity + per volume. (Arpit Agarwal) + OPTIMIZATIONS HDFS-6690. Deduplicate xattr names in memory. (wang) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java index adfc896f7f2..0b9fda83ae5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java @@ -29,6 +29,7 @@ import java.util.concurrent.ThreadFactory; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.DF; @@ -50,7 +51,8 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder; * It uses the {@link FsDatasetImpl} object for synchronization. */ @InterfaceAudience.Private -class FsVolumeImpl implements FsVolumeSpi { +@VisibleForTesting +public class FsVolumeImpl implements FsVolumeSpi { private final FsDatasetImpl dataset; private final String storageID; private final StorageType storageType; @@ -59,6 +61,12 @@ class FsVolumeImpl implements FsVolumeSpi { private final File currentDir; // /current private final DF usage; private final long reserved; + + // Capacity configured. This is useful when we want to + // limit the visible capacity for tests. If negative, then we just + // query from the filesystem. + protected long configuredCapacity; + /** * Per-volume worker pool that processes new blocks to cache. * The maximum number of workers per volume is bounded (configurable via @@ -78,20 +86,26 @@ class FsVolumeImpl implements FsVolumeSpi { File parent = currentDir.getParentFile(); this.usage = new DF(parent, conf); this.storageType = storageType; + this.configuredCapacity = -1; + cacheExecutor = initializeCacheExecutor(parent); + } + + protected ThreadPoolExecutor initializeCacheExecutor(File parent) { final int maxNumThreads = dataset.datanode.getConf().getInt( DFSConfigKeys.DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY, - DFSConfigKeys.DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_DEFAULT - ); + DFSConfigKeys.DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_DEFAULT); + ThreadFactory workerFactory = new ThreadFactoryBuilder() .setDaemon(true) .setNameFormat("FsVolumeImplWorker-" + parent.toString() + "-%d") .build(); - cacheExecutor = new ThreadPoolExecutor( + ThreadPoolExecutor executor = new ThreadPoolExecutor( 1, maxNumThreads, 60, TimeUnit.SECONDS, new LinkedBlockingQueue(), workerFactory); - cacheExecutor.allowCoreThreadTimeOut(true); + executor.allowCoreThreadTimeOut(true); + return executor; } File getCurrentDir() { @@ -130,9 +144,24 @@ class FsVolumeImpl implements FsVolumeSpi { * reserved capacity. * @return the unreserved number of bytes left in this filesystem. May be zero. */ - long getCapacity() { - long remaining = usage.getCapacity() - reserved; - return remaining > 0 ? remaining : 0; + @VisibleForTesting + public long getCapacity() { + if (configuredCapacity < 0) { + long remaining = usage.getCapacity() - reserved; + return remaining > 0 ? remaining : 0; + } + + return configuredCapacity; + } + + /** + * This function MUST NOT be used outside of tests. + * + * @param capacity + */ + @VisibleForTesting + public void setCapacityForTesting(long capacity) { + this.configuredCapacity = capacity; } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java index fef5ff85227..98ca3160047 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java @@ -55,7 +55,6 @@ import java.net.InetSocketAddress; import java.net.URI; import java.net.URISyntaxException; import java.nio.channels.FileChannel; -import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -91,7 +90,9 @@ import org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter; import org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter.SecureResources; import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetUtil; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; @@ -131,11 +132,15 @@ public class MiniDFSCluster { public static final String DFS_NAMENODE_SAFEMODE_EXTENSION_TESTING_KEY = DFS_NAMENODE_SAFEMODE_EXTENSION_KEY + ".testing"; - // Changing this value may break some tests that assume it is 2. - public static final int DIRS_PER_DATANODE = 2; + // Changing this default may break some tests that assume it is 2. + private static final int DEFAULT_STORAGES_PER_DATANODE = 2; static { DefaultMetricsSystem.setMiniClusterMode(true); } + public int getStoragesPerDatanode() { + return storagesPerDatanode; + } + /** * Class to construct instances of MiniDFSClusters with specific options. */ @@ -145,6 +150,8 @@ public class MiniDFSCluster { private final Configuration conf; private int numDataNodes = 1; private StorageType[][] storageTypes = null; + private StorageType[] storageTypes1D = null; + private int storagesPerDatanode = DEFAULT_STORAGES_PER_DATANODE; private boolean format = true; private boolean manageNameDfsDirs = true; private boolean manageNameDfsSharedDirs = true; @@ -155,6 +162,8 @@ public class MiniDFSCluster { private String[] racks = null; private String [] hosts = null; private long [] simulatedCapacities = null; + private long [][] storageCapacities = null; + private long [] storageCapacities1D = null; private String clusterId = null; private boolean waitSafeMode = true; private boolean setupHostsFile = false; @@ -192,17 +201,21 @@ public class MiniDFSCluster { return this; } + /** + * Default: DEFAULT_STORAGES_PER_DATANODE + */ + public Builder storagesPerDatanode(int numStorages) { + this.storagesPerDatanode = numStorages; + return this; + } + /** * Set the same storage type configuration for each datanode. * If storageTypes is uninitialized or passed null then * StorageType.DEFAULT is used. */ public Builder storageTypes(StorageType[] types) { - assert types.length == DIRS_PER_DATANODE; - this.storageTypes = new StorageType[numDataNodes][types.length]; - for (int i = 0; i < numDataNodes; ++i) { - this.storageTypes[i] = types; - } + this.storageTypes1D = types; return this; } @@ -216,6 +229,26 @@ public class MiniDFSCluster { return this; } + /** + * Set the same storage capacity configuration for each datanode. + * If storageTypes is uninitialized or passed null then + * StorageType.DEFAULT is used. + */ + public Builder storageCapacities(long[] capacities) { + this.storageCapacities1D = capacities; + return this; + } + + /** + * Set custom storage capacity configuration for each datanode. + * If storageCapacities is uninitialized or passed null then + * capacity is limited by available disk space. + */ + public Builder storageCapacities(long[][] capacities) { + this.storageCapacities = capacities; + return this; + } + /** * Default: true */ @@ -289,6 +322,11 @@ public class MiniDFSCluster { } /** + * Use SimulatedFSDataset and limit the capacity of each DN per + * the values passed in val. + * + * For limiting the capacity of volumes with real storage, see + * {@link FsVolumeImpl#setCapacityForTesting} * Default: null */ public Builder simulatedCapacities(long[] val) { @@ -391,7 +429,28 @@ public class MiniDFSCluster { LOG.info("starting cluster: numNameNodes=" + numNameNodes + ", numDataNodes=" + builder.numDataNodes); nameNodes = new NameNodeInfo[numNameNodes]; + this.storagesPerDatanode = builder.storagesPerDatanode; + + // Duplicate the storageType setting for each DN. + if (builder.storageTypes == null && builder.storageTypes1D != null) { + assert builder.storageTypes1D.length == storagesPerDatanode; + builder.storageTypes = new StorageType[builder.numDataNodes][storagesPerDatanode]; + for (int i = 0; i < builder.numDataNodes; ++i) { + builder.storageTypes[i] = builder.storageTypes1D; + } + } + + // Duplicate the storageCapacity setting for each DN. + if (builder.storageCapacities == null && builder.storageCapacities1D != null) { + assert builder.storageCapacities1D.length == storagesPerDatanode; + builder.storageCapacities = new long[builder.numDataNodes][storagesPerDatanode]; + + for (int i = 0; i < builder.numDataNodes; ++i) { + builder.storageCapacities[i] = builder.storageCapacities1D; + } + } + initMiniDFSCluster(builder.conf, builder.numDataNodes, builder.storageTypes, @@ -404,6 +463,7 @@ public class MiniDFSCluster { builder.dnOption, builder.racks, builder.hosts, + builder.storageCapacities, builder.simulatedCapacities, builder.clusterId, builder.waitSafeMode, @@ -446,6 +506,7 @@ public class MiniDFSCluster { private boolean waitSafeMode = true; private boolean federation; private boolean checkExitOnShutdown = true; + protected final int storagesPerDatanode; /** * A unique instance identifier for the cluster. This @@ -484,6 +545,7 @@ public class MiniDFSCluster { */ public MiniDFSCluster() { nameNodes = new NameNodeInfo[0]; // No namenode in the cluster + storagesPerDatanode = DEFAULT_STORAGES_PER_DATANODE; synchronized (MiniDFSCluster.class) { instanceId = instanceCount++; } @@ -660,11 +722,12 @@ public class MiniDFSCluster { String[] racks, String hosts[], long[] simulatedCapacities) throws IOException { this.nameNodes = new NameNodeInfo[1]; // Single namenode in the cluster + this.storagesPerDatanode = DEFAULT_STORAGES_PER_DATANODE; initMiniDFSCluster(conf, numDataNodes, null, format, - manageNameDfsDirs, true, manageDataDfsDirs, manageDataDfsDirs, - operation, null, racks, hosts, - simulatedCapacities, null, true, false, - MiniDFSNNTopology.simpleSingleNN(nameNodePort, 0), true, false, false, null); + manageNameDfsDirs, true, manageDataDfsDirs, manageDataDfsDirs, + operation, null, racks, hosts, + null, simulatedCapacities, null, true, false, + MiniDFSNNTopology.simpleSingleNN(nameNodePort, 0), true, false, false, null); } private void initMiniDFSCluster( @@ -673,7 +736,8 @@ public class MiniDFSCluster { boolean manageNameDfsSharedDirs, boolean enableManagedDfsDirsRedundancy, boolean manageDataDfsDirs, StartupOption startOpt, StartupOption dnStartOpt, String[] racks, - String[] hosts, long[] simulatedCapacities, String clusterId, + String[] hosts, + long[][] storageCapacities, long[] simulatedCapacities, String clusterId, boolean waitSafeMode, boolean setupHostsFile, MiniDFSNNTopology nnTopology, boolean checkExitOnShutdown, boolean checkDataNodeAddrConfig, @@ -744,7 +808,7 @@ public class MiniDFSCluster { // Start the DataNodes startDataNodes(conf, numDataNodes, storageTypes, manageDataDfsDirs, dnStartOpt != null ? dnStartOpt : startOpt, - racks, hosts, simulatedCapacities, setupHostsFile, + racks, hosts, storageCapacities, simulatedCapacities, setupHostsFile, checkDataNodeAddrConfig, checkDataNodeHostConfig, dnConfOverlays); waitClusterUp(); //make sure ProxyUsers uses the latest conf @@ -1119,8 +1183,8 @@ public class MiniDFSCluster { String makeDataNodeDirs(int dnIndex, StorageType[] storageTypes) throws IOException { StringBuilder sb = new StringBuilder(); - assert storageTypes == null || storageTypes.length == DIRS_PER_DATANODE; - for (int j = 0; j < DIRS_PER_DATANODE; ++j) { + assert storageTypes == null || storageTypes.length == storagesPerDatanode; + for (int j = 0; j < storagesPerDatanode; ++j) { File dir = getInstanceStorageDir(dnIndex, j); dir.mkdirs(); if (!dir.isDirectory()) { @@ -1196,7 +1260,7 @@ public class MiniDFSCluster { long[] simulatedCapacities, boolean setupHostsFile) throws IOException { startDataNodes(conf, numDataNodes, null, manageDfsDirs, operation, racks, hosts, - simulatedCapacities, setupHostsFile, false, false, null); + null, simulatedCapacities, setupHostsFile, false, false, null); } public synchronized void startDataNodes(Configuration conf, int numDataNodes, @@ -1206,7 +1270,7 @@ public class MiniDFSCluster { boolean setupHostsFile, boolean checkDataNodeAddrConfig) throws IOException { startDataNodes(conf, numDataNodes, null, manageDfsDirs, operation, racks, hosts, - simulatedCapacities, setupHostsFile, checkDataNodeAddrConfig, false, null); + null, simulatedCapacities, setupHostsFile, checkDataNodeAddrConfig, false, null); } /** @@ -1240,12 +1304,15 @@ public class MiniDFSCluster { public synchronized void startDataNodes(Configuration conf, int numDataNodes, StorageType[][] storageTypes, boolean manageDfsDirs, StartupOption operation, String[] racks, String[] hosts, + long[][] storageCapacities, long[] simulatedCapacities, boolean setupHostsFile, boolean checkDataNodeAddrConfig, boolean checkDataNodeHostConfig, Configuration[] dnConfOverlays) throws IOException { + assert storageCapacities == null || simulatedCapacities == null; assert storageTypes == null || storageTypes.length == numDataNodes; + assert storageCapacities == null || storageCapacities.length == numDataNodes; if (operation == StartupOption.RECOVER) { return; @@ -1298,7 +1365,7 @@ public class MiniDFSCluster { operation != StartupOption.ROLLBACK) ? null : new String[] {operation.getName()}; - + DataNode[] dns = new DataNode[numDataNodes]; for (int i = curDatanodesNum; i < curDatanodesNum+numDataNodes; i++) { Configuration dnConf = new HdfsConfiguration(conf); if (dnConfOverlays != null) { @@ -1389,10 +1456,24 @@ public class MiniDFSCluster { dn.runDatanodeDaemon(); dataNodes.add(new DataNodeProperties(dn, newconf, dnArgs, secureResources, dn.getIpcPort())); + dns[i - curDatanodesNum] = dn; } curDatanodesNum += numDataNodes; this.numDataNodes += numDataNodes; waitActive(); + + if (storageCapacities != null) { + for (int i = curDatanodesNum; i < curDatanodesNum+numDataNodes; ++i) { + List volumes = dns[i].getFSDataset().getVolumes(); + assert storageCapacities[i].length == storagesPerDatanode; + assert volumes.size() == storagesPerDatanode; + + for (int j = 0; j < volumes.size(); ++j) { + FsVolumeImpl volume = (FsVolumeImpl) volumes.get(j); + volume.setCapacityForTesting(storageCapacities[i][j]); + } + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java index 382bf368213..d5225a4f966 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java @@ -22,6 +22,7 @@ import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI; import java.io.File; import java.io.IOException; +import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -31,6 +32,8 @@ import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter; import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset; import org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter.SecureResources; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; +import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.StaticMapping; import org.apache.hadoop.security.UserGroupInformation; @@ -52,11 +55,15 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster { public synchronized void startDataNodes(Configuration conf, int numDataNodes, StorageType[][] storageTypes, boolean manageDfsDirs, StartupOption operation, String[] racks, String[] nodeGroups, String[] hosts, + long[][] storageCapacities, long[] simulatedCapacities, boolean setupHostsFile, boolean checkDataNodeAddrConfig, boolean checkDataNodeHostConfig) throws IOException { + + assert storageCapacities == null || simulatedCapacities == null; assert storageTypes == null || storageTypes.length == numDataNodes; + assert storageCapacities == null || storageCapacities.length == numDataNodes; if (operation == StartupOption.RECOVER) { return; @@ -109,6 +116,7 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster { operation != StartupOption.ROLLBACK) ? null : new String[] {operation.getName()}; + DataNode[] dns = new DataNode[numDataNodes]; for (int i = curDatanodesNum; i < curDatanodesNum+numDataNodes; i++) { Configuration dnConf = new HdfsConfiguration(conf); // Set up datanode address @@ -181,10 +189,23 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster { } dn.runDatanodeDaemon(); dataNodes.add(new DataNodeProperties(dn, newconf, dnArgs, secureResources, dn.getIpcPort())); + dns[i - curDatanodesNum] = dn; } curDatanodesNum += numDataNodes; this.numDataNodes += numDataNodes; waitActive(); + + if (storageCapacities != null) { + for (int i = curDatanodesNum; i < curDatanodesNum+numDataNodes; ++i) { + List volumes = dns[i].getFSDataset().getVolumes(); + assert volumes.size() == storagesPerDatanode; + + for (int j = 0; j < volumes.size(); ++j) { + FsVolumeImpl volume = (FsVolumeImpl) volumes.get(j); + volume.setCapacityForTesting(storageCapacities[i][j]); + } + } + } } public synchronized void startDataNodes(Configuration conf, int numDataNodes, @@ -193,7 +214,7 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster { long[] simulatedCapacities, boolean setupHostsFile) throws IOException { startDataNodes(conf, numDataNodes, null, manageDfsDirs, operation, racks, nodeGroups, - hosts, simulatedCapacities, setupHostsFile, false, false); + hosts, null, simulatedCapacities, setupHostsFile, false, false); } public void startDataNodes(Configuration conf, int numDataNodes, @@ -209,13 +230,14 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster { public synchronized void startDataNodes(Configuration conf, int numDataNodes, StorageType[][] storageTypes, boolean manageDfsDirs, StartupOption operation, String[] racks, String[] hosts, + long[][] storageCapacities, long[] simulatedCapacities, boolean setupHostsFile, boolean checkDataNodeAddrConfig, boolean checkDataNodeHostConfig, Configuration[] dnConfOverlays) throws IOException { startDataNodes(conf, numDataNodes, storageTypes, manageDfsDirs, operation, racks, - NODE_GROUPS, hosts, simulatedCapacities, setupHostsFile, + NODE_GROUPS, hosts, storageCapacities, simulatedCapacities, setupHostsFile, checkDataNodeAddrConfig, checkDataNodeHostConfig); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java index bda95c07525..3db66f52c94 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java @@ -213,7 +213,7 @@ public class TestSafeMode { @Override public Boolean get() { return getLongCounter("StorageBlockReportOps", getMetrics(NN_METRICS)) == - MiniDFSCluster.DIRS_PER_DATANODE; + cluster.getStoragesPerDatanode(); } }, 10, 10000); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockHasMultipleReplicasOnSameDN.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockHasMultipleReplicasOnSameDN.java index dfe4209ec23..e71c0ea982a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockHasMultipleReplicasOnSameDN.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockHasMultipleReplicasOnSameDN.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hdfs.server.datanode; import java.io.IOException; -import java.net.InetSocketAddress; import java.util.ArrayList; @@ -106,7 +105,7 @@ public class TestBlockHasMultipleReplicasOnSameDN { DataNode dn = cluster.getDataNodes().get(0); DatanodeRegistration dnReg = dn.getDNRegistrationForBP(bpid); StorageBlockReport reports[] = - new StorageBlockReport[MiniDFSCluster.DIRS_PER_DATANODE]; + new StorageBlockReport[cluster.getStoragesPerDatanode()]; ArrayList blocks = new ArrayList(); @@ -114,7 +113,7 @@ public class TestBlockHasMultipleReplicasOnSameDN { blocks.add(locatedBlock.getBlock().getLocalBlock()); } - for (int i = 0; i < MiniDFSCluster.DIRS_PER_DATANODE; ++i) { + for (int i = 0; i < cluster.getStoragesPerDatanode(); ++i) { BlockListAsLongs bll = new BlockListAsLongs(blocks, null); FsVolumeSpi v = dn.getFSDataset().getVolumes().get(i); DatanodeStorage dns = new DatanodeStorage(v.getStorageID()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDnRespectsBlockReportSplitThreshold.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDnRespectsBlockReportSplitThreshold.java index 989c33d2f09..7058d71f2e8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDnRespectsBlockReportSplitThreshold.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDnRespectsBlockReportSplitThreshold.java @@ -130,7 +130,7 @@ public class TestDnRespectsBlockReportSplitThreshold { ArgumentCaptor captor = ArgumentCaptor.forClass(StorageBlockReport[].class); - Mockito.verify(nnSpy, times(MiniDFSCluster.DIRS_PER_DATANODE)).blockReport( + Mockito.verify(nnSpy, times(cluster.getStoragesPerDatanode())).blockReport( any(DatanodeRegistration.class), anyString(), captor.capture()); @@ -167,7 +167,7 @@ public class TestDnRespectsBlockReportSplitThreshold { anyString(), captor.capture()); - verifyCapturedArguments(captor, MiniDFSCluster.DIRS_PER_DATANODE, BLOCKS_IN_FILE); + verifyCapturedArguments(captor, cluster.getStoragesPerDatanode(), BLOCKS_IN_FILE); } /** @@ -194,7 +194,7 @@ public class TestDnRespectsBlockReportSplitThreshold { ArgumentCaptor captor = ArgumentCaptor.forClass(StorageBlockReport[].class); - Mockito.verify(nnSpy, times(MiniDFSCluster.DIRS_PER_DATANODE)).blockReport( + Mockito.verify(nnSpy, times(cluster.getStoragesPerDatanode())).blockReport( any(DatanodeRegistration.class), anyString(), captor.capture()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java index c893d3bb7af..c7828b1ca08 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java @@ -443,7 +443,7 @@ public class TestNameNodeMetrics { assertCounter("SyncsNumOps", 1L, rb); // Each datanode reports in when the cluster comes up assertCounter("BlockReportNumOps", - (long)DATANODE_COUNT*MiniDFSCluster.DIRS_PER_DATANODE, rb); + (long)DATANODE_COUNT * cluster.getStoragesPerDatanode(), rb); // Sleep for an interval+slop to let the percentiles rollover Thread.sleep((PERCENTILES_INTERVAL+1)*1000); From dc154ab86d694e68f54c635043eb55d501d0e242 Mon Sep 17 00:00:00 2001 From: Karthik Kambatla Date: Wed, 27 Aug 2014 00:55:19 -0700 Subject: [PATCH 19/28] Add a section for 2.5.1 in CHANGES.txt --- hadoop-common-project/hadoop-common/CHANGES.txt | 12 ++++++++++++ hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 12 ++++++++++++ hadoop-mapreduce-project/CHANGES.txt | 12 ++++++++++++ hadoop-yarn-project/CHANGES.txt | 12 ++++++++++++ 4 files changed, 48 insertions(+) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 0291c758e03..9242ca41240 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -695,6 +695,18 @@ Release 2.6.0 - UNRELEASED HADOOP-10989. Work around buggy getgrouplist() implementations on Linux that return 0 on failure. (cnauroth) +Release 2.5.1 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + Release 2.5.0 - 2014-08-11 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 31ed15c6114..4e60c46388c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -638,6 +638,18 @@ Release 2.6.0 - UNRELEASED HDFS-4852. libhdfs documentation is out of date. (cnauroth) +Release 2.5.1 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + Release 2.5.0 - 2014-08-11 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index ddf21ed838b..4cd71c0784d 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -258,6 +258,18 @@ Release 2.6.0 - UNRELEASED MAPREDUCE-6044. Fully qualified intermediate done dir path breaks per-user dir creation on Windows. (zjshen) +Release 2.5.1 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + Release 2.5.0 - 2014-08-11 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 5b61b4146c1..916816e7509 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -237,6 +237,18 @@ Release 2.6.0 - UNRELEASED YARN-2434. RM should not recover containers from previously failed attempt when AM restart is not enabled (Jian He via jlowe) +Release 2.5.1 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + Release 2.5.0 - 2014-08-11 INCOMPATIBLE CHANGES From 2d9b77d6170fe38757f7f48a4492f17ac669cbc2 Mon Sep 17 00:00:00 2001 From: Karthik Kambatla Date: Wed, 27 Aug 2014 01:16:52 -0700 Subject: [PATCH 20/28] Fix CHANGES.txt entry for MAPREDUCE-6033. --- hadoop-mapreduce-project/CHANGES.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 4cd71c0784d..a6d2981e6e8 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -270,6 +270,9 @@ Release 2.5.1 - UNRELEASED BUG FIXES + MAPREDUCE-6033. Updated access check for displaying job information + (Yu Gao via Eric Yang) + Release 2.5.0 - 2014-08-11 INCOMPATIBLE CHANGES @@ -351,9 +354,6 @@ Release 2.5.0 - 2014-08-11 BUG FIXES - MAPREDUCE-6033. Updated access check for displaying job information - (Yu Gao via Eric Yang) - MAPREDUCE-5759. Remove unnecessary conf load in Limits (Sandy Ryza) MAPREDUCE-5014. Extend Distcp to accept a custom CopyListing. From d16bfd1d0f7cd958e7041be40763cc9983a7b80a Mon Sep 17 00:00:00 2001 From: Karthik Kambatla Date: Wed, 27 Aug 2014 01:43:58 -0700 Subject: [PATCH 21/28] YARN-1326. RM should log using RMStore at startup time. (Tsuyoshi Ozawa via kasha) --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../recovery/RMStateStoreFactory.java | 13 ++++++++----- .../server/resourcemanager/webapp/AboutBlock.java | 1 + .../resourcemanager/webapp/dao/ClusterInfo.java | 8 ++++++++ .../resourcemanager/webapp/TestRMWebServices.java | 2 +- 5 files changed, 21 insertions(+), 6 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 916816e7509..eefa5470bb2 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -154,6 +154,9 @@ Release 2.6.0 - UNRELEASED YARN-2389. Added functionality for schedulers to kill all applications in a queue. (Subramaniam Venkatraman Krishnan via jianhe) + YARN-1326. RM should log using RMStore at startup time. + (Tsuyoshi Ozawa via kasha) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreFactory.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreFactory.java index f9e2869d997..c09ddb8aa1d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreFactory.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreFactory.java @@ -17,17 +17,20 @@ */ package org.apache.hadoop.yarn.server.resourcemanager.recovery; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.yarn.conf.YarnConfiguration; public class RMStateStoreFactory { + private static final Log LOG = LogFactory.getLog(RMStateStoreFactory.class); public static RMStateStore getStore(Configuration conf) { - RMStateStore store = ReflectionUtils.newInstance( - conf.getClass(YarnConfiguration.RM_STORE, - MemoryRMStateStore.class, RMStateStore.class), - conf); - return store; + Class storeClass = + conf.getClass(YarnConfiguration.RM_STORE, + MemoryRMStateStore.class, RMStateStore.class); + LOG.info("Using RMStateStore implementation - " + storeClass); + return ReflectionUtils.newInstance(storeClass, conf); } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AboutBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AboutBlock.java index 91b5cc10194..ea5c48adff6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AboutBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AboutBlock.java @@ -44,6 +44,7 @@ public class AboutBlock extends HtmlBlock { _("Cluster ID:", cinfo.getClusterId()). _("ResourceManager state:", cinfo.getState()). _("ResourceManager HA state:", cinfo.getHAState()). + _("ResourceManager RMStateStore:", cinfo.getRMStateStore()). _("ResourceManager started on:", Times.format(cinfo.getStartedOn())). _("ResourceManager version:", cinfo.getRMBuildVersion() + " on " + cinfo.getRMVersionBuiltOn()). diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterInfo.java index c96d73ed1dc..b529f211226 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterInfo.java @@ -25,6 +25,7 @@ import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.service.Service.STATE; import org.apache.hadoop.util.VersionInfo; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.util.YarnVersionInfo; @XmlRootElement @@ -35,6 +36,7 @@ public class ClusterInfo { protected long startedOn; protected STATE state; protected HAServiceProtocol.HAServiceState haState; + protected String rmStateStoreName; protected String resourceManagerVersion; protected String resourceManagerBuildVersion; protected String resourceManagerVersionBuiltOn; @@ -51,6 +53,8 @@ public class ClusterInfo { this.id = ts; this.state = rm.getServiceState(); this.haState = rm.getRMContext().getHAServiceState(); + this.rmStateStoreName = rm.getRMContext().getStateStore().getClass() + .getName(); this.startedOn = ts; this.resourceManagerVersion = YarnVersionInfo.getVersion(); this.resourceManagerBuildVersion = YarnVersionInfo.getBuildVersion(); @@ -68,6 +72,10 @@ public class ClusterInfo { return this.haState.toString(); } + public String getRMStateStore() { + return this.rmStateStoreName; + } + public String getRMVersion() { return this.resourceManagerVersion; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java index 561b1478f74..ff0f6f63386 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java @@ -284,7 +284,7 @@ public class TestRMWebServices extends JerseyTest { Exception { assertEquals("incorrect number of elements", 1, json.length()); JSONObject info = json.getJSONObject("clusterInfo"); - assertEquals("incorrect number of elements", 10, info.length()); + assertEquals("incorrect number of elements", 11, info.length()); verifyClusterGeneric(info.getLong("id"), info.getLong("startedOn"), info.getString("state"), info.getString("haState"), info.getString("hadoopVersionBuiltOn"), From d778abf022b415c64223153814d4188c2b3dd797 Mon Sep 17 00:00:00 2001 From: Zhijie Shen Date: Wed, 27 Aug 2014 02:01:00 -0700 Subject: [PATCH 22/28] YARN-2035. FileSystemApplicationHistoryStore should not make working dir when it already exists. Contributed by Jonathan Eagles. --- hadoop-yarn-project/CHANGES.txt | 3 + .../FileSystemApplicationHistoryStore.java | 14 ++++- ...TestFileSystemApplicationHistoryStore.java | 62 ++++++++++++++++++- 3 files changed, 75 insertions(+), 4 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index eefa5470bb2..36d304c7876 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -240,6 +240,9 @@ Release 2.6.0 - UNRELEASED YARN-2434. RM should not recover containers from previously failed attempt when AM restart is not enabled (Jian He via jlowe) + YARN-2035. FileSystemApplicationHistoryStore should not make working dir + when it already exists. (Jonathan Eagles via zjshen) + Release 2.5.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/FileSystemApplicationHistoryStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/FileSystemApplicationHistoryStore.java index a5725ebfffe..a2d91406871 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/FileSystemApplicationHistoryStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/FileSystemApplicationHistoryStore.java @@ -110,15 +110,23 @@ public class FileSystemApplicationHistoryStore extends AbstractService super(FileSystemApplicationHistoryStore.class.getName()); } + protected FileSystem getFileSystem(Path path, Configuration conf) throws Exception { + return path.getFileSystem(conf); + } + @Override public void serviceInit(Configuration conf) throws Exception { Path fsWorkingPath = new Path(conf.get(YarnConfiguration.FS_APPLICATION_HISTORY_STORE_URI)); rootDirPath = new Path(fsWorkingPath, ROOT_DIR_NAME); try { - fs = fsWorkingPath.getFileSystem(conf); - fs.mkdirs(rootDirPath); - fs.setPermission(rootDirPath, ROOT_DIR_UMASK); + fs = getFileSystem(fsWorkingPath, conf); + + if (!fs.isDirectory(rootDirPath)) { + fs.mkdirs(rootDirPath); + fs.setPermission(rootDirPath, ROOT_DIR_UMASK); + } + } catch (IOException e) { LOG.error("Error when initializing FileSystemHistoryStorage", e); throw e; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestFileSystemApplicationHistoryStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestFileSystemApplicationHistoryStore.java index d31018c1181..552a5e50b0e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestFileSystemApplicationHistoryStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestFileSystemApplicationHistoryStore.java @@ -20,9 +20,17 @@ package org.apache.hadoop.yarn.server.applicationhistoryservice; import java.io.IOException; import java.net.URI; +import java.net.URISyntaxException; import org.junit.Assert; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -53,6 +61,11 @@ public class TestFileSystemApplicationHistoryStore extends @Before public void setup() throws Exception { fs = new RawLocalFileSystem(); + initStore(fs); + } + + private void initStore(final FileSystem fs) throws IOException, + URISyntaxException { Configuration conf = new Configuration(); fs.initialize(new URI("/"), conf); fsWorkingPath = @@ -61,7 +74,12 @@ public class TestFileSystemApplicationHistoryStore extends fs.delete(fsWorkingPath, true); conf.set(YarnConfiguration.FS_APPLICATION_HISTORY_STORE_URI, fsWorkingPath.toString()); - store = new FileSystemApplicationHistoryStore(); + store = new FileSystemApplicationHistoryStore() { + @Override + protected FileSystem getFileSystem(Path path, Configuration conf) { + return fs; + } + }; store.init(conf); store.start(); } @@ -243,4 +261,46 @@ public class TestFileSystemApplicationHistoryStore extends testWriteHistoryData(3, false, true); testReadHistoryData(3, false, true); } + + @Test + public void testInitExistingWorkingDirectoryInSafeMode() throws Exception { + LOG.info("Starting testInitExistingWorkingDirectoryInSafeMode"); + tearDown(); + + // Setup file system to inject startup conditions + FileSystem fs = spy(new RawLocalFileSystem()); + doReturn(true).when(fs).isDirectory(any(Path.class)); + + try { + initStore(fs); + } catch (Exception e) { + Assert.fail("Exception should not be thrown: " + e); + } + + // Make sure that directory creation was not attempted + verify(fs, times(1)).isDirectory(any(Path.class)); + verify(fs, times(0)).mkdirs(any(Path.class)); + } + + @Test + public void testInitNonExistingWorkingDirectoryInSafeMode() throws Exception { + LOG.info("Starting testInitNonExistingWorkingDirectoryInSafeMode"); + tearDown(); + + // Setup file system to inject startup conditions + FileSystem fs = spy(new RawLocalFileSystem()); + doReturn(false).when(fs).isDirectory(any(Path.class)); + doThrow(new IOException()).when(fs).mkdirs(any(Path.class)); + + try { + initStore(fs); + Assert.fail("Exception should have been thrown"); + } catch (Exception e) { + // Expected failure + } + + // Make sure that directory creation was attempted + verify(fs, times(1)).isDirectory(any(Path.class)); + verify(fs, times(1)).mkdirs(any(Path.class)); + } } From a1618a2a77ef241b23058809037f93ea00da9329 Mon Sep 17 00:00:00 2001 From: Allen Wittenauer Date: Tue, 26 Aug 2014 14:40:46 -0700 Subject: [PATCH 23/28] HADOOP-11002. shell escapes are incompatible with previous releases (aw) --- .../hadoop-common/CHANGES.txt | 2 ++ .../src/main/bin/hadoop-functions.sh | 31 ++++++------------- 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 9242ca41240..2270df3424d 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -319,6 +319,8 @@ Trunk (Unreleased) HADOOP-10925. Compilation fails in native link0 function on Windows. (cnauroth) + HADOOP-11002. shell escapes are incompatible with previous releases (aw) + OPTIMIZATIONS HADOOP-7761. Improve the performance of raw comparisons. (todd) diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh index f2437fa2ff2..ab61b8483f6 100644 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh @@ -59,8 +59,7 @@ function hadoop_bootstrap_init TOOL_PATH=${TOOL_PATH:-${HADOOP_PREFIX}/share/hadoop/tools/lib/*} export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)} - - + # defaults export HADOOP_OPTS=${HADOOP_OPTS:-"-Djava.net.preferIPv4Stack=true"} } @@ -94,7 +93,6 @@ function hadoop_exec_hadoopenv fi } - function hadoop_basic_init { # Some of these are also set in hadoop-env.sh. @@ -446,7 +444,6 @@ function hadoop_add_to_classpath_mapred hadoop_add_classpath "${HADOOP_MAPRED_HOME}/${MAPRED_DIR}"'/*' } - function hadoop_add_to_classpath_userpath { # Add the user-specified HADOOP_CLASSPATH to the @@ -551,7 +548,6 @@ function hadoop_java_setup fi } - function hadoop_finalize_libpaths { if [[ -n "${JAVA_LIBRARY_PATH}" ]]; then @@ -564,17 +560,14 @@ function hadoop_finalize_libpaths # # fill in any last minute options that might not have been defined yet # -# Note that we are replacing ' ' with '\ ' so that directories with -# spaces work correctly when run exec blah -# function hadoop_finalize_hadoop_opts { - hadoop_add_param HADOOP_OPTS hadoop.log.dir "-Dhadoop.log.dir=${HADOOP_LOG_DIR/ /\ }" - hadoop_add_param HADOOP_OPTS hadoop.log.file "-Dhadoop.log.file=${HADOOP_LOGFILE/ /\ }" - hadoop_add_param HADOOP_OPTS hadoop.home.dir "-Dhadoop.home.dir=${HADOOP_PREFIX/ /\ }" - hadoop_add_param HADOOP_OPTS hadoop.id.str "-Dhadoop.id.str=${HADOOP_IDENT_STRING/ /\ }" + hadoop_add_param HADOOP_OPTS hadoop.log.dir "-Dhadoop.log.dir=${HADOOP_LOG_DIR}" + hadoop_add_param HADOOP_OPTS hadoop.log.file "-Dhadoop.log.file=${HADOOP_LOGFILE}" + hadoop_add_param HADOOP_OPTS hadoop.home.dir "-Dhadoop.home.dir=${HADOOP_PREFIX}" + hadoop_add_param HADOOP_OPTS hadoop.id.str "-Dhadoop.id.str=${HADOOP_IDENT_STRING}" hadoop_add_param HADOOP_OPTS hadoop.root.logger "-Dhadoop.root.logger=${HADOOP_ROOT_LOGGER}" - hadoop_add_param HADOOP_OPTS hadoop.policy.file "-Dhadoop.policy.file=${HADOOP_POLICYFILE/ /\ }" + hadoop_add_param HADOOP_OPTS hadoop.policy.file "-Dhadoop.policy.file=${HADOOP_POLICYFILE}" hadoop_add_param HADOOP_OPTS hadoop.security.logger "-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER}" } @@ -724,10 +717,8 @@ function hadoop_java_exec local command=$1 local class=$2 shift 2 - # we eval this so that paths with spaces work #shellcheck disable=SC2086 - eval exec "$JAVA" "-Dproc_${command}" ${HADOOP_OPTS} "${class}" "$@" - + exec "${JAVA}" "-Dproc_${command}" ${HADOOP_OPTS} "${class}" "$@" } function hadoop_start_daemon @@ -739,7 +730,7 @@ function hadoop_start_daemon local class=$2 shift 2 #shellcheck disable=SC2086 - eval exec "$JAVA" "-Dproc_${command}" ${HADOOP_OPTS} "${class}" "$@" + exec "${JAVA}" "-Dproc_${command}" ${HADOOP_OPTS} "${class}" "$@" } function hadoop_start_daemon_wrapper @@ -802,9 +793,7 @@ function hadoop_start_secure_daemon # where to send stderr. same thing, except &2 = stderr local daemonerrfile=$5 shift 5 - - - + hadoop_rotate_log "${daemonoutfile}" hadoop_rotate_log "${daemonerrfile}" @@ -925,7 +914,6 @@ function hadoop_stop_daemon fi } - function hadoop_stop_secure_daemon { local command=$1 @@ -984,7 +972,6 @@ function hadoop_daemon_handler esac } - function hadoop_secure_daemon_handler { local daemonmode=$1 From 9ec4a930f57ab17b969ab656a7d5b0c7364b1354 Mon Sep 17 00:00:00 2001 From: Allen Wittenauer Date: Wed, 27 Aug 2014 07:00:31 -0700 Subject: [PATCH 24/28] HADOOP-10996. Stop violence in the *_HOME (aw) --- .../hadoop-common/CHANGES.txt | 2 ++ .../src/main/bin/hadoop-config.sh | 10 ++++++++-- .../src/main/bin/hadoop-functions.sh | 12 +++++------ .../hadoop-hdfs/src/main/bin/hdfs-config.sh | 15 +++++++------- hadoop-mapreduce-project/bin/mapred-config.sh | 20 ++++++++++--------- .../hadoop-yarn/bin/yarn-config.sh | 16 +++++++-------- 6 files changed, 42 insertions(+), 33 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 2270df3424d..45e38d392f9 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -321,6 +321,8 @@ Trunk (Unreleased) HADOOP-11002. shell escapes are incompatible with previous releases (aw) + HADOOP-10996. Stop violence in the *_HOME (aw) + OPTIMIZATIONS HADOOP-7761. Improve the performance of raw comparisons. (todd) diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh index b2fc4d341df..0cf8bcfc78e 100644 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-config.sh @@ -53,7 +53,10 @@ if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then fi # get our functions defined for usage later -if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh" ]]; then +if [[ -n "${HADOOP_COMMON_HOME}" ]] && + [[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-functions.sh" ]]; then + . "${HADOOP_COMMON_HOME}/libexec/hadoop-functions.sh" +elif [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh" ]]; then . "${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh" else echo "ERROR: Unable to exec ${HADOOP_LIBEXEC_DIR}/hadoop-functions.sh." 1>&2 @@ -61,7 +64,10 @@ else fi # allow overrides of the above and pre-defines of the below -if [[ -f "${HADOOP_LIBEXEC_DIR}/hadoop-layout.sh" ]]; then +if [[ -n "${HADOOP_COMMON_HOME}" ]] && + [[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-layout.sh" ]]; then + . "${HADOOP_COMMON_HOME}/libexec/hadoop-layout.sh" +elif [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-layout.sh" ]]; then . "${HADOOP_LIBEXEC_DIR}/hadoop-layout.sh" fi diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh index ab61b8483f6..800e024485e 100644 --- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh +++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop-functions.sh @@ -68,17 +68,18 @@ function hadoop_find_confdir { # NOTE: This function is not user replaceable. + local conf_dir # Look for the basic hadoop configuration area. # # # An attempt at compatibility with some Hadoop 1.x # installs. if [[ -e "${HADOOP_PREFIX}/conf/hadoop-env.sh" ]]; then - DEFAULT_CONF_DIR="conf" + conf_dir="conf" else - DEFAULT_CONF_DIR="etc/hadoop" + conf_dir="etc/hadoop" fi - export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-${HADOOP_PREFIX}/${DEFAULT_CONF_DIR}}" + export HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-${HADOOP_PREFIX}/${conf_dir}}" } function hadoop_exec_hadoopenv @@ -573,10 +574,7 @@ function hadoop_finalize_hadoop_opts function hadoop_finalize_classpath { - - # we want the HADOOP_CONF_DIR at the end - # according to oom, it gives a 2% perf boost - hadoop_add_classpath "${HADOOP_CONF_DIR}" after + hadoop_add_classpath "${HADOOP_CONF_DIR}" before # user classpath gets added at the last minute. this allows # override of CONF dirs and more diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh index fb460d96d6a..68240287d97 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.sh @@ -20,7 +20,7 @@ function hadoop_subproject_init { - if [ -e "${HADOOP_CONF_DIR}/hdfs-env.sh" ]; then + if [[ -e "${HADOOP_CONF_DIR}/hdfs-env.sh" ]]; then . "${HADOOP_CONF_DIR}/hdfs-env.sh" fi @@ -49,7 +49,7 @@ function hadoop_subproject_init HADOOP_ROOT_LOGGER=${HADOOP_HDFS_ROOT_LOGGER:-$HADOOP_ROOT_LOGGER} HADOOP_HDFS_ROOT_LOGGER="${HADOOP_ROOT_LOGGER}" - HADOOP_HDFS_HOME="${HADOOP_HDFS_HOME:-$HADOOP_HOME_DIR}" + HADOOP_HDFS_HOME="${HADOOP_HDFS_HOME:-$HADOOP_PREFIX}" HADOOP_IDENT_STRING="${HADOOP_HDFS_IDENT_STRING:-$HADOOP_IDENT_STRING}" HADOOP_HDFS_IDENT_STRING="${HADOOP_IDENT_STRING}" @@ -71,12 +71,13 @@ if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_hd_this}")" >/dev/null && pwd -P) fi -if [ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]; then - . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" -elif [ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]; then +if [[ -n "${HADOOP_COMMON_HOME}" ]] && + [[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then . "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" -elif [ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]; then - . "${HADOOP_HOME}/libexec/hadoop-config.sh" +elif [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then + . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" +elif [ -e "${HADOOP_PREFIX}/libexec/hadoop-config.sh" ]; then + . "${HADOOP_PREFIX}/libexec/hadoop-config.sh" else echo "ERROR: Hadoop common not found." 2>&1 exit 1 diff --git a/hadoop-mapreduce-project/bin/mapred-config.sh b/hadoop-mapreduce-project/bin/mapred-config.sh index c24d3509c48..c2681ac66b1 100644 --- a/hadoop-mapreduce-project/bin/mapred-config.sh +++ b/hadoop-mapreduce-project/bin/mapred-config.sh @@ -20,7 +20,7 @@ function hadoop_subproject_init { - if [ -e "${HADOOP_CONF_DIR}/mapred-env.sh" ]; then + if [[ -e "${HADOOP_CONF_DIR}/mapred-env.sh" ]]; then . "${HADOOP_CONF_DIR}/mapred-env.sh" fi @@ -49,7 +49,7 @@ function hadoop_subproject_init HADOOP_ROOT_LOGGER="${HADOOP_MAPRED_ROOT_LOGGER:-INFO,console}" HADOOP_MAPRED_ROOT_LOGGER="${HADOOP_ROOT_LOGGER}" - HADOOP_MAPRED_HOME="${HADOOP_MAPRED_HOME:-$HADOOP_HOME_DIR}" + HADOOP_MAPRED_HOME="${HADOOP_MAPRED_HOME:-$HADOOP_PREFIX}" HADOOP_IDENT_STRING="${HADOOP_MAPRED_IDENT_STRING:-$HADOOP_IDENT_STRING}" HADOOP_MAPRED_IDENT_STRING="${HADOOP_IDENT_STRING}" @@ -60,13 +60,15 @@ if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_mc_this}")" >/dev/null && pwd -P) fi -if [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then - . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" -elif [[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then +if [[ -n "${HADOOP_COMMON_HOME}" ]] && + [[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then . "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" -elif [[ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]]; then - . "${HADOOP_HOME}/libexec/hadoop-config.sh" +elif [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then + . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" +elif [ -e "${HADOOP_PREFIX}/libexec/hadoop-config.sh" ]; then + . "${HADOOP_PREFIX}/libexec/hadoop-config.sh" else - echo "Hadoop common not found." - exit + echo "ERROR: Hadoop common not found." 2>&1 + exit 1 fi + diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn-config.sh b/hadoop-yarn-project/hadoop-yarn/bin/yarn-config.sh index 34d2d2d0a80..d83e9983d41 100644 --- a/hadoop-yarn-project/hadoop-yarn/bin/yarn-config.sh +++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn-config.sh @@ -80,14 +80,14 @@ if [[ -z "${HADOOP_LIBEXEC_DIR}" ]]; then HADOOP_LIBEXEC_DIR=$(cd -P -- "$(dirname -- "${_yc_this}")" >/dev/null && pwd -P) fi -if [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then - . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" -elif [[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then +if [[ -n "${HADOOP_COMMON_HOME}" ]] && + [[ -e "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" ]]; then . "${HADOOP_COMMON_HOME}/libexec/hadoop-config.sh" -elif [[ -e "${HADOOP_HOME}/libexec/hadoop-config.sh" ]]; then - . "${HADOOP_HOME}/libexec/hadoop-config.sh" +elif [[ -e "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" ]]; then + . "${HADOOP_LIBEXEC_DIR}/hadoop-config.sh" +elif [ -e "${HADOOP_PREFIX}/libexec/hadoop-config.sh" ]; then + . "${HADOOP_PREFIX}/libexec/hadoop-config.sh" else - echo "Hadoop common not found." - exit + echo "ERROR: Hadoop common not found." 2>&1 + exit 1 fi - From 812bd0c0e583fce925e3151510860ca9781b3e40 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Wed, 27 Aug 2014 15:14:54 +0000 Subject: [PATCH 25/28] MAPREDUCE-5885. build/test/test.mapred.spill causes release audit warnings. Contributed by Chen He --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../apache/hadoop/mapred/TestComparators.java | 61 ++++++++++++------ .../hadoop/mapred/TestMapOutputType.java | 62 ++++++++++++------- .../org/apache/hadoop/mapred/TestMapRed.java | 28 ++++++--- .../lib/TestKeyFieldBasedComparator.java | 25 +++++++- .../hadoop/mapreduce/TestMapReduce.java | 30 ++++++--- 6 files changed, 150 insertions(+), 59 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index a6d2981e6e8..de0767d2a45 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -258,6 +258,9 @@ Release 2.6.0 - UNRELEASED MAPREDUCE-6044. Fully qualified intermediate done dir path breaks per-user dir creation on Windows. (zjshen) + MAPREDUCE-5885. build/test/test.mapred.spill causes release audit warnings + (Chen He via jlowe) + Release 2.5.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestComparators.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestComparators.java index 1cef5cb42f2..f83dbe28578 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestComparators.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestComparators.java @@ -17,13 +17,30 @@ */ package org.apache.hadoop.mapred; -import org.apache.hadoop.fs.*; -import org.apache.hadoop.io.*; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.File; +import java.io.IOException; +import java.util.Iterator; +import java.util.Random; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.io.WritableComparator; import org.apache.hadoop.mapreduce.MRConfig; -import junit.framework.TestCase; -import java.io.*; -import java.util.*; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + /** * Two different types of comparators can be used in MapReduce. One is used @@ -37,8 +54,11 @@ import java.util.*; * 2. Test the common use case where values are grouped by keys but values * within each key are grouped by a secondary key (a timestamp, for example). */ -public class TestComparators extends TestCase -{ +public class TestComparators { + private static final File TEST_DIR = new File( + System.getProperty("test.build.data", + System.getProperty("java.io.tmpdir")), "TestComparators-mapred"); + JobConf conf = new JobConf(TestMapOutputType.class); JobClient jc; static Random rng = new Random(); @@ -292,9 +312,9 @@ public class TestComparators extends TestCase } } - + @Before public void configure() throws Exception { - Path testdir = new Path("build/test/test.mapred.spill"); + Path testdir = new Path(TEST_DIR.getAbsolutePath()); Path inDir = new Path(testdir, "in"); Path outDir = new Path(testdir, "out"); FileSystem fs = FileSystem.get(conf); @@ -334,14 +354,18 @@ public class TestComparators extends TestCase jc = new JobClient(conf); } - + + @After + public void cleanup() { + FileUtil.fullyDelete(TEST_DIR); + } /** * Test the default comparator for Map/Reduce. * Use the identity mapper and see if the keys are sorted at the end * @throws Exception */ - public void testDefaultMRComparator() throws Exception { - configure(); + @Test + public void testDefaultMRComparator() throws Exception { conf.setMapperClass(IdentityMapper.class); conf.setReducerClass(AscendingKeysReducer.class); @@ -361,8 +385,8 @@ public class TestComparators extends TestCase * comparator. Keys should be sorted in reverse order in the reducer. * @throws Exception */ - public void testUserMRComparator() throws Exception { - configure(); + @Test + public void testUserMRComparator() throws Exception { conf.setMapperClass(IdentityMapper.class); conf.setReducerClass(DescendingKeysReducer.class); conf.setOutputKeyComparatorClass(DecreasingIntComparator.class); @@ -384,8 +408,8 @@ public class TestComparators extends TestCase * values for a key should be sorted by the 'timestamp'. * @throws Exception */ - public void testUserValueGroupingComparator() throws Exception { - configure(); + @Test + public void testUserValueGroupingComparator() throws Exception { conf.setMapperClass(RandomGenMapper.class); conf.setReducerClass(AscendingGroupReducer.class); conf.setOutputValueGroupingComparator(CompositeIntGroupFn.class); @@ -409,8 +433,8 @@ public class TestComparators extends TestCase * order. This lets us make sure that the right comparators are used. * @throws Exception */ - public void testAllUserComparators() throws Exception { - configure(); + @Test + public void testAllUserComparators() throws Exception { conf.setMapperClass(RandomGenMapper.class); // use a decreasing comparator so keys are sorted in reverse order conf.setOutputKeyComparatorClass(DecreasingIntComparator.class); @@ -430,6 +454,7 @@ public class TestComparators extends TestCase * Test a user comparator that relies on deserializing both arguments * for each compare. */ + @Test public void testBakedUserComparator() throws Exception { MyWritable a = new MyWritable(8, 8); MyWritable b = new MyWritable(7, 9); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapOutputType.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapOutputType.java index d11d7bc30b4..e3860fd2e25 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapOutputType.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapOutputType.java @@ -17,21 +17,36 @@ */ package org.apache.hadoop.mapred; -import org.apache.hadoop.fs.*; -import org.apache.hadoop.io.*; -import org.apache.hadoop.mapred.lib.*; +import java.io.File; +import java.io.IOException; +import java.util.Iterator; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapreduce.MRConfig; -import junit.framework.TestCase; -import java.io.*; -import java.util.*; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.fail; + /** * TestMapOutputType checks whether the Map task handles type mismatch * between mapper output and the type specified in * JobConf.MapOutputKeyType and JobConf.MapOutputValueType. */ -public class TestMapOutputType extends TestCase -{ +public class TestMapOutputType { + private static final File TEST_DIR = new File( + System.getProperty("test.build.data", + System.getProperty("java.io.tmpdir")), "TestMapOutputType-mapred"); JobConf conf = new JobConf(TestMapOutputType.class); JobClient jc; /** @@ -75,9 +90,9 @@ public class TestMapOutputType extends TestCase } } - + @Before public void configure() throws Exception { - Path testdir = new Path("build/test/test.mapred.spill"); + Path testdir = new Path(TEST_DIR.getAbsolutePath()); Path inDir = new Path(testdir, "in"); Path outDir = new Path(testdir, "out"); FileSystem fs = FileSystem.get(conf); @@ -101,17 +116,21 @@ public class TestMapOutputType extends TestCase throw new IOException("Mkdirs failed to create " + inDir.toString()); } Path inFile = new Path(inDir, "part0"); - SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, inFile, + SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, inFile, Text.class, Text.class); writer.append(new Text("rec: 1"), new Text("Hello")); writer.close(); jc = new JobClient(conf); } - + + @After + public void cleanup() { + FileUtil.fullyDelete(TEST_DIR); + } + + @Test public void testKeyMismatch() throws Exception { - configure(); - // Set bad MapOutputKeyClass and MapOutputValueClass conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(IntWritable.class); @@ -125,11 +144,9 @@ public class TestMapOutputType extends TestCase fail("Oops! The job was supposed to break due to an exception"); } } - + + @Test public void testValueMismatch() throws Exception { - configure(); - - // Set good MapOutputKeyClass, bad MapOutputValueClass conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(IntWritable.class); @@ -142,11 +159,10 @@ public class TestMapOutputType extends TestCase fail("Oops! The job was supposed to break due to an exception"); } } - - public void testNoMismatch() throws Exception{ - configure(); - - // Set good MapOutputKeyClass and MapOutputValueClass + + @Test + public void testNoMismatch() throws Exception{ + // Set good MapOutputKeyClass and MapOutputValueClass conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapRed.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapRed.java index 3f7a6f7e3b4..02a083b4f0a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapRed.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapRed.java @@ -24,7 +24,7 @@ import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; -import java.util.Collections; +import java.io.File; import java.util.EnumSet; import java.util.HashSet; import java.util.Iterator; @@ -34,6 +34,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.NullWritable; @@ -46,11 +47,11 @@ import org.apache.hadoop.mapred.lib.IdentityReducer; import org.apache.hadoop.mapreduce.MRConfig; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.junit.After; import org.junit.Test; import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; /********************************************************** * MapredLoadTest generates a bunch of work that exercises @@ -110,6 +111,10 @@ public class TestMapRed extends Configured implements Tool { * of numbers in random order, but where each number appears * as many times as we were instructed. */ + private static final File TEST_DIR = new File( + System.getProperty("test.build.data", + System.getProperty("java.io.tmpdir")), "TestMapRed-mapred"); + static class RandomGenMapper implements Mapper { @@ -248,6 +253,11 @@ public class TestMapRed extends Configured implements Tool { private static int counts = 100; private static Random r = new Random(); + @After + public void cleanup() { + FileUtil.fullyDelete(TEST_DIR); + } + /** public TestMapRed(int range, int counts, Configuration conf) throws IOException { this.range = range; @@ -372,7 +382,7 @@ public class TestMapRed extends Configured implements Tool { boolean includeCombine ) throws Exception { JobConf conf = new JobConf(TestMapRed.class); - Path testdir = new Path("build/test/test.mapred.compress"); + Path testdir = new Path(TEST_DIR.getAbsolutePath()); Path inDir = new Path(testdir, "in"); Path outDir = new Path(testdir, "out"); FileSystem fs = FileSystem.get(conf); @@ -440,7 +450,7 @@ public class TestMapRed extends Configured implements Tool { // // Generate distribution of ints. This is the answer key. // - JobConf conf = null; + JobConf conf; //Check to get configuration and check if it is configured thro' Configured //interface. This would happen when running testcase thro' command line. if(getConf() == null) { @@ -465,7 +475,7 @@ public class TestMapRed extends Configured implements Tool { // Write the answer key to a file. // FileSystem fs = FileSystem.get(conf); - Path testdir = new Path("mapred.loadtest"); + Path testdir = new Path(TEST_DIR.getAbsolutePath(), "mapred.loadtest"); if (!fs.mkdirs(testdir)) { throw new IOException("Mkdirs failed to create " + testdir.toString()); } @@ -635,8 +645,8 @@ public class TestMapRed extends Configured implements Tool { in.close(); } int originalTotal = 0; - for (int i = 0; i < dist.length; i++) { - originalTotal += dist[i]; + for (int aDist : dist) { + originalTotal += aDist; } System.out.println("Original sum: " + originalTotal); System.out.println("Recomputed sum: " + totalseen); @@ -727,7 +737,7 @@ public class TestMapRed extends Configured implements Tool { public void runJob(int items) { try { JobConf conf = new JobConf(TestMapRed.class); - Path testdir = new Path("build/test/test.mapred.spill"); + Path testdir = new Path(TEST_DIR.getAbsolutePath()); Path inDir = new Path(testdir, "in"); Path outDir = new Path(testdir, "out"); FileSystem fs = FileSystem.get(conf); @@ -777,7 +787,7 @@ public class TestMapRed extends Configured implements Tool { System.err.println("Usage: TestMapRed "); System.err.println(); System.err.println("Note: a good test will have a " + - " value that is substantially larger than the "); + " value that is substantially larger than the "); return -1; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/lib/TestKeyFieldBasedComparator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/lib/TestKeyFieldBasedComparator.java index 0bee2b564b9..34a4d2c6c92 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/lib/TestKeyFieldBasedComparator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/lib/TestKeyFieldBasedComparator.java @@ -18,7 +18,6 @@ package org.apache.hadoop.mapred.lib; -import java.io.*; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -35,9 +34,23 @@ import org.apache.hadoop.mapred.RunningJob; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.mapred.TextOutputFormat; import org.apache.hadoop.mapred.Utils; +import org.junit.After; +import org.junit.Test; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; public class TestKeyFieldBasedComparator extends HadoopTestCase { + + private static final File TEST_DIR = new File( + System.getProperty("test.build.data", + System.getProperty("java.io.tmpdir")), + "TestKeyFieldBasedComparator-lib"); JobConf conf; JobConf localConf; @@ -50,8 +63,9 @@ public class TestKeyFieldBasedComparator extends HadoopTestCase { localConf = createJobConf(); localConf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " "); } + public void configure(String keySpec, int expect) throws Exception { - Path testdir = new Path("build/test/test.mapred.spill"); + Path testdir = new Path(TEST_DIR.getAbsolutePath()); Path inDir = new Path(testdir, "in"); Path outDir = new Path(testdir, "out"); FileSystem fs = getFileSystem(); @@ -116,6 +130,13 @@ public class TestKeyFieldBasedComparator extends HadoopTestCase { reader.close(); } } + + @After + public void cleanup() { + FileUtil.fullyDelete(TEST_DIR); + } + + @Test public void testBasicUnixComparator() throws Exception { configure("-k1,1n", 1); configure("-k2,2n", 1); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestMapReduce.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestMapReduce.java index 01e1283dd23..48ad47af220 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestMapReduce.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestMapReduce.java @@ -23,14 +23,14 @@ import java.io.DataInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; +import java.io.File; import java.util.Iterator; import java.util.Random; -import junit.framework.TestCase; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.SequenceFile; @@ -41,6 +41,10 @@ import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; +import org.junit.After; +import org.junit.Test; + +import static org.junit.Assert.assertTrue; /********************************************************** * MapredLoadTest generates a bunch of work that exercises @@ -75,8 +79,10 @@ import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; * 7) A mapred job integrates all the count files into a single one. * **********************************************************/ -public class TestMapReduce extends TestCase { - +public class TestMapReduce { + private static final File TEST_DIR = new File( + System.getProperty("test.build.data", + System.getProperty("java.io.tmpdir")), "TestMapReduce-mapreduce"); private static FileSystem fs; static { @@ -215,6 +221,12 @@ public class TestMapReduce extends TestCase { private static int counts = 100; private static Random r = new Random(); + @After + public void cleanup() { + FileUtil.fullyDelete(TEST_DIR); + } + + @Test public void testMapred() throws Exception { launch(); } @@ -239,7 +251,7 @@ public class TestMapReduce extends TestCase { // // Write the answer key to a file. // - Path testdir = new Path("mapred.loadtest"); + Path testdir = new Path(TEST_DIR.getAbsolutePath()); if (!fs.mkdirs(testdir)) { throw new IOException("Mkdirs failed to create " + testdir.toString()); } @@ -488,13 +500,17 @@ public class TestMapReduce extends TestCase { System.err.println("Usage: TestMapReduce "); System.err.println(); System.err.println("Note: a good test will have a value" + - " that is substantially larger than the "); + " that is substantially larger than the "); return; } int i = 0; range = Integer.parseInt(argv[i++]); counts = Integer.parseInt(argv[i++]); - launch(); + try { + launch(); + } finally { + FileUtil.fullyDelete(TEST_DIR); + } } } From c5d9a4a91e4e0faae3a8530408da35b591396060 Mon Sep 17 00:00:00 2001 From: arp Date: Wed, 27 Aug 2014 09:52:33 -0700 Subject: [PATCH 26/28] HDFS-6694. TestPipelinesFailover.testPipelineRecoveryStress tests fail intermittently with various symptoms - debugging patch (Contributed by Yongjun Zhang) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 4 +++ .../namenode/ha/TestPipelinesFailover.java | 28 +++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 4e60c46388c..fb3906afb5c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -348,6 +348,10 @@ Trunk (Unreleased) HDFS-6905. fs-encryption merge triggered release audit failures. (clamb via tucu) + HDFS-6694. TestPipelinesFailover.testPipelineRecoveryStress tests fail + intermittently with various symptoms - debugging patch. (Yongjun Zhang via + Arpit Agarwal) + Release 2.6.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java index bba3dbb1196..08c652553e9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java @@ -58,6 +58,7 @@ import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils.DelayAnswer; import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread; import org.apache.hadoop.test.MultithreadedTestUtil.TestContext; +import org.apache.hadoop.util.Shell.ShellCommandExecutor; import org.apache.log4j.Level; import org.junit.Test; import org.mockito.Mockito; @@ -420,6 +421,33 @@ public class TestPipelinesFailover { */ @Test(timeout=STRESS_RUNTIME*3) public void testPipelineRecoveryStress() throws Exception { + + // The following section of code is to help debug HDFS-6694 about + // this test that fails from time to time due to "too many open files". + // + String[] scmd = new String[] {"/bin/sh", "-c", "ulimit -a"}; + ShellCommandExecutor sce = new ShellCommandExecutor(scmd); + sce.execute(); + + System.out.println("HDFS-6694 Debug Data BEGIN==="); + System.out.println("'ulimit -a' output:\n" + sce.getOutput()); + + scmd = new String[] {"hostname"}; + sce = new ShellCommandExecutor(scmd); + sce.execute(); + System.out.println("'hostname' output:\n" + sce.getOutput()); + + scmd = new String[] {"ifconfig"}; + sce = new ShellCommandExecutor(scmd); + sce.execute(); + System.out.println("'ifconfig' output:\n" + sce.getOutput()); + + scmd = new String[] {"whoami"}; + sce = new ShellCommandExecutor(scmd); + sce.execute(); + System.out.println("'whoami' output:\n" + sce.getOutput()); + System.out.println("===HDFS-6694 Debug Data END"); + HAStressTestHarness harness = new HAStressTestHarness(); // Disable permissions so that another user can recover the lease. harness.conf.setBoolean( From e2d0ff364a84a4de10e7b11fe83cd3dab155a571 Mon Sep 17 00:00:00 2001 From: Jian He Date: Wed, 27 Aug 2014 10:02:45 -0700 Subject: [PATCH 27/28] YARN-2182. Updated ContainerId#toString() to append RM Epoch number. Contributed by Tsuyoshi OZAWA --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../apache/hadoop/yarn/api/records/ContainerId.java | 10 +++++++--- .../org/apache/hadoop/yarn/api/TestContainerId.java | 4 +++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 36d304c7876..871829ac369 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -157,6 +157,9 @@ Release 2.6.0 - UNRELEASED YARN-1326. RM should log using RMStore at startup time. (Tsuyoshi Ozawa via kasha) + YARN-2182. Updated ContainerId#toString() to append RM Epoch number. + (Tsuyoshi OZAWA via jianhe) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerId.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerId.java index 73e80859049..fc7f40488ec 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerId.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerId.java @@ -83,7 +83,7 @@ public abstract class ContainerId implements Comparable{ // TODO: fail the app submission if attempts are more than 10 or something - private static final ThreadLocal appAttemptIdFormat = + private static final ThreadLocal appAttemptIdAndEpochFormat = new ThreadLocal() { @Override public NumberFormat initialValue() { @@ -153,9 +153,13 @@ public abstract class ContainerId implements Comparable{ sb.append(ApplicationId.appIdFormat.get().format(appId.getId())) .append("_"); sb.append( - appAttemptIdFormat.get().format( + appAttemptIdAndEpochFormat.get().format( getApplicationAttemptId().getAttemptId())).append("_"); - sb.append(containerIdFormat.get().format(getId())); + sb.append(containerIdFormat.get().format(0x3fffff & getId())); + int epoch = getId() >> 22; + if (epoch > 0) { + sb.append("_").append(appAttemptIdAndEpochFormat.get().format(epoch)); + } return sb.toString(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerId.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerId.java index f92df8a8d0c..b23d0ed3fbc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerId.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerId.java @@ -54,7 +54,9 @@ public class TestContainerId { long ts = System.currentTimeMillis(); ContainerId c6 = newContainerId(36473, 4365472, ts, 25645811); Assert.assertEquals("container_10_0001_01_000001", c1.toString()); - Assert.assertEquals("container_" + ts + "_36473_4365472_25645811", + Assert.assertEquals(479987, 0x003fffff & c6.getId()); + Assert.assertEquals(6, c6.getId() >> 22); + Assert.assertEquals("container_" + ts + "_36473_4365472_479987_06", c6.toString()); } From 6b441d227a8806e87224106a81361bd61f0b3d0b Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Wed, 27 Aug 2014 10:26:22 -0700 Subject: [PATCH 28/28] HDFS-6908. Incorrect snapshot directory diff generated by snapshot deletion. Contributed by Juan Yu and Jing Zhao. --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../DirectoryWithSnapshotFeature.java | 10 ++- .../snapshot/TestSnapshotDeletion.java | 77 ++++++++++++++++++- 3 files changed, 85 insertions(+), 5 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index fb3906afb5c..63c434d7085 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -642,6 +642,9 @@ Release 2.6.0 - UNRELEASED HDFS-4852. libhdfs documentation is out of date. (cnauroth) + HDFS-6908. Incorrect snapshot directory diff generated by snapshot deletion. + (Juan Yu and jing9 via jing9) + Release 2.5.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java index 9893bbaf2e2..9c9d435a34e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java @@ -722,6 +722,8 @@ public class DirectoryWithSnapshotFeature implements INode.Feature { counts.add(lastDiff.diff.destroyCreatedList(currentINode, collectedBlocks, removedINodes)); } + counts.add(currentINode.cleanSubtreeRecursively(snapshot, prior, + collectedBlocks, removedINodes, priorDeleted, countDiffChange)); } else { // update prior prior = getDiffs().updatePrior(snapshot, prior); @@ -739,7 +741,9 @@ public class DirectoryWithSnapshotFeature implements INode.Feature { counts.add(getDiffs().deleteSnapshotDiff(snapshot, prior, currentINode, collectedBlocks, removedINodes, countDiffChange)); - + counts.add(currentINode.cleanSubtreeRecursively(snapshot, prior, + collectedBlocks, removedINodes, priorDeleted, countDiffChange)); + // check priorDiff again since it may be created during the diff deletion if (prior != Snapshot.NO_SNAPSHOT_ID) { DirectoryDiff priorDiff = this.getDiffs().getDiffById(prior); @@ -778,9 +782,7 @@ public class DirectoryWithSnapshotFeature implements INode.Feature { } } } - counts.add(currentINode.cleanSubtreeRecursively(snapshot, prior, - collectedBlocks, removedINodes, priorDeleted, countDiffChange)); - + if (currentINode.isQuotaSet()) { currentINode.getDirectoryWithQuotaFeature().addSpaceConsumed2Cache( -counts.get(Quota.NAMESPACE), -counts.get(Quota.DISKSPACE)); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDeletion.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDeletion.java index 77fa2a20cf2..1450a7d2321 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDeletion.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshotDeletion.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.namenode.snapshot; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -558,7 +559,81 @@ public class TestSnapshotDeletion { + toDeleteFileInSnapshot.toString(), e); } } - + + /** + * Delete a snapshot that is taken before a directory deletion, + * directory diff list should be combined correctly. + */ + @Test (timeout=60000) + public void testDeleteSnapshot1() throws Exception { + final Path root = new Path("/"); + + Path dir = new Path("/dir1"); + Path file1 = new Path(dir, "file1"); + DFSTestUtil.createFile(hdfs, file1, BLOCKSIZE, REPLICATION, seed); + + hdfs.allowSnapshot(root); + hdfs.createSnapshot(root, "s1"); + + Path file2 = new Path(dir, "file2"); + DFSTestUtil.createFile(hdfs, file2, BLOCKSIZE, REPLICATION, seed); + + hdfs.createSnapshot(root, "s2"); + + // delete file + hdfs.delete(file1, true); + hdfs.delete(file2, true); + + // delete directory + assertTrue(hdfs.delete(dir, false)); + + // delete second snapshot + hdfs.deleteSnapshot(root, "s2"); + + NameNodeAdapter.enterSafeMode(cluster.getNameNode(), false); + NameNodeAdapter.saveNamespace(cluster.getNameNode()); + + // restart NN + cluster.restartNameNodes(); + } + + /** + * Delete a snapshot that is taken before a directory deletion (recursively), + * directory diff list should be combined correctly. + */ + @Test (timeout=60000) + public void testDeleteSnapshot2() throws Exception { + final Path root = new Path("/"); + + Path dir = new Path("/dir1"); + Path file1 = new Path(dir, "file1"); + DFSTestUtil.createFile(hdfs, file1, BLOCKSIZE, REPLICATION, seed); + + hdfs.allowSnapshot(root); + hdfs.createSnapshot(root, "s1"); + + Path file2 = new Path(dir, "file2"); + DFSTestUtil.createFile(hdfs, file2, BLOCKSIZE, REPLICATION, seed); + INodeFile file2Node = fsdir.getINode(file2.toString()).asFile(); + long file2NodeId = file2Node.getId(); + + hdfs.createSnapshot(root, "s2"); + + // delete directory recursively + assertTrue(hdfs.delete(dir, true)); + assertNotNull(fsdir.getInode(file2NodeId)); + + // delete second snapshot + hdfs.deleteSnapshot(root, "s2"); + assertTrue(fsdir.getInode(file2NodeId) == null); + + NameNodeAdapter.enterSafeMode(cluster.getNameNode(), false); + NameNodeAdapter.saveNamespace(cluster.getNameNode()); + + // restart NN + cluster.restartNameNodes(); + } + /** * Test deleting snapshots in a more complicated scenario: need to combine * snapshot diffs, but no need to handle diffs distributed in a dir tree