diff --git a/hadoop-client/pom.xml b/hadoop-client/pom.xml index 5d8c7fad4d6..8dea4325899 100644 --- a/hadoop-client/pom.xml +++ b/hadoop-client/pom.xml @@ -143,10 +143,6 @@ org.mortbay.jetty jetty - - org.mortbay.jetty - jetty-util - com.sun.jersey jersey-core diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 82d7ad8a021..224c7fbe192 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -165,6 +165,9 @@ Trunk (Unreleased) BUG FIXES + HADOOP-9451. Fault single-layer config if node group topology is enabled. + (Junping Du via llu) + HADOOP-8419. Fixed GzipCode NPE reset for IBM JDK. (Yu Li via eyang) HADOOP-9041. FsUrlStreamHandlerFactory could cause an infinite loop in @@ -502,6 +505,10 @@ Trunk (Unreleased) HADOOP-9353. Activate native-win maven profile by default on Windows. (Arpit Agarwal via szetszwo) + + HADOOP-9437. TestNativeIO#testRenameTo fails on Windows due to assumption + that POSIX errno is embedded in NativeIOException. (Chris Nauroth via + suresh) Release 2.0.5-beta - UNRELEASED @@ -536,6 +543,9 @@ Release 2.0.5-beta - UNRELEASED HADOOP-9358. "Auth failed" log should include exception string (todd) + HADOOP-9401. CodecPool: Add counters for number of (de)compressors + leased out. (kkambatl via tucu) + OPTIMIZATIONS HADOOP-9150. Avoid unnecessary DNS resolution attempts for logical URIs @@ -604,8 +614,17 @@ Release 2.0.5-beta - UNRELEASED HADOOP-9125. LdapGroupsMapping threw CommunicationException after some idle time. (Kai Zheng via atm) - HADOOP-9357. Fallback to default authority if not specified in FileContext. - (Andrew Wang via eli) + HADOOP-9429. TestConfiguration fails with IBM JAVA. (Amir Sanjar via + suresh) + + HADOOP-9222. Cover package with org.apache.hadoop.io.lz4 unit tests (Vadim + Bondarev via jlowe) + + HADOOP-9233. Cover package org.apache.hadoop.io.compress.zlib with unit + tests (Vadim Bondarev via jlowe) + + HADOOP-9211. Set default max heap size in HADOOP_CLIENT_OPTS to 512m + in order to avoid OOME. (Plamen Jeliazkov via shv) Release 2.0.4-alpha - UNRELEASED @@ -619,6 +638,9 @@ Release 2.0.4-alpha - UNRELEASED BUG FIXES + HADOOP-9467. Metrics2 record filter should check name as well as tags. + (Ganeshan Iyler via llu) + HADOOP-9406. hadoop-client leaks dependency on JDK tools jar. (tucu) HADOOP-9301. hadoop client servlet/jsp/jetty/tomcat JARs creating @@ -630,6 +652,9 @@ Release 2.0.4-alpha - UNRELEASED HADOOP-9444. Modify hadoop-policy.xml to replace unexpanded variables to a default value of '*'. (Roman Shaposhnik via vinodkv) + HADOOP-9471. hadoop-client wrongfully excludes jetty-util JAR, + breaking webhdfs. (tucu) + Release 2.0.3-alpha - 2013-02-06 INCOMPATIBLE CHANGES @@ -1593,6 +1618,24 @@ Release 2.0.0-alpha - 05-23-2012 HADOOP-8655. Fix TextInputFormat for large deliminators. (Gelesh via bobby) +Release 0.23.8 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + HADOOP-9222. Cover package with org.apache.hadoop.io.lz4 unit tests (Vadim + Bondarev via jlowe) + + HADOOP-9233. Cover package org.apache.hadoop.io.compress.zlib with unit + tests (Vadim Bondarev via jlowe) + Release 0.23.7 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh index 42a0d05aaad..41289a9acda 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh +++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh @@ -62,7 +62,7 @@ export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANOD export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS" # The following applies to multiple commands (fs, dfs, fsck, distcp etc) -export HADOOP_CLIENT_OPTS="-Xmx128m $HADOOP_CLIENT_OPTS" +export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS" #HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS" # On secure datanodes, user to run the datanode as after dropping privileges diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java index 3ba48d3b4cd..26f50503fef 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java @@ -244,33 +244,17 @@ public final class FileContext { } /* - * Resolve a relative path passed from the user. - * - * Relative paths are resolved against the current working directory - * (e.g. "foo/bar" becomes "//foo/bar"). - * Fully-qualified URIs (e.g. "hdfs://nn:p/foo/bar") and slash-relative paths + * Remove relative part - return "absolute": + * If input is relative path ("foo/bar") add wd: ie "//foo/bar" + * A fully qualified uri ("hdfs://nn:p/foo/bar") or a slash-relative path * ("/foo/bar") are returned unchanged. * - * Additionally, we fix malformed URIs that specify a scheme but not an - * authority (e.g. "hdfs:///foo/bar"). Per RFC 2395, we remove the scheme - * if it matches the default FS, and let the default FS add in the default - * scheme and authority later (see {@link #AbstractFileSystem#checkPath}). - * * Applications that use FileContext should use #makeQualified() since - * they really want a fully-qualified URI. + * they really want a fully qualified URI. * Hence this method is not called makeAbsolute() and * has been deliberately declared private. */ private Path fixRelativePart(Path p) { - // Per RFC 2396 5.2, drop schema if there is a scheme but no authority. - if (p.hasSchemeAndNoAuthority()) { - String scheme = p.toUri().getScheme(); - if (scheme.equalsIgnoreCase(defaultFS.getUri().getScheme())) { - p = new Path(p.toUri().getSchemeSpecificPart()); - } - } - // Absolute paths are unchanged. Relative paths are resolved against the - // current working directory. if (p.isUriPathAbsolute()) { return p; } else { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java index 8299aa3be89..feef1c7bab2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java @@ -256,10 +256,6 @@ public class Path implements Comparable { return (isUriPathAbsolute() && uri.getScheme() == null && uri.getAuthority() == null); } - - public boolean hasSchemeAndNoAuthority() { - return uri.getScheme() != null && uri.getAuthority() == null; - } /** * True if the path component (i.e. directory) of this URI is absolute. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecPool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecPool.java index ed76012f808..63e3a09c307 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecPool.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/CodecPool.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -29,6 +30,10 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.ReflectionUtils; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; + /** * A global compressor/decompressor pool used to save and reuse * (possibly native) compression/decompression codecs. @@ -52,6 +57,29 @@ public class CodecPool { private static final Map, List> decompressorPool = new HashMap, List>(); + private static LoadingCache, AtomicInteger> createCache( + Class klass) { + return CacheBuilder.newBuilder().build( + new CacheLoader, AtomicInteger>() { + @Override + public AtomicInteger load(Class key) throws Exception { + return new AtomicInteger(); + } + }); + } + + /** + * Map to track the number of leased compressors + */ + private static final LoadingCache, AtomicInteger> compressorCounts = + createCache(Compressor.class); + + /** + * Map to tracks the number of leased decompressors + */ + private static final LoadingCache, AtomicInteger> decompressorCounts = + createCache(Decompressor.class); + private static T borrow(Map, List> pool, Class codecClass) { T codec = null; @@ -90,6 +118,21 @@ public class CodecPool { } } + @SuppressWarnings("unchecked") + private static int getLeaseCount( + LoadingCache, AtomicInteger> usageCounts, + Class codecClass) { + return usageCounts.getUnchecked((Class) codecClass).get(); + } + + private static void updateLeaseCount( + LoadingCache, AtomicInteger> usageCounts, T codec, int delta) { + if (codec != null) { + Class codecClass = ReflectionUtils.getClass(codec); + usageCounts.getUnchecked(codecClass).addAndGet(delta); + } + } + /** * Get a {@link Compressor} for the given {@link CompressionCodec} from the * pool or a new one. @@ -111,6 +154,7 @@ public class CodecPool { LOG.debug("Got recycled compressor"); } } + updateLeaseCount(compressorCounts, compressor, 1); return compressor; } @@ -137,6 +181,7 @@ public class CodecPool { LOG.debug("Got recycled decompressor"); } } + updateLeaseCount(decompressorCounts, decompressor, 1); return decompressor; } @@ -155,6 +200,7 @@ public class CodecPool { } compressor.reset(); payback(compressorPool, compressor); + updateLeaseCount(compressorCounts, compressor, -1); } /** @@ -173,5 +219,24 @@ public class CodecPool { } decompressor.reset(); payback(decompressorPool, decompressor); + updateLeaseCount(decompressorCounts, decompressor, -1); + } + + /** + * Return the number of leased {@link Compressor}s for this + * {@link CompressionCodec} + */ + public static int getLeasedCompressorsCount(CompressionCodec codec) { + return (codec == null) ? 0 : getLeaseCount(compressorCounts, + codec.getCompressorType()); + } + + /** + * Return the number of leased {@link Decompressor}s for this + * {@link CompressionCodec} + */ + public static int getLeasedDecompressorsCount(CompressionCodec codec) { + return (codec == null) ? 0 : getLeaseCount(decompressorCounts, + codec.getDecompressorType()); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsFilter.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsFilter.java index 47cbfba94a2..3ba8140f6da 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsFilter.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsFilter.java @@ -57,7 +57,7 @@ public abstract class MetricsFilter implements MetricsPlugin { * @return true to accept; false otherwise. */ public boolean accepts(MetricsRecord record) { - return accepts(record.tags()); + return accepts(record.name()) && accepts(record.tags()); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopologyWithNodeGroup.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopologyWithNodeGroup.java index 1bc21450e60..86920f08b83 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopologyWithNodeGroup.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopologyWithNodeGroup.java @@ -191,7 +191,12 @@ public class NetworkTopologyWithNodeGroup extends NetworkTopology { } rack = getNode(nodeGroup.getNetworkLocation()); - if (rack != null && !(rack instanceof InnerNode)) { + // rack should be an innerNode and with parent. + // note: rack's null parent case is: node's topology only has one layer, + // so rack is recognized as "/" and no parent. + // This will be recognized as a node with fault topology. + if (rack != null && + (!(rack instanceof InnerNode) || rack.getParent() == null)) { throw new IllegalArgumentException("Unexpected data node " + node.toString() + " at an illegal network location"); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java index aa4fa28ad1f..fde11e72cf4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java @@ -27,8 +27,10 @@ import java.io.DataInputStream; import java.io.IOException; import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.Map; +import java.util.Set; import javax.crypto.SecretKey; @@ -144,6 +146,10 @@ extends AbstractDelegationTokenIdentifier> return; } + protected void logExpireToken(TokenIdent ident) throws IOException { + return; + } + /** * Update the current master key * This is called once by startThreads before tokenRemoverThread is created, @@ -363,15 +369,25 @@ extends AbstractDelegationTokenIdentifier> } /** Remove expired delegation tokens from cache */ - private synchronized void removeExpiredToken() { + private void removeExpiredToken() throws IOException { long now = Time.now(); - Iterator i = currentTokens.values().iterator(); - while (i.hasNext()) { - long renewDate = i.next().getRenewDate(); - if (now > renewDate) { - i.remove(); + Set expiredTokens = new HashSet(); + synchronized (this) { + Iterator> i = + currentTokens.entrySet().iterator(); + while (i.hasNext()) { + Map.Entry entry = i.next(); + long renewDate = entry.getValue().getRenewDate(); + if (renewDate < now) { + expiredTokens.add(entry.getKey()); + i.remove(); + } } } + // don't hold lock on 'this' to avoid edit log updates blocking token ops + for (TokenIdent ident : expiredTokens) { + logExpireToken(ident); + } } public void stopThreads() { diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/NativeIO.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/NativeIO.c index 47f8dc1c9df..cd9b2a4d8b3 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/NativeIO.c +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/nativeio/NativeIO.c @@ -816,6 +816,7 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_io_nativeio_NativeIO_renameTo0(JNIEnv *env, jclass clazz, jstring jsrc, jstring jdst) { +#ifdef UNIX const char *src = NULL, *dst = NULL; src = (*env)->GetStringUTFChars(env, jsrc, NULL); @@ -829,6 +830,23 @@ jclass clazz, jstring jsrc, jstring jdst) done: if (src) (*env)->ReleaseStringUTFChars(env, jsrc, src); if (dst) (*env)->ReleaseStringUTFChars(env, jdst, dst); +#endif + +#ifdef WINDOWS + LPCWSTR src = NULL, dst = NULL; + + src = (LPCWSTR) (*env)->GetStringChars(env, jsrc, NULL); + if (!src) goto done; // exception was thrown + dst = (LPCWSTR) (*env)->GetStringChars(env, jdst, NULL); + if (!dst) goto done; // exception was thrown + if (!MoveFile(src, dst)) { + throw_ioe(env, GetLastError()); + } + +done: + if (src) (*env)->ReleaseStringChars(env, jsrc, src); + if (dst) (*env)->ReleaseStringChars(env, jdst, dst); +#endif } /** diff --git a/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm index da69b303b37..7ac6755b5a4 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/ClusterSetup.apt.vm @@ -249,7 +249,7 @@ Hadoop MapReduce Next Generation - Cluster Setup *-------------------------+-------------------------+------------------------+ | <<>> | | | | | <<>> Scheduler class. | | -| | | <<>> (recommended) or <<>> | +| | | <<>> (recommended), <<>> (also recommended), or <<>> | *-------------------------+-------------------------+------------------------+ | <<>> | | | | | Minimum limit of memory to allocate to each container request at the <<>>. | | diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java index 8f5963c22cb..19ff6c6c994 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java @@ -51,6 +51,10 @@ public class TestConfiguration extends TestCase { final static String CONFIG = new File("./test-config.xml").getAbsolutePath(); final static String CONFIG2 = new File("./test-config2.xml").getAbsolutePath(); final static Random RAN = new Random(); + final static boolean IBMJAVA = System.getProperty("java.vendor").contains("IBM"); + final static String XMLHEADER = + IBMJAVA?"": + ""; @Override protected void setUp() throws Exception { @@ -327,8 +331,8 @@ public class TestConfiguration extends TestCase { ByteArrayOutputStream baos = new ByteArrayOutputStream(); conf.writeXml(baos); String result = baos.toString(); - assertTrue("Result has proper header", result.startsWith( - "")); + assertTrue("Result has proper header", result.startsWith(XMLHEADER)); + assertTrue("Result has proper footer", result.endsWith("")); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextMainOperationsBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextMainOperationsBaseTest.java index df2f7eb9597..d6efc52c42a 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextMainOperationsBaseTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextMainOperationsBaseTest.java @@ -21,8 +21,6 @@ package org.apache.hadoop.fs; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; import java.util.EnumSet; import org.apache.hadoop.HadoopIllegalArgumentException; @@ -1166,40 +1164,6 @@ public abstract class FileContextMainOperationsBaseTest { Assert.assertEquals(fc.getFileStatus(file), fc.getFileLinkStatus(file)); } } - - /** - * Test that URIs with a scheme, no authority, and absolute path component - * resolve with the authority of the default FS. - */ - @Test(timeout=30000) - public void testAbsolutePathSchemeNoAuthority() throws IOException, - URISyntaxException { - Path file = getTestRootPath(fc, "test/file"); - createFile(file); - URI uri = file.toUri(); - URI noAuthorityUri = new URI(uri.getScheme(), null, uri.getPath(), - uri.getQuery(), uri.getFragment()); - Path noAuthority = new Path(noAuthorityUri); - Assert.assertEquals(fc.getFileStatus(file), fc.getFileStatus(noAuthority)); - } - - /** - * Test that URIs with a scheme, no authority, and relative path component - * resolve with the authority of the default FS. - */ - @Test(timeout=30000) - public void testRelativePathSchemeNoAuthority() throws IOException, - URISyntaxException { - Path workDir = new Path(getAbsoluteTestRootPath(fc), new Path("test")); - fc.setWorkingDirectory(workDir); - Path file = new Path(workDir, "file"); - createFile(file); - URI uri = file.toUri(); - URI noAuthorityUri = new URI(uri.getScheme() + ":file"); - System.out.println(noAuthorityUri); - Path noAuthority = new Path(noAuthorityUri); - Assert.assertEquals(fc.getFileStatus(file), fc.getFileStatus(noAuthority)); - } protected void createFile(Path path) throws IOException { FSDataOutputStream out = fc.create(path, EnumSet.of(CREATE), diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java new file mode 100644 index 00000000000..35f84b950e4 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java @@ -0,0 +1,524 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.compress; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.compress.lz4.Lz4Compressor; +import org.apache.hadoop.io.compress.snappy.SnappyCompressor; +import org.apache.hadoop.io.compress.zlib.BuiltInZlibDeflater; +import org.apache.hadoop.io.compress.zlib.ZlibCompressor; +import org.apache.hadoop.io.compress.zlib.ZlibFactory; +import org.apache.hadoop.util.NativeCodeLoader; +import org.apache.log4j.Logger; +import org.junit.Assert; + +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import static org.junit.Assert.*; + +public class CompressDecompressTester { + + private static final Logger logger = Logger + .getLogger(CompressDecompressTester.class); + + private final byte[] originalRawData; + + private ImmutableList> pairs = ImmutableList.of(); + private ImmutableList.Builder> builder = ImmutableList.builder(); + + private ImmutableSet stateges = ImmutableSet.of(); + + private PreAssertionTester assertionDelegate; + + public CompressDecompressTester(byte[] originalRawData) { + this.originalRawData = Arrays.copyOf(originalRawData, + originalRawData.length); + this.assertionDelegate = new PreAssertionTester() { + + @Override + public ImmutableList> filterOnAssumeWhat( + ImmutableList> pairs) { + ImmutableList.Builder> builder = ImmutableList + .builder(); + + for (TesterPair pair : pairs) { + if (isAvailable(pair)) + builder.add(pair); + } + return builder.build(); + } + }; + } + + private static boolean isNativeSnappyLoadable() { + boolean snappyAvailable = false; + boolean loaded = false; + try { + System.loadLibrary("snappy"); + logger.warn("Snappy native library is available"); + snappyAvailable = true; + boolean hadoopNativeAvailable = NativeCodeLoader.isNativeCodeLoaded(); + loaded = snappyAvailable && hadoopNativeAvailable; + if (loaded) { + logger.info("Snappy native library loaded"); + } else { + logger.warn("Snappy native library not loaded"); + } + } catch (Throwable t) { + logger.warn("Failed to load snappy: ", t); + return false; + } + return loaded; + } + + public static CompressDecompressTester of( + byte[] rawData) { + return new CompressDecompressTester(rawData); + } + + + public CompressDecompressTester withCompressDecompressPair( + T compressor, E decompressor) { + addPair( + compressor, + decompressor, + Joiner.on("_").join(compressor.getClass().getCanonicalName(), + decompressor.getClass().getCanonicalName())); + return this; + } + + public CompressDecompressTester withTestCases( + ImmutableSet stateges) { + this.stateges = ImmutableSet.copyOf(stateges); + return this; + } + + private void addPair(T compressor, E decompressor, String name) { + builder.add(new TesterPair(name, compressor, decompressor)); + } + + public void test() throws InstantiationException, IllegalAccessException { + pairs = builder.build(); + pairs = assertionDelegate.filterOnAssumeWhat(pairs); + + for (TesterPair pair : pairs) { + for (CompressionTestStrategy strategy : stateges) { + strategy.getTesterStrategy().assertCompression(pair.getName(), + pair.getCompressor(), pair.getDecompressor(), + Arrays.copyOf(originalRawData, originalRawData.length)); + } + } + endAll(pairs); + } + + private void endAll(ImmutableList> pairs) { + for (TesterPair pair : pairs) + pair.end(); + } + + interface PreAssertionTester { + ImmutableList> filterOnAssumeWhat( + ImmutableList> pairs); + } + + public enum CompressionTestStrategy { + + COMPRESS_DECOMPRESS_ERRORS(new TesterCompressionStrategy() { + private final Joiner joiner = Joiner.on("- "); + + @Override + public void assertCompression(String name, Compressor compressor, + Decompressor decompressor, byte[] rawData) { + assertTrue(checkSetInputNullPointerException(compressor)); + assertTrue(checkSetInputNullPointerException(decompressor)); + + assertTrue(checkCompressArrayIndexOutOfBoundsException(compressor, + rawData)); + assertTrue(checkCompressArrayIndexOutOfBoundsException(decompressor, + rawData)); + + assertTrue(checkCompressNullPointerException(compressor, rawData)); + assertTrue(checkCompressNullPointerException(decompressor, rawData)); + + assertTrue(checkSetInputArrayIndexOutOfBoundsException(compressor)); + assertTrue(checkSetInputArrayIndexOutOfBoundsException(decompressor)); + } + + private boolean checkSetInputNullPointerException(Compressor compressor) { + try { + compressor.setInput(null, 0, 1); + } catch (NullPointerException npe) { + return true; + } catch (Exception ex) { + logger.error(joiner.join(compressor.getClass().getCanonicalName(), + "checkSetInputNullPointerException error !!!")); + } + return false; + } + + private boolean checkCompressNullPointerException(Compressor compressor, + byte[] rawData) { + try { + compressor.setInput(rawData, 0, rawData.length); + compressor.compress(null, 0, 1); + } catch (NullPointerException npe) { + return true; + } catch (Exception ex) { + logger.error(joiner.join(compressor.getClass().getCanonicalName(), + "checkCompressNullPointerException error !!!")); + } + return false; + } + + private boolean checkCompressNullPointerException( + Decompressor decompressor, byte[] rawData) { + try { + decompressor.setInput(rawData, 0, rawData.length); + decompressor.decompress(null, 0, 1); + } catch (NullPointerException npe) { + return true; + } catch (Exception ex) { + logger.error(joiner.join(decompressor.getClass().getCanonicalName(), + "checkCompressNullPointerException error !!!")); + } + return false; + } + + private boolean checkSetInputNullPointerException( + Decompressor decompressor) { + try { + decompressor.setInput(null, 0, 1); + } catch (NullPointerException npe) { + return true; + } catch (Exception ex) { + logger.error(joiner.join(decompressor.getClass().getCanonicalName(), + "checkSetInputNullPointerException error !!!")); + } + return false; + } + + private boolean checkSetInputArrayIndexOutOfBoundsException( + Compressor compressor) { + try { + compressor.setInput(new byte[] { (byte) 0 }, 0, -1); + } catch (ArrayIndexOutOfBoundsException e) { + return true; + } catch (Exception e) { + logger.error(joiner.join(compressor.getClass().getCanonicalName(), + "checkSetInputArrayIndexOutOfBoundsException error !!!")); + } + return false; + } + + private boolean checkCompressArrayIndexOutOfBoundsException( + Compressor compressor, byte[] rawData) { + try { + compressor.setInput(rawData, 0, rawData.length); + compressor.compress(new byte[rawData.length], 0, -1); + } catch (ArrayIndexOutOfBoundsException e) { + return true; + } catch (Exception e) { + logger.error(joiner.join(compressor.getClass().getCanonicalName(), + "checkCompressArrayIndexOutOfBoundsException error !!!")); + } + return false; + } + + private boolean checkCompressArrayIndexOutOfBoundsException( + Decompressor decompressor, byte[] rawData) { + try { + decompressor.setInput(rawData, 0, rawData.length); + decompressor.decompress(new byte[rawData.length], 0, -1); + } catch (ArrayIndexOutOfBoundsException e) { + return true; + } catch (Exception e) { + logger.error(joiner.join(decompressor.getClass().getCanonicalName(), + "checkCompressArrayIndexOutOfBoundsException error !!!")); + } + return false; + } + + private boolean checkSetInputArrayIndexOutOfBoundsException( + Decompressor decompressor) { + try { + decompressor.setInput(new byte[] { (byte) 0 }, 0, -1); + } catch (ArrayIndexOutOfBoundsException e) { + return true; + } catch (Exception e) { + logger.error(joiner.join(decompressor.getClass().getCanonicalName(), + "checkNullPointerException error !!!")); + } + return false; + } + + }), + + COMPRESS_DECOMPRESS_SINGLE_BLOCK(new TesterCompressionStrategy() { + final Joiner joiner = Joiner.on("- "); + + @Override + public void assertCompression(String name, Compressor compressor, + Decompressor decompressor, byte[] rawData) { + + int cSize = 0; + int decompressedSize = 0; + byte[] compressedResult = new byte[rawData.length]; + byte[] decompressedBytes = new byte[rawData.length]; + try { + assertTrue( + joiner.join(name, "compressor.needsInput before error !!!"), + compressor.needsInput()); + assertTrue( + joiner.join(name, "compressor.getBytesWritten before error !!!"), + compressor.getBytesWritten() == 0); + compressor.setInput(rawData, 0, rawData.length); + compressor.finish(); + while (!compressor.finished()) { + cSize += compressor.compress(compressedResult, 0, + compressedResult.length); + } + compressor.reset(); + + assertTrue( + joiner.join(name, "decompressor.needsInput() before error !!!"), + decompressor.needsInput()); + decompressor.setInput(compressedResult, 0, cSize); + assertFalse( + joiner.join(name, "decompressor.needsInput() after error !!!"), + decompressor.needsInput()); + while (!decompressor.finished()) { + decompressedSize = decompressor.decompress(decompressedBytes, 0, + decompressedBytes.length); + } + decompressor.reset(); + assertTrue(joiner.join(name, " byte size not equals error !!!"), + decompressedSize == rawData.length); + assertArrayEquals( + joiner.join(name, " byte arrays not equals error !!!"), rawData, + decompressedBytes); + } catch (Exception ex) { + fail(joiner.join(name, ex.getMessage())); + } + } + }), + + COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM(new TesterCompressionStrategy() { + final Joiner joiner = Joiner.on("- "); + final ImmutableMap, Integer> emptySize = ImmutableMap + .of(Lz4Compressor.class, 4, ZlibCompressor.class, 16, + SnappyCompressor.class, 4, BuiltInZlibDeflater.class, 16); + + @Override + void assertCompression(String name, Compressor compressor, + Decompressor decompressor, byte[] originalRawData) { + byte[] buf = null; + ByteArrayInputStream bytesIn = null; + BlockDecompressorStream blockDecompressorStream = null; + ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(); + // close without write + try { + compressor.reset(); + // decompressor.end(); + BlockCompressorStream blockCompressorStream = new BlockCompressorStream( + bytesOut, compressor, 1024, 0); + blockCompressorStream.close(); + // check compressed output + buf = bytesOut.toByteArray(); + int emSize = emptySize.get(compressor.getClass()); + Assert.assertEquals( + joiner.join(name, "empty stream compressed output size != " + + emSize), emSize, buf.length); + // use compressed output as input for decompression + bytesIn = new ByteArrayInputStream(buf); + // create decompression stream + blockDecompressorStream = new BlockDecompressorStream(bytesIn, + decompressor, 1024); + // no byte is available because stream was closed + assertEquals(joiner.join(name, " return value is not -1"), -1, + blockDecompressorStream.read()); + } catch (IOException e) { + fail(joiner.join(name, e.getMessage())); + } finally { + if (blockDecompressorStream != null) + try { + bytesOut.close(); + blockDecompressorStream.close(); + bytesIn.close(); + blockDecompressorStream.close(); + } catch (IOException e) { + } + } + } + + }), + + COMPRESS_DECOMPRESS_BLOCK(new TesterCompressionStrategy() { + private final Joiner joiner = Joiner.on("- "); + private static final int BLOCK_SIZE = 512; + private final byte[] operationBlock = new byte[BLOCK_SIZE]; + // Use default of 512 as bufferSize and compressionOverhead of + // (1% of bufferSize + 12 bytes) = 18 bytes (zlib algorithm). + private static final int overheadSpace = BLOCK_SIZE / 100 + 12; + + @Override + public void assertCompression(String name, Compressor compressor, + Decompressor decompressor, byte[] originalRawData) { + int off = 0; + int len = originalRawData.length; + int maxSize = BLOCK_SIZE - overheadSpace; + int compresSize = 0; + List blockLabels = new ArrayList(); + ByteArrayOutputStream compressedOut = new ByteArrayOutputStream(); + ByteArrayOutputStream decompressOut = new ByteArrayOutputStream(); + try { + if (originalRawData.length > maxSize) { + do { + int bufLen = Math.min(len, maxSize); + compressor.setInput(originalRawData, off, bufLen); + compressor.finish(); + while (!compressor.finished()) { + compresSize = compressor.compress(operationBlock, 0, + operationBlock.length); + compressedOut.write(operationBlock, 0, compresSize); + blockLabels.add(compresSize); + } + compressor.reset(); + off += bufLen; + len -= bufLen; + } while (len > 0); + } + + off = 0; + // compressed bytes + byte[] compressedBytes = compressedOut.toByteArray(); + for (Integer step : blockLabels) { + decompressor.setInput(compressedBytes, off, step); + while (!decompressor.finished()) { + int dSize = decompressor.decompress(operationBlock, 0, + operationBlock.length); + decompressOut.write(operationBlock, 0, dSize); + } + decompressor.reset(); + off = off + step; + } + assertArrayEquals( + joiner.join(name, "byte arrays not equals error !!!"), + originalRawData, decompressOut.toByteArray()); + } catch (Exception ex) { + fail(joiner.join(name, ex.getMessage())); + } finally { + try { + compressedOut.close(); + } catch (IOException e) { + } + try { + decompressOut.close(); + } catch (IOException e) { + } + } + } + }); + + private final TesterCompressionStrategy testerStrategy; + + CompressionTestStrategy(TesterCompressionStrategy testStrategy) { + this.testerStrategy = testStrategy; + } + + public TesterCompressionStrategy getTesterStrategy() { + return testerStrategy; + } + } + + static final class TesterPair { + private final T compressor; + private final E decompressor; + private final String name; + + TesterPair(String name, T compressor, E decompressor) { + this.compressor = compressor; + this.decompressor = decompressor; + this.name = name; + } + + public void end() { + Configuration cfg = new Configuration(); + compressor.reinit(cfg); + compressor.end(); + decompressor.end(); + } + + public T getCompressor() { + return compressor; + } + + public E getDecompressor() { + return decompressor; + } + + public String getName() { + return name; + } + } + + /** + * Method for compressor availability check + */ + private static boolean isAvailable(TesterPair pair) { + Compressor compressor = pair.compressor; + + if (compressor.getClass().isAssignableFrom(Lz4Compressor.class) + && (NativeCodeLoader.isNativeCodeLoaded())) + return true; + + else if (compressor.getClass().isAssignableFrom(BuiltInZlibDeflater.class) + && NativeCodeLoader.isNativeCodeLoaded()) + return true; + + else if (compressor.getClass().isAssignableFrom(ZlibCompressor.class)) { + return ZlibFactory.isNativeZlibLoaded(new Configuration()); + } + else if (compressor.getClass().isAssignableFrom(SnappyCompressor.class) + && isNativeSnappyLoadable()) + return true; + + return false; + } + + abstract static class TesterCompressionStrategy { + + protected final Logger logger = Logger.getLogger(getClass()); + + abstract void assertCompression(String name, Compressor compressor, + Decompressor decompressor, byte[] originalRawData); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecPool.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecPool.java new file mode 100644 index 00000000000..551f282889e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodecPool.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.compress; + +import static org.junit.Assert.assertEquals; + +import org.apache.hadoop.conf.Configuration; +import org.junit.Before; +import org.junit.Test; + +public class TestCodecPool { + private final String LEASE_COUNT_ERR = + "Incorrect number of leased (de)compressors"; + DefaultCodec codec; + + @Before + public void setup() { + this.codec = new DefaultCodec(); + this.codec.setConf(new Configuration()); + } + + @Test(timeout = 1000) + public void testCompressorPoolCounts() { + // Get two compressors and return them + Compressor comp1 = CodecPool.getCompressor(codec); + Compressor comp2 = CodecPool.getCompressor(codec); + assertEquals(LEASE_COUNT_ERR, 2, + CodecPool.getLeasedCompressorsCount(codec)); + + CodecPool.returnCompressor(comp2); + assertEquals(LEASE_COUNT_ERR, 1, + CodecPool.getLeasedCompressorsCount(codec)); + + CodecPool.returnCompressor(comp1); + assertEquals(LEASE_COUNT_ERR, 0, + CodecPool.getLeasedCompressorsCount(codec)); + } + + @Test(timeout = 1000) + public void testDecompressorPoolCounts() { + // Get two decompressors and return them + Decompressor decomp1 = CodecPool.getDecompressor(codec); + Decompressor decomp2 = CodecPool.getDecompressor(codec); + assertEquals(LEASE_COUNT_ERR, 2, + CodecPool.getLeasedDecompressorsCount(codec)); + + CodecPool.returnDecompressor(decomp2); + assertEquals(LEASE_COUNT_ERR, 1, + CodecPool.getLeasedDecompressorsCount(codec)); + + CodecPool.returnDecompressor(decomp1); + assertEquals(LEASE_COUNT_ERR, 0, + CodecPool.getLeasedDecompressorsCount(codec)); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java new file mode 100644 index 00000000000..a8ac993c47b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.compress; + +import static org.junit.Assert.fail; +import java.util.Random; +import org.apache.hadoop.io.compress.CompressDecompressTester.CompressionTestStrategy; +import org.apache.hadoop.io.compress.lz4.Lz4Compressor; +import org.apache.hadoop.io.compress.lz4.Lz4Decompressor; +import org.apache.hadoop.io.compress.snappy.SnappyCompressor; +import org.apache.hadoop.io.compress.snappy.SnappyDecompressor; +import org.apache.hadoop.io.compress.zlib.BuiltInZlibDeflater; +import org.apache.hadoop.io.compress.zlib.BuiltInZlibInflater; +import org.junit.Test; +import com.google.common.collect.ImmutableSet; + +/** + * Test for pairs: + *
+ * SnappyCompressor/SnappyDecompressor
+ * Lz4Compressor/Lz4Decompressor
+ * BuiltInZlibDeflater/new BuiltInZlibInflater
+ *
+ *
+ * Note: we can't use ZlibCompressor/ZlibDecompressor here 
+ * because his constructor can throw exception (if native libraries not found)
+ * For ZlibCompressor/ZlibDecompressor pair testing used {@code TestZlibCompressorDecompressor}   
+ *
+ * 
+ * + */ +public class TestCompressorDecompressor { + + private static final Random rnd = new Random(12345L); + + @Test + public void testCompressorDecompressor() { + // no more for this data + int SIZE = 44 * 1024; + + byte[] rawData = generate(SIZE); + try { + CompressDecompressTester.of(rawData) + .withCompressDecompressPair(new SnappyCompressor(), new SnappyDecompressor()) + .withCompressDecompressPair(new Lz4Compressor(), new Lz4Decompressor()) + .withCompressDecompressPair(new BuiltInZlibDeflater(), new BuiltInZlibInflater()) + .withTestCases(ImmutableSet.of(CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK, + CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK, + CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS, + CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM)) + .test(); + + } catch (Exception ex) { + fail("testCompressorDecompressor error !!!" + ex); + } + } + + @Test + public void testCompressorDecompressorWithExeedBufferLimit() { + int BYTE_SIZE = 100 * 1024; + byte[] rawData = generate(BYTE_SIZE); + try { + CompressDecompressTester.of(rawData) + .withCompressDecompressPair( + new SnappyCompressor(BYTE_SIZE + BYTE_SIZE / 2), + new SnappyDecompressor(BYTE_SIZE + BYTE_SIZE / 2)) + .withCompressDecompressPair(new Lz4Compressor(BYTE_SIZE), + new Lz4Decompressor(BYTE_SIZE)) + .withTestCases(ImmutableSet.of(CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK, + CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK, + CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS, + CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM)) + .test(); + + } catch (Exception ex) { + fail("testCompressorDecompressorWithExeedBufferLimit error !!!" + ex); + } + } + + public static byte[] generate(int size) { + byte[] array = new byte[size]; + for (int i = 0; i < size; i++) + array[i] = (byte) rnd.nextInt(16); + return array; + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/lz4/TestLz4CompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/lz4/TestLz4CompressorDecompressor.java new file mode 100644 index 00000000000..e8555b23887 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/lz4/TestLz4CompressorDecompressor.java @@ -0,0 +1,316 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.compress.lz4; + +import static org.junit.Assert.*; +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.io.DataInputBuffer; +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.compress.BlockCompressorStream; +import org.apache.hadoop.io.compress.BlockDecompressorStream; +import org.apache.hadoop.io.compress.CompressionInputStream; +import org.apache.hadoop.io.compress.CompressionOutputStream; +import org.apache.hadoop.io.compress.Lz4Codec; +import org.apache.hadoop.io.compress.lz4.Lz4Compressor; +import org.apache.hadoop.io.compress.lz4.Lz4Decompressor; +import org.junit.Before; +import org.junit.Test; +import static org.junit.Assume.*; + +public class TestLz4CompressorDecompressor { + + private static final Random rnd = new Random(12345l); + + @Before + public void before() { + assumeTrue(Lz4Codec.isNativeCodeLoaded()); + } + + //test on NullPointerException in {@code compressor.setInput()} + @Test + public void testCompressorSetInputNullPointerException() { + try { + Lz4Compressor compressor = new Lz4Compressor(); + compressor.setInput(null, 0, 10); + fail("testCompressorSetInputNullPointerException error !!!"); + } catch (NullPointerException ex) { + // expected + } catch (Exception e) { + fail("testCompressorSetInputNullPointerException ex error !!!"); + } + } + + //test on NullPointerException in {@code decompressor.setInput()} + @Test + public void testDecompressorSetInputNullPointerException() { + try { + Lz4Decompressor decompressor = new Lz4Decompressor(); + decompressor.setInput(null, 0, 10); + fail("testDecompressorSetInputNullPointerException error !!!"); + } catch (NullPointerException ex) { + // expected + } catch (Exception e) { + fail("testDecompressorSetInputNullPointerException ex error !!!"); + } + } + + //test on ArrayIndexOutOfBoundsException in {@code compressor.setInput()} + @Test + public void testCompressorSetInputAIOBException() { + try { + Lz4Compressor compressor = new Lz4Compressor(); + compressor.setInput(new byte[] {}, -5, 10); + fail("testCompressorSetInputAIOBException error !!!"); + } catch (ArrayIndexOutOfBoundsException ex) { + // expected + } catch (Exception ex) { + fail("testCompressorSetInputAIOBException ex error !!!"); + } + } + + //test on ArrayIndexOutOfBoundsException in {@code decompressor.setInput()} + @Test + public void testDecompressorSetInputAIOUBException() { + try { + Lz4Decompressor decompressor = new Lz4Decompressor(); + decompressor.setInput(new byte[] {}, -5, 10); + fail("testDecompressorSetInputAIOBException error !!!"); + } catch (ArrayIndexOutOfBoundsException ex) { + // expected + } catch (Exception e) { + fail("testDecompressorSetInputAIOBException ex error !!!"); + } + } + + //test on NullPointerException in {@code compressor.compress()} + @Test + public void testCompressorCompressNullPointerException() { + try { + Lz4Compressor compressor = new Lz4Compressor(); + byte[] bytes = generate(1024 * 6); + compressor.setInput(bytes, 0, bytes.length); + compressor.compress(null, 0, 0); + fail("testCompressorCompressNullPointerException error !!!"); + } catch (NullPointerException ex) { + // expected + } catch (Exception e) { + fail("testCompressorCompressNullPointerException ex error !!!"); + } + } + + //test on NullPointerException in {@code decompressor.decompress()} + @Test + public void testDecompressorCompressNullPointerException() { + try { + Lz4Decompressor decompressor = new Lz4Decompressor(); + byte[] bytes = generate(1024 * 6); + decompressor.setInput(bytes, 0, bytes.length); + decompressor.decompress(null, 0, 0); + fail("testDecompressorCompressNullPointerException error !!!"); + } catch (NullPointerException ex) { + // expected + } catch (Exception e) { + fail("testDecompressorCompressNullPointerException ex error !!!"); + } + } + + //test on ArrayIndexOutOfBoundsException in {@code compressor.compress()} + @Test + public void testCompressorCompressAIOBException() { + try { + Lz4Compressor compressor = new Lz4Compressor(); + byte[] bytes = generate(1024 * 6); + compressor.setInput(bytes, 0, bytes.length); + compressor.compress(new byte[] {}, 0, -1); + fail("testCompressorCompressAIOBException error !!!"); + } catch (ArrayIndexOutOfBoundsException ex) { + // expected + } catch (Exception e) { + fail("testCompressorCompressAIOBException ex error !!!"); + } + } + + //test on ArrayIndexOutOfBoundsException in decompressor.decompress() + @Test + public void testDecompressorCompressAIOBException() { + try { + Lz4Decompressor decompressor = new Lz4Decompressor(); + byte[] bytes = generate(1024 * 6); + decompressor.setInput(bytes, 0, bytes.length); + decompressor.decompress(new byte[] {}, 0, -1); + fail("testDecompressorCompressAIOBException error !!!"); + } catch (ArrayIndexOutOfBoundsException ex) { + // expected + } catch (Exception e) { + fail("testDecompressorCompressAIOBException ex error !!!"); + } + } + + // test Lz4Compressor compressor.compress() + @Test + public void testSetInputWithBytesSizeMoreThenDefaultLz4CompressorByfferSize() { + int BYTES_SIZE = 1024 * 64 + 1; + try { + Lz4Compressor compressor = new Lz4Compressor(); + byte[] bytes = generate(BYTES_SIZE); + assertTrue("needsInput error !!!", compressor.needsInput()); + compressor.setInput(bytes, 0, bytes.length); + byte[] emptyBytes = new byte[BYTES_SIZE]; + int csize = compressor.compress(emptyBytes, 0, bytes.length); + assertTrue( + "testSetInputWithBytesSizeMoreThenDefaultLz4CompressorByfferSize error !!!", + csize != 0); + } catch (Exception ex) { + fail("testSetInputWithBytesSizeMoreThenDefaultLz4CompressorByfferSize ex error !!!"); + } + } + + // test compress/decompress process + @Test + public void testCompressDecompress() { + int BYTE_SIZE = 1024 * 54; + byte[] bytes = generate(BYTE_SIZE); + Lz4Compressor compressor = new Lz4Compressor(); + try { + compressor.setInput(bytes, 0, bytes.length); + assertTrue("Lz4CompressDecompress getBytesRead error !!!", + compressor.getBytesRead() > 0); + assertTrue( + "Lz4CompressDecompress getBytesWritten before compress error !!!", + compressor.getBytesWritten() == 0); + + byte[] compressed = new byte[BYTE_SIZE]; + int cSize = compressor.compress(compressed, 0, compressed.length); + assertTrue( + "Lz4CompressDecompress getBytesWritten after compress error !!!", + compressor.getBytesWritten() > 0); + Lz4Decompressor decompressor = new Lz4Decompressor(); + // set as input for decompressor only compressed data indicated with cSize + decompressor.setInput(compressed, 0, cSize); + byte[] decompressed = new byte[BYTE_SIZE]; + decompressor.decompress(decompressed, 0, decompressed.length); + + assertTrue("testLz4CompressDecompress finished error !!!", decompressor.finished()); + assertArrayEquals(bytes, decompressed); + compressor.reset(); + decompressor.reset(); + assertTrue("decompressor getRemaining error !!!",decompressor.getRemaining() == 0); + } catch (Exception e) { + fail("testLz4CompressDecompress ex error!!!"); + } + } + + // test compress/decompress with empty stream + @Test + public void testCompressorDecompressorEmptyStreamLogic() { + ByteArrayInputStream bytesIn = null; + ByteArrayOutputStream bytesOut = null; + byte[] buf = null; + BlockDecompressorStream blockDecompressorStream = null; + try { + // compress empty stream + bytesOut = new ByteArrayOutputStream(); + BlockCompressorStream blockCompressorStream = new BlockCompressorStream( + bytesOut, new Lz4Compressor(), 1024, 0); + // close without write + blockCompressorStream.close(); + // check compressed output + buf = bytesOut.toByteArray(); + assertEquals("empty stream compressed output size != 4", 4, buf.length); + // use compressed output as input for decompression + bytesIn = new ByteArrayInputStream(buf); + // create decompression stream + blockDecompressorStream = new BlockDecompressorStream(bytesIn, + new Lz4Decompressor(), 1024); + // no byte is available because stream was closed + assertEquals("return value is not -1", -1, blockDecompressorStream.read()); + } catch (Exception e) { + fail("testCompressorDecompressorEmptyStreamLogic ex error !!!" + + e.getMessage()); + } finally { + if (blockDecompressorStream != null) + try { + bytesIn.close(); + bytesOut.close(); + blockDecompressorStream.close(); + } catch (IOException e) { + } + } + } + + // test compress/decompress process through CompressionOutputStream/CompressionInputStream api + @Test + public void testCompressorDecopressorLogicWithCompressionStreams() { + DataOutputStream deflateOut = null; + DataInputStream inflateIn = null; + int BYTE_SIZE = 1024 * 100; + byte[] bytes = generate(BYTE_SIZE); + int bufferSize = 262144; + int compressionOverhead = (bufferSize / 6) + 32; + try { + DataOutputBuffer compressedDataBuffer = new DataOutputBuffer(); + CompressionOutputStream deflateFilter = new BlockCompressorStream( + compressedDataBuffer, new Lz4Compressor(bufferSize), bufferSize, + compressionOverhead); + deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter)); + deflateOut.write(bytes, 0, bytes.length); + deflateOut.flush(); + deflateFilter.finish(); + + DataInputBuffer deCompressedDataBuffer = new DataInputBuffer(); + deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0, + compressedDataBuffer.getLength()); + + CompressionInputStream inflateFilter = new BlockDecompressorStream( + deCompressedDataBuffer, new Lz4Decompressor(bufferSize), bufferSize); + + inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter)); + + byte[] result = new byte[BYTE_SIZE]; + inflateIn.read(result); + + assertArrayEquals("original array not equals compress/decompressed array", result, + bytes); + } catch (IOException e) { + fail("testLz4CompressorDecopressorLogicWithCompressionStreams ex error !!!"); + } finally { + try { + if (deflateOut != null) + deflateOut.close(); + if (inflateIn != null) + inflateIn.close(); + } catch (Exception e) { + } + } + } + + public static byte[] generate(int size) { + byte[] array = new byte[size]; + for (int i = 0; i < size; i++) + array[i] = (byte)rnd.nextInt(16); + return array; + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/zlib/TestZlibCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/zlib/TestZlibCompressorDecompressor.java new file mode 100644 index 00000000000..6e792d1e4ea --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/zlib/TestZlibCompressorDecompressor.java @@ -0,0 +1,362 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io.compress.zlib; + +import static org.junit.Assert.*; +import static org.junit.Assume.*; +import java.io.IOException; +import java.io.InputStream; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.io.DataInputBuffer; +import org.apache.hadoop.io.compress.CompressDecompressTester; +import org.apache.hadoop.io.compress.Compressor; +import org.apache.hadoop.io.compress.Decompressor; +import org.apache.hadoop.io.compress.DecompressorStream; +import org.apache.hadoop.io.compress.CompressDecompressTester.CompressionTestStrategy; +import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionLevel; +import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionStrategy; +import org.junit.Before; +import org.junit.Test; +import com.google.common.collect.ImmutableSet; + +public class TestZlibCompressorDecompressor { + + private static final Random random = new Random(12345L); + + @Before + public void before() { + assumeTrue(ZlibFactory.isNativeZlibLoaded(new Configuration())); + } + + @Test + public void testZlibCompressorDecompressor() { + try { + int SIZE = 44 * 1024; + byte[] rawData = generate(SIZE); + + CompressDecompressTester.of(rawData) + .withCompressDecompressPair(new ZlibCompressor(), new ZlibDecompressor()) + .withTestCases(ImmutableSet.of(CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK, + CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK, + CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS, + CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM)) + .test(); + } catch (Exception ex) { + fail("testCompressorDecompressor error !!!" + ex); + } + } + + @Test + public void testCompressorDecompressorWithExeedBufferLimit() { + int BYTE_SIZE = 100 * 1024; + byte[] rawData = generate(BYTE_SIZE); + try { + CompressDecompressTester.of(rawData) + .withCompressDecompressPair( + new ZlibCompressor( + org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionLevel.BEST_COMPRESSION, + CompressionStrategy.DEFAULT_STRATEGY, + org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionHeader.DEFAULT_HEADER, + BYTE_SIZE), + new ZlibDecompressor( + org.apache.hadoop.io.compress.zlib.ZlibDecompressor.CompressionHeader.DEFAULT_HEADER, + BYTE_SIZE)) + .withTestCases(ImmutableSet.of(CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK, + CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK, + CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS, + CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM)) + .test(); + } catch (Exception ex) { + fail("testCompressorDecompressorWithExeedBufferLimit error !!!" + ex); + } + } + + + @Test + public void testZlibCompressorDecompressorWithConfiguration() { + Configuration conf = new Configuration(); + conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true); + if (ZlibFactory.isNativeZlibLoaded(conf)) { + byte[] rawData; + int tryNumber = 5; + int BYTE_SIZE = 10 * 1024; + Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf); + Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf); + rawData = generate(BYTE_SIZE); + try { + for (int i = 0; i < tryNumber; i++) + compressDecompressZlib(rawData, (ZlibCompressor) zlibCompressor, + (ZlibDecompressor) zlibDecompressor); + zlibCompressor.reinit(conf); + } catch (Exception ex) { + fail("testZlibCompressorDecompressorWithConfiguration ex error " + ex); + } + } else { + assertTrue("ZlibFactory is using native libs against request", + ZlibFactory.isNativeZlibLoaded(conf)); + } + } + + @Test + public void testZlibCompressDecompress() { + byte[] rawData = null; + int rawDataSize = 0; + rawDataSize = 1024 * 64; + rawData = generate(rawDataSize); + try { + ZlibCompressor compressor = new ZlibCompressor(); + ZlibDecompressor decompressor = new ZlibDecompressor(); + assertFalse("testZlibCompressDecompress finished error", + compressor.finished()); + compressor.setInput(rawData, 0, rawData.length); + assertTrue("testZlibCompressDecompress getBytesRead before error", + compressor.getBytesRead() == 0); + compressor.finish(); + + byte[] compressedResult = new byte[rawDataSize]; + int cSize = compressor.compress(compressedResult, 0, rawDataSize); + assertTrue("testZlibCompressDecompress getBytesRead ather error", + compressor.getBytesRead() == rawDataSize); + assertTrue( + "testZlibCompressDecompress compressed size no less then original size", + cSize < rawDataSize); + decompressor.setInput(compressedResult, 0, cSize); + byte[] decompressedBytes = new byte[rawDataSize]; + decompressor.decompress(decompressedBytes, 0, decompressedBytes.length); + assertArrayEquals("testZlibCompressDecompress arrays not equals ", + rawData, decompressedBytes); + compressor.reset(); + decompressor.reset(); + } catch (IOException ex) { + fail("testZlibCompressDecompress ex !!!" + ex); + } + } + + @Test + public void testZlibCompressorDecompressorSetDictionary() { + Configuration conf = new Configuration(); + conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true); + if (ZlibFactory.isNativeZlibLoaded(conf)) { + Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf); + Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf); + + checkSetDictionaryNullPointerException(zlibCompressor); + checkSetDictionaryNullPointerException(zlibDecompressor); + + checkSetDictionaryArrayIndexOutOfBoundsException(zlibDecompressor); + checkSetDictionaryArrayIndexOutOfBoundsException(zlibCompressor); + } else { + assertTrue("ZlibFactory is using native libs against request", + ZlibFactory.isNativeZlibLoaded(conf)); + } + } + + @Test + public void testZlibFactory() { + Configuration cfg = new Configuration(); + + assertTrue("testZlibFactory compression level error !!!", + CompressionLevel.DEFAULT_COMPRESSION == ZlibFactory + .getCompressionLevel(cfg)); + + assertTrue("testZlibFactory compression strategy error !!!", + CompressionStrategy.DEFAULT_STRATEGY == ZlibFactory + .getCompressionStrategy(cfg)); + + ZlibFactory.setCompressionLevel(cfg, CompressionLevel.BEST_COMPRESSION); + assertTrue("testZlibFactory compression strategy error !!!", + CompressionLevel.BEST_COMPRESSION == ZlibFactory + .getCompressionLevel(cfg)); + + ZlibFactory.setCompressionStrategy(cfg, CompressionStrategy.FILTERED); + assertTrue("testZlibFactory compression strategy error !!!", + CompressionStrategy.FILTERED == ZlibFactory.getCompressionStrategy(cfg)); + } + + + private boolean checkSetDictionaryNullPointerException( + Decompressor decompressor) { + try { + decompressor.setDictionary(null, 0, 1); + } catch (NullPointerException ex) { + return true; + } catch (Exception ex) { + } + return false; + } + + private boolean checkSetDictionaryNullPointerException(Compressor compressor) { + try { + compressor.setDictionary(null, 0, 1); + } catch (NullPointerException ex) { + return true; + } catch (Exception ex) { + } + return false; + } + + private boolean checkSetDictionaryArrayIndexOutOfBoundsException( + Compressor compressor) { + try { + compressor.setDictionary(new byte[] { (byte) 0 }, 0, -1); + } catch (ArrayIndexOutOfBoundsException e) { + return true; + } catch (Exception e) { + } + return false; + } + + private boolean checkSetDictionaryArrayIndexOutOfBoundsException( + Decompressor decompressor) { + try { + decompressor.setDictionary(new byte[] { (byte) 0 }, 0, -1); + } catch (ArrayIndexOutOfBoundsException e) { + return true; + } catch (Exception e) { + } + return false; + } + + private byte[] compressDecompressZlib(byte[] rawData, + ZlibCompressor zlibCompressor, ZlibDecompressor zlibDecompressor) + throws IOException { + int cSize = 0; + byte[] compressedByte = new byte[rawData.length]; + byte[] decompressedRawData = new byte[rawData.length]; + zlibCompressor.setInput(rawData, 0, rawData.length); + zlibCompressor.finish(); + while (!zlibCompressor.finished()) { + cSize = zlibCompressor.compress(compressedByte, 0, compressedByte.length); + } + zlibCompressor.reset(); + + assertTrue(zlibDecompressor.getBytesWritten() == 0); + assertTrue(zlibDecompressor.getBytesRead() == 0); + assertTrue(zlibDecompressor.needsInput()); + zlibDecompressor.setInput(compressedByte, 0, cSize); + assertFalse(zlibDecompressor.needsInput()); + while (!zlibDecompressor.finished()) { + zlibDecompressor.decompress(decompressedRawData, 0, + decompressedRawData.length); + } + assertTrue(zlibDecompressor.getBytesWritten() == rawData.length); + assertTrue(zlibDecompressor.getBytesRead() == cSize); + zlibDecompressor.reset(); + assertTrue(zlibDecompressor.getRemaining() == 0); + assertArrayEquals( + "testZlibCompressorDecompressorWithConfiguration array equals error", + rawData, decompressedRawData); + + return decompressedRawData; + } + + @Test + public void testBuiltInGzipDecompressorExceptions() { + BuiltInGzipDecompressor decompresser = new BuiltInGzipDecompressor(); + try { + decompresser.setInput(null, 0, 1); + } catch (NullPointerException ex) { + // expected + } catch (Exception ex) { + fail("testBuiltInGzipDecompressorExceptions npe error " + ex); + } + + try { + decompresser.setInput(new byte[] { 0 }, 0, -1); + } catch (ArrayIndexOutOfBoundsException ex) { + // expected + } catch (Exception ex) { + fail("testBuiltInGzipDecompressorExceptions aioob error" + ex); + } + + assertTrue("decompresser.getBytesRead error", + decompresser.getBytesRead() == 0); + assertTrue("decompresser.getRemaining error", + decompresser.getRemaining() == 0); + decompresser.reset(); + decompresser.end(); + + InputStream decompStream = null; + try { + // invalid 0 and 1 bytes , must be 31, -117 + int buffSize = 1 * 1024; + byte buffer[] = new byte[buffSize]; + Decompressor decompressor = new BuiltInGzipDecompressor(); + DataInputBuffer gzbuf = new DataInputBuffer(); + decompStream = new DecompressorStream(gzbuf, decompressor); + gzbuf.reset(new byte[] { 0, 0, 1, 1, 1, 1, 11, 1, 1, 1, 1 }, 11); + decompStream.read(buffer); + } catch (IOException ioex) { + // expected + } catch (Exception ex) { + fail("invalid 0 and 1 byte in gzip stream" + ex); + } + + // invalid 2 byte, must be 8 + try { + int buffSize = 1 * 1024; + byte buffer[] = new byte[buffSize]; + Decompressor decompressor = new BuiltInGzipDecompressor(); + DataInputBuffer gzbuf = new DataInputBuffer(); + decompStream = new DecompressorStream(gzbuf, decompressor); + gzbuf.reset(new byte[] { 31, -117, 7, 1, 1, 1, 1, 11, 1, 1, 1, 1 }, 11); + decompStream.read(buffer); + } catch (IOException ioex) { + // expected + } catch (Exception ex) { + fail("invalid 2 byte in gzip stream" + ex); + } + + try { + int buffSize = 1 * 1024; + byte buffer[] = new byte[buffSize]; + Decompressor decompressor = new BuiltInGzipDecompressor(); + DataInputBuffer gzbuf = new DataInputBuffer(); + decompStream = new DecompressorStream(gzbuf, decompressor); + gzbuf.reset(new byte[] { 31, -117, 8, -32, 1, 1, 1, 11, 1, 1, 1, 1 }, 11); + decompStream.read(buffer); + } catch (IOException ioex) { + // expected + } catch (Exception ex) { + fail("invalid 3 byte in gzip stream" + ex); + } + try { + int buffSize = 1 * 1024; + byte buffer[] = new byte[buffSize]; + Decompressor decompressor = new BuiltInGzipDecompressor(); + DataInputBuffer gzbuf = new DataInputBuffer(); + decompStream = new DecompressorStream(gzbuf, decompressor); + gzbuf.reset(new byte[] { 31, -117, 8, 4, 1, 1, 1, 11, 1, 1, 1, 1 }, 11); + decompStream.read(buffer); + } catch (IOException ioex) { + // expected + } catch (Exception ex) { + fail("invalid 3 byte make hasExtraField" + ex); + } + } + + public static byte[] generate(int size) { + byte[] data = new byte[size]; + for (int i = 0; i < size; i++) + data[i] = (byte)random.nextInt(16); + return data; + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/nativeio/TestNativeIO.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/nativeio/TestNativeIO.java index 0602d302720..f5fc49dbde6 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/nativeio/TestNativeIO.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/nativeio/TestNativeIO.java @@ -446,7 +446,13 @@ public class TestNativeIO { NativeIO.renameTo(nonExistentFile, targetFile); Assert.fail(); } catch (NativeIOException e) { - Assert.assertEquals(e.getErrno(), Errno.ENOENT); + if (Path.WINDOWS) { + Assert.assertEquals( + String.format("The system cannot find the file specified.%n"), + e.getMessage()); + } else { + Assert.assertEquals(Errno.ENOENT, e.getErrno()); + } } // Test renaming a file to itself. It should succeed and do nothing. @@ -465,7 +471,13 @@ public class TestNativeIO { NativeIO.renameTo(sourceFile, badTarget); Assert.fail(); } catch (NativeIOException e) { - Assert.assertEquals(e.getErrno(), Errno.ENOTDIR); + if (Path.WINDOWS) { + Assert.assertEquals( + String.format("The parameter is incorrect.%n"), + e.getMessage()); + } else { + Assert.assertEquals(Errno.ENOTDIR, e.getErrno()); + } } FileUtils.deleteQuietly(TEST_DIR); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/filter/TestPatternFilter.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/filter/TestPatternFilter.java index 29849605e0f..2bdfdb978a9 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/filter/TestPatternFilter.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/filter/TestPatternFilter.java @@ -24,7 +24,9 @@ import java.util.List; import org.apache.commons.configuration.SubsetConfiguration; import org.junit.Test; import static org.junit.Assert.*; +import static org.mockito.Mockito.*; +import org.apache.hadoop.metrics2.MetricsRecord; import org.apache.hadoop.metrics2.MetricsTag; import org.apache.hadoop.metrics2.impl.ConfigBuilder; import static org.apache.hadoop.metrics2.lib.Interns.*; @@ -38,6 +40,8 @@ public class TestPatternFilter { SubsetConfiguration empty = new ConfigBuilder().subset(""); shouldAccept(empty, "anything"); shouldAccept(empty, Arrays.asList(tag("key", "desc", "value"))); + shouldAccept(empty, mockMetricsRecord("anything", Arrays.asList( + tag("key", "desc", "value")))); } /** @@ -50,9 +54,15 @@ public class TestPatternFilter { shouldAccept(wl, "foo"); shouldAccept(wl, Arrays.asList(tag("bar", "", ""), tag("foo", "", "f"))); + shouldAccept(wl, mockMetricsRecord("foo", Arrays.asList( + tag("bar", "", ""), tag("foo", "", "f")))); shouldReject(wl, "bar"); shouldReject(wl, Arrays.asList(tag("bar", "", ""))); shouldReject(wl, Arrays.asList(tag("foo", "", "boo"))); + shouldReject(wl, mockMetricsRecord("bar", Arrays.asList( + tag("foo", "", "f")))); + shouldReject(wl, mockMetricsRecord("foo", Arrays.asList( + tag("bar", "", "")))); } /** @@ -64,9 +74,15 @@ public class TestPatternFilter { .add("p.exclude.tags", "foo:f").subset("p"); shouldAccept(bl, "bar"); shouldAccept(bl, Arrays.asList(tag("bar", "", ""))); + shouldAccept(bl, mockMetricsRecord("bar", Arrays.asList( + tag("bar", "", "")))); shouldReject(bl, "foo"); shouldReject(bl, Arrays.asList(tag("bar", "", ""), tag("foo", "", "f"))); + shouldReject(bl, mockMetricsRecord("foo", Arrays.asList( + tag("bar", "", "")))); + shouldReject(bl, mockMetricsRecord("bar", Arrays.asList( + tag("bar", "", ""), tag("foo", "", "f")))); } /** @@ -81,10 +97,18 @@ public class TestPatternFilter { .add("p.exclude.tags", "bar:b").subset("p"); shouldAccept(c, "foo"); shouldAccept(c, Arrays.asList(tag("foo", "", "f"))); + shouldAccept(c, mockMetricsRecord("foo", Arrays.asList( + tag("foo", "", "f")))); shouldReject(c, "bar"); shouldReject(c, Arrays.asList(tag("bar", "", "b"))); + shouldReject(c, mockMetricsRecord("bar", Arrays.asList( + tag("foo", "", "f")))); + shouldReject(c, mockMetricsRecord("foo", Arrays.asList( + tag("bar", "", "b")))); shouldAccept(c, "foobar"); shouldAccept(c, Arrays.asList(tag("foobar", "", ""))); + shouldAccept(c, mockMetricsRecord("foobar", Arrays.asList( + tag("foobar", "", "")))); } /** @@ -98,6 +122,8 @@ public class TestPatternFilter { .add("p.exclude.tags", "foo:f").subset("p"); shouldAccept(c, "foo"); shouldAccept(c, Arrays.asList(tag("foo", "", "f"))); + shouldAccept(c, mockMetricsRecord("foo", Arrays.asList( + tag("foo", "", "f")))); } static void shouldAccept(SubsetConfiguration conf, String s) { @@ -110,6 +136,17 @@ public class TestPatternFilter { assertTrue("accepts "+ tags, newRegexFilter(conf).accepts(tags)); } + /** + * Asserts that filters with the given configuration accept the given record. + * + * @param conf SubsetConfiguration containing filter configuration + * @param record MetricsRecord to check + */ + static void shouldAccept(SubsetConfiguration conf, MetricsRecord record) { + assertTrue("accepts " + record, newGlobFilter(conf).accepts(record)); + assertTrue("accepts " + record, newRegexFilter(conf).accepts(record)); + } + static void shouldReject(SubsetConfiguration conf, String s) { assertTrue("rejects "+ s, !newGlobFilter(conf).accepts(s)); assertTrue("rejects "+ s, !newRegexFilter(conf).accepts(s)); @@ -120,6 +157,17 @@ public class TestPatternFilter { assertTrue("rejects "+ tags, !newRegexFilter(conf).accepts(tags)); } + /** + * Asserts that filters with the given configuration reject the given record. + * + * @param conf SubsetConfiguration containing filter configuration + * @param record MetricsRecord to check + */ + static void shouldReject(SubsetConfiguration conf, MetricsRecord record) { + assertTrue("rejects " + record, !newGlobFilter(conf).accepts(record)); + assertTrue("rejects " + record, !newRegexFilter(conf).accepts(record)); + } + /** * Create a new glob filter with a config object * @param conf the config object @@ -141,4 +189,19 @@ public class TestPatternFilter { f.init(conf); return f; } + + /** + * Creates a mock MetricsRecord with the given name and tags. + * + * @param name String name + * @param tags List tags + * @return MetricsRecord newly created mock + */ + private static MetricsRecord mockMetricsRecord(String name, + List tags) { + MetricsRecord record = mock(MetricsRecord.class); + when(record.name()).thenReturn(name); + when(record.tags()).thenReturn(tags); + return record; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopologyWithNodeGroup.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetworkTopologyWithNodeGroup.java similarity index 79% rename from hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopologyWithNodeGroup.java rename to hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetworkTopologyWithNodeGroup.java index a3b63c7430c..2b6ce622cf8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopologyWithNodeGroup.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetworkTopologyWithNodeGroup.java @@ -20,30 +20,31 @@ package org.apache.hadoop.net; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.util.HashMap; import java.util.Map; -import org.apache.hadoop.hdfs.DFSTestUtil; -import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.junit.Test; public class TestNetworkTopologyWithNodeGroup { private final static NetworkTopologyWithNodeGroup cluster = new NetworkTopologyWithNodeGroup(); - private final static DatanodeDescriptor dataNodes[] = new DatanodeDescriptor[] { - DFSTestUtil.getDatanodeDescriptor("1.1.1.1", "/d1/r1/s1"), - DFSTestUtil.getDatanodeDescriptor("2.2.2.2", "/d1/r1/s1"), - DFSTestUtil.getDatanodeDescriptor("3.3.3.3", "/d1/r1/s2"), - DFSTestUtil.getDatanodeDescriptor("4.4.4.4", "/d1/r2/s3"), - DFSTestUtil.getDatanodeDescriptor("5.5.5.5", "/d1/r2/s3"), - DFSTestUtil.getDatanodeDescriptor("6.6.6.6", "/d1/r2/s4"), - DFSTestUtil.getDatanodeDescriptor("7.7.7.7", "/d2/r3/s5"), - DFSTestUtil.getDatanodeDescriptor("8.8.8.8", "/d2/r3/s6") + private final static NodeBase dataNodes[] = new NodeBase[] { + new NodeBase("h1", "/d1/r1/s1"), + new NodeBase("h2", "/d1/r1/s1"), + new NodeBase("h3", "/d1/r1/s2"), + new NodeBase("h4", "/d1/r2/s3"), + new NodeBase("h5", "/d1/r2/s3"), + new NodeBase("h6", "/d1/r2/s4"), + new NodeBase("h7", "/d2/r3/s5"), + new NodeBase("h8", "/d2/r3/s6") }; private final static NodeBase computeNode = new NodeBase("/d1/r1/s1/h9"); + + private final static NodeBase rackOnlyNode = new NodeBase("h10", "/r2"); static { for(int i=0; i pickNodesAtRandom(int numNodes, String excludedScope) { Map frequency = new HashMap(); - for (DatanodeDescriptor dnd : dataNodes) { + for (NodeBase dnd : dataNodes) { frequency.put(dnd, 0); } @@ -161,6 +162,12 @@ public class TestNetworkTopologyWithNodeGroup { /** * This test checks that chooseRandom works for an excluded node. */ + /** + * Test replica placement policy in case last node is invalid. + * We create 6 nodes but the last node is in fault topology (with rack info), + * so cannot be added to cluster. We should test proper exception is thrown in + * adding node but shouldn't affect the cluster. + */ @Test public void testChooseRandomExcludedNode() { String scope = "~" + NodeBase.getPath(dataNodes[0]); @@ -171,5 +178,23 @@ public class TestNetworkTopologyWithNodeGroup { assertTrue(frequency.get(key) > 0 || key == dataNodes[0]); } } + + /** + * This test checks that adding a node with invalid topology will be failed + * with an exception to show topology is invalid. + */ + @Test + public void testAddNodeWithInvalidTopology() { + // The last node is a node with invalid topology + try { + cluster.add(rackOnlyNode); + fail("Exception should be thrown, so we should not have reached here."); + } catch (Exception e) { + if (!(e instanceof IllegalArgumentException)) { + fail("Expecting IllegalArgumentException, but caught:" + e); + } + assertTrue(e.getMessage().contains("illegal network location")); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java index 5b08058d081..1625625241d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java @@ -20,6 +20,9 @@ package org.apache.hadoop.test; import java.io.File; import java.io.IOException; import java.io.StringWriter; +import java.lang.management.ManagementFactory; +import java.lang.management.ThreadInfo; +import java.lang.management.ThreadMXBean; import java.lang.reflect.InvocationTargetException; import java.util.Arrays; import java.util.Random; @@ -330,4 +333,33 @@ public abstract class GenericTestUtils { " but got:\n" + output, Pattern.compile(pattern).matcher(output).find()); } + + public static void assertValueNear(long expected, long actual, long allowedError) { + assertValueWithinRange(expected - allowedError, expected + allowedError, actual); + } + + public static void assertValueWithinRange(long expectedMin, long expectedMax, + long actual) { + Assert.assertTrue("Expected " + actual + " to be in range (" + expectedMin + "," + + expectedMax + ")", expectedMin <= actual && actual <= expectedMax); + } + + /** + * Assert that there are no threads running whose name matches the + * given regular expression. + * @param regex the regex to match against + */ + public static void assertNoThreadsMatching(String regex) { + Pattern pattern = Pattern.compile(regex); + ThreadMXBean threadBean = ManagementFactory.getThreadMXBean(); + + ThreadInfo[] infos = threadBean.getThreadInfo(threadBean.getAllThreadIds(), 20); + for (ThreadInfo info : infos) { + if (info == null) continue; + if (pattern.matcher(info.getThreadName()).matches()) { + Assert.fail("Leaked thread: " + info + "\n" + + Joiner.on("\n").join(info.getStackTrace())); + } + } + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MockitoUtil.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MockitoUtil.java index 82abcadf279..32305b5ee78 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MockitoUtil.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/MockitoUtil.java @@ -20,6 +20,9 @@ package org.apache.hadoop.test; import java.io.Closeable; import org.mockito.Mockito; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; +import org.mockito.stubbing.Stubber; public abstract class MockitoUtil { @@ -33,4 +36,29 @@ public abstract class MockitoUtil { return Mockito.mock(clazz, Mockito.withSettings().extraInterfaces(Closeable.class)); } + + /** + * Throw an exception from the mock/spy only in the case that the + * call stack at the time the method has a line which matches the given + * pattern. + * + * @param t the Throwable to throw + * @param pattern the pattern against which to match the call stack trace + * @return the stub in progress + */ + public static Stubber doThrowWhenCallStackMatches( + final Throwable t, final String pattern) { + return Mockito.doAnswer(new Answer() { + @Override + public Object answer(InvocationOnMock invocation) throws Throwable { + t.setStackTrace(Thread.currentThread().getStackTrace()); + for (StackTraceElement elem : t.getStackTrace()) { + if (elem.toString().matches(pattern)) { + throw t; + } + } + return invocation.callRealMethod(); + } + }); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 7a9e504c434..bde5d8285c1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -28,6 +28,9 @@ Trunk (Unreleased) IMPROVEMENTS + HDFS-4665. Move TestNetworkTopologyWithNodeGroup to common. + (Junping Du via llu) + HDFS-1620. Rename HdfsConstants -> HdfsServerConstants, FSConstants -> HdfsConstants. (Harsh J Chouraria via atm) @@ -315,6 +318,9 @@ Trunk (Unreleased) HDFS-4586. TestDataDirs.testGetDataDirsFromURIs fails with all directories in dfs.datanode.data.dir are invalid. (Ivan Mitic via atm) + HDFS-4646. createNNProxyWithClientProtocol ignores configured timeout + value (Jagane Sundar via cos) + BREAKDOWN OF HADOOP-8562 SUBTASKS AND RELATED JIRAS HDFS-4145. Merge hdfs cmd line scripts from branch-1-win. (David Lao, @@ -343,12 +349,21 @@ Trunk (Unreleased) HDFS-4604. TestJournalNode fails on Windows. (Ivan Mitic via suresh) + HDFS-4625. Make TestNNWithQJM#testNewNamenodeTakesOverWriter work on + Windows. (Ivan Mitic via suresh) + + HDFS-4674. TestBPOfferService fails on Windows due to failure parsing + datanode data directory as URI. (Chris Nauroth via suresh) + Release 2.0.5-beta - UNRELEASED INCOMPATIBLE CHANGES NEW FEATURES + HDFS-1804. Add a new block-volume device choosing policy that looks at + free space. (atm) + IMPROVEMENTS HDFS-4222. NN is unresponsive and loses heartbeats from DNs when @@ -378,6 +393,15 @@ Release 2.0.5-beta - UNRELEASED HDFS-4618. Default transaction interval for checkpoints is too low. (todd) + HDFS-4525. Provide an API for knowing that whether file is closed or not. + (SreeHari via umamahesh) + + HDFS-3940. Add Gset#clear method and clear the block map when namenode is + shutdown. (suresh) + + HDFS-4679. Namenode operation checks should be done in a consistent + manner. (suresh) + OPTIMIZATIONS BUG FIXES @@ -460,6 +484,25 @@ Release 2.0.5-beta - UNRELEASED HDFS-4598. Fix the default value of ConcatSourcesParam and the WebHDFS doc. (szetszwo) + HDFS-4655. DNA_FINALIZE is logged as being an unknown command by the DN + when received from the standby NN. (atm) + + HDFS-4656. DN heartbeat loop can be briefly tight. (atm) + + HDFS-4658. Standby NN will log that it has received a block report "after + becoming active" (atm) + + HDFS-3981. Fix handling of FSN lock in getBlockLocations. (Xiaobo Peng + and todd via todd) + + HDFS-4676. TestHDFSFileSystemContract should set MiniDFSCluster variable + to null to free up memory. (suresh) + + HDFS-4669. TestBlockPoolManager fails using IBM java. (Tian Hong Wang via + suresh) + + HDFS-4643. Fix flakiness in TestQuorumJournalManager. (todd) + Release 2.0.4-alpha - UNRELEASED INCOMPATIBLE CHANGES @@ -2433,6 +2476,20 @@ Release 2.0.0-alpha - 05-23-2012 HDFS-3039. Address findbugs and javadoc warnings on branch. (todd via atm) +Release 0.23.8 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + HDFS-4477. Secondary namenode may retain old tokens (daryn via kihwal) + Release 0.23.7 - UNRELEASED INCOMPATIBLE CHANGES @@ -2477,6 +2534,10 @@ Release 0.23.7 - UNRELEASED HDFS-4581. checkDiskError should not be called on network errors (Rohit Kochar via kihwal) + HDFS-4649. Webhdfs cannot list large directories (daryn via kihwal) + + HDFS-4548. Webhdfs doesn't renegotiate SPNEGO token (daryn via kihwal) + Release 0.23.6 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index 016405a714a..14f11463236 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -1546,7 +1546,22 @@ public class DFSClient implements java.io.Closeable { UnresolvedPathException.class); } } - + + /** + * Close status of a file + * @return true if file is already closed + */ + public boolean isFileClosed(String src) throws IOException{ + checkOpen(); + try { + return namenode.isFileClosed(src); + } catch(RemoteException re) { + throw re.unwrapRemoteException(AccessControlException.class, + FileNotFoundException.class, + UnresolvedPathException.class); + } + } + /** * Get the file info for a specific file or directory. If src * refers to a symlink then the FileStatus of the link is returned. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index a90b28abfd1..2c5a40074c5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -377,6 +377,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_DATANODE_PLUGINS_KEY = "dfs.datanode.plugins"; public static final String DFS_DATANODE_FSDATASET_FACTORY_KEY = "dfs.datanode.fsdataset.factory"; public static final String DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_POLICY_KEY = "dfs.datanode.fsdataset.volume.choosing.policy"; + public static final String DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_THRESHOLD_KEY = "dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold"; + public static final long DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_THRESHOLD_DEFAULT = 1024L * 1024L * 1024L * 10L; // 10 GB + public static final String DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_PREFERENCE_PERCENT_KEY = "dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-percent"; + public static final float DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_PREFERENCE_PERCENT_DEFAULT = 0.75f; public static final String DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY = "dfs.datanode.socket.write.timeout"; public static final String DFS_DATANODE_STARTUP_KEY = "dfs.datanode.startup"; public static final String DFS_NAMENODE_PLUGINS_KEY = "dfs.namenode.plugins"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java index 44d1fda7919..fabb013b5f5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java @@ -460,7 +460,7 @@ public class DFSUtil { // Look for configurations of the form [.][.] // across all of the configured nameservices and namenodes. - Map> ret = Maps.newHashMap(); + Map> ret = Maps.newLinkedHashMap(); for (String nsId : emptyAsSingletonNull(nameserviceIds)) { Map isas = getAddressesForNameserviceId(conf, nsId, defaultAddress, keys); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java index e772859b8ee..2fda8b8e8ef 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java @@ -917,4 +917,17 @@ public class DistributedFileSystem extends FileSystem { public boolean isInSafeMode() throws IOException { return setSafeMode(SafeModeAction.SAFEMODE_GET, true); } + + /** + * Get the close status of a file + * @param src The path to the file + * + * @return return true if file is closed + * @throws FileNotFoundException if the file does not exist. + * @throws IOException If an I/O error occurred + */ + public boolean isFileClosed(Path src) throws IOException { + return dfs.isFileClosed(getPathName(src)); + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java index 5ae2ee424f6..e630271bd31 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java @@ -260,7 +260,9 @@ public class NameNodeProxies { final long version = RPC.getProtocolVersion(ClientNamenodeProtocolPB.class); ClientNamenodeProtocolPB proxy = RPC.getProtocolProxy( ClientNamenodeProtocolPB.class, version, address, ugi, conf, - NetUtils.getDefaultSocketFactory(conf), 0, defaultPolicy).getProxy(); + NetUtils.getDefaultSocketFactory(conf), + org.apache.hadoop.ipc.Client.getTimeout(conf), defaultPolicy) + .getProxy(); if (withRetries) { // create the proxy with retries diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java index 9621c979473..cbfc2f2759a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java @@ -757,7 +757,21 @@ public interface ClientProtocol { @Idempotent public HdfsFileStatus getFileInfo(String src) throws AccessControlException, FileNotFoundException, UnresolvedLinkException, IOException; - + + /** + * Get the close status of a file + * @param src The string representation of the path to the file + * + * @return return true if file is closed + * @throws AccessControlException permission denied + * @throws FileNotFoundException file src is not found + * @throws UnresolvedLinkException if the path contains a symlink. + * @throws IOException If an I/O error occurred + */ + @Idempotent + public boolean isFileClosed(String src) throws AccessControlException, + FileNotFoundException, UnresolvedLinkException, IOException; + /** * Get the file info for a specific file or directory. If the path * refers to a symlink then the FileStatus of the symlink is returned. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java index 83ae28238d0..438d87ce8cd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java @@ -96,7 +96,8 @@ public class LayoutVersion { OPTIMIZE_PERSIST_BLOCKS(-40, "Serialize block lists with delta-encoded variable length ints, " + "add OP_UPDATE_BLOCKS"), - RESERVED_REL1_2_0(-41, -32, "Reserved for release 1.2.0", true, CONCAT); + RESERVED_REL1_2_0(-41, -32, "Reserved for release 1.2.0", true, CONCAT), + ADD_INODE_ID(-42, -40, "Assign a unique inode id for each inode", false); final int lv; final int ancestorLV; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java index af422155508..d3e931587cd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java @@ -76,6 +76,8 @@ import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetPre import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetPreferredBlockSizeResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetServerDefaultsRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetServerDefaultsResponseProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.IsFileClosedRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.IsFileClosedResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ListCorruptFileBlocksRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ListCorruptFileBlocksResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.MetaSaveRequestProto; @@ -864,4 +866,17 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements throw new ServiceException(e); } } + + @Override + public IsFileClosedResponseProto isFileClosed( + RpcController controller, IsFileClosedRequestProto request) + throws ServiceException { + try { + boolean result = server.isFileClosed(request.getSrc()); + return IsFileClosedResponseProto.newBuilder().setResult(result).build(); + } catch (IOException e) { + throw new ServiceException(e); + } + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java index 8e510e6d3ec..cd9c8111b49 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java @@ -77,6 +77,7 @@ import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetLis import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetListingResponseProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetPreferredBlockSizeRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetServerDefaultsRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.IsFileClosedRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ListCorruptFileBlocksRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.MetaSaveRequestProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.MkdirsRequestProto; @@ -850,6 +851,19 @@ public class ClientNamenodeProtocolTranslatorPB implements throw ProtobufHelper.getRemoteException(e); } } + + + @Override + public boolean isFileClosed(String src) throws AccessControlException, + FileNotFoundException, UnresolvedLinkException, IOException { + IsFileClosedRequestProto req = IsFileClosedRequestProto.newBuilder() + .setSrc(src).build(); + try { + return rpcProxy.isFileClosed(null, req).getResult(); + } catch (ServiceException e) { + throw ProtobufHelper.getRemoteException(e); + } + } @Override public Object getUnderlyingProxyObject() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java index d18ac9f456d..896c20c85b6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java @@ -310,6 +310,23 @@ public class DelegationTokenSecretManager namesystem.logUpdateMasterKey(key); } } + + @Override //AbstractDelegationTokenManager + protected void logExpireToken(final DelegationTokenIdentifier dtId) + throws IOException { + synchronized (noInterruptsLock) { + // The edit logging code will fail catastrophically if it + // is interrupted during a logSync, since the interrupt + // closes the edit log files. Doing this inside the + // above lock and then checking interruption status + // prevents this bug. + if (Thread.interrupted()) { + throw new InterruptedIOException( + "Interrupted before expiring delegation token"); + } + namesystem.logExpireDelegationToken(dtId); + } + } /** A utility method for creating credentials. */ public static Credentials createCredentials(final NameNode namenode, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 8a22c55f389..51eced86dda 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -1569,8 +1569,8 @@ public class BlockManager { node.receivedBlockReport(); if (staleBefore && !node.areBlockContentsStale()) { LOG.info("BLOCK* processReport: Received first block report from " - + node + " after becoming active. Its block contents are no longer" - + " considered stale"); + + node + " after starting up or becoming active. Its block " + + "contents are no longer considered stale"); rescanPostponedMisreplicatedBlocks(); } @@ -3180,4 +3180,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block OK } + public void shutdown() { + blocksMap.close(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlocksMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlocksMap.java index dbfcaa70138..2f1c06bed6e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlocksMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlocksMap.java @@ -67,7 +67,7 @@ class BlocksMap { void close() { - // Empty blocks once GSet#clear is implemented (HDFS-3940) + blocks.clear(); } BlockCollection getBlockCollection(Block b) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java index 69fcdd971be..d5e026b33b1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java @@ -612,6 +612,7 @@ class BPOfferService { case DatanodeProtocol.DNA_TRANSFER: case DatanodeProtocol.DNA_INVALIDATE: case DatanodeProtocol.DNA_SHUTDOWN: + case DatanodeProtocol.DNA_FINALIZE: case DatanodeProtocol.DNA_RECOVERBLOCK: case DatanodeProtocol.DNA_BALANCERBANDWIDTHUPDATE: LOG.warn("Got a command from standby NN - ignoring command:" + cmd.getAction()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java index ad4a78e30fe..75f42f959d7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java @@ -512,7 +512,7 @@ class BPServiceActor implements Runnable { // // Every so often, send heartbeat or block-report // - if (startTime - lastHeartbeat > dnConf.heartBeatInterval) { + if (startTime - lastHeartbeat >= dnConf.heartBeatInterval) { // // All heartbeat messages include following info: // -- Datanode name diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolManager.java index 54cbb184bf6..73cc3c48d1d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolManager.java @@ -160,8 +160,8 @@ class BlockPoolManager { Map> addrMap) throws IOException { assert Thread.holdsLock(refreshNamenodesLock); - Set toRefresh = Sets.newHashSet(); - Set toAdd = Sets.newHashSet(); + Set toRefresh = Sets.newLinkedHashSet(); + Set toAdd = Sets.newLinkedHashSet(); Set toRemove; synchronized (this) { @@ -239,4 +239,4 @@ class BlockPoolManager { protected BPOfferService createBPOS(List nnAddrs) { return new BPOfferService(nnAddrs, dn); } -} \ No newline at end of file +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/AvailableSpaceVolumeChoosingPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/AvailableSpaceVolumeChoosingPolicy.java new file mode 100644 index 00000000000..d474567072f --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/AvailableSpaceVolumeChoosingPolicy.java @@ -0,0 +1,259 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.datanode.fsdataset; + +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_THRESHOLD_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_THRESHOLD_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_PREFERENCE_PERCENT_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_PREFERENCE_PERCENT_KEY; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; + +/** + * A DN volume choosing policy which takes into account the amount of free + * space on each of the available volumes when considering where to assign a + * new replica allocation. By default this policy prefers assigning replicas to + * those volumes with more available free space, so as to over time balance the + * available space of all the volumes within a DN. + */ +public class AvailableSpaceVolumeChoosingPolicy + implements VolumeChoosingPolicy, Configurable { + + private static final Log LOG = LogFactory.getLog(AvailableSpaceVolumeChoosingPolicy.class); + + private static final Random RAND = new Random(); + + private long balancedSpaceThreshold = DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_THRESHOLD_DEFAULT; + private float balancedPreferencePercent = DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_PREFERENCE_PERCENT_DEFAULT; + + @Override + public synchronized void setConf(Configuration conf) { + balancedSpaceThreshold = conf.getLong( + DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_THRESHOLD_KEY, + DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_THRESHOLD_DEFAULT); + balancedPreferencePercent = conf.getFloat( + DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_PREFERENCE_PERCENT_KEY, + DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_PREFERENCE_PERCENT_DEFAULT); + + LOG.info("Available space volume choosing policy initialized: " + + DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_THRESHOLD_KEY + + " = " + balancedSpaceThreshold + ", " + + DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_PREFERENCE_PERCENT_KEY + + " = " + balancedPreferencePercent); + } + + @Override + public synchronized Configuration getConf() { + // Nothing to do. Only added to fulfill the Configurable contract. + return null; + } + + private VolumeChoosingPolicy roundRobinPolicyBalanced = + new RoundRobinVolumeChoosingPolicy(); + private VolumeChoosingPolicy roundRobinPolicyHighAvailable = + new RoundRobinVolumeChoosingPolicy(); + private VolumeChoosingPolicy roundRobinPolicyLowAvailable = + new RoundRobinVolumeChoosingPolicy(); + + @Override + public synchronized V chooseVolume(List volumes, + final long replicaSize) throws IOException { + if (volumes.size() < 1) { + throw new DiskOutOfSpaceException("No more available volumes"); + } + + AvailableSpaceVolumeList volumesWithSpaces = + new AvailableSpaceVolumeList(volumes); + + if (volumesWithSpaces.areAllVolumesWithinFreeSpaceThreshold()) { + // If they're actually not too far out of whack, fall back on pure round + // robin. + V volume = roundRobinPolicyBalanced.chooseVolume(volumes, replicaSize); + if (LOG.isDebugEnabled()) { + LOG.debug("All volumes are within the configured free space balance " + + "threshold. Selecting " + volume + " for write of block size " + + replicaSize); + } + return volume; + } else { + V volume = null; + // If none of the volumes with low free space have enough space for the + // replica, always try to choose a volume with a lot of free space. + long mostAvailableAmongLowVolumes = volumesWithSpaces + .getMostAvailableSpaceAmongVolumesWithLowAvailableSpace(); + + List highAvailableVolumes = extractVolumesFromPairs( + volumesWithSpaces.getVolumesWithHighAvailableSpace()); + List lowAvailableVolumes = extractVolumesFromPairs( + volumesWithSpaces.getVolumesWithLowAvailableSpace()); + + float preferencePercentScaler = + (highAvailableVolumes.size() * balancedPreferencePercent) + + (lowAvailableVolumes.size() * (1 - balancedPreferencePercent)); + float scaledPreferencePercent = + (highAvailableVolumes.size() * balancedPreferencePercent) / + preferencePercentScaler; + if (mostAvailableAmongLowVolumes < replicaSize || + RAND.nextFloat() < scaledPreferencePercent) { + volume = roundRobinPolicyHighAvailable.chooseVolume( + highAvailableVolumes, + replicaSize); + if (LOG.isDebugEnabled()) { + LOG.debug("Volumes are imbalanced. Selecting " + volume + + " from high available space volumes for write of block size " + + replicaSize); + } + } else { + volume = roundRobinPolicyLowAvailable.chooseVolume( + lowAvailableVolumes, + replicaSize); + if (LOG.isDebugEnabled()) { + LOG.debug("Volumes are imbalanced. Selecting " + volume + + " from low available space volumes for write of block size " + + replicaSize); + } + } + return volume; + } + } + + /** + * Used to keep track of the list of volumes we're choosing from. + */ + private class AvailableSpaceVolumeList { + private final List volumes; + + public AvailableSpaceVolumeList(List volumes) throws IOException { + this.volumes = new ArrayList(); + for (V volume : volumes) { + this.volumes.add(new AvailableSpaceVolumePair(volume)); + } + } + + /** + * Check if the available space on all the volumes is roughly equal. + * + * @param volumes the volumes to check + * @return true if all volumes' free space is within the configured threshold, + * false otherwise. + * @throws IOException + * in the event of error checking amount of available space + */ + public boolean areAllVolumesWithinFreeSpaceThreshold() { + long leastAvailable = Long.MAX_VALUE; + long mostAvailable = 0; + for (AvailableSpaceVolumePair volume : volumes) { + leastAvailable = Math.min(leastAvailable, volume.getAvailable()); + mostAvailable = Math.max(mostAvailable, volume.getAvailable()); + } + return (mostAvailable - leastAvailable) < balancedSpaceThreshold; + } + + /** + * @return the minimum amount of space available on a single volume, + * across all volumes. + */ + private long getLeastAvailableSpace() { + long leastAvailable = Long.MAX_VALUE; + for (AvailableSpaceVolumePair volume : volumes) { + leastAvailable = Math.min(leastAvailable, volume.getAvailable()); + } + return leastAvailable; + } + + /** + * @return the maximum amount of space available across volumes with low space. + */ + public long getMostAvailableSpaceAmongVolumesWithLowAvailableSpace() { + long mostAvailable = Long.MIN_VALUE; + for (AvailableSpaceVolumePair volume : getVolumesWithLowAvailableSpace()) { + mostAvailable = Math.max(mostAvailable, volume.getAvailable()); + } + return mostAvailable; + } + + /** + * @return the list of volumes with relatively low available space. + */ + public List getVolumesWithLowAvailableSpace() { + long leastAvailable = getLeastAvailableSpace(); + List ret = new ArrayList(); + for (AvailableSpaceVolumePair volume : volumes) { + if (volume.getAvailable() <= leastAvailable + balancedSpaceThreshold) { + ret.add(volume); + } + } + return ret; + } + + /** + * @return the list of volumes with a lot of available space. + */ + public List getVolumesWithHighAvailableSpace() { + long leastAvailable = getLeastAvailableSpace(); + List ret = new ArrayList(); + for (AvailableSpaceVolumePair volume : volumes) { + if (volume.getAvailable() > leastAvailable + balancedSpaceThreshold) { + ret.add(volume); + } + } + return ret; + } + + } + + /** + * Used so that we only check the available space on a given volume once, at + * the beginning of {@link AvailableSpaceVolumeChoosingPolicy#chooseVolume(List, long)}. + */ + private class AvailableSpaceVolumePair { + private final V volume; + private final long availableSpace; + + public AvailableSpaceVolumePair(V volume) throws IOException { + this.volume = volume; + this.availableSpace = volume.getAvailable(); + } + + public long getAvailable() { + return availableSpace; + } + + public V getVolume() { + return volume; + } + } + + private List extractVolumesFromPairs(List volumes) { + List ret = new ArrayList(); + for (AvailableSpaceVolumePair volume : volumes) { + ret.add(volume.getVolume()); + } + return ret; + } + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java index b11059a4bb4..07679b2eb7d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java @@ -2098,4 +2098,8 @@ public class FSDirectory implements Closeable { inode.setLocalName(name.getBytes()); } } + + void shutdown() { + nameCache.reset(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java index 754b3030522..3f4772e55a1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java @@ -656,6 +656,7 @@ public class FSEditLog implements LogsPurgeable { */ public void logOpenFile(String path, INodeFileUnderConstruction newNode) { AddOp op = AddOp.getInstance(cache.get()) + .setInodeId(newNode.getId()) .setPath(path) .setReplication(newNode.getBlockReplication()) .setModificationTime(newNode.getModificationTime()) @@ -697,6 +698,7 @@ public class FSEditLog implements LogsPurgeable { */ public void logMkDir(String path, INode newNode) { MkdirOp op = MkdirOp.getInstance(cache.get()) + .setInodeId(newNode.getId()) .setPath(path) .setTimestamp(newNode.getModificationTime()) .setPermissionStatus(newNode.getPermissionStatus()); @@ -814,6 +816,7 @@ public class FSEditLog implements LogsPurgeable { void logSymlink(String path, String value, long mtime, long atime, INodeSymlink node) { SymlinkOp op = SymlinkOp.getInstance(cache.get()) + .setId(node.getId()) .setPath(path) .setValue(value) .setModificationTime(mtime) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index 133a82821f2..297e7f0da59 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -32,6 +32,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.LayoutVersion; +import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; import org.apache.hadoop.hdfs.server.common.Storage; @@ -167,7 +168,7 @@ public class FSEditLogLoader { } } try { - long inodeId = applyEditLogOp(op, fsDir, in.getVersion()); + long inodeId = applyEditLogOp(op, fsDir, in.getVersion(), lastInodeId); if (lastInodeId < inodeId) { lastInodeId = inodeId; } @@ -223,9 +224,30 @@ public class FSEditLogLoader { return numEdits; } + // allocate and update last allocated inode id + private long getAndUpdateLastInodeId(long inodeIdFromOp, int logVersion, + long lastInodeId) throws IOException { + long inodeId = inodeIdFromOp; + + if (inodeId == INodeId.GRANDFATHER_INODE_ID) { + if (LayoutVersion.supports(Feature.ADD_INODE_ID, logVersion)) { + throw new IOException("The layout version " + logVersion + + " supports inodeId but gave bogus inodeId"); + } + inodeId = fsNamesys.allocateNewInodeId(); + } else { + // need to reset lastInodeId. fsnamesys gets lastInodeId firstly from + // fsimage but editlog captures more recent inodeId allocations + if (inodeId > lastInodeId) { + fsNamesys.resetLastInodeId(inodeId); + } + } + return inodeId; + } + @SuppressWarnings("deprecation") private long applyEditLogOp(FSEditLogOp op, FSDirectory fsDir, - int logVersion) throws IOException { + int logVersion, long lastInodeId) throws IOException { long inodeId = INodeId.GRANDFATHER_INODE_ID; if (LOG.isTraceEnabled()) { LOG.trace("replaying edit log: " + op); @@ -256,7 +278,8 @@ public class FSEditLogLoader { assert addCloseOp.blocks.length == 0; // add to the file tree - inodeId = fsNamesys.allocateNewInodeId(); + inodeId = getAndUpdateLastInodeId(addCloseOp.inodeId, logVersion, + lastInodeId); newFile = (INodeFile) fsDir.unprotectedAddFile(inodeId, addCloseOp.path, addCloseOp.permissions, replication, addCloseOp.mtime, addCloseOp.atime, addCloseOp.blockSize, true, @@ -371,7 +394,8 @@ public class FSEditLogLoader { } case OP_MKDIR: { MkdirOp mkdirOp = (MkdirOp)op; - inodeId = fsNamesys.allocateNewInodeId(); + inodeId = getAndUpdateLastInodeId(mkdirOp.inodeId, logVersion, + lastInodeId); fsDir.unprotectedMkdir(inodeId, mkdirOp.path, mkdirOp.permissions, mkdirOp.timestamp); break; @@ -425,7 +449,8 @@ public class FSEditLogLoader { } case OP_SYMLINK: { SymlinkOp symlinkOp = (SymlinkOp)op; - inodeId = fsNamesys.allocateNewInodeId(); + inodeId = getAndUpdateLastInodeId(symlinkOp.inodeId, logVersion, + lastInodeId); fsDir.unprotectedAddSymlink(inodeId, symlinkOp.path, symlinkOp.value, symlinkOp.mtime, symlinkOp.atime, symlinkOp.permissionStatus); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java index fb74adf6a11..0dd54e75de9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java @@ -158,6 +158,7 @@ public abstract class FSEditLogOp { @SuppressWarnings("unchecked") static abstract class AddCloseOp extends FSEditLogOp implements BlockListUpdatingOp { int length; + long inodeId; String path; short replication; long mtime; @@ -172,6 +173,11 @@ public abstract class FSEditLogOp { super(opCode); assert(opCode == OP_ADD || opCode == OP_CLOSE); } + + T setInodeId(long inodeId) { + this.inodeId = inodeId; + return (T)this; + } T setPath(String path) { this.path = path; @@ -235,6 +241,7 @@ public abstract class FSEditLogOp { @Override public void writeFields(DataOutputStream out) throws IOException { + FSImageSerialization.writeLong(inodeId, out); FSImageSerialization.writeString(path, out); FSImageSerialization.writeShort(replication, out); FSImageSerialization.writeLong(mtime, out); @@ -255,6 +262,12 @@ public abstract class FSEditLogOp { if (!LayoutVersion.supports(Feature.EDITLOG_OP_OPTIMIZATION, logVersion)) { this.length = in.readInt(); } + if (LayoutVersion.supports(Feature.ADD_INODE_ID, logVersion)) { + this.inodeId = in.readLong(); + } else { + // The inodeId should be updated when this editLogOp is applied + this.inodeId = INodeId.GRANDFATHER_INODE_ID; + } if ((-17 < logVersion && length != 4) || (logVersion <= -17 && length != 5 && !LayoutVersion.supports( Feature.EDITLOG_OP_OPTIMIZATION, logVersion))) { @@ -327,6 +340,8 @@ public abstract class FSEditLogOp { StringBuilder builder = new StringBuilder(); builder.append("[length="); builder.append(length); + builder.append(", inodeId="); + builder.append(inodeId); builder.append(", path="); builder.append(path); builder.append(", replication="); @@ -357,6 +372,8 @@ public abstract class FSEditLogOp { protected void toXml(ContentHandler contentHandler) throws SAXException { XMLUtils.addSaxString(contentHandler, "LENGTH", Integer.valueOf(length).toString()); + XMLUtils.addSaxString(contentHandler, "INODEID", + Long.valueOf(inodeId).toString()); XMLUtils.addSaxString(contentHandler, "PATH", path); XMLUtils.addSaxString(contentHandler, "REPLICATION", Short.valueOf(replication).toString()); @@ -376,6 +393,7 @@ public abstract class FSEditLogOp { @Override void fromXml(Stanza st) throws InvalidXmlException { this.length = Integer.valueOf(st.getValue("LENGTH")); + this.inodeId = Long.valueOf(st.getValue("INODEID")); this.path = st.getValue("PATH"); this.replication = Short.valueOf(st.getValue("REPLICATION")); this.mtime = Long.valueOf(st.getValue("MTIME")); @@ -907,6 +925,7 @@ public abstract class FSEditLogOp { static class MkdirOp extends FSEditLogOp { int length; + long inodeId; String path; long timestamp; PermissionStatus permissions; @@ -919,6 +938,11 @@ public abstract class FSEditLogOp { return (MkdirOp)cache.get(OP_MKDIR); } + MkdirOp setInodeId(long inodeId) { + this.inodeId = inodeId; + return this; + } + MkdirOp setPath(String path) { this.path = path; return this; @@ -937,6 +961,7 @@ public abstract class FSEditLogOp { @Override public void writeFields(DataOutputStream out) throws IOException { + FSImageSerialization.writeLong(inodeId, out); FSImageSerialization.writeString(path, out); FSImageSerialization.writeLong(timestamp, out); // mtime FSImageSerialization.writeLong(timestamp, out); // atime, unused at this @@ -953,6 +978,12 @@ public abstract class FSEditLogOp { && !LayoutVersion.supports(Feature.EDITLOG_OP_OPTIMIZATION, logVersion)) { throw new IOException("Incorrect data format. Mkdir operation."); } + if (LayoutVersion.supports(Feature.ADD_INODE_ID, logVersion)) { + this.inodeId = FSImageSerialization.readLong(in); + } else { + // This id should be updated when this editLogOp is applied + this.inodeId = INodeId.GRANDFATHER_INODE_ID; + } this.path = FSImageSerialization.readString(in); if (LayoutVersion.supports(Feature.EDITLOG_OP_OPTIMIZATION, logVersion)) { this.timestamp = FSImageSerialization.readLong(in); @@ -979,6 +1010,8 @@ public abstract class FSEditLogOp { StringBuilder builder = new StringBuilder(); builder.append("MkdirOp [length="); builder.append(length); + builder.append(", inodeId="); + builder.append(inodeId); builder.append(", path="); builder.append(path); builder.append(", timestamp="); @@ -997,6 +1030,8 @@ public abstract class FSEditLogOp { protected void toXml(ContentHandler contentHandler) throws SAXException { XMLUtils.addSaxString(contentHandler, "LENGTH", Integer.valueOf(length).toString()); + XMLUtils.addSaxString(contentHandler, "INODEID", + Long.valueOf(inodeId).toString()); XMLUtils.addSaxString(contentHandler, "PATH", path); XMLUtils.addSaxString(contentHandler, "TIMESTAMP", Long.valueOf(timestamp).toString()); @@ -1005,6 +1040,7 @@ public abstract class FSEditLogOp { @Override void fromXml(Stanza st) throws InvalidXmlException { this.length = Integer.valueOf(st.getValue("LENGTH")); + this.inodeId = Long.valueOf(st.getValue("INODEID")); this.path = st.getValue("PATH"); this.timestamp = Long.valueOf(st.getValue("TIMESTAMP")); this.permissions = @@ -1483,6 +1519,7 @@ public abstract class FSEditLogOp { static class SymlinkOp extends FSEditLogOp { int length; + long inodeId; String path; String value; long mtime; @@ -1497,6 +1534,11 @@ public abstract class FSEditLogOp { return (SymlinkOp)cache.get(OP_SYMLINK); } + SymlinkOp setId(long inodeId) { + this.inodeId = inodeId; + return this; + } + SymlinkOp setPath(String path) { this.path = path; return this; @@ -1525,6 +1567,7 @@ public abstract class FSEditLogOp { @Override public void writeFields(DataOutputStream out) throws IOException { + FSImageSerialization.writeLong(inodeId, out); FSImageSerialization.writeString(path, out); FSImageSerialization.writeString(value, out); FSImageSerialization.writeLong(mtime, out); @@ -1542,6 +1585,12 @@ public abstract class FSEditLogOp { + "symlink operation."); } } + if (LayoutVersion.supports(Feature.ADD_INODE_ID, logVersion)) { + this.inodeId = FSImageSerialization.readLong(in); + } else { + // This id should be updated when the editLogOp is applied + this.inodeId = INodeId.GRANDFATHER_INODE_ID; + } this.path = FSImageSerialization.readString(in); this.value = FSImageSerialization.readString(in); @@ -1560,6 +1609,8 @@ public abstract class FSEditLogOp { StringBuilder builder = new StringBuilder(); builder.append("SymlinkOp [length="); builder.append(length); + builder.append(", inodeId="); + builder.append(inodeId); builder.append(", path="); builder.append(path); builder.append(", value="); @@ -1582,6 +1633,8 @@ public abstract class FSEditLogOp { protected void toXml(ContentHandler contentHandler) throws SAXException { XMLUtils.addSaxString(contentHandler, "LENGTH", Integer.valueOf(length).toString()); + XMLUtils.addSaxString(contentHandler, "INODEID", + Long.valueOf(inodeId).toString()); XMLUtils.addSaxString(contentHandler, "PATH", path); XMLUtils.addSaxString(contentHandler, "VALUE", value); XMLUtils.addSaxString(contentHandler, "MTIME", @@ -1593,6 +1646,7 @@ public abstract class FSEditLogOp { @Override void fromXml(Stanza st) throws InvalidXmlException { this.length = Integer.valueOf(st.getValue("LENGTH")); + this.inodeId = Long.valueOf(st.getValue("INODEID")); this.path = st.getValue("PATH"); this.value = st.getValue("VALUE"); this.mtime = Long.valueOf(st.getValue("MTIME")); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java index 02d7d4400c6..22e7fd5b507 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java @@ -206,6 +206,20 @@ class FSImageFormat { imgTxId = 0; } + // read the last allocated inode id in the fsimage + if (LayoutVersion.supports(Feature.ADD_INODE_ID, imgVersion)) { + long lastInodeId = in.readLong(); + namesystem.resetLastInodeId(lastInodeId); + if (LOG.isDebugEnabled()) { + LOG.debug("load last allocated InodeId from fsimage:" + lastInodeId); + } + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Old layout version doesn't have inode id." + + " Will assign new id for each inode."); + } + } + // read compression related info FSImageCompression compression; if (LayoutVersion.supports(Feature.FSIMAGE_COMPRESSION, imgVersion)) { @@ -216,8 +230,7 @@ class FSImageFormat { in = compression.unwrapInputStream(fin); LOG.info("Loading image file " + curFile + " using " + compression); - // reset INodeId. TODO: remove this after inodeId is persisted in fsimage - namesystem.resetLastInodeIdWithoutChecking(INodeId.LAST_RESERVED_ID); + // load all inodes LOG.info("Number of files = " + numFiles); if (LayoutVersion.supports(Feature.FSIMAGE_NAME_OPTIMIZATION, @@ -264,8 +277,8 @@ class FSImageFormat { * @param in image input stream * @throws IOException */ - private void loadLocalNameINodes(long numFiles, DataInputStream in) - throws IOException { + private void loadLocalNameINodes(long numFiles, DataInputStream in) + throws IOException { assert LayoutVersion.supports(Feature.FSIMAGE_NAME_OPTIMIZATION, getLayoutVersion()); assert numFiles > 0; @@ -385,7 +398,8 @@ class FSImageFormat { long blockSize = 0; int imgVersion = getLayoutVersion(); - long inodeId = namesystem.allocateNewInodeId(); + long inodeId = LayoutVersion.supports(Feature.ADD_INODE_ID, imgVersion) ? + in.readLong() : namesystem.allocateNewInodeId(); short replication = in.readShort(); replication = namesystem.getBlockManager().adjustReplication(replication); @@ -436,8 +450,8 @@ class FSImageFormat { LOG.info("Number of files under construction = " + size); for (int i = 0; i < size; i++) { - INodeFileUnderConstruction cons = - FSImageSerialization.readINodeUnderConstruction(in); + INodeFileUnderConstruction cons = FSImageSerialization + .readINodeUnderConstruction(in, namesystem, getLayoutVersion()); // verify that file exists in namespace String path = cons.getLocalName(); @@ -566,7 +580,8 @@ class FSImageFormat { out.writeLong(fsDir.rootDir.numItemsInTree()); out.writeLong(sourceNamesystem.getGenerationStamp()); out.writeLong(context.getTxId()); - + out.writeLong(sourceNamesystem.getLastInodeId()); + // write compression info and set up compressed stream out = compression.writeHeaderAndWrapStream(fos); LOG.info("Saving image file " + newFile + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java index 200313ed49f..370ba86b5da 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java @@ -29,6 +29,8 @@ import org.apache.hadoop.fs.permission.PermissionStatus; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DeprecatedUTF8; import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.LayoutVersion; +import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; @@ -79,8 +81,11 @@ public class FSImageSerialization { // from the input stream // static INodeFileUnderConstruction readINodeUnderConstruction( - DataInputStream in) throws IOException { + DataInputStream in, FSNamesystem fsNamesys, int imgVersion) + throws IOException { byte[] name = readBytes(in); + long inodeId = LayoutVersion.supports(Feature.ADD_INODE_ID, imgVersion) ? in + .readLong() : fsNamesys.allocateNewInodeId(); short blockReplication = in.readShort(); long modificationTime = in.readLong(); long preferredBlockSize = in.readLong(); @@ -107,8 +112,7 @@ public class FSImageSerialization { int numLocs = in.readInt(); assert numLocs == 0 : "Unexpected block locations"; - //TODO: get inodeId from fsimage after inodeId is persisted - return new INodeFileUnderConstruction(INodeId.GRANDFATHER_INODE_ID, + return new INodeFileUnderConstruction(inodeId, name, blockReplication, modificationTime, @@ -128,6 +132,7 @@ public class FSImageSerialization { String path) throws IOException { writeString(path, out); + out.writeLong(cons.getId()); out.writeShort(cons.getBlockReplication()); out.writeLong(cons.getModificationTime()); out.writeLong(cons.getPreferredBlockSize()); @@ -151,6 +156,7 @@ public class FSImageSerialization { byte[] name = node.getLocalNameBytes(); out.writeShort(name.length); out.write(name); + out.writeLong(node.getId()); FsPermission filePerm = TL_DATA.get().FILE_PERM; if (node.isDirectory()) { out.writeShort(0); // replication diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index aa077fe5a29..07b87c7074c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -1210,7 +1210,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats, writeLock(); try { checkOperation(OperationCategory.WRITE); - if (isInSafeMode()) { throw new SafeModeException("Cannot set permission for " + src, safeMode); } @@ -1248,7 +1247,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats, writeLock(); try { checkOperation(OperationCategory.WRITE); - if (isInSafeMode()) { throw new SafeModeException("Cannot set owner for " + src, safeMode); } @@ -1302,9 +1300,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats, LocatedBlocks getBlockLocations(String src, long offset, long length, boolean doAccessTime, boolean needBlockToken, boolean checkSafeMode) throws FileNotFoundException, UnresolvedLinkException, IOException { - FSPermissionChecker pc = getPermissionChecker(); try { - return getBlockLocationsInt(pc, src, offset, length, doAccessTime, + return getBlockLocationsInt(src, offset, length, doAccessTime, needBlockToken, checkSafeMode); } catch (AccessControlException e) { logAuditEvent(false, "open", src); @@ -1312,14 +1309,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } } - private LocatedBlocks getBlockLocationsInt(FSPermissionChecker pc, - String src, long offset, long length, boolean doAccessTime, - boolean needBlockToken, boolean checkSafeMode) + private LocatedBlocks getBlockLocationsInt(String src, long offset, + long length, boolean doAccessTime, boolean needBlockToken, + boolean checkSafeMode) throws FileNotFoundException, UnresolvedLinkException, IOException { - if (isPermissionEnabled) { - checkPathAccess(pc, src, FsAction.READ); - } - if (offset < 0) { throw new HadoopIllegalArgumentException( "Negative offset is not supported. File: " + src); @@ -1347,13 +1340,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats, * Get block locations within the specified range, updating the * access times if necessary. */ - private LocatedBlocks getBlockLocationsUpdateTimes(String src, - long offset, - long length, - boolean doAccessTime, - boolean needBlockToken) - throws FileNotFoundException, UnresolvedLinkException, IOException { - + private LocatedBlocks getBlockLocationsUpdateTimes(String src, long offset, + long length, boolean doAccessTime, boolean needBlockToken) + throws FileNotFoundException, + UnresolvedLinkException, IOException { + FSPermissionChecker pc = getPermissionChecker(); for (int attempt = 0; attempt < 2; attempt++) { boolean isReadOp = (attempt == 0); if (isReadOp) { // first attempt is with readlock @@ -1369,6 +1360,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } else { checkOperation(OperationCategory.WRITE); } + if (isPermissionEnabled) { + checkPathAccess(pc, src, FsAction.READ); + } // if the namenode is in safemode, then do not update access time if (isInSafeMode()) { @@ -1378,14 +1372,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats, long now = now(); final INodeFile inode = INodeFile.valueOf(dir.getINode(src), src); if (doAccessTime && isAccessTimeSupported()) { - if (now <= inode.getAccessTime() + getAccessTimePrecision()) { + if (now > inode.getAccessTime() + getAccessTimePrecision()) { // if we have to set access time but we only have the readlock, then // restart this entire operation with the writeLock. if (isReadOp) { continue; } + dir.setTimes(src, inode, -1, now, false); } - dir.setTimes(src, inode, -1, now, false); } return blockManager.createLocatedBlocks(inode.getBlocks(), inode.computeFileSize(false), inode.isUnderConstruction(), @@ -1411,6 +1405,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, */ void concat(String target, String [] srcs) throws IOException, UnresolvedLinkException { + if(FSNamesystem.LOG.isDebugEnabled()) { + FSNamesystem.LOG.debug("concat " + Arrays.toString(srcs) + + " to " + target); + } try { concatInt(target, srcs); } catch (AccessControlException e) { @@ -1421,11 +1419,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats, private void concatInt(String target, String [] srcs) throws IOException, UnresolvedLinkException { - if(FSNamesystem.LOG.isDebugEnabled()) { - FSNamesystem.LOG.debug("concat " + Arrays.toString(srcs) + - " to " + target); - } - // verify args if(target.isEmpty()) { throw new IllegalArgumentException("Target file name is empty"); @@ -1574,6 +1567,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, */ void setTimes(String src, long mtime, long atime) throws IOException, UnresolvedLinkException { + if (!isAccessTimeSupported() && atime != -1) { + throw new IOException("Access time for hdfs is not configured. " + + " Please set " + DFS_NAMENODE_ACCESSTIME_PRECISION_KEY + " configuration parameter."); + } try { setTimesInt(src, mtime, atime); } catch (AccessControlException e) { @@ -1584,16 +1581,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats, private void setTimesInt(String src, long mtime, long atime) throws IOException, UnresolvedLinkException { - if (!isAccessTimeSupported() && atime != -1) { - throw new IOException("Access time for hdfs is not configured. " + - " Please set " + DFS_NAMENODE_ACCESSTIME_PRECISION_KEY + " configuration parameter."); - } HdfsFileStatus resultingStat = null; FSPermissionChecker pc = getPermissionChecker(); checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); + if (isInSafeMode()) { + throw new SafeModeException("Cannot set times " + src, safeMode); + } // Write access is required to set access and modification times if (isPermissionEnabled) { @@ -1618,6 +1614,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats, void createSymlink(String target, String link, PermissionStatus dirPerms, boolean createParent) throws IOException, UnresolvedLinkException { + if (!DFSUtil.isValidName(link)) { + throw new InvalidPathException("Invalid file name: " + link); + } try { createSymlinkInt(target, link, dirPerms, createParent); } catch (AccessControlException e) { @@ -1629,17 +1628,34 @@ public class FSNamesystem implements Namesystem, FSClusterStats, private void createSymlinkInt(String target, String link, PermissionStatus dirPerms, boolean createParent) throws IOException, UnresolvedLinkException { + if (NameNode.stateChangeLog.isDebugEnabled()) { + NameNode.stateChangeLog.debug("DIR* NameSystem.createSymlink: target=" + + target + " link=" + link); + } HdfsFileStatus resultingStat = null; FSPermissionChecker pc = getPermissionChecker(); checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); - + if (isInSafeMode()) { + throw new SafeModeException("Cannot create symlink " + link, safeMode); + } if (!createParent) { verifyParentDir(link); } - createSymlinkInternal(pc, target, link, dirPerms, createParent); + if (!dir.isValidToCreate(link)) { + throw new IOException("failed to create link " + link + +" either because the filename is invalid or the file exists"); + } + if (isPermissionEnabled) { + checkAncestorAccess(pc, link, FsAction.WRITE); + } + // validate that we have enough inodes. + checkFsObjectLimit(); + + // add symbolic link to namespace + dir.addSymlink(link, target, dirPerms, createParent); resultingStat = getAuditFileInfo(link, false); } finally { writeUnlock(); @@ -1648,37 +1664,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats, logAuditEvent(true, "createSymlink", link, target, resultingStat); } - /** - * Create a symbolic link. - */ - private void createSymlinkInternal(FSPermissionChecker pc, String target, - String link, PermissionStatus dirPerms, boolean createParent) - throws IOException, UnresolvedLinkException { - assert hasWriteLock(); - if (NameNode.stateChangeLog.isDebugEnabled()) { - NameNode.stateChangeLog.debug("DIR* NameSystem.createSymlink: target=" + - target + " link=" + link); - } - if (isInSafeMode()) { - throw new SafeModeException("Cannot create symlink " + link, safeMode); - } - if (!DFSUtil.isValidName(link)) { - throw new InvalidPathException("Invalid file name: " + link); - } - if (!dir.isValidToCreate(link)) { - throw new IOException("failed to create link " + link - +" either because the filename is invalid or the file exists"); - } - if (isPermissionEnabled) { - checkAncestorAccess(pc, link, FsAction.WRITE); - } - // validate that we have enough inodes. - checkFsObjectLimit(); - - // add symbolic link to namespace - dir.addSymlink(link, target, dirPerms, createParent); - } - /** * Set replication for an existing file. * @@ -1798,13 +1783,24 @@ public class FSNamesystem implements Namesystem, FSClusterStats, throws AccessControlException, SafeModeException, FileAlreadyExistsException, UnresolvedLinkException, FileNotFoundException, ParentNotDirectoryException, IOException { + if (NameNode.stateChangeLog.isDebugEnabled()) { + NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: src=" + src + + ", holder=" + holder + + ", clientMachine=" + clientMachine + + ", createParent=" + createParent + + ", replication=" + replication + + ", createFlag=" + flag.toString()); + } + if (!DFSUtil.isValidName(src)) { + throw new InvalidPathException(src); + } + boolean skipSync = false; final HdfsFileStatus stat; FSPermissionChecker pc = getPermissionChecker(); checkOperation(OperationCategory.WRITE); writeLock(); try { - checkOperation(OperationCategory.WRITE); startFileInternal(pc, src, permissions, holder, clientMachine, flag, createParent, replication, blockSize); stat = dir.getFileInfo(src, false); @@ -1847,21 +1843,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, AccessControlException, UnresolvedLinkException, FileNotFoundException, ParentNotDirectoryException, IOException { assert hasWriteLock(); - if (NameNode.stateChangeLog.isDebugEnabled()) { - NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: src=" + src - + ", holder=" + holder - + ", clientMachine=" + clientMachine - + ", createParent=" + createParent - + ", replication=" + replication - + ", createFlag=" + flag.toString()); - } + checkOperation(OperationCategory.WRITE); if (isInSafeMode()) { throw new SafeModeException("Cannot create file" + src, safeMode); } - if (!DFSUtil.isValidName(src)) { - throw new InvalidPathException(src); - } - // Verify that the destination does not exist as a directory already. boolean pathExists = dir.exists(src); if (pathExists && dir.isDir(src)) { @@ -1997,21 +1982,20 @@ public class FSNamesystem implements Namesystem, FSClusterStats, */ boolean recoverLease(String src, String holder, String clientMachine) throws IOException { + if (!DFSUtil.isValidName(src)) { + throw new IOException("Invalid file name: " + src); + } + boolean skipSync = false; FSPermissionChecker pc = getPermissionChecker(); checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); - if (isInSafeMode()) { throw new SafeModeException( "Cannot recover the lease of " + src, safeMode); } - if (!DFSUtil.isValidName(src)) { - throw new IOException("Invalid file name: " + src); - } - final INodeFile inode = INodeFile.valueOf(dir.getINode(src), src); if (!inode.isUnderConstruction()) { return true; @@ -2135,13 +2119,20 @@ public class FSNamesystem implements Namesystem, FSClusterStats, "Append is not enabled on this NameNode. Use the " + DFS_SUPPORT_APPEND_KEY + " configuration option to enable it."); } + if (NameNode.stateChangeLog.isDebugEnabled()) { + NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: src=" + src + + ", holder=" + holder + + ", clientMachine=" + clientMachine); + } + if (!DFSUtil.isValidName(src)) { + throw new InvalidPathException(src); + } + LocatedBlock lb = null; FSPermissionChecker pc = getPermissionChecker(); checkOperation(OperationCategory.WRITE); writeLock(); try { - checkOperation(OperationCategory.WRITE); - lb = startFileInternal(pc, src, null, holder, clientMachine, EnumSet.of(CreateFlag.APPEND), false, blockManager.maxReplication, 0); @@ -2434,21 +2425,21 @@ public class FSNamesystem implements Namesystem, FSClusterStats, boolean abandonBlock(ExtendedBlock b, String src, String holder) throws LeaseExpiredException, FileNotFoundException, UnresolvedLinkException, IOException { + if(NameNode.stateChangeLog.isDebugEnabled()) { + NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " + b + + "of file " + src); + } checkOperation(OperationCategory.WRITE); writeLock(); try { checkOperation(OperationCategory.WRITE); - // - // Remove the block from the pending creates list - // - if(NameNode.stateChangeLog.isDebugEnabled()) { - NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " - +b+"of file "+src); - } if (isInSafeMode()) { throw new SafeModeException("Cannot abandon block " + b + " for fle" + src, safeMode); } + // + // Remove the block from the pending creates list + // INodeFileUnderConstruction file = checkLease(src, holder); dir.removeBlock(src, file, ExtendedBlock.getLocalBlock(b)); if(NameNode.stateChangeLog.isDebugEnabled()) { @@ -2510,19 +2501,23 @@ public class FSNamesystem implements Namesystem, FSClusterStats, */ boolean completeFile(String src, String holder, ExtendedBlock last) throws SafeModeException, UnresolvedLinkException, IOException { + if (NameNode.stateChangeLog.isDebugEnabled()) { + NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " + + src + " for " + holder); + } checkBlock(last); boolean success = false; checkOperation(OperationCategory.WRITE); writeLock(); try { - checkOperation(OperationCategory.WRITE); - - success = completeFileInternal(src, holder, - ExtendedBlock.getLocalBlock(last)); + success = completeFileInternal(src, holder, + ExtendedBlock.getLocalBlock(last)); } finally { writeUnlock(); } getEditLog().logSync(); + NameNode.stateChangeLog.info("DIR* completeFile: " + src + " is closed by " + + holder); return success; } @@ -2530,10 +2525,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, String holder, Block last) throws SafeModeException, UnresolvedLinkException, IOException { assert hasWriteLock(); - if (NameNode.stateChangeLog.isDebugEnabled()) { - NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " + - src + " for " + holder); - } + checkOperation(OperationCategory.WRITE); if (isInSafeMode()) { throw new SafeModeException("Cannot complete file " + src, safeMode); } @@ -2569,9 +2561,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } finalizeINodeFileUnderConstruction(src, pendingFile); - - NameNode.stateChangeLog.info("DIR* completeFile: " + src + " is closed by " - + holder); return true; } @@ -2672,18 +2661,19 @@ public class FSNamesystem implements Namesystem, FSClusterStats, private boolean renameToInt(String src, String dst) throws IOException, UnresolvedLinkException { - boolean status = false; - HdfsFileStatus resultingStat = null; if (NameNode.stateChangeLog.isDebugEnabled()) { NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: " + src + " to " + dst); } + if (!DFSUtil.isValidName(dst)) { + throw new IOException("Invalid name: " + dst); + } FSPermissionChecker pc = getPermissionChecker(); checkOperation(OperationCategory.WRITE); + boolean status = false; + HdfsFileStatus resultingStat = null; writeLock(); try { - checkOperation(OperationCategory.WRITE); - status = renameToInternal(pc, src, dst); if (status) { resultingStat = getAuditFileInfo(dst, false); @@ -2703,12 +2693,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, private boolean renameToInternal(FSPermissionChecker pc, String src, String dst) throws IOException, UnresolvedLinkException { assert hasWriteLock(); + checkOperation(OperationCategory.WRITE); if (isInSafeMode()) { throw new SafeModeException("Cannot rename " + src, safeMode); } - if (!DFSUtil.isValidName(dst)) { - throw new IOException("Invalid name: " + dst); - } if (isPermissionEnabled) { //We should not be doing this. This is move() not renameTo(). //but for now, @@ -2730,16 +2718,18 @@ public class FSNamesystem implements Namesystem, FSClusterStats, /** Rename src to dst */ void renameTo(String src, String dst, Options.Rename... options) throws IOException, UnresolvedLinkException { - HdfsFileStatus resultingStat = null; if (NameNode.stateChangeLog.isDebugEnabled()) { NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: with options - " + src + " to " + dst); } + if (!DFSUtil.isValidName(dst)) { + throw new InvalidPathException("Invalid name: " + dst); + } FSPermissionChecker pc = getPermissionChecker(); checkOperation(OperationCategory.WRITE); + HdfsFileStatus resultingStat = null; writeLock(); try { - checkOperation(OperationCategory.WRITE); renameToInternal(pc, src, dst, options); resultingStat = getAuditFileInfo(dst, false); } finally { @@ -2758,12 +2748,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, private void renameToInternal(FSPermissionChecker pc, String src, String dst, Options.Rename... options) throws IOException { assert hasWriteLock(); + checkOperation(OperationCategory.WRITE); if (isInSafeMode()) { throw new SafeModeException("Cannot rename " + src, safeMode); } - if (!DFSUtil.isValidName(dst)) { - throw new InvalidPathException("Invalid name: " + dst); - } if (isPermissionEnabled) { checkParentAccess(pc, src, FsAction.WRITE); checkAncestorAccess(pc, dst, FsAction.WRITE); @@ -2950,16 +2938,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats, HdfsFileStatus getFileInfo(String src, boolean resolveLink) throws AccessControlException, UnresolvedLinkException, StandbyException, IOException { + if (!DFSUtil.isValidName(src)) { + throw new InvalidPathException("Invalid file name: " + src); + } HdfsFileStatus stat = null; FSPermissionChecker pc = getPermissionChecker(); checkOperation(OperationCategory.READ); readLock(); try { checkOperation(OperationCategory.READ); - - if (!DFSUtil.isValidName(src)) { - throw new InvalidPathException("Invalid file name: " + src); - } if (isPermissionEnabled) { checkTraverse(pc, src); } @@ -2973,6 +2960,33 @@ public class FSNamesystem implements Namesystem, FSClusterStats, logAuditEvent(true, "getfileinfo", src); return stat; } + + /** + * Returns true if the file is closed + */ + boolean isFileClosed(String src) + throws AccessControlException, UnresolvedLinkException, + StandbyException, IOException { + FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.READ); + readLock(); + try { + checkOperation(OperationCategory.READ); + if (isPermissionEnabled) { + checkTraverse(pc, src); + } + return !INodeFile.valueOf(dir.getINode(src), src).isUnderConstruction(); + } catch (AccessControlException e) { + if (isAuditEnabled() && isExternalInvocation()) { + logAuditEvent(false, UserGroupInformation.getCurrentUser(), + getRemoteIp(), + "isFileClosed", src, null, null); + } + throw e; + } finally { + readUnlock(); + } + } /** * Create all the necessary directories @@ -2989,16 +3003,18 @@ public class FSNamesystem implements Namesystem, FSClusterStats, private boolean mkdirsInt(String src, PermissionStatus permissions, boolean createParent) throws IOException, UnresolvedLinkException { - HdfsFileStatus resultingStat = null; - boolean status = false; if(NameNode.stateChangeLog.isDebugEnabled()) { NameNode.stateChangeLog.debug("DIR* NameSystem.mkdirs: " + src); } + if (!DFSUtil.isValidName(src)) { + throw new InvalidPathException(src); + } FSPermissionChecker pc = getPermissionChecker(); checkOperation(OperationCategory.WRITE); + HdfsFileStatus resultingStat = null; + boolean status = false; writeLock(); try { - checkOperation(OperationCategory.WRITE); status = mkdirsInternal(pc, src, permissions, createParent); if (status) { resultingStat = dir.getFileInfo(src, false); @@ -3020,6 +3036,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, PermissionStatus permissions, boolean createParent) throws IOException, UnresolvedLinkException { assert hasWriteLock(); + checkOperation(OperationCategory.WRITE); if (isInSafeMode()) { throw new SafeModeException("Cannot create directory " + src, safeMode); } @@ -3031,9 +3048,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats, // a new directory is not created. return true; } - if (!DFSUtil.isValidName(src)) { - throw new InvalidPathException(src); - } if (isPermissionEnabled) { checkAncestorAccess(pc, src, FsAction.WRITE); } @@ -3304,8 +3318,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats, boolean closeFile, boolean deleteblock, DatanodeID[] newtargets, String[] newtargetstorages) throws IOException, UnresolvedLinkException { - String src = ""; + LOG.info("commitBlockSynchronization(lastblock=" + lastblock + + ", newgenerationstamp=" + newgenerationstamp + + ", newlength=" + newlength + + ", newtargets=" + Arrays.asList(newtargets) + + ", closeFile=" + closeFile + + ", deleteBlock=" + deleteblock + + ")"); checkOperation(OperationCategory.WRITE); + String src = ""; writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -3317,13 +3338,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats, "Cannot commitBlockSynchronization while in safe mode", safeMode); } - LOG.info("commitBlockSynchronization(lastblock=" + lastblock - + ", newgenerationstamp=" + newgenerationstamp - + ", newlength=" + newlength - + ", newtargets=" + Arrays.asList(newtargets) - + ", closeFile=" + closeFile - + ", deleteBlock=" + deleteblock - + ")"); final BlockInfo storedBlock = blockManager.getStoredBlock(ExtendedBlock .getLocalBlock(lastblock)); if (storedBlock == null) { @@ -3413,7 +3427,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats, writeLock(); try { checkOperation(OperationCategory.WRITE); - if (isInSafeMode()) { throw new SafeModeException("Cannot renew lease for " + holder, safeMode); } @@ -4793,8 +4806,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats, * shutdown FSNamesystem */ void shutdown() { - if (mbeanName != null) + if (mbeanName != null) { MBeans.unregister(mbeanName); + } + if (dir != null) { + dir.shutdown(); + } + if (blockManager != null) { + blockManager.shutdown(); + } } @@ -4882,11 +4902,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, */ void reportBadBlocks(LocatedBlock[] blocks) throws IOException { checkOperation(OperationCategory.WRITE); + NameNode.stateChangeLog.info("*DIR* reportBadBlocks"); writeLock(); try { checkOperation(OperationCategory.WRITE); - - NameNode.stateChangeLog.info("*DIR* reportBadBlocks"); for (int i = 0; i < blocks.length; i++) { ExtendedBlock blk = blocks[i].getBlock(); DatanodeInfo[] nodes = blocks[i].getLocations(); @@ -4949,6 +4968,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats, ExtendedBlock newBlock, DatanodeID[] newNodes) throws IOException { checkOperation(OperationCategory.WRITE); + LOG.info("updatePipeline(block=" + oldBlock + + ", newGenerationStamp=" + newBlock.getGenerationStamp() + + ", newLength=" + newBlock.getNumBytes() + + ", newNodes=" + Arrays.asList(newNodes) + + ", clientName=" + clientName + + ")"); writeLock(); try { checkOperation(OperationCategory.WRITE); @@ -4958,12 +4983,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } assert newBlock.getBlockId()==oldBlock.getBlockId() : newBlock + " and " + oldBlock + " has different block identifier"; - LOG.info("updatePipeline(block=" + oldBlock - + ", newGenerationStamp=" + newBlock.getGenerationStamp() - + ", newLength=" + newBlock.getNumBytes() - + ", newNodes=" + Arrays.asList(newNodes) - + ", clientName=" + clientName - + ")"); updatePipelineInternal(clientName, oldBlock, newBlock, newNodes); } finally { writeUnlock(); @@ -5215,7 +5234,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats, writeLock(); try { checkOperation(OperationCategory.WRITE); - if (isInSafeMode()) { throw new SafeModeException("Cannot issue delegation token", safeMode); } @@ -5340,6 +5358,21 @@ public class FSNamesystem implements Namesystem, FSClusterStats, getEditLog().logSync(); } + /** + * Log the cancellation of expired tokens to edit logs + * + * @param id token identifier to cancel + */ + public void logExpireDelegationToken(DelegationTokenIdentifier id) { + assert !isInSafeMode() : + "this should never be called while in safemode, since we stop " + + "the DT manager before entering safemode!"; + // No need to hold FSN lock since we don't access any internal + // structures, and this is stopped before the FSN shuts itself + // down, etc. + getEditLog().logCancelDelegationToken(id); + } + private void logReassignLease(String leaseHolder, String src, String newHolder) { assert hasWriteLock(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index 275b9bdbe61..a2e346e055b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -690,7 +690,12 @@ class NameNodeRpcServer implements NamenodeProtocols { metrics.incrFileInfoOps(); return namesystem.getFileInfo(src, true); } - + + @Override // ClientProtocol + public boolean isFileClosed(String src) throws IOException{ + return namesystem.isFileClosed(src); + } + @Override // ClientProtocol public HdfsFileStatus getFileLinkInfo(String src) throws IOException { metrics.incrFileInfoOps(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java index f88f085b3ed..288455bd2b3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java @@ -718,9 +718,15 @@ public class NamenodeWebHdfsMethods { private static StreamingOutput getListingStream(final NamenodeProtocols np, final String p) throws IOException { - final DirectoryListing first = getDirectoryListing(np, p, + // allows exceptions like FNF or ACE to prevent http response of 200 for + // a failure since we can't (currently) return error responses in the + // middle of a streaming operation + final DirectoryListing firstDirList = getDirectoryListing(np, p, HdfsFileStatus.EMPTY_NAME); + // must save ugi because the streaming object will be executed outside + // the remote user's ugi + final UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); return new StreamingOutput() { @Override public void write(final OutputStream outstream) throws IOException { @@ -729,21 +735,32 @@ public class NamenodeWebHdfsMethods { out.println("{\"" + FileStatus.class.getSimpleName() + "es\":{\"" + FileStatus.class.getSimpleName() + "\":["); - final HdfsFileStatus[] partial = first.getPartialListing(); - if (partial.length > 0) { - out.print(JsonUtil.toJsonString(partial[0], false)); - } - for(int i = 1; i < partial.length; i++) { - out.println(','); - out.print(JsonUtil.toJsonString(partial[i], false)); - } - - for(DirectoryListing curr = first; curr.hasMore(); ) { - curr = getDirectoryListing(np, p, curr.getLastName()); - for(HdfsFileStatus s : curr.getPartialListing()) { - out.println(','); - out.print(JsonUtil.toJsonString(s, false)); - } + try { + // restore remote user's ugi + ugi.doAs(new PrivilegedExceptionAction() { + @Override + public Void run() throws IOException { + long n = 0; + for (DirectoryListing dirList = firstDirList; ; + dirList = getDirectoryListing(np, p, dirList.getLastName()) + ) { + // send each segment of the directory listing + for (HdfsFileStatus s : dirList.getPartialListing()) { + if (n++ > 0) { + out.println(','); + } + out.print(JsonUtil.toJsonString(s, false)); + } + // stop if last segment + if (!dirList.hasMore()) { + break; + } + } + return null; + } + }); + } catch (InterruptedException e) { + throw new IOException(e); } out.println(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java index b4ee16b5088..0d9ff4fd872 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java @@ -123,7 +123,7 @@ class ImageLoaderCurrent implements ImageLoader { new SimpleDateFormat("yyyy-MM-dd HH:mm"); private static int[] versions = { -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39, - -40}; + -40, -41, -42}; private int imageVersion = 0; /* (non-Javadoc) @@ -163,6 +163,10 @@ class ImageLoaderCurrent implements ImageLoader { v.visit(ImageElement.TRANSACTION_ID, in.readLong()); } + if (LayoutVersion.supports(Feature.ADD_INODE_ID, imageVersion)) { + v.visit(ImageElement.LAST_INODE_ID, in.readLong()); + } + if (LayoutVersion.supports(Feature.FSIMAGE_COMPRESSION, imageVersion)) { boolean isCompressed = in.readBoolean(); v.visit(ImageElement.IS_COMPRESSED, String.valueOf(isCompressed)); @@ -440,6 +444,9 @@ class ImageLoaderCurrent implements ImageLoader { } v.visit(ImageElement.INODE_PATH, pathName); + if (LayoutVersion.supports(Feature.ADD_INODE_ID, imageVersion)) { + v.visit(ImageElement.INODE_ID, in.readLong()); + } v.visit(ImageElement.REPLICATION, in.readShort()); v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong())); if(LayoutVersion.supports(Feature.FILE_ACCESS_TIME, imageVersion)) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java index e1b2fda5575..d15db389df5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageVisitor.java @@ -80,7 +80,9 @@ abstract class ImageVisitor { DELEGATION_TOKEN_IDENTIFIER_MAX_DATE, DELEGATION_TOKEN_IDENTIFIER_EXPIRY_TIME, DELEGATION_TOKEN_IDENTIFIER_MASTER_KEY_ID, - TRANSACTION_ID + TRANSACTION_ID, + LAST_INODE_ID, + INODE_ID } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/GSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/GSet.java index b3de3aac223..f409f5e50f1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/GSet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/GSet.java @@ -81,4 +81,6 @@ public interface GSet extends Iterable { * @throws NullPointerException if key == null. */ E remove(K key); + + void clear(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/GSetByHashMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/GSetByHashMap.java index 7d2c61a7f12..92e49cfc1d5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/GSetByHashMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/GSetByHashMap.java @@ -65,4 +65,9 @@ public class GSetByHashMap implements GSet { public Iterator iterator() { return m.values().iterator(); } + + @Override + public void clear() { + m.clear(); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/LightWeightGSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/LightWeightGSet.java index 5ab9a8cb355..9919175c72a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/LightWeightGSet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/LightWeightGSet.java @@ -85,7 +85,6 @@ public class LightWeightGSet implements GSet { if (LOG.isDebugEnabled()) { LOG.debug("recommended=" + recommended_length + ", actual=" + actual); } - entries = new LinkedElement[actual]; hash_mask = entries.length - 1; } @@ -329,13 +328,18 @@ public class LightWeightGSet implements GSet { final int exponent = e2 < 0? 0: e2 > 30? 30: e2; final int c = 1 << exponent; - if (LightWeightGSet.LOG.isDebugEnabled()) { - LOG.debug("Computing capacity for map " + mapName); - LOG.debug("VM type = " + vmBit + "-bit"); - LOG.debug(percentage + "% max memory = " - + StringUtils.TraditionalBinaryPrefix.long2String(maxMemory, "B", 1)); - LOG.debug("capacity = 2^" + exponent + " = " + c + " entries"); - } + LOG.info("Computing capacity for map " + mapName); + LOG.info("VM type = " + vmBit + "-bit"); + LOG.info(percentage + "% max memory = " + + StringUtils.TraditionalBinaryPrefix.long2String(maxMemory, "B", 1)); + LOG.info("capacity = 2^" + exponent + " = " + c + " entries"); return c; } + + public void clear() { + for (int i = 0; i < entries.length; i++) { + entries[i] = null; + } + size = 0; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java index 6f33827fa74..75b405d0514 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java @@ -156,7 +156,6 @@ public class WebHdfsFileSystem extends FileSystem private URI uri; private boolean hasInitedToken; private Token delegationToken; - private final AuthenticatedURL.Token authToken = new AuthenticatedURL.Token(); private RetryPolicy retryPolicy = null; private Path workingDir; @@ -481,6 +480,8 @@ public class WebHdfsFileSystem extends FileSystem try { if (op.getRequireAuth()) { LOG.debug("open AuthenticatedURL connection"); + UserGroupInformation.getCurrentUser().checkTGTAndReloginFromKeytab(); + final AuthenticatedURL.Token authToken = new AuthenticatedURL.Token(); conn = new AuthenticatedURL(AUTH).openConnection(url, authToken); } else { LOG.debug("open URL connection"); @@ -1006,20 +1007,12 @@ public class WebHdfsFileSystem extends FileSystem @Override public long renew(final Token token, final Configuration conf ) throws IOException, InterruptedException { - final UserGroupInformation ugi = UserGroupInformation.getLoginUser(); - // update the kerberos credentials, if they are coming from a keytab - ugi.reloginFromKeytab(); - return getWebHdfs(token, conf).renewDelegationToken(token); } @Override public void cancel(final Token token, final Configuration conf ) throws IOException, InterruptedException { - final UserGroupInformation ugi = UserGroupInformation.getLoginUser(); - // update the kerberos credentials, if they are coming from a keytab - ugi.checkTGTAndReloginFromKeytab(); - getWebHdfs(token, conf).cancelDelegationToken(token); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto index 419ae45b738..d99e4e7209b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto @@ -332,6 +332,14 @@ message GetFileInfoResponseProto { optional HdfsFileStatusProto fs = 1; } +message IsFileClosedRequestProto { + required string src = 1; +} + +message IsFileClosedResponseProto { + required bool result = 1; +} + message GetFileLinkInfoRequestProto { required string src = 1; } @@ -498,4 +506,6 @@ service ClientNamenodeProtocol { returns(SetBalancerBandwidthResponseProto); rpc getDataEncryptionKey(GetDataEncryptionKeyRequestProto) returns(GetDataEncryptionKeyResponseProto); + rpc isFileClosed(IsFileClosedRequestProto) + returns(IsFileClosedResponseProto); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 79e8476ac91..30f2bc9dd0d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -1242,4 +1242,32 @@ + + dfs.datanode.fsdataset.volume.choosing.balanced-space-threshold + 10737418240 + + Only used when the dfs.datanode.fsdataset.volume.choosing.policy is set to + org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy. + This setting controls how much DN volumes are allowed to differ in terms of + bytes of free disk space before they are considered imbalanced. If the free + space of all the volumes are within this range of each other, the volumes + will be considered balanced and block assignments will be done on a pure + round robin basis. + + + + + dfs.datanode.fsdataset.volume.choosing.balanced-space-preference-percent + 0.75f + + Only used when the dfs.datanode.fsdataset.volume.choosing.policy is set to + org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy. + This setting controls what percentage of new block allocations will be sent + to volumes with more available disk space than others. This setting should + be in the range 0.0 - 1.0, though in practice 0.5 - 1.0, since there should + be no reason to prefer that volumes with less available disk space receive + more block allocations. + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java index ad262c48c86..2353c9a3f3c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java @@ -762,4 +762,27 @@ public class TestDistributedFileSystem { } } } + + @Test(timeout=60000) + public void testFileCloseStatus() throws IOException { + Configuration conf = new HdfsConfiguration(); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); + DistributedFileSystem fs = cluster.getFileSystem(); + try { + // create a new file. + Path file = new Path("/simpleFlush.dat"); + FSDataOutputStream output = fs.create(file); + // write to file + output.writeBytes("Some test data"); + output.flush(); + assertFalse("File status should be open", fs.isFileClosed(file)); + output.close(); + assertTrue("File status should be closed", fs.isFileClosed(file)); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHDFSFileSystemContract.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHDFSFileSystemContract.java index b4345f0367b..f19386b4edd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHDFSFileSystemContract.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHDFSFileSystemContract.java @@ -46,6 +46,7 @@ public class TestHDFSFileSystemContract extends FileSystemContractBaseTest { protected void tearDown() throws Exception { super.tearDown(); cluster.shutdown(); + cluster = null; } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java index 6eab01090c7..794b44d438a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java @@ -99,7 +99,7 @@ public class TestSafeMode { */ @Test public void testManualSafeMode() throws IOException { - fs = (DistributedFileSystem)cluster.getFileSystem(); + fs = cluster.getFileSystem(); Path file1 = new Path("/tmp/testManualSafeMode/file1"); Path file2 = new Path("/tmp/testManualSafeMode/file2"); @@ -112,7 +112,7 @@ public class TestSafeMode { // now bring up just the NameNode. cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).format(false).build(); cluster.waitActive(); - dfs = (DistributedFileSystem)cluster.getFileSystem(); + dfs = cluster.getFileSystem(); assertTrue("No datanode is started. Should be in SafeMode", dfs.setSafeMode(SafeModeAction.SAFEMODE_GET)); @@ -322,11 +322,11 @@ public class TestSafeMode { fs.rename(file1, new Path("file2")); }}); - try { - fs.setTimes(file1, 0, 0); - } catch (IOException ioe) { - fail("Set times failed while in SM"); - } + runFsFun("Set time while in SM", new FSRun() { + @Override + public void run(FileSystem fs) throws IOException { + fs.setTimes(file1, 0, 0); + }}); try { DFSTestUtil.readFile(fs, file1); @@ -350,7 +350,7 @@ public class TestSafeMode { conf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY, 1); cluster.restartNameNode(); - fs = (DistributedFileSystem)cluster.getFileSystem(); + fs = cluster.getFileSystem(); String tipMsg = cluster.getNamesystem().getSafemode(); assertTrue("Safemode tip message looks right: " + tipMsg, @@ -375,7 +375,7 @@ public class TestSafeMode { * @throws IOException when there's an issue connecting to the test DFS. */ public void testSafeModeUtils() throws IOException { - dfs = (DistributedFileSystem)cluster.getFileSystem(); + dfs = cluster.getFileSystem(); // Enter safemode. dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSetTimes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSetTimes.java index 18341ca9558..4e6091b8122 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSetTimes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSetTimes.java @@ -27,6 +27,7 @@ import java.net.InetSocketAddress; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Random; +import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; @@ -36,8 +37,11 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; +import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; +import org.apache.hadoop.test.MockitoUtil; import org.apache.hadoop.util.Time; import org.junit.Test; +import org.mockito.Mockito; /** * This class tests the access time on files. @@ -273,6 +277,37 @@ public class TestSetTimes { cluster.shutdown(); } } + + /** + * Test that when access time updates are not needed, the FSNamesystem + * write lock is not taken by getBlockLocations. + * Regression test for HDFS-3981. + */ + @Test(timeout=60000) + public void testGetBlockLocationsOnlyUsesReadLock() throws IOException { + Configuration conf = new HdfsConfiguration(); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 100*1000); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(0) + .build(); + ReentrantReadWriteLock spyLock = NameNodeAdapter.spyOnFsLock(cluster.getNamesystem()); + try { + // Create empty file in the FSN. + Path p = new Path("/empty-file"); + DFSTestUtil.createFile(cluster.getFileSystem(), p, 0, (short)1, 0L); + + // getBlockLocations() should not need the write lock, since we just created + // the file (and thus its access time is already within the 100-second + // accesstime precision configured above). + MockitoUtil.doThrowWhenCallStackMatches( + new AssertionError("Should not need write lock"), + ".*getBlockLocations.*") + .when(spyLock).writeLock(); + cluster.getFileSystem().getFileBlockLocations(p, 0, 100); + } finally { + cluster.shutdown(); + } + } public static void main(String[] args) throws Exception { new TestSetTimes().testTimes(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestNNWithQJM.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestNNWithQJM.java index 9ce2ae3c7ce..cfe463c4356 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestNNWithQJM.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestNNWithQJM.java @@ -98,9 +98,6 @@ public class TestNNWithQJM { @Test (timeout = 30000) public void testNewNamenodeTakesOverWriter() throws Exception { - // Skip the test on Windows. See HDFS-4584. - assumeTrue(!Path.WINDOWS); - File nn1Dir = new File( MiniDFSCluster.getBaseDirectory() + "/TestNNWithQJM/image-nn1"); File nn2Dir = new File( @@ -110,23 +107,37 @@ public class TestNNWithQJM { nn1Dir.getAbsolutePath()); conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, mjc.getQuorumJournalURI("myjournal").toString()); - + + // Start the cluster once to generate the dfs dirs MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(0) .manageNameDfsDirs(false) .checkExitOnShutdown(false) .build(); + // Shutdown the cluster before making a copy of the namenode dir + // to release all file locks, otherwise, the copy will fail on + // some platforms. + cluster.shutdown(); + try { - cluster.getFileSystem().mkdirs(TEST_PATH); - // Start a second NN pointed to the same quorum. // We need to copy the image dir from the first NN -- or else // the new NN will just be rejected because of Namespace mismatch. FileUtil.fullyDelete(nn2Dir); FileUtil.copy(nn1Dir, FileSystem.getLocal(conf).getRaw(), new Path(nn2Dir.getAbsolutePath()), false, conf); - + + // Start the cluster again + cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(0) + .format(false) + .manageNameDfsDirs(false) + .checkExitOnShutdown(false) + .build(); + + cluster.getFileSystem().mkdirs(TEST_PATH); + Configuration conf2 = new Configuration(); conf2.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nn2Dir.getAbsolutePath()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java index 7c2bb29d409..e62e26b0971 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java @@ -74,6 +74,8 @@ public class TestQuorumJournalManager { private Configuration conf; private QuorumJournalManager qjm; private List spies; + + private List toClose = Lists.newLinkedList(); static { ((Log4JLogger)ProtobufRpcEngine.LOG).getLogger().setLevel(Level.ALL); @@ -98,11 +100,26 @@ public class TestQuorumJournalManager { @After public void shutdown() throws IOException { + IOUtils.cleanup(LOG, toClose.toArray(new Closeable[0])); + + // Should not leak clients between tests -- this can cause flaky tests. + // (See HDFS-4643) + GenericTestUtils.assertNoThreadsMatching(".*IPC Client.*"); + if (cluster != null) { cluster.shutdown(); } } + /** + * Enqueue a QJM for closing during shutdown. This makes the code a little + * easier to follow, with fewer try..finally clauses necessary. + */ + private QuorumJournalManager closeLater(QuorumJournalManager qjm) { + toClose.add(qjm); + return qjm; + } + @Test public void testSingleWriter() throws Exception { writeSegment(cluster, qjm, 1, 3, true); @@ -119,8 +136,8 @@ public class TestQuorumJournalManager { @Test public void testFormat() throws Exception { - QuorumJournalManager qjm = new QuorumJournalManager( - conf, cluster.getQuorumJournalURI("testFormat-jid"), FAKE_NSINFO); + QuorumJournalManager qjm = closeLater(new QuorumJournalManager( + conf, cluster.getQuorumJournalURI("testFormat-jid"), FAKE_NSINFO)); assertFalse(qjm.hasSomeData()); qjm.format(FAKE_NSINFO); assertTrue(qjm.hasSomeData()); @@ -128,8 +145,7 @@ public class TestQuorumJournalManager { @Test public void testReaderWhileAnotherWrites() throws Exception { - - QuorumJournalManager readerQjm = createSpyingQJM(); + QuorumJournalManager readerQjm = closeLater(createSpyingQJM()); List streams = Lists.newArrayList(); readerQjm.selectInputStreams(streams, 0, false); assertEquals(0, streams.size()); @@ -251,8 +267,8 @@ public class TestQuorumJournalManager { // Make a new QJM - qjm = new QuorumJournalManager( - conf, cluster.getQuorumJournalURI(JID), FAKE_NSINFO); + qjm = closeLater(new QuorumJournalManager( + conf, cluster.getQuorumJournalURI(JID), FAKE_NSINFO)); qjm.recoverUnfinalizedSegments(); checkRecovery(cluster, 1, 3); @@ -364,8 +380,8 @@ public class TestQuorumJournalManager { NNStorage.getInProgressEditsFileName(1)); // Make a new QJM - qjm = new QuorumJournalManager( - conf, cluster.getQuorumJournalURI(JID), FAKE_NSINFO); + qjm = closeLater(new QuorumJournalManager( + conf, cluster.getQuorumJournalURI(JID), FAKE_NSINFO)); qjm.recoverUnfinalizedSegments(); checkRecovery(cluster, 1, 3); } @@ -902,8 +918,8 @@ public class TestQuorumJournalManager { return Mockito.spy(logger); } }; - return new QuorumJournalManager( - conf, cluster.getQuorumJournalURI(JID), FAKE_NSINFO, spyFactory); + return closeLater(new QuorumJournalManager( + conf, cluster.getQuorumJournalURI(JID), FAKE_NSINFO, spyFactory)); } private static void waitForAllPendingCalls(AsyncLoggerSet als) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java index 504e1ca6854..42ea48230ee 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java @@ -17,10 +17,12 @@ */ package org.apache.hadoop.hdfs.server.datanode; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertSame; +import java.io.File; import java.io.IOException; import java.net.InetSocketAddress; import java.util.Map; @@ -68,6 +70,8 @@ public class TestBPOfferService { TestBPOfferService.class); private static final ExtendedBlock FAKE_BLOCK = new ExtendedBlock(FAKE_BPID, 12345L); + private static final String TEST_BUILD_DATA = System.getProperty( + "test.build.data", "build/test/data"); static { ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL); @@ -90,6 +94,8 @@ public class TestBPOfferService { mockDn = Mockito.mock(DataNode.class); Mockito.doReturn(true).when(mockDn).shouldRun(); Configuration conf = new Configuration(); + File dnDataDir = new File(new File(TEST_BUILD_DATA, "dfs"), "data"); + conf.set(DFS_DATANODE_DATA_DIR_KEY, dnDataDir.toURI().toString()); Mockito.doReturn(conf).when(mockDn).getConf(); Mockito.doReturn(new DNConf(conf)).when(mockDn).getDnConf(); Mockito.doReturn(DataNodeMetrics.create(conf, "fake dn")) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockPoolManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockPoolManager.java index 3b07fe7978a..e1bbd9a4d26 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockPoolManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockPoolManager.java @@ -113,7 +113,7 @@ public class TestBlockPoolManager { // Remove the first NS conf.set(DFSConfigKeys.DFS_NAMESERVICES, - "ns1"); + "ns2"); bpm.refreshNamenodes(conf); assertEquals( "stop #1\n" + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/TestAvailableSpaceVolumeChoosingPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/TestAvailableSpaceVolumeChoosingPolicy.java new file mode 100644 index 00000000000..16cce72c5bf --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/TestAvailableSpaceVolumeChoosingPolicy.java @@ -0,0 +1,303 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.datanode.fsdataset; + +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_THRESHOLD_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_PREFERENCE_PERCENT_KEY; + +import java.util.ArrayList; +import java.util.List; + +import junit.framework.Assert; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.util.ReflectionUtils; +import org.junit.Test; +import org.mockito.Mockito; + +public class TestAvailableSpaceVolumeChoosingPolicy { + + private static final int RANDOMIZED_ITERATIONS = 10000; + private static final float RANDOMIZED_ERROR_PERCENT = 0.05f; + private static final long RANDOMIZED_ALLOWED_ERROR = (long) (RANDOMIZED_ERROR_PERCENT * RANDOMIZED_ITERATIONS); + + private static void initPolicy(VolumeChoosingPolicy policy, + float preferencePercent) { + Configuration conf = new Configuration(); + // Set the threshold to consider volumes imbalanced to 1MB + conf.setLong( + DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_THRESHOLD_KEY, + 1024 * 1024); // 1MB + conf.setFloat( + DFS_DATANODE_FSDATASET_VOLUME_CHOOSING_BALANCED_SPACE_PREFERENCE_PERCENT_KEY, + preferencePercent); + ((Configurable) policy).setConf(conf); + } + + // Test the Round-Robin block-volume fallback path when all volumes are within + // the threshold. + @Test(timeout=60000) + public void testRR() throws Exception { + @SuppressWarnings("unchecked") + final AvailableSpaceVolumeChoosingPolicy policy = + ReflectionUtils.newInstance(AvailableSpaceVolumeChoosingPolicy.class, null); + initPolicy(policy, 1.0f); + TestRoundRobinVolumeChoosingPolicy.testRR(policy); + } + + // ChooseVolume should throw DiskOutOfSpaceException + // with volume and block sizes in exception message. + @Test(timeout=60000) + public void testRRPolicyExceptionMessage() throws Exception { + final AvailableSpaceVolumeChoosingPolicy policy + = new AvailableSpaceVolumeChoosingPolicy(); + initPolicy(policy, 1.0f); + TestRoundRobinVolumeChoosingPolicy.testRRPolicyExceptionMessage(policy); + } + + @Test(timeout=60000) + public void testTwoUnbalancedVolumes() throws Exception { + @SuppressWarnings("unchecked") + final AvailableSpaceVolumeChoosingPolicy policy = + ReflectionUtils.newInstance(AvailableSpaceVolumeChoosingPolicy.class, null); + initPolicy(policy, 1.0f); + + List volumes = new ArrayList(); + + // First volume with 1MB free space + volumes.add(Mockito.mock(FsVolumeSpi.class)); + Mockito.when(volumes.get(0).getAvailable()).thenReturn(1024L * 1024L); + + // Second volume with 3MB free space, which is a difference of 2MB, more + // than the threshold of 1MB. + volumes.add(Mockito.mock(FsVolumeSpi.class)); + Mockito.when(volumes.get(1).getAvailable()).thenReturn(1024L * 1024L * 3); + + Assert.assertEquals(volumes.get(1), policy.chooseVolume(volumes, 100)); + Assert.assertEquals(volumes.get(1), policy.chooseVolume(volumes, 100)); + Assert.assertEquals(volumes.get(1), policy.chooseVolume(volumes, 100)); + } + + @Test(timeout=60000) + public void testThreeUnbalancedVolumes() throws Exception { + @SuppressWarnings("unchecked") + final AvailableSpaceVolumeChoosingPolicy policy = + ReflectionUtils.newInstance(AvailableSpaceVolumeChoosingPolicy.class, null); + + List volumes = new ArrayList(); + + // First volume with 1MB free space + volumes.add(Mockito.mock(FsVolumeSpi.class)); + Mockito.when(volumes.get(0).getAvailable()).thenReturn(1024L * 1024L); + + // Second volume with 3MB free space, which is a difference of 2MB, more + // than the threshold of 1MB. + volumes.add(Mockito.mock(FsVolumeSpi.class)); + Mockito.when(volumes.get(1).getAvailable()).thenReturn(1024L * 1024L * 3); + + // Third volume, again with 3MB free space. + volumes.add(Mockito.mock(FsVolumeSpi.class)); + Mockito.when(volumes.get(2).getAvailable()).thenReturn(1024L * 1024L * 3); + + // We should alternate assigning between the two volumes with a lot of free + // space. + initPolicy(policy, 1.0f); + Assert.assertEquals(volumes.get(1), policy.chooseVolume(volumes, 100)); + Assert.assertEquals(volumes.get(2), policy.chooseVolume(volumes, 100)); + Assert.assertEquals(volumes.get(1), policy.chooseVolume(volumes, 100)); + Assert.assertEquals(volumes.get(2), policy.chooseVolume(volumes, 100)); + + // All writes should be assigned to the volume with the least free space. + initPolicy(policy, 0.0f); + Assert.assertEquals(volumes.get(0), policy.chooseVolume(volumes, 100)); + Assert.assertEquals(volumes.get(0), policy.chooseVolume(volumes, 100)); + Assert.assertEquals(volumes.get(0), policy.chooseVolume(volumes, 100)); + Assert.assertEquals(volumes.get(0), policy.chooseVolume(volumes, 100)); + } + + @Test(timeout=60000) + public void testFourUnbalancedVolumes() throws Exception { + @SuppressWarnings("unchecked") + final AvailableSpaceVolumeChoosingPolicy policy = + ReflectionUtils.newInstance(AvailableSpaceVolumeChoosingPolicy.class, null); + + List volumes = new ArrayList(); + + // First volume with 1MB free space + volumes.add(Mockito.mock(FsVolumeSpi.class)); + Mockito.when(volumes.get(0).getAvailable()).thenReturn(1024L * 1024L); + + // Second volume with 1MB + 1 byte free space + volumes.add(Mockito.mock(FsVolumeSpi.class)); + Mockito.when(volumes.get(1).getAvailable()).thenReturn(1024L * 1024L + 1); + + // Third volume with 3MB free space, which is a difference of 2MB, more + // than the threshold of 1MB. + volumes.add(Mockito.mock(FsVolumeSpi.class)); + Mockito.when(volumes.get(2).getAvailable()).thenReturn(1024L * 1024L * 3); + + // Fourth volume, again with 3MB free space. + volumes.add(Mockito.mock(FsVolumeSpi.class)); + Mockito.when(volumes.get(3).getAvailable()).thenReturn(1024L * 1024L * 3); + + // We should alternate assigning between the two volumes with a lot of free + // space. + initPolicy(policy, 1.0f); + Assert.assertEquals(volumes.get(2), policy.chooseVolume(volumes, 100)); + Assert.assertEquals(volumes.get(3), policy.chooseVolume(volumes, 100)); + Assert.assertEquals(volumes.get(2), policy.chooseVolume(volumes, 100)); + Assert.assertEquals(volumes.get(3), policy.chooseVolume(volumes, 100)); + + // We should alternate assigning between the two volumes with less free + // space. + initPolicy(policy, 0.0f); + Assert.assertEquals(volumes.get(0), policy.chooseVolume(volumes, 100)); + Assert.assertEquals(volumes.get(1), policy.chooseVolume(volumes, 100)); + Assert.assertEquals(volumes.get(0), policy.chooseVolume(volumes, 100)); + Assert.assertEquals(volumes.get(1), policy.chooseVolume(volumes, 100)); + } + + @Test(timeout=60000) + public void testNotEnoughSpaceOnSelectedVolume() throws Exception { + @SuppressWarnings("unchecked") + final AvailableSpaceVolumeChoosingPolicy policy = + ReflectionUtils.newInstance(AvailableSpaceVolumeChoosingPolicy.class, null); + + List volumes = new ArrayList(); + + // First volume with 1MB free space + volumes.add(Mockito.mock(FsVolumeSpi.class)); + Mockito.when(volumes.get(0).getAvailable()).thenReturn(1024L * 1024L); + + // Second volume with 3MB free space, which is a difference of 2MB, more + // than the threshold of 1MB. + volumes.add(Mockito.mock(FsVolumeSpi.class)); + Mockito.when(volumes.get(1).getAvailable()).thenReturn(1024L * 1024L * 3); + + // All writes should be assigned to the volume with the least free space. + // However, if the volume with the least free space doesn't have enough + // space to accept the replica size, and another volume does have enough + // free space, that should be chosen instead. + initPolicy(policy, 0.0f); + Assert.assertEquals(volumes.get(1), policy.chooseVolume(volumes, 1024L * 1024L * 2)); + } + + @Test(timeout=60000) + public void testAvailableSpaceChanges() throws Exception { + @SuppressWarnings("unchecked") + final AvailableSpaceVolumeChoosingPolicy policy = + ReflectionUtils.newInstance(AvailableSpaceVolumeChoosingPolicy.class, null); + initPolicy(policy, 1.0f); + + List volumes = new ArrayList(); + + // First volume with 1MB free space + volumes.add(Mockito.mock(FsVolumeSpi.class)); + Mockito.when(volumes.get(0).getAvailable()).thenReturn(1024L * 1024L); + + // Second volume with 3MB free space, which is a difference of 2MB, more + // than the threshold of 1MB. + volumes.add(Mockito.mock(FsVolumeSpi.class)); + Mockito.when(volumes.get(1).getAvailable()) + .thenReturn(1024L * 1024L * 3) + .thenReturn(1024L * 1024L * 3) + .thenReturn(1024L * 1024L * 3) + .thenReturn(1024L * 1024L * 1); // After the third check, return 1MB. + + // Should still be able to get a volume for the replica even though the + // available space on the second volume changed. + Assert.assertEquals(volumes.get(1), policy.chooseVolume(volumes, 100)); + } + + @Test(timeout=60000) + public void randomizedTest1() throws Exception { + doRandomizedTest(0.75f, 1, 1); + } + + @Test(timeout=60000) + public void randomizedTest2() throws Exception { + doRandomizedTest(0.75f, 5, 1); + } + + @Test(timeout=60000) + public void randomizedTest3() throws Exception { + doRandomizedTest(0.75f, 1, 5); + } + + @Test(timeout=60000) + public void randomizedTest4() throws Exception { + doRandomizedTest(0.90f, 5, 1); + } + + /* + * Ensure that we randomly select the lesser-used volumes with appropriate + * frequency. + */ + public void doRandomizedTest(float preferencePercent, int lowSpaceVolumes, + int highSpaceVolumes) throws Exception { + @SuppressWarnings("unchecked") + final AvailableSpaceVolumeChoosingPolicy policy = + ReflectionUtils.newInstance(AvailableSpaceVolumeChoosingPolicy.class, null); + + List volumes = new ArrayList(); + + // Volumes with 1MB free space + for (int i = 0; i < lowSpaceVolumes; i++) { + FsVolumeSpi volume = Mockito.mock(FsVolumeSpi.class); + Mockito.when(volume.getAvailable()).thenReturn(1024L * 1024L); + volumes.add(volume); + } + + // Volumes with 3MB free space + for (int i = 0; i < highSpaceVolumes; i++) { + FsVolumeSpi volume = Mockito.mock(FsVolumeSpi.class); + Mockito.when(volume.getAvailable()).thenReturn(1024L * 1024L * 3); + volumes.add(volume); + } + + initPolicy(policy, preferencePercent); + long lowAvailableSpaceVolumeSelected = 0; + long highAvailableSpaceVolumeSelected = 0; + for (int i = 0; i < RANDOMIZED_ITERATIONS; i++) { + FsVolumeSpi volume = policy.chooseVolume(volumes, 100); + for (int j = 0; j < volumes.size(); j++) { + // Note how many times the first low available volume was selected + if (volume == volumes.get(j) && j == 0) { + lowAvailableSpaceVolumeSelected++; + } + // Note how many times the first high available volume was selected + if (volume == volumes.get(j) && j == lowSpaceVolumes) { + highAvailableSpaceVolumeSelected++; + break; + } + } + } + + // Calculate the expected ratio of how often low available space volumes + // were selected vs. high available space volumes. + float expectedSelectionRatio = preferencePercent / (1 - preferencePercent); + + GenericTestUtils.assertValueNear( + (long)(lowAvailableSpaceVolumeSelected * expectedSelectionRatio), + highAvailableSpaceVolumeSelected, + RANDOMIZED_ALLOWED_ERROR); + } + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/TestRoundRobinVolumeChoosingPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/TestRoundRobinVolumeChoosingPolicy.java index f8f3cd85c77..9818a01b694 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/TestRoundRobinVolumeChoosingPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/TestRoundRobinVolumeChoosingPolicy.java @@ -32,6 +32,14 @@ public class TestRoundRobinVolumeChoosingPolicy { // Test the Round-Robin block-volume choosing algorithm. @Test public void testRR() throws Exception { + @SuppressWarnings("unchecked") + final RoundRobinVolumeChoosingPolicy policy = + ReflectionUtils.newInstance(RoundRobinVolumeChoosingPolicy.class, null); + testRR(policy); + } + + public static void testRR(VolumeChoosingPolicy policy) + throws Exception { final List volumes = new ArrayList(); // First volume, with 100 bytes of space. @@ -41,10 +49,6 @@ public class TestRoundRobinVolumeChoosingPolicy { // Second volume, with 200 bytes of space. volumes.add(Mockito.mock(FsVolumeSpi.class)); Mockito.when(volumes.get(1).getAvailable()).thenReturn(200L); - - @SuppressWarnings("unchecked") - final RoundRobinVolumeChoosingPolicy policy = - ReflectionUtils.newInstance(RoundRobinVolumeChoosingPolicy.class, null); // Test two rounds of round-robin choosing Assert.assertEquals(volumes.get(0), policy.chooseVolume(volumes, 0)); @@ -69,6 +73,13 @@ public class TestRoundRobinVolumeChoosingPolicy { // with volume and block sizes in exception message. @Test public void testRRPolicyExceptionMessage() throws Exception { + final RoundRobinVolumeChoosingPolicy policy + = new RoundRobinVolumeChoosingPolicy(); + testRRPolicyExceptionMessage(policy); + } + + public static void testRRPolicyExceptionMessage( + VolumeChoosingPolicy policy) throws Exception { final List volumes = new ArrayList(); // First volume, with 500 bytes of space. @@ -79,8 +90,6 @@ public class TestRoundRobinVolumeChoosingPolicy { volumes.add(Mockito.mock(FsVolumeSpi.class)); Mockito.when(volumes.get(1).getAvailable()).thenReturn(600L); - final RoundRobinVolumeChoosingPolicy policy - = new RoundRobinVolumeChoosingPolicy(); int blockSize = 700; try { policy.chooseVolume(volumes, blockSize); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java index 2ffaaafca3a..0dc0a7325f2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java @@ -217,7 +217,8 @@ public abstract class FSImageTestUtil { FsPermission.createImmutable((short)0755)); for (int i = 1; i <= numDirs; i++) { String dirName = "dir" + i; - INodeDirectory dir = new INodeDirectory(newInodeId + i -1, dirName, perms); + INodeDirectory dir = new INodeDirectory(newInodeId + i - 1, dirName, + perms); editLog.logMkDir("/" + dirName, dir); } editLog.logSync(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java index de0cf8a5ef5..db0e9b8e058 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java @@ -30,8 +30,6 @@ import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; import java.lang.management.ManagementFactory; -import java.lang.management.ThreadInfo; -import java.lang.management.ThreadMXBean; import java.net.InetSocketAddress; import java.net.URI; import java.util.ArrayList; @@ -122,18 +120,7 @@ public class TestCheckpoint { @After public void checkForSNNThreads() { - ThreadMXBean threadBean = ManagementFactory.getThreadMXBean(); - - ThreadInfo[] infos = threadBean.getThreadInfo(threadBean.getAllThreadIds(), 20); - for (ThreadInfo info : infos) { - if (info == null) continue; - LOG.info("Check thread: " + info.getThreadName()); - if (info.getThreadName().contains("SecondaryNameNode")) { - fail("Leaked thread: " + info + "\n" + - Joiner.on("\n").join(info.getStackTrace())); - } - } - LOG.info("--------"); + GenericTestUtils.assertNoThreadsMatching(".*SecondaryNameNode.*"); } static void checkFile(FileSystem fileSys, Path name, int repl) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java index 37b22714c20..86cb992ae17 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java @@ -24,10 +24,8 @@ import static org.junit.Assert.fail; import java.io.FileNotFoundException; import java.io.IOException; -import java.util.EnumSet; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Options; @@ -39,10 +37,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; -import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; -import org.apache.hadoop.io.EnumSetWritable; import org.junit.Test; public class TestINodeFile { @@ -398,9 +393,7 @@ public class TestINodeFile { cluster.waitActive(); FSNamesystem fsn = cluster.getNamesystem(); - long lastId = fsn.getLastInodeId(); - - assertTrue(lastId == 1001); + assertTrue(fsn.getLastInodeId() == 1001); // Create one directory and the last inode id should increase to 1002 FileSystem fs = cluster.getFileSystem(); @@ -408,14 +401,10 @@ public class TestINodeFile { assertTrue(fs.mkdirs(path)); assertTrue(fsn.getLastInodeId() == 1002); - // Use namenode rpc to create a file - NamenodeProtocols nnrpc = cluster.getNameNodeRpc(); - HdfsFileStatus fileStatus = nnrpc.create("/test1/file", new FsPermission( - (short) 0755), "client", - new EnumSetWritable(EnumSet.of(CreateFlag.CREATE)), true, - (short) 1, 128 * 1024 * 1024L); + int fileLen = 1024; + Path filePath = new Path("/test1/file"); + DFSTestUtil.createFile(fs, filePath, fileLen, (short) 1, 0); assertTrue(fsn.getLastInodeId() == 1003); - assertTrue(fileStatus.getFileId() == 1003); // Rename doesn't increase inode id Path renamedPath = new Path("/test2"); @@ -427,7 +416,32 @@ public class TestINodeFile { // Make sure empty editlog can be handled cluster.restartNameNode(); cluster.waitActive(); + fsn = cluster.getNamesystem(); assertTrue(fsn.getLastInodeId() == 1003); + + DFSTestUtil.createFile(fs, new Path("/test2/file2"), fileLen, (short) 1, + 0); + long id = fsn.getLastInodeId(); + assertTrue(id == 1004); + fs.delete(new Path("/test2"), true); + // create a file under construction + FSDataOutputStream outStream = fs.create(new Path("/test3/file")); + assertTrue(outStream != null); + assertTrue(fsn.getLastInodeId() == 1006); + + // Apply editlogs to fsimage, test fsimage with inodeUnderConstruction can + // be handled + fsn.enterSafeMode(false); + fsn.saveNamespace(); + fsn.leaveSafeMode(); + + outStream.close(); + + // The lastInodeId in fsimage should remain 1006 after reboot + cluster.restartNameNode(); + cluster.waitActive(); + fsn = cluster.getNamesystem(); + assertTrue(fsn.getLastInodeId() == 1006); } finally { if (cluster != null) { cluster.shutdown(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java index dd679d1a9af..2df1459d4d5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode; +import static org.apache.hadoop.hdfs.DFSConfigKeys.*; import static org.junit.Assert.assertEquals; import java.io.File; @@ -30,12 +31,14 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; +import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.junit.Test; +import static org.mockito.Mockito.*; /** * This class tests the creation and validation of a checkpoint. @@ -163,4 +166,70 @@ public class TestSecurityTokenEditLog { if(cluster != null) cluster.shutdown(); } } + + @Test(timeout=10000) + public void testEditsForCancelOnTokenExpire() throws IOException, + InterruptedException { + long renewInterval = 2000; + Configuration conf = new Configuration(); + conf.setBoolean( + DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true); + conf.setLong(DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY, renewInterval); + conf.setLong(DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY, renewInterval*2); + + Text renewer = new Text(UserGroupInformation.getCurrentUser().getUserName()); + FSImage fsImage = mock(FSImage.class); + FSEditLog log = mock(FSEditLog.class); + doReturn(log).when(fsImage).getEditLog(); + FSNamesystem fsn = new FSNamesystem(conf, fsImage); + + DelegationTokenSecretManager dtsm = fsn.getDelegationTokenSecretManager(); + try { + dtsm.startThreads(); + + // get two tokens + Token token1 = fsn.getDelegationToken(renewer); + Token token2 = fsn.getDelegationToken(renewer); + DelegationTokenIdentifier ident1 = + (DelegationTokenIdentifier)token1.decodeIdentifier(); + DelegationTokenIdentifier ident2 = + (DelegationTokenIdentifier)token2.decodeIdentifier(); + + // verify we got the tokens + verify(log, times(1)).logGetDelegationToken(eq(ident1), anyLong()); + verify(log, times(1)).logGetDelegationToken(eq(ident2), anyLong()); + + // this is a little tricky because DTSM doesn't let us set scan interval + // so need to periodically sleep, then stop/start threads to force scan + + // renew first token 1/2 to expire + Thread.sleep(renewInterval/2); + fsn.renewDelegationToken(token2); + verify(log, times(1)).logRenewDelegationToken(eq(ident2), anyLong()); + // force scan and give it a little time to complete + dtsm.stopThreads(); dtsm.startThreads(); + Thread.sleep(250); + // no token has expired yet + verify(log, times(0)).logCancelDelegationToken(eq(ident1)); + verify(log, times(0)).logCancelDelegationToken(eq(ident2)); + + // sleep past expiration of 1st non-renewed token + Thread.sleep(renewInterval/2); + dtsm.stopThreads(); dtsm.startThreads(); + Thread.sleep(250); + // non-renewed token should have implicitly been cancelled + verify(log, times(1)).logCancelDelegationToken(eq(ident1)); + verify(log, times(0)).logCancelDelegationToken(eq(ident2)); + + // sleep past expiration of 2nd renewed token + Thread.sleep(renewInterval/2); + dtsm.stopThreads(); dtsm.startThreads(); + Thread.sleep(250); + // both tokens should have been implicitly cancelled by now + verify(log, times(1)).logCancelDelegationToken(eq(ident1)); + verify(log, times(1)).logCancelDelegationToken(eq(ident2)); + } finally { + dtsm.stopThreads(); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestGSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestGSet.java index 971d538b272..5f43cb53f93 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestGSet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestGSet.java @@ -388,6 +388,11 @@ public class TestGSet { return String.format(" iterate=%5d, contain=%5d, time elapsed=%5d.%03ds", iterate_count, contain_count, t/1000, t%1000); } + + @Override + public void clear() { + gset.clear(); + } } /** Test data set */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java index 969ad5c1fb9..3998fdc3c93 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs.web; import java.io.IOException; +import java.net.URISyntaxException; +import java.security.PrivilegedExceptionAction; import java.util.Random; import org.apache.commons.logging.Log; @@ -29,9 +31,13 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.TestDFSClientRetries; import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.log4j.Level; import org.junit.Assert; import org.junit.Test; @@ -208,4 +214,48 @@ public class TestWebHDFS { final Configuration conf = WebHdfsTestUtil.createConf(); TestDFSClientRetries.namenodeRestartTest(conf, true); } + + @Test(timeout=300000) + public void testLargeDirectory() throws Exception { + final Configuration conf = WebHdfsTestUtil.createConf(); + final int listLimit = 2; + // force small chunking of directory listing + conf.setInt(DFSConfigKeys.DFS_LIST_LIMIT, listLimit); + // force paths to be only owner-accessible to ensure ugi isn't changing + // during listStatus + FsPermission.setUMask(conf, new FsPermission((short)0077)); + + final MiniDFSCluster cluster = + new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); + try { + cluster.waitActive(); + WebHdfsTestUtil.getWebHdfsFileSystem(conf).setPermission( + new Path("/"), + new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)); + + // trick the NN into not believing it's not the superuser so we can + // tell if the correct user is used by listStatus + UserGroupInformation.setLoginUser( + UserGroupInformation.createUserForTesting( + "not-superuser", new String[]{"not-supergroup"})); + + UserGroupInformation.createUserForTesting("me", new String[]{"my-group"}) + .doAs(new PrivilegedExceptionAction() { + @Override + public Void run() throws IOException, URISyntaxException { + FileSystem fs = WebHdfsTestUtil.getWebHdfsFileSystem(conf); + Path d = new Path("/my-dir"); + Assert.assertTrue(fs.mkdirs(d)); + for (int i=0; i < listLimit*3; i++) { + Path p = new Path(d, "file-"+i); + Assert.assertTrue(fs.createNewFile(p)); + } + Assert.assertEquals(listLimit*3, fs.listStatus(d).length); + return null; + } + }); + } finally { + cluster.shutdown(); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored index 75fb82ff751..dd3a2ca7427 100644 Binary files a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored and b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored differ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml index dded62ef49a..ce6dbe4dc64 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml @@ -1,6 +1,6 @@ - -40 + -42 OP_START_LOG_SEGMENT @@ -41,6 +41,7 @@ 5 0 + 1002 /file_create 1 1330405685834 @@ -60,6 +61,7 @@ 6 0 + 0 /file_create 1 1330405685848 @@ -98,6 +100,7 @@ 9 0 + 1003 /directory_mkdir 1330405685861 @@ -119,6 +122,7 @@ 11 0 + 1004 /file_create 1 1330405685866 @@ -138,6 +142,7 @@ 12 0 + 0 /file_create 1 1330405685868 @@ -218,6 +223,7 @@ 20 0 + 1005 /file_concat_target 1 1330405685889 @@ -309,6 +315,7 @@ 27 0 + 0 /file_concat_target 1 1330405685978 @@ -350,6 +357,7 @@ 29 0 + 1006 /file_concat_0 1 1330405685983 @@ -441,6 +449,7 @@ 36 0 + 0 /file_concat_0 1 1330405686013 @@ -482,6 +491,7 @@ 38 0 + 1007 /file_concat_1 1 1330405686017 @@ -573,6 +583,7 @@ 45 0 + 0 /file_concat_1 1 1330405686042 @@ -620,6 +631,7 @@ 47 0 + 1008 /file_symlink /file_concat_target 1330405686051 @@ -693,6 +705,7 @@ 52 0 + 1009 /hard-lease-recovery-test 1 1330405686084 @@ -759,6 +772,7 @@ 58 0 + 0 /hard-lease-recovery-test 1 1330405688726 diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 4c10ed40d8a..93b5d63e767 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -14,10 +14,11 @@ Trunk (Unreleased) MAPREDUCE-4887. Add RehashPartitioner, to smooth distributions with poor implementations of Object#hashCode(). (Radim Kolar via cutting) - IMPROVEMENTS + HADOOP-8562. Enhancements to support Hadoop on Windows Server and Windows + Azure environments. (See breakdown of tasks below for subtasks and + contributors) - MAPREDUCE-3787. [Gridmix] Optimize job monitoring and STRESS mode for - faster job submission. (amarrk) + IMPROVEMENTS MAPREDUCE-3481. [Gridmix] Improve Gridmix STRESS mode. (amarrk) @@ -30,9 +31,6 @@ Trunk (Unreleased) MAPREDUCE-2733. [Gridmix] Gridmix3 cpu emulation system tests. (Vinay Thota via amarrk) - MAPREDUCE-3008. Improvements to cumulative CPU emulation for short running - tasks in Gridmix. (amarrk) - MAPREDUCE-2836. Provide option to fail jobs when submitted to non-existent fair scheduler pools. (Ahmed Radwan via todd) @@ -71,39 +69,14 @@ Trunk (Unreleased) MAPREDUCE-4735. Make arguments in TestDFSIO case insensitive. (Brandon Li via suresh) + MAPREDUCE-5014. Extend Distcp to accept a custom CopyListing. + (Srikanth Sundarrajan via amareshwari) + BUG FIXES MAPREDUCE-4272. SortedRanges.Range#compareTo is not spec compliant. (Yu Gao via llu) - MAPREDUCE-4356. [Rumen] Provide access to the method - ParsedTask.obtainTaskAttempts(). (ravigummadi) - - MAPREDUCE-4100. [Gridmix] Bug fixed in compression emulation feature for - map only jobs. (amarrk) - - MAPREDUCE-4149. [Rumen] Rumen fails to parse certain counter - strings. (ravigummadi) - - MAPREDUCE-4083. [Gridmix] NPE in cpu emulation. (amarrk) - - MAPREDUCE-4087. [Gridmix] GenerateDistCacheData job of Gridmix can - become slow in some cases (ravigummadi). - - MAPREDUCE-3953. [Gridmix] Gridmix throws NPE and does not simulate a - job if the trace contains null taskStatus for a task. - (ravigummadi) - - MAPREDUCE-3829. [Gridmix] Gridmix should give better error message when - input data directory already exists and -generate opton is - given.(ravigummadi) - - MAPREDUCE-2722. [Gridmix] Gridmix simulated job's map's hdfsBytesRead - counter is wrong when compressed input is used.(ravigummadi) - - MAPREDUCE-3757. [Rumen] Fixed Rumen Folder to adjust shuffleFinished and - sortFinished times when needed.(ravigummadi) - MAPREDUCE-3194. "mapred mradmin" command is broken in mrv2 (Jason Lowe via bobby) @@ -155,10 +128,34 @@ Trunk (Unreleased) MAPREDUCE-5012. Typo in javadoc for IdentityMapper class. (Adam Monsen via suresh) - MAPREDUCE-5006. Fix failing streaming tests due to MAPREDUCE-4994. - (Sandy Ryza via tomwhite) + MAPREDUCE-5078. TestMRAppMaster fails on Windows due to mismatched path + separators. (Chris Nauroth via sseth) -Release 2.0.4-beta - UNRELEASED + MAPREDUCE-4885. Streaming tests have multiple failures on Windows. (Chris + Nauroth via bikas) + + BREAKDOWN OF HADOOP-8562 SUBTASKS + + MAPREDUCE-4739. Some MapReduce tests fail to find winutils. + (Chris Nauroth via suresh) + + MAPREDUCE-4780. MapReduce distribution build fails on Windows. + (Chris Nauroth via suresh) + + MAPREDUCE-4790. MapReduce build script would be more readable using abspath. + (Chris Nauroth via suresh) + + MAPREDUCE-4869. Fix TestMapReduceChildJVM. (Chris Nauroth via acmurthy) + + MAPREDUCE-4870. Fix TestMRJobsWithHistoryService. (Chris Nauroth via acmurthy) + + MAPREDUCE-4983. Fixed various platform specific assumptions in various tests, + so that they can pass on Windows too. (Chris Nauroth via vinodkv) + + HADOOP-9372. Fix bad timeout annotations on tests. + (Arpit Agarwal via suresh) + +Release 2.0.5-beta - UNRELEASED INCOMPATIBLE CHANGES @@ -166,9 +163,41 @@ Release 2.0.4-beta - UNRELEASED IMPROVEMENTS + MAPREDUCE-3008. Improvements to cumulative CPU emulation for short running + tasks in Gridmix. (amarrk via tgraves) + MAPREDUCE-5033. mapred shell script should respect usage flags (--help -help -h). (Andrew Wang via atm) + MAPREDUCE-4892. Modify CombineFileInputFormat to not skew input slits' + allocation on small clusters. (Bikas Saha via vinodkv) + + MAPREDUCE-4990. Construct debug strings conditionally in + ShuffleHandler.Shuffle#sendMapOutput(). (kkambatl via tucu) + + MAPREDUCE-4875. coverage fixing for org.apache.hadoop.mapred + (Aleksey Gorshkov via bobby) + + MAPREDUCE-5129. Allow tags to JobHistory for deeper analytics. (billie via + acmurthy) + + MAPREDUCE-3787. [Gridmix] Optimize job monitoring and STRESS mode for + faster job submission. (amarrk via tgraves) + + MAPREDUCE-5079. Changes job recovery to restore state directly from job + history, instaed of simulating state machine events. + (Jason Lowe and Robert Parker via sseth) + + MAPREDUCE-4981. Add WordMean, WordMedian, WordStandardDeviation + to ExamplesDriver. (Plamen Jeliazkov via shv) + + MAPREUDUCE-5059. Change average merge time on Job overview page to be the + time delta between the end of the shuffle and the start of the reduce. + (Omkar Vinit Joshi via vinodkv) + + MAPREDUCE-4985. Add compression option to TestDFSIO usage. + (Plamen Jeliazkov via shv) + OPTIMIZATIONS BUG FIXES @@ -195,6 +224,109 @@ Release 2.0.4-beta - UNRELEASED MAPREDUCE-5008. Merger progress miscounts with respect to EOF_MARKER. (Sandy Ryza via tomwhite) + MAPREDUCE-4693. History server should include counters for failed tasks. + (Xuan Gong via sseth) + + MAPREDUCE-4896. mapred queue -info spits out ugly exception when queue does + not exist. (sandyr via tucu) + + MAPREDUCE-3685. Fix bugs in MergeManager to ensure compression codec is + appropriately used and that on-disk segments are correctly sorted on + file-size. (Anty Rao and Ravi Prakash via acmurthy) + + MAPREDUCE-4571. TestHsWebServicesJobs fails on jdk7. (tgraves via tucu) + + MAPREDUCE-4716. TestHsWebServicesJobsQuery.testJobsQueryStateInvalid + fails with jdk7. (tgraves via tucu) + + MAPREDUCE-5075. DistCp leaks input file handles since ThrottledInputStream + does not close the wrapped InputStream. (Chris Nauroth via szetszwo) + + MAPREDUCE-3872. Fix an event handling races in ContainerLauncherImpl. + (Robert Kanter via sseth) + + MAPREDUCE-5062. Fix MR AM to read max-retries from the RM. (Zhijie Shen via + vinodkv) + + MAPREDUCE-3829. [Gridmix] Gridmix should give better error message when + input data directory already exists and -generate opton is + given.(ravigummadi via tgraves) + + MAPREDUCE-2722. [Gridmix] Gridmix simulated job's map's hdfsBytesRead + counter is wrong when compressed input is used.(ravigummadi via tgraves) + + MAPREDUCE-3953. [Gridmix] Gridmix throws NPE and does not simulate a + job if the trace contains null taskStatus for a task. (ravigummadi via + tgraves) + + MAPREDUCE-4087. [Gridmix] GenerateDistCacheData job of Gridmix can + become slow in some cases (ravigummadi via tgraves). + + MAPREDUCE-5077. Remove mapreduce.util.ResourceCalculatorPlugin and related + code. (Karthik Kambatla via sseth) + + MAPREDUCE-4083. [Gridmix] NPE in cpu emulation. (amarrk via tgraves) + + MAPREDUCE-4100. [Gridmix] Bug fixed in compression emulation feature for + map only jobs. (amarrk via tgraves) + + MAPREDUCE-4356. [Rumen] Provide access to the method + ParsedTask.obtainTaskAttempts(). (ravigummadi via tgraves) + + MAPREDUCE-4149. [Rumen] Rumen fails to parse certain counter + strings. (ravigummadi via tgraves) + + MAPREDUCE-3757. [Rumen] Fixed Rumen Folder to adjust shuffleFinished and + sortFinished times when needed. (Ravi Gummadi via tgraves) + + MAPREDUCE-5138. Fix LocalDistributedCacheManager after YARN-112. (Omkar Vinit + Joshi via vinodkv) + + MAPREDUCE-5113. Streaming input/output types are ignored with java + mapper/reducer. (sandyr via tucu) + + MAPREDUCE-5098. Fix findbugs warnings in gridmix. (kkambatl via tucu) + + MAPREDUCE-5086. MR app master deletes staging dir when sent a reboot + command from the RM. (Jian He via jlowe) + + MAPREDUCE-5137. AM web UI: clicking on Map Task results in 500 error + (Thomas Graves via jlowe) + + MAPREDUCE-5136. TestJobImpl->testJobNoTasks fails with IBM JAVA (Amir + Sanjar via jlowe) + + MAPREDUCE-5139. Update MR AM to use the modified startContainer API after + YARN-486. (Xuan Gong via vinodkv) + +Release 2.0.4-alpha - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + MAPREDUCE-5006. Fix failing streaming tests due to MAPREDUCE-4994. + (Sandy Ryza via tomwhite) + + MAPREDUCE-5088. MR Client gets an renewer token exception while Oozie is + submitting a job (Daryn Sharp via cos) + + MAPREDUCE-5117. Changed MRClientProtocolPBClientImpl to be closeable and thus + fix failures in renewal of HistoryServer's delegations tokens. (Siddharth + Seth via vinodkv) + + MAPREDUCE-5083. MiniMRCluster should use a random component when creating an + actual cluster (Siddharth Seth via hitesh) + + MAPREDUCE-5094. Disabled memory monitoring by default in MiniMRYarnCluster + to avoid some downstream tests failing. (Siddharth Seth via vinodkv) + Release 2.0.3-alpha - 2013-02-06 INCOMPATIBLE CHANGES @@ -711,6 +843,18 @@ Release 2.0.0-alpha - 05-23-2012 MAPREDUCE-4444. nodemanager fails to start when one of the local-dirs is bad (Jason Lowe via bobby) +Release 0.23.8 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + Release 0.23.7 - UNRELEASED INCOMPATIBLE CHANGES @@ -725,6 +869,12 @@ Release 0.23.7 - UNRELEASED MAPREDUCE-4989. JSONify DataTables input data for Attempts page (Ravi Prakash via jlowe) + MAPREDUCE-5027. Shuffle does not limit number of outstanding connections + (Robert Parker via jeagles) + + MAPREDUCE-4972. Coverage fixing for org.apache.hadoop.mapreduce.jobhistory + (Aleksey Gorshkov via bobby) + OPTIMIZATIONS MAPREDUCE-4946. Fix a performance problem for large jobs by reducing the @@ -744,6 +894,35 @@ Release 0.23.7 - UNRELEASED MAPREDUCE-5009. Killing the Task Attempt slated for commit does not clear the value from the Task commitAttempt member (Robert Parker via jeagles) + MAPREDUCE-4871. AM uses mapreduce.jobtracker.split.metainfo.maxsize but + mapred-default has mapreduce.job.split.metainfo.maxsize (Jason Lowe via + jeagles) + + MAPREDUCE-4794. DefaultSpeculator generates error messages on normal + shutdown (Jason Lowe via jeagles) + + MAPREDUCE-5043. Fetch failure processing can cause AM event queue to + backup and eventually OOM (Jason Lowe via bobby) + + MAPREDUCE-5023. History Server Web Services missing Job Counters (Ravi + Prakash via tgraves) + + MAPREDUCE-5060. Fetch failures that time out only count against the first + map task (Robert Joseph Evans via jlowe) + + MAPREDUCE-5042. Reducer unable to fetch for a map task that was recovered + (Jason Lowe via bobby) + + MAPREDUCE-5053. java.lang.InternalError from decompression codec cause + reducer to fail (Robert Parker via jeagles) + + MAPREDUCE-4991. coverage for gridmix (Aleksey Gorshkov via tgraves) + + MAPREDUCE-5007. fix coverage org.apache.hadoop.mapreduce.v2.hs (Aleksey + Gorshkov via tgraves) + + MAPREDUCE-5137. AM web UI: clicking on Map Task results in 500 error + (Thomas Graves via jlowe) Release 0.23.6 - UNRELEASED diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java index fe7aa4cedbc..45d6e9e84a9 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java @@ -24,9 +24,12 @@ import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.io.IOUtils; @@ -46,6 +49,7 @@ import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.jobhistory.AMStartedEvent; import org.apache.hadoop.mapreduce.jobhistory.EventReader; @@ -54,6 +58,9 @@ import org.apache.hadoop.mapreduce.jobhistory.HistoryEvent; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryCopyService; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler; +import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser; +import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo; +import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo; import org.apache.hadoop.mapreduce.security.TokenCache; import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager; @@ -61,6 +68,7 @@ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; import org.apache.hadoop.mapreduce.v2.api.records.AMInfo; import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.api.records.TaskId; +import org.apache.hadoop.mapreduce.v2.api.records.TaskState; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; import org.apache.hadoop.mapreduce.v2.app.client.ClientService; import org.apache.hadoop.mapreduce.v2.app.client.MRClientService; @@ -74,6 +82,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; import org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.JobEventType; import org.apache.hadoop.mapreduce.v2.app.job.event.JobFinishEvent; +import org.apache.hadoop.mapreduce.v2.app.job.event.JobStartEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent; @@ -84,8 +93,6 @@ import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherImpl; import org.apache.hadoop.mapreduce.v2.app.local.LocalContainerAllocator; import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics; -import org.apache.hadoop.mapreduce.v2.app.recover.Recovery; -import org.apache.hadoop.mapreduce.v2.app.recover.RecoveryService; import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator; import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent; import org.apache.hadoop.mapreduce.v2.app.rm.RMCommunicator; @@ -94,6 +101,7 @@ import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler; import org.apache.hadoop.mapreduce.v2.app.speculate.DefaultSpeculator; import org.apache.hadoop.mapreduce.v2.app.speculate.Speculator; import org.apache.hadoop.mapreduce.v2.app.speculate.SpeculatorEvent; +import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; @@ -167,7 +175,6 @@ public class MRAppMaster extends CompositeService { private AppContext context; private Dispatcher dispatcher; private ClientService clientService; - private Recovery recoveryServ; private ContainerAllocator containerAllocator; private ContainerLauncher containerLauncher; private EventHandler committerEventHandler; @@ -180,7 +187,6 @@ public class MRAppMaster extends CompositeService { private OutputCommitter committer; private JobEventDispatcher jobEventDispatcher; private JobHistoryEventHandler jobHistoryEventHandler; - private boolean inRecovery = false; private SpeculatorEventDispatcher speculatorEventDispatcher; private Job job; @@ -193,6 +199,8 @@ public class MRAppMaster extends CompositeService { private String shutDownMessage = null; JobStateInternal forcedState = null; + private long recoveredJobStartTime = 0; + public MRAppMaster(ApplicationAttemptId applicationAttemptId, ContainerId containerId, String nmHost, int nmPort, int nmHttpPort, long appSubmitTime, int maxAppAttempts) { @@ -340,34 +348,9 @@ public class MRAppMaster extends CompositeService { } } else { committer = createOutputCommitter(conf); - boolean recoveryEnabled = conf.getBoolean( - MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE, true); - boolean recoverySupportedByCommitter = committer.isRecoverySupported(); - // If a shuffle secret was not provided by the job client then this app - // attempt will generate one. However that disables recovery if there - // are reducers as the shuffle secret would be app attempt specific. - boolean shuffleKeyValidForRecovery = (numReduceTasks > 0 && - TokenCache.getShuffleSecretKey(fsTokens) != null); - - if (recoveryEnabled && recoverySupportedByCommitter - && shuffleKeyValidForRecovery && appAttemptID.getAttemptId() > 1) { - LOG.info("Recovery is enabled. " - + "Will try to recover from previous life on best effort basis."); - recoveryServ = createRecoveryService(context); - addIfService(recoveryServ); - dispatcher = recoveryServ.getDispatcher(); - clock = recoveryServ.getClock(); - inRecovery = true; - } else { - LOG.info("Not starting RecoveryService: recoveryEnabled: " - + recoveryEnabled + " recoverySupportedByCommitter: " - + recoverySupportedByCommitter + " shuffleKeyValidForRecovery: " - + shuffleKeyValidForRecovery + " ApplicationAttemptID: " - + appAttemptID.getAttemptId()); - dispatcher = createDispatcher(); - addIfService(dispatcher); - } + dispatcher = createDispatcher(); + addIfService(dispatcher); //service to handle requests from JobClient clientService = createClientService(context); @@ -549,8 +532,14 @@ public class MRAppMaster extends CompositeService { } try { - //We are finishing cleanly so this is the last retry - isLastAMRetry = true; + //if isLastAMRetry comes as true, should never set it to false + if ( !isLastAMRetry){ + if (((JobImpl)job).getInternalState() != JobStateInternal.REBOOT) { + LOG.info("We are finishing cleanly so this is the last retry"); + isLastAMRetry = true; + } + } + notifyIsLastAMRetry(isLastAMRetry); // Stop all services // This will also send the final report to the ResourceManager LOG.info("Calling stop for all the services"); @@ -589,15 +578,6 @@ public class MRAppMaster extends CompositeService { return new JobFinishEventHandler(); } - /** - * Create the recovery service. - * @return an instance of the recovery service. - */ - protected Recovery createRecoveryService(AppContext appContext) { - return new RecoveryService(appContext.getApplicationAttemptId(), - appContext.getClock(), getCommitter(), isNewApiCommitter()); - } - /** Create and initialize (but don't start) a single job. * @param forcedState a state to force the job into or null for normal operation. * @param diagnostic a diagnostic message to include with the job. @@ -609,7 +589,8 @@ public class MRAppMaster extends CompositeService { Job newJob = new JobImpl(jobId, appAttemptID, conf, dispatcher.getEventHandler(), taskAttemptListener, jobTokenSecretManager, fsTokens, clock, - completedTasksFromPreviousRun, metrics, newApiCommitter, + completedTasksFromPreviousRun, metrics, + committer, newApiCommitter, currentUser.getUserName(), appSubmitTime, amInfos, context, forcedState, diagnostic); ((RunningAppContext) context).jobs.put(newJob.getID(), newJob); @@ -972,18 +953,8 @@ public class MRAppMaster extends CompositeService { public void start() { amInfos = new LinkedList(); - - // Pull completedTasks etc from recovery - if (inRecovery) { - completedTasksFromPreviousRun = recoveryServ.getCompletedTasks(); - amInfos = recoveryServ.getAMInfos(); - } else { - // Get the amInfos anyways irrespective of whether recovery is enabled or - // not IF this is not the first AM generation - if (appAttemptID.getAttemptId() != 1) { - amInfos.addAll(readJustAMInfos()); - } - } + completedTasksFromPreviousRun = new HashMap(); + processRecovery(); // Current an AMInfo for the current AM generation. AMInfo amInfo = @@ -1045,13 +1016,105 @@ public class MRAppMaster extends CompositeService { startJobs(); } + private void processRecovery() { + if (appAttemptID.getAttemptId() == 1) { + return; // no need to recover on the first attempt + } + + boolean recoveryEnabled = getConfig().getBoolean( + MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE, + MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE_DEFAULT); + boolean recoverySupportedByCommitter = + committer != null && committer.isRecoverySupported(); + + // If a shuffle secret was not provided by the job client then this app + // attempt will generate one. However that disables recovery if there + // are reducers as the shuffle secret would be app attempt specific. + int numReduceTasks = getConfig().getInt(MRJobConfig.NUM_REDUCES, 0); + boolean shuffleKeyValidForRecovery = (numReduceTasks > 0 && + TokenCache.getShuffleSecretKey(fsTokens) != null); + + if (recoveryEnabled && recoverySupportedByCommitter + && shuffleKeyValidForRecovery) { + LOG.info("Recovery is enabled. " + + "Will try to recover from previous life on best effort basis."); + try { + parsePreviousJobHistory(); + } catch (IOException e) { + LOG.warn("Unable to parse prior job history, aborting recovery", e); + // try to get just the AMInfos + amInfos.addAll(readJustAMInfos()); + } + } else { + LOG.info("Will not try to recover. recoveryEnabled: " + + recoveryEnabled + " recoverySupportedByCommitter: " + + recoverySupportedByCommitter + " shuffleKeyValidForRecovery: " + + shuffleKeyValidForRecovery + " ApplicationAttemptID: " + + appAttemptID.getAttemptId()); + // Get the amInfos anyways whether recovery is enabled or not + amInfos.addAll(readJustAMInfos()); + } + } + + private static FSDataInputStream getPreviousJobHistoryStream( + Configuration conf, ApplicationAttemptId appAttemptId) + throws IOException { + Path historyFile = JobHistoryUtils.getPreviousJobHistoryPath(conf, + appAttemptId); + LOG.info("Previous history file is at " + historyFile); + return historyFile.getFileSystem(conf).open(historyFile); + } + + private void parsePreviousJobHistory() throws IOException { + FSDataInputStream in = getPreviousJobHistoryStream(getConfig(), + appAttemptID); + JobHistoryParser parser = new JobHistoryParser(in); + JobInfo jobInfo = parser.parse(); + Exception parseException = parser.getParseException(); + if (parseException != null) { + LOG.info("Got an error parsing job-history file" + + ", ignoring incomplete events.", parseException); + } + Map taskInfos = jobInfo + .getAllTasks(); + for (TaskInfo taskInfo : taskInfos.values()) { + if (TaskState.SUCCEEDED.toString().equals(taskInfo.getTaskStatus())) { + Iterator> taskAttemptIterator = + taskInfo.getAllTaskAttempts().entrySet().iterator(); + while (taskAttemptIterator.hasNext()) { + Map.Entry currentEntry = taskAttemptIterator.next(); + if (!jobInfo.getAllCompletedTaskAttempts().containsKey(currentEntry.getKey())) { + taskAttemptIterator.remove(); + } + } + completedTasksFromPreviousRun + .put(TypeConverter.toYarn(taskInfo.getTaskId()), taskInfo); + LOG.info("Read from history task " + + TypeConverter.toYarn(taskInfo.getTaskId())); + } + } + LOG.info("Read completed tasks from history " + + completedTasksFromPreviousRun.size()); + recoveredJobStartTime = jobInfo.getLaunchTime(); + + // recover AMInfos + List jhAmInfoList = jobInfo.getAMInfos(); + if (jhAmInfoList != null) { + for (JobHistoryParser.AMInfo jhAmInfo : jhAmInfoList) { + AMInfo amInfo = MRBuilderUtils.newAMInfo(jhAmInfo.getAppAttemptId(), + jhAmInfo.getStartTime(), jhAmInfo.getContainerId(), + jhAmInfo.getNodeManagerHost(), jhAmInfo.getNodeManagerPort(), + jhAmInfo.getNodeManagerHttpPort()); + amInfos.add(amInfo); + } + } + } + private List readJustAMInfos() { List amInfos = new ArrayList(); FSDataInputStream inputStream = null; try { - inputStream = - RecoveryService.getPreviousJobHistoryFileStream(getConfig(), - appAttemptID); + inputStream = getPreviousJobHistoryStream(getConfig(), appAttemptID); EventReader jobHistoryEventReader = new EventReader(inputStream); // All AMInfos are contiguous. Track when the first AMStartedEvent @@ -1102,7 +1165,8 @@ public class MRAppMaster extends CompositeService { @SuppressWarnings("unchecked") protected void startJobs() { /** create a job-start event to get this ball rolling */ - JobEvent startJobEvent = new JobEvent(job.getID(), JobEventType.JOB_START); + JobEvent startJobEvent = new JobStartEvent(job.getID(), + recoveredJobStartTime); /** send the job-start event. this triggers the job execution. */ dispatcher.getEventHandler().handle(startJobEvent); } @@ -1272,19 +1336,25 @@ public class MRAppMaster extends CompositeService { // that they don't take too long in shutting down if(appMaster.containerAllocator instanceof ContainerAllocatorRouter) { ((ContainerAllocatorRouter) appMaster.containerAllocator) - .setSignalled(true); - ((ContainerAllocatorRouter) appMaster.containerAllocator) - .setShouldUnregister(appMaster.isLastAMRetry); - } - - if(appMaster.jobHistoryEventHandler != null) { - appMaster.jobHistoryEventHandler - .setForcejobCompletion(appMaster.isLastAMRetry); + .setSignalled(true); } + appMaster.notifyIsLastAMRetry(appMaster.isLastAMRetry); appMaster.stop(); } } + public void notifyIsLastAMRetry(boolean isLastAMRetry){ + if(containerAllocator instanceof ContainerAllocatorRouter) { + LOG.info("Notify RMCommunicator isAMLastRetry: " + isLastAMRetry); + ((ContainerAllocatorRouter) containerAllocator) + .setShouldUnregister(isLastAMRetry); + } + if(jobHistoryEventHandler != null) { + LOG.info("Notify JHEH isAMLastRetry: " + isLastAMRetry); + jobHistoryEventHandler.setForcejobCompletion(isLastAMRetry); + } + } + protected static void initAndStartAppMaster(final MRAppMaster appMaster, final YarnConfiguration conf, String jobUserName) throws IOException, InterruptedException { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/JobStateInternal.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/JobStateInternal.java index 7517bc8c73c..bdb627b2354 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/JobStateInternal.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/JobStateInternal.java @@ -30,5 +30,6 @@ public enum JobStateInternal { KILL_WAIT, KILL_ABORT, KILLED, - ERROR + ERROR, + REBOOT } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobEventType.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobEventType.java index c1608259e48..f6c38d30bc3 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobEventType.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobEventType.java @@ -54,6 +54,6 @@ public enum JobEventType { JOB_TASK_ATTEMPT_FETCH_FAILURE, //Producer:RMContainerAllocator - JOB_UPDATED_NODES - + JOB_UPDATED_NODES, + JOB_AM_REBOOT } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/package-info.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobStartEvent.java similarity index 61% rename from hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/package-info.java rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobStartEvent.java index 400fdfaea63..39051da000f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/package-info.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobStartEvent.java @@ -1,4 +1,4 @@ -/* +/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -15,6 +15,25 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -@InterfaceAudience.Private -package org.apache.hadoop.mapreduce.v2.app.recover; -import org.apache.hadoop.classification.InterfaceAudience; + +package org.apache.hadoop.mapreduce.v2.app.job.event; + +import org.apache.hadoop.mapreduce.v2.api.records.JobId; + +public class JobStartEvent extends JobEvent { + + long recoveredJobStartTime; + + public JobStartEvent(JobId jobID) { + this(jobID, 0); + } + + public JobStartEvent(JobId jobID, long recoveredJobStartTime) { + super(jobID, JobEventType.JOB_START); + this.recoveredJobStartTime = recoveredJobStartTime; + } + + public long getRecoveredJobStartTime() { + return recoveredJobStartTime; + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptEventType.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptEventType.java index a6c684015ed..a43263264e9 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptEventType.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptEventType.java @@ -26,6 +26,7 @@ public enum TaskAttemptEventType { //Producer:Task TA_SCHEDULE, TA_RESCHEDULE, + TA_RECOVER, //Producer:Client, Task TA_KILL, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptRecoverEvent.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptRecoverEvent.java new file mode 100644 index 00000000000..19fe752fb12 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptRecoverEvent.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce.v2.app.job.event; + +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo; +import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; + +public class TaskAttemptRecoverEvent extends TaskAttemptEvent { + + private TaskAttemptInfo taInfo; + private OutputCommitter committer; + private boolean recoverAttemptOutput; + + public TaskAttemptRecoverEvent(TaskAttemptId id, TaskAttemptInfo taInfo, + OutputCommitter committer, boolean recoverOutput) { + super(id, TaskAttemptEventType.TA_RECOVER); + this.taInfo = taInfo; + this.committer = committer; + this.recoverAttemptOutput = recoverOutput; + } + + public TaskAttemptInfo getTaskAttemptInfo() { + return taInfo; + } + + public OutputCommitter getCommitter() { + return committer; + } + + public boolean getRecoverOutput() { + return recoverAttemptOutput; + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskEventType.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskEventType.java index d385e2fc682..8ce9c9f27c5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskEventType.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskEventType.java @@ -28,6 +28,7 @@ public enum TaskEventType { //Producer:Job T_SCHEDULE, + T_RECOVER, //Producer:Speculator T_ADD_SPEC_ATTEMPT, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskRecoverEvent.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskRecoverEvent.java new file mode 100644 index 00000000000..b5ead5ecb4f --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskRecoverEvent.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce.v2.app.job.event; + +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo; +import org.apache.hadoop.mapreduce.v2.api.records.TaskId; + +public class TaskRecoverEvent extends TaskEvent { + + private TaskInfo taskInfo; + private OutputCommitter committer; + private boolean recoverTaskOutput; + + public TaskRecoverEvent(TaskId taskID, TaskInfo taskInfo, + OutputCommitter committer, boolean recoverTaskOutput) { + super(taskID, TaskEventType.T_RECOVER); + this.taskInfo = taskInfo; + this.committer = committer; + this.recoverTaskOutput = recoverTaskOutput; + } + + public TaskInfo getTaskInfo() { + return taskInfo; + } + + public OutputCommitter getOutputCommitter() { + return committer; + } + + public boolean getRecoverTaskOutput() { + return recoverTaskOutput; + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java index 4b080b4ab74..367b0280845 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java @@ -49,6 +49,7 @@ import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.JobACL; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.jobhistory.JobFinishedEvent; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent; @@ -92,6 +93,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.JobEventType; import org.apache.hadoop.mapreduce.v2.app.job.event.JobFinishEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.JobSetupFailedEvent; +import org.apache.hadoop.mapreduce.v2.app.job.event.JobStartEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskAttemptCompletedEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskAttemptFetchFailureEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent; @@ -101,6 +103,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptKillEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType; +import org.apache.hadoop.mapreduce.v2.app.job.event.TaskRecoverEvent; import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils; @@ -159,6 +162,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, private final Lock writeLock; private final JobId jobId; private final String jobName; + private final OutputCommitter committer; private final boolean newApiCommitter; private final org.apache.hadoop.mapreduce.JobID oldJobId; private final TaskAttemptListener taskAttemptListener; @@ -215,6 +219,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, DIAGNOSTIC_UPDATE_TRANSITION = new DiagnosticsUpdateTransition(); private static final InternalErrorTransition INTERNAL_ERROR_TRANSITION = new InternalErrorTransition(); + private static final InternalRebootTransition + INTERNAL_REBOOT_TRANSITION = new InternalRebootTransition(); private static final TaskAttemptCompletedEventTransition TASK_ATTEMPT_COMPLETED_EVENT_TRANSITION = new TaskAttemptCompletedEventTransition(); @@ -246,6 +252,9 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, .addTransition(JobStateInternal.NEW, JobStateInternal.ERROR, JobEventType.INTERNAL_ERROR, INTERNAL_ERROR_TRANSITION) + .addTransition(JobStateInternal.NEW, JobStateInternal.REBOOT, + JobEventType.JOB_AM_REBOOT, + INTERNAL_REBOOT_TRANSITION) // Ignore-able events .addTransition(JobStateInternal.NEW, JobStateInternal.NEW, JobEventType.JOB_UPDATED_NODES) @@ -265,6 +274,9 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, .addTransition(JobStateInternal.INITED, JobStateInternal.ERROR, JobEventType.INTERNAL_ERROR, INTERNAL_ERROR_TRANSITION) + .addTransition(JobStateInternal.INITED, JobStateInternal.REBOOT, + JobEventType.JOB_AM_REBOOT, + INTERNAL_REBOOT_TRANSITION) // Ignore-able events .addTransition(JobStateInternal.INITED, JobStateInternal.INITED, JobEventType.JOB_UPDATED_NODES) @@ -287,6 +299,9 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, .addTransition(JobStateInternal.SETUP, JobStateInternal.ERROR, JobEventType.INTERNAL_ERROR, INTERNAL_ERROR_TRANSITION) + .addTransition(JobStateInternal.SETUP, JobStateInternal.REBOOT, + JobEventType.JOB_AM_REBOOT, + INTERNAL_REBOOT_TRANSITION) // Ignore-able events .addTransition(JobStateInternal.SETUP, JobStateInternal.SETUP, JobEventType.JOB_UPDATED_NODES) @@ -327,6 +342,9 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, JobStateInternal.RUNNING, JobStateInternal.ERROR, JobEventType.INTERNAL_ERROR, INTERNAL_ERROR_TRANSITION) + .addTransition(JobStateInternal.RUNNING, JobStateInternal.REBOOT, + JobEventType.JOB_AM_REBOOT, + INTERNAL_REBOOT_TRANSITION) // Transitions from KILL_WAIT state. .addTransition @@ -352,7 +370,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, EnumSet.of(JobEventType.JOB_KILL, JobEventType.JOB_UPDATED_NODES, JobEventType.JOB_MAP_TASK_RESCHEDULED, - JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE)) + JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE, + JobEventType.JOB_AM_REBOOT)) // Transitions from COMMITTING state .addTransition(JobStateInternal.COMMITTING, @@ -377,7 +396,10 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, .addTransition(JobStateInternal.COMMITTING, JobStateInternal.ERROR, JobEventType.INTERNAL_ERROR, INTERNAL_ERROR_TRANSITION) - // Ignore-able events + .addTransition(JobStateInternal.COMMITTING, JobStateInternal.REBOOT, + JobEventType.JOB_AM_REBOOT, + INTERNAL_REBOOT_TRANSITION) + // Ignore-able events .addTransition(JobStateInternal.COMMITTING, JobStateInternal.COMMITTING, EnumSet.of(JobEventType.JOB_UPDATED_NODES, @@ -397,7 +419,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, .addTransition(JobStateInternal.SUCCEEDED, JobStateInternal.SUCCEEDED, EnumSet.of(JobEventType.JOB_KILL, JobEventType.JOB_UPDATED_NODES, - JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE)) + JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE, + JobEventType.JOB_AM_REBOOT)) // Transitions from FAIL_ABORT state .addTransition(JobStateInternal.FAIL_ABORT, @@ -425,7 +448,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, JobEventType.JOB_MAP_TASK_RESCHEDULED, JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE, JobEventType.JOB_COMMIT_COMPLETED, - JobEventType.JOB_COMMIT_FAILED)) + JobEventType.JOB_COMMIT_FAILED, + JobEventType.JOB_AM_REBOOT)) // Transitions from KILL_ABORT state .addTransition(JobStateInternal.KILL_ABORT, @@ -452,7 +476,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, JobEventType.JOB_SETUP_COMPLETED, JobEventType.JOB_SETUP_FAILED, JobEventType.JOB_COMMIT_COMPLETED, - JobEventType.JOB_COMMIT_FAILED)) + JobEventType.JOB_COMMIT_FAILED, + JobEventType.JOB_AM_REBOOT)) // Transitions from FAILED state .addTransition(JobStateInternal.FAILED, JobStateInternal.FAILED, @@ -476,7 +501,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, JobEventType.JOB_SETUP_FAILED, JobEventType.JOB_COMMIT_COMPLETED, JobEventType.JOB_COMMIT_FAILED, - JobEventType.JOB_ABORT_COMPLETED)) + JobEventType.JOB_ABORT_COMPLETED, + JobEventType.JOB_AM_REBOOT)) // Transitions from KILLED state .addTransition(JobStateInternal.KILLED, JobStateInternal.KILLED, @@ -498,7 +524,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, JobEventType.JOB_SETUP_FAILED, JobEventType.JOB_COMMIT_COMPLETED, JobEventType.JOB_COMMIT_FAILED, - JobEventType.JOB_ABORT_COMPLETED)) + JobEventType.JOB_ABORT_COMPLETED, + JobEventType.JOB_AM_REBOOT)) // No transitions from INTERNAL_ERROR state. Ignore all. .addTransition( @@ -517,9 +544,33 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, JobEventType.JOB_COMMIT_COMPLETED, JobEventType.JOB_COMMIT_FAILED, JobEventType.JOB_ABORT_COMPLETED, - JobEventType.INTERNAL_ERROR)) + JobEventType.INTERNAL_ERROR, + JobEventType.JOB_AM_REBOOT)) .addTransition(JobStateInternal.ERROR, JobStateInternal.ERROR, JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION) + + // No transitions from AM_REBOOT state. Ignore all. + .addTransition( + JobStateInternal.REBOOT, + JobStateInternal.REBOOT, + EnumSet.of(JobEventType.JOB_INIT, + JobEventType.JOB_KILL, + JobEventType.JOB_TASK_COMPLETED, + JobEventType.JOB_TASK_ATTEMPT_COMPLETED, + JobEventType.JOB_MAP_TASK_RESCHEDULED, + JobEventType.JOB_DIAGNOSTIC_UPDATE, + JobEventType.JOB_UPDATED_NODES, + JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE, + JobEventType.JOB_SETUP_COMPLETED, + JobEventType.JOB_SETUP_FAILED, + JobEventType.JOB_COMMIT_COMPLETED, + JobEventType.JOB_COMMIT_FAILED, + JobEventType.JOB_ABORT_COMPLETED, + JobEventType.INTERNAL_ERROR, + JobEventType.JOB_AM_REBOOT)) + .addTransition(JobStateInternal.REBOOT, JobStateInternal.REBOOT, + JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION) + // create the topology tables .installTopology(); @@ -555,7 +606,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, JobTokenSecretManager jobTokenSecretManager, Credentials fsTokenCredentials, Clock clock, Map completedTasksFromPreviousRun, MRAppMetrics metrics, - boolean newApiCommitter, String userName, + OutputCommitter committer, boolean newApiCommitter, String userName, long appSubmitTime, List amInfos, AppContext appContext, JobStateInternal forcedState, String forcedDiagnostic) { this.applicationAttemptId = applicationAttemptId; @@ -571,6 +622,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, this.queueName = conf.get(MRJobConfig.QUEUE_NAME, "default"); this.appSubmitTime = appSubmitTime; this.oldJobId = TypeConverter.fromYarn(jobId); + this.committer = committer; this.newApiCommitter = newApiCommitter; this.taskAttemptListener = taskAttemptListener; @@ -841,10 +893,16 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, } } - protected void scheduleTasks(Set taskIDs) { + protected void scheduleTasks(Set taskIDs, + boolean recoverTaskOutput) { for (TaskId taskID : taskIDs) { - eventHandler.handle(new TaskEvent(taskID, - TaskEventType.T_SCHEDULE)); + TaskInfo taskInfo = completedTasksFromPreviousRun.remove(taskID); + if (taskInfo != null) { + eventHandler.handle(new TaskRecoverEvent(taskID, taskInfo, + committer, recoverTaskOutput)); + } else { + eventHandler.handle(new TaskEvent(taskID, TaskEventType.T_SCHEDULE)); + } } } @@ -904,6 +962,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, return JobState.RUNNING; case FAIL_ABORT: return JobState.FAILED; + case REBOOT: + return JobState.ERROR; default: return JobState.valueOf(smState.name()); } @@ -972,6 +1032,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, case KILLED: metrics.killedJob(this); break; + case REBOOT: case ERROR: case FAILED: metrics.failedJob(this); @@ -1255,7 +1316,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, job.conf.get(MRJobConfig.WORKFLOW_ID, ""), job.conf.get(MRJobConfig.WORKFLOW_NAME, ""), job.conf.get(MRJobConfig.WORKFLOW_NODE_NAME, ""), - getWorkflowAdjacencies(job.conf)); + getWorkflowAdjacencies(job.conf), + job.conf.get(MRJobConfig.WORKFLOW_TAGS, "")); job.eventHandler.handle(new JobHistoryEvent(job.jobId, jse)); //TODO JH Verify jobACLs, UserName via UGI? @@ -1370,7 +1432,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, job.conf, splits[i], job.taskAttemptListener, job.jobToken, job.fsTokens, - job.clock, job.completedTasksFromPreviousRun, + job.clock, job.applicationAttemptId.getAttemptId(), job.metrics, job.appContext); job.addTask(task); @@ -1388,7 +1450,6 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, job.conf, job.numMapTasks, job.taskAttemptListener, job.jobToken, job.fsTokens, job.clock, - job.completedTasksFromPreviousRun, job.applicationAttemptId.getAttemptId(), job.metrics, job.appContext); job.addTask(task); @@ -1424,8 +1485,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, @Override public void transition(JobImpl job, JobEvent event) { job.setupProgress = 1.0f; - job.scheduleTasks(job.mapTasks); // schedule (i.e., start) the maps - job.scheduleTasks(job.reduceTasks); + job.scheduleTasks(job.mapTasks, job.numReduceTasks == 0); + job.scheduleTasks(job.reduceTasks, true); // If we have no tasks, just transition to job completed if (job.numReduceTasks == 0 && job.numMapTasks == 0) { @@ -1456,7 +1517,12 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, */ @Override public void transition(JobImpl job, JobEvent event) { - job.startTime = job.clock.getTime(); + JobStartEvent jse = (JobStartEvent) event; + if (jse.getRecoveredJobStartTime() != 0) { + job.startTime = jse.getRecoveredJobStartTime(); + } else { + job.startTime = job.clock.getTime(); + } JobInitedEvent jie = new JobInitedEvent(job.oldJobId, job.startTime, @@ -1898,8 +1964,17 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, } } - private static class InternalErrorTransition implements + private static class InternalTerminationTransition implements SingleArcTransition { + JobStateInternal terminationState = null; + String jobHistoryString = null; + public InternalTerminationTransition(JobStateInternal stateInternal, + String jobHistoryString) { + this.terminationState = stateInternal; + //mostly a hack for jbhistoryserver + this.jobHistoryString = jobHistoryString; + } + @Override public void transition(JobImpl job, JobEvent event) { //TODO Is this JH event required. @@ -1907,9 +1982,21 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, JobUnsuccessfulCompletionEvent failedEvent = new JobUnsuccessfulCompletionEvent(job.oldJobId, job.finishTime, 0, 0, - JobStateInternal.ERROR.toString()); + jobHistoryString); job.eventHandler.handle(new JobHistoryEvent(job.jobId, failedEvent)); - job.finished(JobStateInternal.ERROR); + job.finished(terminationState); + } + } + + private static class InternalErrorTransition extends InternalTerminationTransition { + public InternalErrorTransition(){ + super(JobStateInternal.ERROR, JobStateInternal.ERROR.toString()); + } + } + + private static class InternalRebootTransition extends InternalTerminationTransition { + public InternalRebootTransition(){ + super(JobStateInternal.REBOOT, JobStateInternal.ERROR.toString()); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/MapTaskImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/MapTaskImpl.java index bec20aa0c81..c625f739c63 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/MapTaskImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/MapTaskImpl.java @@ -18,17 +18,13 @@ package org.apache.hadoop.mapreduce.v2.app.job.impl; -import java.util.Map; - import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapTaskAttemptImpl; import org.apache.hadoop.mapreduce.MRJobConfig; -import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo; import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier; import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo; import org.apache.hadoop.mapreduce.v2.api.records.JobId; -import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; import org.apache.hadoop.mapreduce.v2.app.AppContext; import org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener; @@ -49,11 +45,10 @@ public class MapTaskImpl extends TaskImpl { TaskAttemptListener taskAttemptListener, Token jobToken, Credentials credentials, Clock clock, - Map completedTasksFromPreviousRun, int startCount, - MRAppMetrics metrics, AppContext appContext) { + int appAttemptId, MRAppMetrics metrics, AppContext appContext) { super(jobId, TaskType.MAP, partition, eventHandler, remoteJobConfFile, conf, taskAttemptListener, jobToken, credentials, clock, - completedTasksFromPreviousRun, startCount, metrics, appContext); + appAttemptId, metrics, appContext); this.taskSplitMetaInfo = taskSplitMetaInfo; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/ReduceTaskImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/ReduceTaskImpl.java index a860ad70242..0f4ea9a73bd 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/ReduceTaskImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/ReduceTaskImpl.java @@ -18,16 +18,12 @@ package org.apache.hadoop.mapreduce.v2.app.job.impl; -import java.util.Map; - import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.ReduceTaskAttemptImpl; import org.apache.hadoop.mapreduce.MRJobConfig; -import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo; import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier; import org.apache.hadoop.mapreduce.v2.api.records.JobId; -import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; import org.apache.hadoop.mapreduce.v2.app.AppContext; import org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener; @@ -47,11 +43,10 @@ public class ReduceTaskImpl extends TaskImpl { int numMapTasks, TaskAttemptListener taskAttemptListener, Token jobToken, Credentials credentials, Clock clock, - Map completedTasksFromPreviousRun, int startCount, - MRAppMetrics metrics, AppContext appContext) { + int appAttemptId, MRAppMetrics metrics, AppContext appContext) { super(jobId, TaskType.REDUCE, partition, eventHandler, jobFile, conf, taskAttemptListener, jobToken, credentials, clock, - completedTasksFromPreviousRun, startCount, metrics, appContext); + appAttemptId, metrics, appContext); this.numMapTasks = numMapTasks; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java index dae9be0d322..ac3af4f0128 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java @@ -56,10 +56,12 @@ import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.JobCounter; import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskCounter; import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent; +import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo; import org.apache.hadoop.mapreduce.jobhistory.MapAttemptFinishedEvent; import org.apache.hadoop.mapreduce.jobhistory.ReduceAttemptFinishedEvent; import org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStartedEvent; @@ -89,6 +91,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptDiagnosticsUpdate import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptKillEvent; +import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptRecoverEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType; @@ -111,6 +114,7 @@ import org.apache.hadoop.yarn.Clock; import org.apache.hadoop.yarn.YarnException; import org.apache.hadoop.yarn.api.ApplicationConstants.Environment; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerToken; @@ -204,6 +208,11 @@ public abstract class TaskAttemptImpl implements TaskAttemptEventType.TA_KILL, new KilledTransition()) .addTransition(TaskAttemptStateInternal.NEW, TaskAttemptStateInternal.FAILED, TaskAttemptEventType.TA_FAILMSG, new FailedTransition()) + .addTransition(TaskAttemptStateInternal.NEW, + EnumSet.of(TaskAttemptStateInternal.FAILED, + TaskAttemptStateInternal.KILLED, + TaskAttemptStateInternal.SUCCEEDED), + TaskAttemptEventType.TA_RECOVER, new RecoverTransition()) .addTransition(TaskAttemptStateInternal.NEW, TaskAttemptStateInternal.NEW, TaskAttemptEventType.TA_DIAGNOSTICS_UPDATE, @@ -759,8 +768,8 @@ public abstract class TaskAttemptImpl implements // The null fields are per-container and will be constructed for each // container separately. ContainerLaunchContext container = BuilderUtils - .newContainerLaunchContext(null, conf - .get(MRJobConfig.USER_NAME), null, localResources, + .newContainerLaunchContext(conf + .get(MRJobConfig.USER_NAME), localResources, environment, null, serviceData, taskCredentialsBuffer, applicationACLs); @@ -769,10 +778,9 @@ public abstract class TaskAttemptImpl implements static ContainerLaunchContext createContainerLaunchContext( Map applicationACLs, - ContainerId containerID, Configuration conf, - Token jobToken, Task remoteTask, + Configuration conf, Token jobToken, Task remoteTask, final org.apache.hadoop.mapred.JobID oldJobId, - Resource assignedCapability, WrappedJvmID jvmID, + WrappedJvmID jvmID, TaskAttemptListener taskAttemptListener, Credentials credentials) { @@ -805,7 +813,7 @@ public abstract class TaskAttemptImpl implements // Construct the actual Container ContainerLaunchContext container = BuilderUtils.newContainerLaunchContext( - containerID, commonContainerSpec.getUser(), assignedCapability, + commonContainerSpec.getUser(), commonContainerSpec.getLocalResources(), myEnv, commands, myServiceData, commonContainerSpec.getContainerTokens().duplicate(), applicationACLs); @@ -1082,6 +1090,102 @@ public abstract class TaskAttemptImpl implements this.avataar = avataar; } + @SuppressWarnings("unchecked") + public TaskAttemptStateInternal recover(TaskAttemptInfo taInfo, + OutputCommitter committer, boolean recoverOutput) { + containerID = taInfo.getContainerId(); + containerNodeId = ConverterUtils.toNodeId(taInfo.getHostname() + ":" + + taInfo.getPort()); + containerMgrAddress = StringInterner.weakIntern( + containerNodeId.toString()); + nodeHttpAddress = StringInterner.weakIntern(taInfo.getHostname() + ":" + + taInfo.getHttpPort()); + computeRackAndLocality(); + launchTime = taInfo.getStartTime(); + finishTime = (taInfo.getFinishTime() != -1) ? + taInfo.getFinishTime() : clock.getTime(); + shufflePort = taInfo.getShufflePort(); + trackerName = taInfo.getHostname(); + httpPort = taInfo.getHttpPort(); + sendLaunchedEvents(); + + reportedStatus.id = attemptId; + reportedStatus.progress = 1.0f; + reportedStatus.counters = taInfo.getCounters(); + reportedStatus.stateString = taInfo.getState(); + reportedStatus.phase = Phase.CLEANUP; + reportedStatus.mapFinishTime = taInfo.getMapFinishTime(); + reportedStatus.shuffleFinishTime = taInfo.getShuffleFinishTime(); + reportedStatus.sortFinishTime = taInfo.getSortFinishTime(); + addDiagnosticInfo(taInfo.getError()); + + boolean needToClean = false; + String recoveredState = taInfo.getTaskStatus(); + if (recoverOutput + && TaskAttemptState.SUCCEEDED.toString().equals(recoveredState)) { + TaskAttemptContext tac = new TaskAttemptContextImpl(conf, + TypeConverter.fromYarn(attemptId)); + try { + committer.recoverTask(tac); + LOG.info("Recovered output from task attempt " + attemptId); + } catch (Exception e) { + LOG.error("Unable to recover task attempt " + attemptId, e); + LOG.info("Task attempt " + attemptId + " will be recovered as KILLED"); + recoveredState = TaskAttemptState.KILLED.toString(); + needToClean = true; + } + } + + TaskAttemptStateInternal attemptState; + if (TaskAttemptState.SUCCEEDED.toString().equals(recoveredState)) { + attemptState = TaskAttemptStateInternal.SUCCEEDED; + reportedStatus.taskState = TaskAttemptState.SUCCEEDED; + eventHandler.handle(createJobCounterUpdateEventTASucceeded(this)); + logAttemptFinishedEvent(attemptState); + } else if (TaskAttemptState.FAILED.toString().equals(recoveredState)) { + attemptState = TaskAttemptStateInternal.FAILED; + reportedStatus.taskState = TaskAttemptState.FAILED; + eventHandler.handle(createJobCounterUpdateEventTAFailed(this, false)); + TaskAttemptUnsuccessfulCompletionEvent tauce = + createTaskAttemptUnsuccessfulCompletionEvent(this, + TaskAttemptStateInternal.FAILED); + eventHandler.handle( + new JobHistoryEvent(attemptId.getTaskId().getJobId(), tauce)); + } else { + if (!TaskAttemptState.KILLED.toString().equals(recoveredState)) { + if (String.valueOf(recoveredState).isEmpty()) { + LOG.info("TaskAttempt" + attemptId + + " had not completed, recovering as KILLED"); + } else { + LOG.warn("TaskAttempt " + attemptId + " found in unexpected state " + + recoveredState + ", recovering as KILLED"); + } + addDiagnosticInfo("Killed during application recovery"); + needToClean = true; + } + attemptState = TaskAttemptStateInternal.KILLED; + reportedStatus.taskState = TaskAttemptState.KILLED; + eventHandler.handle(createJobCounterUpdateEventTAKilled(this, false)); + TaskAttemptUnsuccessfulCompletionEvent tauce = + createTaskAttemptUnsuccessfulCompletionEvent(this, + TaskAttemptStateInternal.KILLED); + eventHandler.handle( + new JobHistoryEvent(attemptId.getTaskId().getJobId(), tauce)); + } + + if (needToClean) { + TaskAttemptContext tac = new TaskAttemptContextImpl(conf, + TypeConverter.fromYarn(attemptId)); + try { + committer.abortTask(tac); + } catch (Exception e) { + LOG.warn("Task cleanup failed for attempt " + attemptId, e); + } + } + + return attemptState; + } + private static TaskAttemptState getExternalState( TaskAttemptStateInternal smState) { switch (smState) { @@ -1122,6 +1226,24 @@ public abstract class TaskAttemptImpl implements } } + private void computeRackAndLocality() { + nodeRackName = RackResolver.resolve( + containerNodeId.getHost()).getNetworkLocation(); + + locality = Locality.OFF_SWITCH; + if (dataLocalHosts.size() > 0) { + String cHost = resolveHost(containerNodeId.getHost()); + if (dataLocalHosts.contains(cHost)) { + locality = Locality.NODE_LOCAL; + } + } + if (locality == Locality.OFF_SWITCH) { + if (dataLocalRacks.contains(nodeRackName)) { + locality = Locality.RACK_LOCAL; + } + } + } + private static long computeSlotMillis(TaskAttemptImpl taskAttempt) { TaskType taskType = taskAttempt.getID().getTaskId().getTaskType(); int slotMemoryReq = @@ -1141,6 +1263,18 @@ public abstract class TaskAttemptImpl implements return slotMillisIncrement; } + private static JobCounterUpdateEvent createJobCounterUpdateEventTASucceeded( + TaskAttemptImpl taskAttempt) { + long slotMillis = computeSlotMillis(taskAttempt); + TaskId taskId = taskAttempt.attemptId.getTaskId(); + JobCounterUpdateEvent jce = new JobCounterUpdateEvent(taskId.getJobId()); + jce.addCounterUpdate( + taskId.getTaskType() == TaskType.MAP ? + JobCounter.SLOTS_MILLIS_MAPS : JobCounter.SLOTS_MILLIS_REDUCES, + slotMillis); + return jce; + } + private static JobCounterUpdateEvent createJobCounterUpdateEventTAFailed( TaskAttemptImpl taskAttempt, boolean taskAlreadyCompleted) { TaskType taskType = taskAttempt.getID().getTaskId().getTaskType(); @@ -1210,6 +1344,26 @@ public abstract class TaskAttemptImpl implements return tauce; } + @SuppressWarnings("unchecked") + private void sendLaunchedEvents() { + JobCounterUpdateEvent jce = new JobCounterUpdateEvent(attemptId.getTaskId() + .getJobId()); + jce.addCounterUpdate(attemptId.getTaskId().getTaskType() == TaskType.MAP ? + JobCounter.TOTAL_LAUNCHED_MAPS : JobCounter.TOTAL_LAUNCHED_REDUCES, 1); + eventHandler.handle(jce); + + LOG.info("TaskAttempt: [" + attemptId + + "] using containerId: [" + containerID + " on NM: [" + + containerMgrAddress + "]"); + TaskAttemptStartedEvent tase = + new TaskAttemptStartedEvent(TypeConverter.fromYarn(attemptId), + TypeConverter.fromYarn(attemptId.getTaskId().getTaskType()), + launchTime, trackerName, httpPort, shufflePort, containerID, + locality.toString(), avataar.toString()); + eventHandler.handle( + new JobHistoryEvent(attemptId.getTaskId().getJobId(), tase)); + } + private WrappedProgressSplitsBlock getProgressSplitBlock() { readLock.lock(); try { @@ -1342,8 +1496,6 @@ public abstract class TaskAttemptImpl implements taskAttempt.containerNodeId.toString()); taskAttempt.nodeHttpAddress = StringInterner.weakIntern( cEvent.getContainer().getNodeHttpAddress()); - taskAttempt.nodeRackName = RackResolver.resolve( - taskAttempt.containerNodeId.getHost()).getNetworkLocation(); taskAttempt.containerToken = cEvent.getContainer().getContainerToken(); taskAttempt.assignedCapability = cEvent.getContainer().getResource(); // this is a _real_ Task (classic Hadoop mapred flavor): @@ -1354,32 +1506,18 @@ public abstract class TaskAttemptImpl implements taskAttempt.taskAttemptListener.registerPendingTask( taskAttempt.remoteTask, taskAttempt.jvmID); - taskAttempt.locality = Locality.OFF_SWITCH; - if (taskAttempt.dataLocalHosts.size() > 0) { - String cHost = taskAttempt.resolveHost( - taskAttempt.containerNodeId.getHost()); - if (taskAttempt.dataLocalHosts.contains(cHost)) { - taskAttempt.locality = Locality.NODE_LOCAL; - } - } - if (taskAttempt.locality == Locality.OFF_SWITCH) { - if (taskAttempt.dataLocalRacks.contains(taskAttempt.nodeRackName)) { - taskAttempt.locality = Locality.RACK_LOCAL; - } - } + taskAttempt.computeRackAndLocality(); //launch the container //create the container object to be launched for a given Task attempt ContainerLaunchContext launchContext = createContainerLaunchContext( - cEvent.getApplicationACLs(), taskAttempt.containerID, - taskAttempt.conf, taskAttempt.jobToken, taskAttempt.remoteTask, - taskAttempt.oldJobId, taskAttempt.assignedCapability, - taskAttempt.jvmID, taskAttempt.taskAttemptListener, - taskAttempt.credentials); + cEvent.getApplicationACLs(), taskAttempt.conf, taskAttempt.jobToken, + taskAttempt.remoteTask, taskAttempt.oldJobId, taskAttempt.jvmID, + taskAttempt.taskAttemptListener, taskAttempt.credentials); taskAttempt.eventHandler.handle(new ContainerRemoteLaunchEvent( taskAttempt.attemptId, taskAttempt.containerID, taskAttempt.containerMgrAddress, taskAttempt.containerToken, - launchContext, taskAttempt.remoteTask)); + launchContext, taskAttempt.assignedCapability, taskAttempt.remoteTask)); // send event to speculator that our container needs are satisfied taskAttempt.eventHandler.handle @@ -1471,27 +1609,7 @@ public abstract class TaskAttemptImpl implements // Costly? taskAttempt.trackerName = nodeHttpInetAddr.getHostName(); taskAttempt.httpPort = nodeHttpInetAddr.getPort(); - JobCounterUpdateEvent jce = - new JobCounterUpdateEvent(taskAttempt.attemptId.getTaskId() - .getJobId()); - jce.addCounterUpdate( - taskAttempt.attemptId.getTaskId().getTaskType() == TaskType.MAP ? - JobCounter.TOTAL_LAUNCHED_MAPS: JobCounter.TOTAL_LAUNCHED_REDUCES - , 1); - taskAttempt.eventHandler.handle(jce); - - LOG.info("TaskAttempt: [" + taskAttempt.attemptId - + "] using containerId: [" + taskAttempt.containerID + " on NM: [" - + taskAttempt.containerMgrAddress + "]"); - TaskAttemptStartedEvent tase = - new TaskAttemptStartedEvent(TypeConverter.fromYarn(taskAttempt.attemptId), - TypeConverter.fromYarn(taskAttempt.attemptId.getTaskId().getTaskType()), - taskAttempt.launchTime, - nodeHttpInetAddr.getHostName(), nodeHttpInetAddr.getPort(), - taskAttempt.shufflePort, taskAttempt.containerID, - taskAttempt.locality.toString(), taskAttempt.avataar.toString()); - taskAttempt.eventHandler.handle - (new JobHistoryEvent(taskAttempt.attemptId.getTaskId().getJobId(), tase)); + taskAttempt.sendLaunchedEvents(); taskAttempt.eventHandler.handle (new SpeculatorEvent (taskAttempt.attemptId, true, taskAttempt.clock.getTime())); @@ -1540,14 +1658,8 @@ public abstract class TaskAttemptImpl implements TaskAttemptEvent event) { //set the finish time taskAttempt.setFinishTime(); - long slotMillis = computeSlotMillis(taskAttempt); - TaskId taskId = taskAttempt.attemptId.getTaskId(); - JobCounterUpdateEvent jce = new JobCounterUpdateEvent(taskId.getJobId()); - jce.addCounterUpdate( - taskId.getTaskType() == TaskType.MAP ? - JobCounter.SLOTS_MILLIS_MAPS : JobCounter.SLOTS_MILLIS_REDUCES, - slotMillis); - taskAttempt.eventHandler.handle(jce); + taskAttempt.eventHandler.handle( + createJobCounterUpdateEventTASucceeded(taskAttempt)); taskAttempt.logAttemptFinishedEvent(TaskAttemptStateInternal.SUCCEEDED); taskAttempt.eventHandler.handle(new TaskTAttemptEvent( taskAttempt.attemptId, @@ -1585,6 +1697,18 @@ public abstract class TaskAttemptImpl implements } } + private static class RecoverTransition implements + MultipleArcTransition { + + @Override + public TaskAttemptStateInternal transition(TaskAttemptImpl taskAttempt, + TaskAttemptEvent event) { + TaskAttemptRecoverEvent tare = (TaskAttemptRecoverEvent) event; + return taskAttempt.recover(tare.getTaskAttemptInfo(), + tare.getCommitter(), tare.getRecoverOutput()); + } + } + @SuppressWarnings({ "unchecked" }) private void logAttemptFinishedEvent(TaskAttemptStateInternal state) { //Log finished events only if an attempt started. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java index d01d9998aaf..6e4f1b27f62 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java @@ -19,6 +19,7 @@ package org.apache.hadoop.mapreduce.v2.app.job.impl; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.EnumSet; @@ -37,7 +38,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.MRConfig; -import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo; @@ -69,8 +70,10 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskAttemptCompletedEvent import org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType; +import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptRecoverEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType; +import org.apache.hadoop.mapreduce.v2.app.job.event.TaskRecoverEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent; import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics; import org.apache.hadoop.mapreduce.v2.app.rm.ContainerFailedEvent; @@ -152,6 +155,12 @@ public abstract class TaskImpl implements Task, EventHandler { TaskEventType.T_SCHEDULE, new InitialScheduleTransition()) .addTransition(TaskStateInternal.NEW, TaskStateInternal.KILLED, TaskEventType.T_KILL, new KillNewTransition()) + .addTransition(TaskStateInternal.NEW, + EnumSet.of(TaskStateInternal.FAILED, + TaskStateInternal.KILLED, + TaskStateInternal.RUNNING, + TaskStateInternal.SUCCEEDED), + TaskEventType.T_RECOVER, new RecoverTransition()) // Transitions from SCHEDULED state //when the first attempt is launched, the task state is set to RUNNING @@ -250,20 +259,16 @@ public abstract class TaskImpl implements Task, EventHandler { // By default, the next TaskAttempt number is zero. Changes during recovery protected int nextAttemptNumber = 0; - private List taskAttemptsFromPreviousGeneration = - new ArrayList(); - private static final class RecoverdAttemptsComparator implements - Comparator { - @Override - public int compare(TaskAttemptInfo attempt1, TaskAttemptInfo attempt2) { - long diff = attempt1.getStartTime() - attempt2.getStartTime(); - return diff == 0 ? 0 : (diff < 0 ? -1 : 1); - } - } - - private static final RecoverdAttemptsComparator RECOVERED_ATTEMPTS_COMPARATOR = - new RecoverdAttemptsComparator(); + // For sorting task attempts by completion time + private static final Comparator TA_INFO_COMPARATOR = + new Comparator() { + @Override + public int compare(TaskAttemptInfo a, TaskAttemptInfo b) { + long diff = a.getFinishTime() - b.getFinishTime(); + return diff == 0 ? 0 : (diff < 0 ? -1 : 1); + } + }; @Override public TaskState getState() { @@ -280,8 +285,7 @@ public abstract class TaskImpl implements Task, EventHandler { TaskAttemptListener taskAttemptListener, Token jobToken, Credentials credentials, Clock clock, - Map completedTasksFromPreviousRun, int startCount, - MRAppMetrics metrics, AppContext appContext) { + int appAttemptId, MRAppMetrics metrics, AppContext appContext) { this.conf = conf; this.clock = clock; this.jobFile = remoteJobConfFile; @@ -307,41 +311,15 @@ public abstract class TaskImpl implements Task, EventHandler { this.encryptedShuffle = conf.getBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, MRConfig.SHUFFLE_SSL_ENABLED_DEFAULT); - // See if this is from a previous generation. - if (completedTasksFromPreviousRun != null - && completedTasksFromPreviousRun.containsKey(taskId)) { - // This task has TaskAttempts from previous generation. We have to replay - // them. - LOG.info("Task is from previous run " + taskId); - TaskInfo taskInfo = completedTasksFromPreviousRun.get(taskId); - Map allAttempts = - taskInfo.getAllTaskAttempts(); - taskAttemptsFromPreviousGeneration = new ArrayList(); - taskAttemptsFromPreviousGeneration.addAll(allAttempts.values()); - Collections.sort(taskAttemptsFromPreviousGeneration, - RECOVERED_ATTEMPTS_COMPARATOR); - } - - if (taskAttemptsFromPreviousGeneration.isEmpty()) { - // All the previous attempts are exhausted, now start with a new - // generation. - - // All the new TaskAttemptIDs are generated based on MR - // ApplicationAttemptID so that attempts from previous lives don't - // over-step the current one. This assumes that a task won't have more - // than 1000 attempts in its single generation, which is very reasonable. - // Someone is nuts if he/she thinks he/she can live with 1000 TaskAttempts - // and requires serious medical attention. - nextAttemptNumber = (startCount - 1) * 1000; - } else { - // There are still some TaskAttempts from previous generation, use them - nextAttemptNumber = - taskAttemptsFromPreviousGeneration.remove(0).getAttemptId().getId(); - } - // This "this leak" is okay because the retained pointer is in an // instance variable. stateMachine = stateMachineFactory.make(this); + + // All the new TaskAttemptIDs are generated based on MR + // ApplicationAttemptID so that attempts from previous lives don't + // over-step the current one. This assumes that a task won't have more + // than 1000 attempts in its single generation, which is very reasonable. + nextAttemptNumber = (appAttemptId - 1) * 1000; } @Override @@ -600,14 +578,28 @@ public abstract class TaskImpl implements Task, EventHandler { // This is always called in the Write Lock private void addAndScheduleAttempt(Avataar avataar) { - TaskAttempt attempt = createAttempt(); - ((TaskAttemptImpl) attempt).setAvataar(avataar); + TaskAttempt attempt = addAttempt(avataar); + inProgressAttempts.add(attempt.getID()); + //schedule the nextAttemptNumber + if (failedAttempts.size() > 0) { + eventHandler.handle(new TaskAttemptEvent(attempt.getID(), + TaskAttemptEventType.TA_RESCHEDULE)); + } else { + eventHandler.handle(new TaskAttemptEvent(attempt.getID(), + TaskAttemptEventType.TA_SCHEDULE)); + } + } + + private TaskAttemptImpl addAttempt(Avataar avataar) { + TaskAttemptImpl attempt = createAttempt(); + attempt.setAvataar(avataar); if (LOG.isDebugEnabled()) { LOG.debug("Created attempt " + attempt.getID()); } switch (attempts.size()) { case 0: - attempts = Collections.singletonMap(attempt.getID(), attempt); + attempts = Collections.singletonMap(attempt.getID(), + (TaskAttempt) attempt); break; case 1: @@ -623,24 +615,8 @@ public abstract class TaskImpl implements Task, EventHandler { break; } - // Update nextATtemptNumber - if (taskAttemptsFromPreviousGeneration.isEmpty()) { - ++nextAttemptNumber; - } else { - // There are still some TaskAttempts from previous generation, use them - nextAttemptNumber = - taskAttemptsFromPreviousGeneration.remove(0).getAttemptId().getId(); - } - - inProgressAttempts.add(attempt.getID()); - //schedule the nextAttemptNumber - if (failedAttempts.size() > 0) { - eventHandler.handle(new TaskAttemptEvent(attempt.getID(), - TaskAttemptEventType.TA_RESCHEDULE)); - } else { - eventHandler.handle(new TaskAttemptEvent(attempt.getID(), - TaskAttemptEventType.TA_SCHEDULE)); - } + ++nextAttemptNumber; + return attempt; } @Override @@ -705,6 +681,16 @@ public abstract class TaskImpl implements Task, EventHandler { } } + private void sendTaskStartedEvent() { + TaskStartedEvent tse = new TaskStartedEvent( + TypeConverter.fromYarn(taskId), getLaunchTime(), + TypeConverter.fromYarn(taskId.getTaskType()), + getSplitsAsString()); + eventHandler + .handle(new JobHistoryEvent(taskId.getJobId(), tse)); + historyTaskStartGenerated = true; + } + private static TaskFinishedEvent createTaskFinishedEvent(TaskImpl task, TaskStateInternal taskState) { TaskFinishedEvent tfe = new TaskFinishedEvent(TypeConverter.fromYarn(task.taskId), @@ -740,6 +726,16 @@ public abstract class TaskImpl implements Task, EventHandler { task.successfulAttempt = null; } + private void sendTaskSucceededEvents() { + eventHandler.handle(new JobTaskEvent(taskId, TaskState.SUCCEEDED)); + LOG.info("Task succeeded with attempt " + successfulAttempt); + if (historyTaskStartGenerated) { + TaskFinishedEvent tfe = createTaskFinishedEvent(this, + TaskStateInternal.SUCCEEDED); + eventHandler.handle(new JobHistoryEvent(taskId.getJobId(), tfe)); + } + } + /** * @return a String representation of the splits. * @@ -751,6 +747,122 @@ public abstract class TaskImpl implements Task, EventHandler { return ""; } + /** + * Recover a completed task from a previous application attempt + * @param taskInfo recovered info about the task + * @param recoverTaskOutput whether to recover task outputs + * @return state of the task after recovery + */ + private TaskStateInternal recover(TaskInfo taskInfo, + OutputCommitter committer, boolean recoverTaskOutput) { + LOG.info("Recovering task " + taskId + + " from prior app attempt, status was " + taskInfo.getTaskStatus()); + + scheduledTime = taskInfo.getStartTime(); + sendTaskStartedEvent(); + Collection attemptInfos = + taskInfo.getAllTaskAttempts().values(); + + if (attemptInfos.size() > 0) { + metrics.launchedTask(this); + } + + // recover the attempts for this task in the order they finished + // so task attempt completion events are ordered properly + int savedNextAttemptNumber = nextAttemptNumber; + ArrayList taInfos = + new ArrayList(taskInfo.getAllTaskAttempts().values()); + Collections.sort(taInfos, TA_INFO_COMPARATOR); + for (TaskAttemptInfo taInfo : taInfos) { + nextAttemptNumber = taInfo.getAttemptId().getId(); + TaskAttemptImpl attempt = addAttempt(Avataar.VIRGIN); + // handle the recovery inline so attempts complete before task does + attempt.handle(new TaskAttemptRecoverEvent(attempt.getID(), taInfo, + committer, recoverTaskOutput)); + finishedAttempts.add(attempt.getID()); + TaskAttemptCompletionEventStatus taces = null; + TaskAttemptState attemptState = attempt.getState(); + switch (attemptState) { + case FAILED: + taces = TaskAttemptCompletionEventStatus.FAILED; + break; + case KILLED: + taces = TaskAttemptCompletionEventStatus.KILLED; + break; + case SUCCEEDED: + taces = TaskAttemptCompletionEventStatus.SUCCEEDED; + break; + default: + throw new IllegalStateException( + "Unexpected attempt state during recovery: " + attemptState); + } + if (attemptState == TaskAttemptState.FAILED) { + failedAttempts.add(attempt.getID()); + if (failedAttempts.size() >= maxAttempts) { + taces = TaskAttemptCompletionEventStatus.TIPFAILED; + } + } + + // don't clobber the successful attempt completion event + // TODO: this shouldn't be necessary after MAPREDUCE-4330 + if (successfulAttempt == null) { + handleTaskAttemptCompletion(attempt.getID(), taces); + if (attemptState == TaskAttemptState.SUCCEEDED) { + successfulAttempt = attempt.getID(); + } + } + } + nextAttemptNumber = savedNextAttemptNumber; + + TaskStateInternal taskState = TaskStateInternal.valueOf( + taskInfo.getTaskStatus()); + switch (taskState) { + case SUCCEEDED: + if (successfulAttempt != null) { + sendTaskSucceededEvents(); + } else { + LOG.info("Missing successful attempt for task " + taskId + + ", recovering as RUNNING"); + // there must have been a fetch failure and the retry wasn't complete + taskState = TaskStateInternal.RUNNING; + metrics.runningTask(this); + addAndScheduleAttempt(Avataar.VIRGIN); + } + break; + case FAILED: + case KILLED: + { + if (taskState == TaskStateInternal.KILLED && attemptInfos.size() == 0) { + metrics.endWaitingTask(this); + } + TaskFailedEvent tfe = new TaskFailedEvent(taskInfo.getTaskId(), + taskInfo.getFinishTime(), taskInfo.getTaskType(), + taskInfo.getError(), taskInfo.getTaskStatus(), + taskInfo.getFailedDueToAttemptId(), taskInfo.getCounters()); + eventHandler.handle(new JobHistoryEvent(taskId.getJobId(), tfe)); + eventHandler.handle( + new JobTaskEvent(taskId, getExternalState(taskState))); + break; + } + default: + throw new java.lang.AssertionError("Unexpected recovered task state: " + + taskState); + } + + return taskState; + } + + private static class RecoverTransition + implements MultipleArcTransition { + + @Override + public TaskStateInternal transition(TaskImpl task, TaskEvent event) { + TaskRecoverEvent tre = (TaskRecoverEvent) event; + return task.recover(tre.getTaskInfo(), tre.getOutputCommitter(), + tre.getRecoverTaskOutput()); + } + } + private static class InitialScheduleTransition implements SingleArcTransition { @@ -758,13 +870,7 @@ public abstract class TaskImpl implements Task, EventHandler { public void transition(TaskImpl task, TaskEvent event) { task.addAndScheduleAttempt(Avataar.VIRGIN); task.scheduledTime = task.clock.getTime(); - TaskStartedEvent tse = new TaskStartedEvent( - TypeConverter.fromYarn(task.taskId), task.getLaunchTime(), - TypeConverter.fromYarn(task.taskId.getTaskType()), - task.getSplitsAsString()); - task.eventHandler - .handle(new JobHistoryEvent(task.taskId.getJobId(), tse)); - task.historyTaskStartGenerated = true; + task.sendTaskStartedEvent(); } } @@ -818,16 +924,7 @@ public abstract class TaskImpl implements Task, EventHandler { task.finishedAttempts.add(taskAttemptId); task.inProgressAttempts.remove(taskAttemptId); task.successfulAttempt = taskAttemptId; - task.eventHandler.handle(new JobTaskEvent( - task.taskId, TaskState.SUCCEEDED)); - LOG.info("Task succeeded with attempt " + task.successfulAttempt); - // issue kill to all other attempts - if (task.historyTaskStartGenerated) { - TaskFinishedEvent tfe = createTaskFinishedEvent(task, - TaskStateInternal.SUCCEEDED); - task.eventHandler.handle(new JobHistoryEvent(task.taskId.getJobId(), - tfe)); - } + task.sendTaskSucceededEvents(); for (TaskAttempt attempt : task.attempts.values()) { if (attempt.getID() != task.successfulAttempt && // This is okay because it can only talk us out of sending a diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java index 588d031bc23..86281f60b1b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java @@ -59,6 +59,7 @@ import org.apache.hadoop.yarn.api.records.ContainerToken; import org.apache.hadoop.yarn.ipc.YarnRPC; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.service.AbstractService; +import org.apache.hadoop.yarn.util.BuilderUtils; import org.apache.hadoop.yarn.util.ProtoUtils; import org.apache.hadoop.yarn.util.Records; @@ -150,10 +151,14 @@ public class ContainerLauncherImpl extends AbstractService implements ContainerLaunchContext containerLaunchContext = event.getContainer(); + org.apache.hadoop.yarn.api.records.Container container = + BuilderUtils.newContainer(containerID, null, null, + event.getResource(), null, containerToken); // Now launch the actual container StartContainerRequest startRequest = Records .newRecord(StartContainerRequest.class); startRequest.setContainerLaunchContext(containerLaunchContext); + startRequest.setContainer(container); StartContainerResponse response = proxy.startContainer(startRequest); ByteBuffer portInfo = response diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerRemoteLaunchEvent.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerRemoteLaunchEvent.java index 0fac5335b0c..eb95f3bb314 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerRemoteLaunchEvent.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerRemoteLaunchEvent.java @@ -23,26 +23,34 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerToken; +import org.apache.hadoop.yarn.api.records.Resource; public class ContainerRemoteLaunchEvent extends ContainerLauncherEvent { private final ContainerLaunchContext container; private final Task task; + private final Resource resource; public ContainerRemoteLaunchEvent(TaskAttemptId taskAttemptID, ContainerId containerID, String containerMgrAddress, ContainerToken containerToken, - ContainerLaunchContext containerLaunchContext, Task remoteTask) { + ContainerLaunchContext containerLaunchContext, Resource resource, + Task remoteTask) { super(taskAttemptID, containerID, containerMgrAddress, containerToken, ContainerLauncher.EventType.CONTAINER_REMOTE_LAUNCH); this.container = containerLaunchContext; this.task = remoteTask; + this.resource = resource; } public ContainerLaunchContext getContainer() { return this.container; } + public Resource getResource() { + return this.resource; + } + public Task getRemoteTask() { return this.task; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java index abb2397e293..74ae16f0ff2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java @@ -123,7 +123,7 @@ public class LocalContainerAllocator extends RMCommunicator // This can happen if the RM has been restarted. If it is in that state, // this application must clean itself up. eventHandler.handle(new JobEvent(this.getJob().getID(), - JobEventType.INTERNAL_ERROR)); + JobEventType.JOB_AM_REBOOT)); throw new YarnException("Resource Manager doesn't recognize AttemptId: " + this.getContext().getApplicationID()); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/Recovery.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/Recovery.java deleted file mode 100644 index c7134a46bd7..00000000000 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/Recovery.java +++ /dev/null @@ -1,39 +0,0 @@ -/** -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package org.apache.hadoop.mapreduce.v2.app.recover; - -import java.util.List; -import java.util.Map; - -import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo; -import org.apache.hadoop.mapreduce.v2.api.records.AMInfo; -import org.apache.hadoop.mapreduce.v2.api.records.TaskId; -import org.apache.hadoop.yarn.Clock; -import org.apache.hadoop.yarn.event.Dispatcher; - -public interface Recovery { - - Dispatcher getDispatcher(); - - Clock getClock(); - - Map getCompletedTasks(); - - List getAMInfos(); -} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/RecoveryService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/RecoveryService.java deleted file mode 100644 index aca752721a7..00000000000 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/RecoveryService.java +++ /dev/null @@ -1,480 +0,0 @@ -/** -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package org.apache.hadoop.mapreduce.v2.app.recover; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileContext; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapreduce.MRJobConfig; -import org.apache.hadoop.mapreduce.OutputCommitter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hadoop.mapreduce.TaskType; -import org.apache.hadoop.mapreduce.TypeConverter; -import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser; -import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo; -import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo; -import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo; -import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; -import org.apache.hadoop.mapreduce.v2.api.records.AMInfo; -import org.apache.hadoop.mapreduce.v2.api.records.Phase; -import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; -import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState; -import org.apache.hadoop.mapreduce.v2.api.records.TaskId; -import org.apache.hadoop.mapreduce.v2.api.records.TaskState; -import org.apache.hadoop.mapreduce.v2.app.ControlledClock; -import org.apache.hadoop.mapreduce.v2.app.commit.CommitterTaskAbortEvent; -import org.apache.hadoop.mapreduce.v2.app.commit.CommitterEventType; -import org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent; -import org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent; -import org.apache.hadoop.mapreduce.v2.app.job.event.JobEventType; -import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent; -import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerLaunchedEvent; -import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent; -import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType; -import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent; -import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus; -import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent; -import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType; -import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent; -import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher; -import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent; -import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerRemoteLaunchEvent; -import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator; -import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent; -import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils; -import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils; -import org.apache.hadoop.yarn.Clock; -import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; -import org.apache.hadoop.yarn.api.records.Container; -import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.api.records.NodeId; -import org.apache.hadoop.yarn.event.AsyncDispatcher; -import org.apache.hadoop.yarn.event.Dispatcher; -import org.apache.hadoop.yarn.event.Event; -import org.apache.hadoop.yarn.event.EventHandler; -import org.apache.hadoop.yarn.service.CompositeService; -import org.apache.hadoop.yarn.service.Service; -import org.apache.hadoop.yarn.util.BuilderUtils; -import org.apache.hadoop.yarn.util.ConverterUtils; - -/* - * Recovers the completed tasks from the previous life of Application Master. - * The completed tasks are deciphered from the history file of the previous life. - * Recovery service intercepts and replay the events for completed tasks. - * While recovery is in progress, the scheduling of new tasks are delayed by - * buffering the task schedule events. - * The recovery service controls the clock while recovery is in progress. - */ - -//TODO: -//task cleanup for all non completed tasks -public class RecoveryService extends CompositeService implements Recovery { - - private static final Log LOG = LogFactory.getLog(RecoveryService.class); - - private final ApplicationAttemptId applicationAttemptId; - private final OutputCommitter committer; - private final boolean newApiCommitter; - private final Dispatcher dispatcher; - private final ControlledClock clock; - - private JobInfo jobInfo = null; - private final Map completedTasks = - new HashMap(); - - private final List pendingTaskScheduleEvents = - new ArrayList(); - - private volatile boolean recoveryMode = false; - - public RecoveryService(ApplicationAttemptId applicationAttemptId, - Clock clock, OutputCommitter committer, boolean newApiCommitter) { - super("RecoveringDispatcher"); - this.applicationAttemptId = applicationAttemptId; - this.committer = committer; - this.newApiCommitter = newApiCommitter; - this.dispatcher = createRecoveryDispatcher(); - this.clock = new ControlledClock(clock); - addService((Service) dispatcher); - } - - @Override - public void init(Configuration conf) { - super.init(conf); - // parse the history file - try { - parse(); - } catch (Exception e) { - LOG.warn(e); - LOG.warn("Could not parse the old history file. Aborting recovery. " - + "Starting afresh.", e); - } - if (completedTasks.size() > 0) { - recoveryMode = true; - LOG.info("SETTING THE RECOVERY MODE TO TRUE. NO OF COMPLETED TASKS " - + "TO RECOVER " + completedTasks.size()); - LOG.info("Job launch time " + jobInfo.getLaunchTime()); - clock.setTime(jobInfo.getLaunchTime()); - } - } - - @Override - public Dispatcher getDispatcher() { - return dispatcher; - } - - @Override - public Clock getClock() { - return clock; - } - - @Override - public Map getCompletedTasks() { - return completedTasks; - } - - @Override - public List getAMInfos() { - if (jobInfo == null || jobInfo.getAMInfos() == null) { - return new LinkedList(); - } - List amInfos = new LinkedList(); - for (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.AMInfo jhAmInfo : jobInfo - .getAMInfos()) { - AMInfo amInfo = - MRBuilderUtils.newAMInfo(jhAmInfo.getAppAttemptId(), - jhAmInfo.getStartTime(), jhAmInfo.getContainerId(), - jhAmInfo.getNodeManagerHost(), jhAmInfo.getNodeManagerPort(), - jhAmInfo.getNodeManagerHttpPort()); - - amInfos.add(amInfo); - } - return amInfos; - } - - private void parse() throws IOException { - FSDataInputStream in = - getPreviousJobHistoryFileStream(getConfig(), applicationAttemptId); - JobHistoryParser parser = new JobHistoryParser(in); - jobInfo = parser.parse(); - Exception parseException = parser.getParseException(); - if (parseException != null) { - LOG.info("Got an error parsing job-history file" + - ", ignoring incomplete events.", parseException); - } - Map taskInfos = jobInfo - .getAllTasks(); - for (TaskInfo taskInfo : taskInfos.values()) { - if (TaskState.SUCCEEDED.toString().equals(taskInfo.getTaskStatus())) { - Iterator> taskAttemptIterator = - taskInfo.getAllTaskAttempts().entrySet().iterator(); - while (taskAttemptIterator.hasNext()) { - Map.Entry currentEntry = taskAttemptIterator.next(); - if (!jobInfo.getAllCompletedTaskAttempts().containsKey(currentEntry.getKey())) { - taskAttemptIterator.remove(); - } - } - completedTasks - .put(TypeConverter.toYarn(taskInfo.getTaskId()), taskInfo); - LOG.info("Read from history task " - + TypeConverter.toYarn(taskInfo.getTaskId())); - } - } - LOG.info("Read completed tasks from history " - + completedTasks.size()); - } - - public static FSDataInputStream getPreviousJobHistoryFileStream( - Configuration conf, ApplicationAttemptId applicationAttemptId) - throws IOException { - FSDataInputStream in = null; - Path historyFile = null; - String jobId = - TypeConverter.fromYarn(applicationAttemptId.getApplicationId()) - .toString(); - String jobhistoryDir = - JobHistoryUtils.getConfiguredHistoryStagingDirPrefix(conf, jobId); - Path histDirPath = - FileContext.getFileContext(conf).makeQualified(new Path(jobhistoryDir)); - LOG.info("Trying file " + histDirPath.toString()); - FileContext fc = FileContext.getFileContext(histDirPath.toUri(), conf); - // read the previous history file - historyFile = - fc.makeQualified(JobHistoryUtils.getStagingJobHistoryFile(histDirPath, - jobId, (applicationAttemptId.getAttemptId() - 1))); - LOG.info("History file is at " + historyFile); - in = fc.open(historyFile); - return in; - } - - protected Dispatcher createRecoveryDispatcher() { - return new RecoveryDispatcher(); - } - - @SuppressWarnings("rawtypes") - class RecoveryDispatcher extends AsyncDispatcher { - private final EventHandler actualHandler; - private final EventHandler handler; - - RecoveryDispatcher() { - super(); - actualHandler = super.getEventHandler(); - handler = new InterceptingEventHandler(actualHandler); - } - - @Override - @SuppressWarnings("unchecked") - public void dispatch(Event event) { - if (recoveryMode) { - if (event.getType() == TaskAttemptEventType.TA_CONTAINER_LAUNCHED) { - TaskAttemptInfo attInfo = getTaskAttemptInfo(((TaskAttemptEvent) event) - .getTaskAttemptID()); - LOG.info("Recovered Attempt start time " + attInfo.getStartTime()); - clock.setTime(attInfo.getStartTime()); - - } else if (event.getType() == TaskAttemptEventType.TA_DONE - || event.getType() == TaskAttemptEventType.TA_FAILMSG - || event.getType() == TaskAttemptEventType.TA_KILL) { - TaskAttemptInfo attInfo = getTaskAttemptInfo(((TaskAttemptEvent) event) - .getTaskAttemptID()); - LOG.info("Recovered Attempt finish time " + attInfo.getFinishTime()); - clock.setTime(attInfo.getFinishTime()); - } - - else if (event.getType() == TaskEventType.T_ATTEMPT_FAILED - || event.getType() == TaskEventType.T_ATTEMPT_KILLED - || event.getType() == TaskEventType.T_ATTEMPT_SUCCEEDED) { - TaskTAttemptEvent tEvent = (TaskTAttemptEvent) event; - LOG.info("Recovered Task attempt " + tEvent.getTaskAttemptID()); - TaskInfo taskInfo = completedTasks.get(tEvent.getTaskAttemptID() - .getTaskId()); - taskInfo.getAllTaskAttempts().remove( - TypeConverter.fromYarn(tEvent.getTaskAttemptID())); - // remove the task info from completed tasks if all attempts are - // recovered - if (taskInfo.getAllTaskAttempts().size() == 0) { - completedTasks.remove(tEvent.getTaskAttemptID().getTaskId()); - // checkForRecoveryComplete - LOG.info("CompletedTasks() " + completedTasks.size()); - if (completedTasks.size() == 0) { - recoveryMode = false; - clock.reset(); - LOG.info("Setting the recovery mode to false. " + - "Recovery is complete!"); - - // send all pending tasks schedule events - for (TaskEvent tEv : pendingTaskScheduleEvents) { - actualHandler.handle(tEv); - } - - } - } - } - } - realDispatch(event); - } - - public void realDispatch(Event event) { - super.dispatch(event); - } - - @Override - public EventHandler getEventHandler() { - return handler; - } - } - - private TaskAttemptInfo getTaskAttemptInfo(TaskAttemptId id) { - TaskInfo taskInfo = completedTasks.get(id.getTaskId()); - return taskInfo.getAllTaskAttempts().get(TypeConverter.fromYarn(id)); - } - - @SuppressWarnings({"rawtypes", "unchecked"}) - private class InterceptingEventHandler implements EventHandler { - EventHandler actualHandler; - - InterceptingEventHandler(EventHandler actualHandler) { - this.actualHandler = actualHandler; - } - - @Override - public void handle(Event event) { - if (!recoveryMode) { - // delegate to the dispatcher one - actualHandler.handle(event); - return; - } - - else if (event.getType() == TaskEventType.T_SCHEDULE) { - TaskEvent taskEvent = (TaskEvent) event; - // delay the scheduling of new tasks till previous ones are recovered - if (completedTasks.get(taskEvent.getTaskID()) == null) { - LOG.debug("Adding to pending task events " - + taskEvent.getTaskID()); - pendingTaskScheduleEvents.add(taskEvent); - return; - } - } - - else if (event.getType() == ContainerAllocator.EventType.CONTAINER_REQ) { - TaskAttemptId aId = ((ContainerAllocatorEvent) event).getAttemptID(); - TaskAttemptInfo attInfo = getTaskAttemptInfo(aId); - LOG.debug("CONTAINER_REQ " + aId); - sendAssignedEvent(aId, attInfo); - return; - } - - else if (event.getType() == CommitterEventType.TASK_ABORT) { - TaskAttemptId aId = ((CommitterTaskAbortEvent) event).getAttemptID(); - LOG.debug("TASK_CLEAN"); - actualHandler.handle(new TaskAttemptEvent(aId, - TaskAttemptEventType.TA_CLEANUP_DONE)); - return; - } - - else if (event.getType() == ContainerLauncher.EventType.CONTAINER_REMOTE_LAUNCH) { - TaskAttemptId aId = ((ContainerRemoteLaunchEvent) event) - .getTaskAttemptID(); - TaskAttemptInfo attInfo = getTaskAttemptInfo(aId); - actualHandler.handle(new TaskAttemptContainerLaunchedEvent(aId, - attInfo.getShufflePort())); - // send the status update event - sendStatusUpdateEvent(aId, attInfo); - - TaskAttemptState state = TaskAttemptState.valueOf(attInfo.getTaskStatus()); - switch (state) { - case SUCCEEDED: - //recover the task output - - // check the committer type and construct corresponding context - TaskAttemptContext taskContext = null; - if(newApiCommitter) { - taskContext = new TaskAttemptContextImpl(getConfig(), - attInfo.getAttemptId()); - } else { - taskContext = new org.apache.hadoop.mapred.TaskAttemptContextImpl(new JobConf(getConfig()), - TypeConverter.fromYarn(aId)); - } - - try { - TaskType type = taskContext.getTaskAttemptID().getTaskID().getTaskType(); - int numReducers = taskContext.getConfiguration().getInt(MRJobConfig.NUM_REDUCES, 1); - if(type == TaskType.REDUCE || (type == TaskType.MAP && numReducers <= 0)) { - committer.recoverTask(taskContext); - LOG.info("Recovered output from task attempt " + attInfo.getAttemptId()); - } else { - LOG.info("Will not try to recover output for " - + taskContext.getTaskAttemptID()); - } - } catch (IOException e) { - LOG.error("Caught an exception while trying to recover task "+aId, e); - actualHandler.handle(new JobDiagnosticsUpdateEvent( - aId.getTaskId().getJobId(), "Error in recovering task output " + - e.getMessage())); - actualHandler.handle(new JobEvent(aId.getTaskId().getJobId(), - JobEventType.INTERNAL_ERROR)); - } - - // send the done event - LOG.info("Sending done event to recovered attempt " + aId); - actualHandler.handle(new TaskAttemptEvent(aId, - TaskAttemptEventType.TA_DONE)); - break; - case KILLED: - LOG.info("Sending kill event to recovered attempt " + aId); - actualHandler.handle(new TaskAttemptEvent(aId, - TaskAttemptEventType.TA_KILL)); - break; - default: - LOG.info("Sending fail event to recovered attempt " + aId); - actualHandler.handle(new TaskAttemptEvent(aId, - TaskAttemptEventType.TA_FAILMSG)); - break; - } - return; - } - - else if (event.getType() == - ContainerLauncher.EventType.CONTAINER_REMOTE_CLEANUP) { - TaskAttemptId aId = ((ContainerLauncherEvent) event) - .getTaskAttemptID(); - actualHandler.handle( - new TaskAttemptEvent(aId, - TaskAttemptEventType.TA_CONTAINER_CLEANED)); - return; - } - - // delegate to the actual handler - actualHandler.handle(event); - } - - private void sendStatusUpdateEvent(TaskAttemptId yarnAttemptID, - TaskAttemptInfo attemptInfo) { - LOG.info("Sending status update event to " + yarnAttemptID); - TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatus(); - taskAttemptStatus.id = yarnAttemptID; - taskAttemptStatus.progress = 1.0f; - taskAttemptStatus.stateString = attemptInfo.getTaskStatus(); - // taskAttemptStatus.outputSize = attemptInfo.getOutputSize(); - taskAttemptStatus.phase = Phase.CLEANUP; - org.apache.hadoop.mapreduce.Counters cntrs = attemptInfo.getCounters(); - if (cntrs == null) { - taskAttemptStatus.counters = null; - } else { - taskAttemptStatus.counters = cntrs; - } - actualHandler.handle(new TaskAttemptStatusUpdateEvent( - taskAttemptStatus.id, taskAttemptStatus)); - } - - private void sendAssignedEvent(TaskAttemptId yarnAttemptID, - TaskAttemptInfo attemptInfo) { - LOG.info("Sending assigned event to " + yarnAttemptID); - ContainerId cId = attemptInfo.getContainerId(); - - NodeId nodeId = - ConverterUtils.toNodeId(attemptInfo.getHostname() + ":" - + attemptInfo.getPort()); - // Resource/Priority/ApplicationACLs are only needed while launching the - // container on an NM, these are already completed tasks, so setting them - // to null - Container container = BuilderUtils.newContainer(cId, nodeId, - attemptInfo.getTrackerName() + ":" + attemptInfo.getHttpPort(), - null, null, null); - actualHandler.handle(new TaskAttemptContainerAssignedEvent(yarnAttemptID, - container, null)); - } - } - -} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java index d29d11890cd..5c453ad83dc 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java @@ -574,7 +574,7 @@ public class RMContainerAllocator extends RMContainerRequestor // This can happen if the RM has been restarted. If it is in that state, // this application must clean itself up. eventHandler.handle(new JobEvent(this.getJob().getID(), - JobEventType.INTERNAL_ERROR)); + JobEventType.JOB_AM_REBOOT)); throw new YarnException("Resource Manager doesn't recognize AttemptId: " + this.getContext().getApplicationID()); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java index 6ee62725e0a..430117c4e25 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java @@ -33,6 +33,9 @@ import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskAttemptInfo; import org.apache.hadoop.yarn.webapp.SubView; import org.apache.hadoop.yarn.webapp.view.HtmlBlock; +import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; +import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE; +import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY; import com.google.inject.Inject; @@ -53,7 +56,7 @@ public class TaskPage extends AppView { h2($(TITLE)); return; } - html. + TBODY> tbody = html. table("#attempts"). thead(). tr(). @@ -65,7 +68,8 @@ public class TaskPage extends AppView { th(".tsh", "Started"). th(".tsh", "Finished"). th(".tsh", "Elapsed"). - th(".note", "Note")._()._(); + th(".note", "Note")._()._(). + tbody(); // Write all the data into a JavaScript array of arrays for JQuery // DataTables to display StringBuilder attemptsTableData = new StringBuilder("[\n"); @@ -105,6 +109,9 @@ public class TaskPage extends AppView { attemptsTableData.append("]"); html.script().$type("text/javascript"). _("var attemptsTableData=" + attemptsTableData)._(); + + tbody._()._(); + } protected boolean isValidRequest() { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java index 4a28ab00637..4ef4d8d9f4b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java @@ -414,7 +414,8 @@ public class MRApp extends MRAppMaster { Job newJob = new TestJob(getJobId(), getAttemptID(), conf, getDispatcher().getEventHandler(), getTaskAttemptListener(), getContext().getClock(), - isNewApiCommitter(), currentUser.getUserName(), getContext(), + getCommitter(), isNewApiCommitter(), + currentUser.getUserName(), getContext(), forcedState, diagnostic); ((AppContext) getContext()).getAllJobs().put(newJob.getID(), newJob); @@ -648,12 +649,13 @@ public class MRApp extends MRAppMaster { public TestJob(JobId jobId, ApplicationAttemptId applicationAttemptId, Configuration conf, EventHandler eventHandler, TaskAttemptListener taskAttemptListener, Clock clock, - boolean newApiCommitter, String user, AppContext appContext, + OutputCommitter committer, boolean newApiCommitter, + String user, AppContext appContext, JobStateInternal forcedState, String diagnostic) { super(jobId, getApplicationAttemptId(applicationId, getStartCount()), conf, eventHandler, taskAttemptListener, new JobTokenSecretManager(), new Credentials(), clock, - getCompletedTaskFromPreviousRun(), metrics, + getCompletedTaskFromPreviousRun(), metrics, committer, newApiCommitter, user, System.currentTimeMillis(), getAllAMInfos(), appContext, forcedState, diagnostic); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRecovery.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRecovery.java index a0a08c91bce..e89f0374bf1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRecovery.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRecovery.java @@ -18,10 +18,21 @@ package org.apache.hadoop.mapreduce.v2.app; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.atLeast; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; import java.util.Iterator; +import java.util.List; +import java.util.Map; import junit.framework.Assert; @@ -31,36 +42,66 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.Counters; +import org.apache.hadoop.mapreduce.JobCounter; +import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.TaskID; +import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.mapreduce.TypeConverter; +import org.apache.hadoop.mapreduce.jobhistory.Event; +import org.apache.hadoop.mapreduce.jobhistory.EventType; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler; +import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo; +import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier; +import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo; import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; import org.apache.hadoop.mapreduce.v2.api.records.AMInfo; +import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.api.records.JobState; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState; +import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskState; import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.mapreduce.v2.app.job.Task; import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; +import org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent; +import org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerLaunchedEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType; +import org.apache.hadoop.mapreduce.v2.app.job.event.TaskRecoverEvent; +import org.apache.hadoop.mapreduce.v2.app.job.impl.MapTaskImpl; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent; +import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics; +import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils; +import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.yarn.Clock; +import org.apache.hadoop.yarn.ClusterInfo; +import org.apache.hadoop.yarn.SystemClock; +import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.util.BuilderUtils; import org.junit.Test; +import org.mockito.ArgumentCaptor; @SuppressWarnings({"unchecked", "rawtypes"}) public class TestRecovery { @@ -75,6 +116,7 @@ public class TestRecovery { private Text val1 = new Text("val1"); private Text val2 = new Text("val2"); + /** * AM with 2 maps and 1 reduce. For 1st map, one attempt fails, one attempt * completely disappears because of failed launch, one attempt gets killed and @@ -1011,6 +1053,423 @@ public class TestRecovery { app.verifyCompleted(); } + @Test + public void testRecoverySuccessAttempt() { + LOG.info("--- START: testRecoverySuccessAttempt ---"); + + long clusterTimestamp = System.currentTimeMillis(); + EventHandler mockEventHandler = mock(EventHandler.class); + MapTaskImpl recoverMapTask = getMockMapTask(clusterTimestamp, + mockEventHandler); + + TaskId taskId = recoverMapTask.getID(); + JobID jobID = new JobID(Long.toString(clusterTimestamp), 1); + TaskID taskID = new TaskID(jobID, + org.apache.hadoop.mapreduce.TaskType.MAP, taskId.getId()); + + //Mock up the TaskAttempts + Map mockTaskAttempts = + new HashMap(); + + TaskAttemptID taId1 = new TaskAttemptID(taskID, 2); + TaskAttemptInfo mockTAinfo1 = getMockTaskAttemptInfo(taId1, + TaskAttemptState.SUCCEEDED); + mockTaskAttempts.put(taId1, mockTAinfo1); + + TaskAttemptID taId2 = new TaskAttemptID(taskID, 1); + TaskAttemptInfo mockTAinfo2 = getMockTaskAttemptInfo(taId2, + TaskAttemptState.FAILED); + mockTaskAttempts.put(taId2, mockTAinfo2); + + OutputCommitter mockCommitter = mock (OutputCommitter.class); + TaskInfo mockTaskInfo = mock(TaskInfo.class); + when(mockTaskInfo.getTaskStatus()).thenReturn("SUCCEEDED"); + when(mockTaskInfo.getTaskId()).thenReturn(taskID); + when(mockTaskInfo.getAllTaskAttempts()).thenReturn(mockTaskAttempts); + + recoverMapTask.handle( + new TaskRecoverEvent(taskId, mockTaskInfo,mockCommitter, true)); + + ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); + verify(mockEventHandler,atLeast(1)).handle( + (org.apache.hadoop.yarn.event.Event) arg.capture()); + + Map finalAttemptStates = + new HashMap(); + finalAttemptStates.put(taId1, TaskAttemptState.SUCCEEDED); + finalAttemptStates.put(taId2, TaskAttemptState.FAILED); + + List jobHistoryEvents = new ArrayList(); + jobHistoryEvents.add(EventType.TASK_STARTED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_FINISHED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_FAILED); + jobHistoryEvents.add(EventType.TASK_FINISHED); + recoveryChecker(recoverMapTask, TaskState.SUCCEEDED, finalAttemptStates, + arg, jobHistoryEvents, 2L, 1L); + } + + @Test + public void testRecoveryAllFailAttempts() { + LOG.info("--- START: testRecoveryAllFailAttempts ---"); + + long clusterTimestamp = System.currentTimeMillis(); + EventHandler mockEventHandler = mock(EventHandler.class); + MapTaskImpl recoverMapTask = getMockMapTask(clusterTimestamp, + mockEventHandler); + + TaskId taskId = recoverMapTask.getID(); + JobID jobID = new JobID(Long.toString(clusterTimestamp), 1); + TaskID taskID = new TaskID(jobID, + org.apache.hadoop.mapreduce.TaskType.MAP, taskId.getId()); + + //Mock up the TaskAttempts + Map mockTaskAttempts = + new HashMap(); + + TaskAttemptID taId1 = new TaskAttemptID(taskID, 2); + TaskAttemptInfo mockTAinfo1 = getMockTaskAttemptInfo(taId1, + TaskAttemptState.FAILED); + mockTaskAttempts.put(taId1, mockTAinfo1); + + TaskAttemptID taId2 = new TaskAttemptID(taskID, 1); + TaskAttemptInfo mockTAinfo2 = getMockTaskAttemptInfo(taId2, + TaskAttemptState.FAILED); + mockTaskAttempts.put(taId2, mockTAinfo2); + + OutputCommitter mockCommitter = mock (OutputCommitter.class); + + TaskInfo mockTaskInfo = mock(TaskInfo.class); + when(mockTaskInfo.getTaskStatus()).thenReturn("FAILED"); + when(mockTaskInfo.getTaskId()).thenReturn(taskID); + when(mockTaskInfo.getAllTaskAttempts()).thenReturn(mockTaskAttempts); + + recoverMapTask.handle( + new TaskRecoverEvent(taskId, mockTaskInfo, mockCommitter, true)); + + ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); + verify(mockEventHandler,atLeast(1)).handle( + (org.apache.hadoop.yarn.event.Event) arg.capture()); + + Map finalAttemptStates = + new HashMap(); + finalAttemptStates.put(taId1, TaskAttemptState.FAILED); + finalAttemptStates.put(taId2, TaskAttemptState.FAILED); + + List jobHistoryEvents = new ArrayList(); + jobHistoryEvents.add(EventType.TASK_STARTED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_FAILED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_FAILED); + jobHistoryEvents.add(EventType.TASK_FAILED); + recoveryChecker(recoverMapTask, TaskState.FAILED, finalAttemptStates, + arg, jobHistoryEvents, 2L, 2L); + } + + @Test + public void testRecoveryTaskSuccessAllAttemptsFail() { + LOG.info("--- START: testRecoveryTaskSuccessAllAttemptsFail ---"); + + long clusterTimestamp = System.currentTimeMillis(); + EventHandler mockEventHandler = mock(EventHandler.class); + MapTaskImpl recoverMapTask = getMockMapTask(clusterTimestamp, + mockEventHandler); + + TaskId taskId = recoverMapTask.getID(); + JobID jobID = new JobID(Long.toString(clusterTimestamp), 1); + TaskID taskID = new TaskID(jobID, + org.apache.hadoop.mapreduce.TaskType.MAP, taskId.getId()); + + //Mock up the TaskAttempts + Map mockTaskAttempts = + new HashMap(); + + TaskAttemptID taId1 = new TaskAttemptID(taskID, 2); + TaskAttemptInfo mockTAinfo1 = getMockTaskAttemptInfo(taId1, + TaskAttemptState.FAILED); + mockTaskAttempts.put(taId1, mockTAinfo1); + + TaskAttemptID taId2 = new TaskAttemptID(taskID, 1); + TaskAttemptInfo mockTAinfo2 = getMockTaskAttemptInfo(taId2, + TaskAttemptState.FAILED); + mockTaskAttempts.put(taId2, mockTAinfo2); + + OutputCommitter mockCommitter = mock (OutputCommitter.class); + TaskInfo mockTaskInfo = mock(TaskInfo.class); + when(mockTaskInfo.getTaskStatus()).thenReturn("SUCCEEDED"); + when(mockTaskInfo.getTaskId()).thenReturn(taskID); + when(mockTaskInfo.getAllTaskAttempts()).thenReturn(mockTaskAttempts); + + recoverMapTask.handle( + new TaskRecoverEvent(taskId, mockTaskInfo, mockCommitter, true)); + + ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); + verify(mockEventHandler,atLeast(1)).handle( + (org.apache.hadoop.yarn.event.Event) arg.capture()); + + Map finalAttemptStates = + new HashMap(); + finalAttemptStates.put(taId1, TaskAttemptState.FAILED); + finalAttemptStates.put(taId2, TaskAttemptState.FAILED); + // check for one new attempt launched since successful attempt not found + TaskAttemptID taId3 = new TaskAttemptID(taskID, 2000); + finalAttemptStates.put(taId3, TaskAttemptState.NEW); + + List jobHistoryEvents = new ArrayList(); + jobHistoryEvents.add(EventType.TASK_STARTED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_FAILED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_FAILED); + recoveryChecker(recoverMapTask, TaskState.RUNNING, finalAttemptStates, + arg, jobHistoryEvents, 2L, 2L); + } + + @Test + public void testRecoveryTaskSuccessAllAttemptsSucceed() { + LOG.info("--- START: testRecoveryTaskSuccessAllAttemptsFail ---"); + + long clusterTimestamp = System.currentTimeMillis(); + EventHandler mockEventHandler = mock(EventHandler.class); + MapTaskImpl recoverMapTask = getMockMapTask(clusterTimestamp, + mockEventHandler); + + TaskId taskId = recoverMapTask.getID(); + JobID jobID = new JobID(Long.toString(clusterTimestamp), 1); + TaskID taskID = new TaskID(jobID, + org.apache.hadoop.mapreduce.TaskType.MAP, taskId.getId()); + + //Mock up the TaskAttempts + Map mockTaskAttempts = + new HashMap(); + + TaskAttemptID taId1 = new TaskAttemptID(taskID, 2); + TaskAttemptInfo mockTAinfo1 = getMockTaskAttemptInfo(taId1, + TaskAttemptState.SUCCEEDED); + mockTaskAttempts.put(taId1, mockTAinfo1); + + TaskAttemptID taId2 = new TaskAttemptID(taskID, 1); + TaskAttemptInfo mockTAinfo2 = getMockTaskAttemptInfo(taId2, + TaskAttemptState.SUCCEEDED); + mockTaskAttempts.put(taId2, mockTAinfo2); + + OutputCommitter mockCommitter = mock (OutputCommitter.class); + TaskInfo mockTaskInfo = mock(TaskInfo.class); + when(mockTaskInfo.getTaskStatus()).thenReturn("SUCCEEDED"); + when(mockTaskInfo.getTaskId()).thenReturn(taskID); + when(mockTaskInfo.getAllTaskAttempts()).thenReturn(mockTaskAttempts); + + recoverMapTask.handle( + new TaskRecoverEvent(taskId, mockTaskInfo, mockCommitter, true)); + + ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); + verify(mockEventHandler,atLeast(1)).handle( + (org.apache.hadoop.yarn.event.Event) arg.capture()); + + Map finalAttemptStates = + new HashMap(); + finalAttemptStates.put(taId1, TaskAttemptState.SUCCEEDED); + finalAttemptStates.put(taId2, TaskAttemptState.SUCCEEDED); + + List jobHistoryEvents = new ArrayList(); + jobHistoryEvents.add(EventType.TASK_STARTED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_FINISHED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_FINISHED); + jobHistoryEvents.add(EventType.TASK_FINISHED); + recoveryChecker(recoverMapTask, TaskState.SUCCEEDED, finalAttemptStates, + arg, jobHistoryEvents, 2L, 0L); + } + + @Test + public void testRecoveryAllAttemptsKilled() { + LOG.info("--- START: testRecoveryAllAttemptsKilled ---"); + + long clusterTimestamp = System.currentTimeMillis(); + EventHandler mockEventHandler = mock(EventHandler.class); + MapTaskImpl recoverMapTask = getMockMapTask(clusterTimestamp, + mockEventHandler); + + TaskId taskId = recoverMapTask.getID(); + JobID jobID = new JobID(Long.toString(clusterTimestamp), 1); + TaskID taskID = new TaskID(jobID, + org.apache.hadoop.mapreduce.TaskType.MAP, taskId.getId()); + + //Mock up the TaskAttempts + Map mockTaskAttempts = + new HashMap(); + TaskAttemptID taId1 = new TaskAttemptID(taskID, 2); + TaskAttemptInfo mockTAinfo1 = getMockTaskAttemptInfo(taId1, + TaskAttemptState.KILLED); + mockTaskAttempts.put(taId1, mockTAinfo1); + + TaskAttemptID taId2 = new TaskAttemptID(taskID, 1); + TaskAttemptInfo mockTAinfo2 = getMockTaskAttemptInfo(taId2, + TaskAttemptState.KILLED); + mockTaskAttempts.put(taId2, mockTAinfo2); + + OutputCommitter mockCommitter = mock (OutputCommitter.class); + TaskInfo mockTaskInfo = mock(TaskInfo.class); + when(mockTaskInfo.getTaskStatus()).thenReturn("KILLED"); + when(mockTaskInfo.getTaskId()).thenReturn(taskID); + when(mockTaskInfo.getAllTaskAttempts()).thenReturn(mockTaskAttempts); + + recoverMapTask.handle( + new TaskRecoverEvent(taskId, mockTaskInfo, mockCommitter, true)); + + ArgumentCaptor arg = ArgumentCaptor.forClass(Event.class); + verify(mockEventHandler,atLeast(1)).handle( + (org.apache.hadoop.yarn.event.Event) arg.capture()); + + Map finalAttemptStates = + new HashMap(); + finalAttemptStates.put(taId1, TaskAttemptState.KILLED); + finalAttemptStates.put(taId2, TaskAttemptState.KILLED); + + List jobHistoryEvents = new ArrayList(); + jobHistoryEvents.add(EventType.TASK_STARTED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_KILLED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED); + jobHistoryEvents.add(EventType.MAP_ATTEMPT_KILLED); + jobHistoryEvents.add(EventType.TASK_FAILED); + recoveryChecker(recoverMapTask, TaskState.KILLED, finalAttemptStates, + arg, jobHistoryEvents, 2L, 0L); + } + + private void recoveryChecker(MapTaskImpl checkTask, TaskState finalState, + Map finalAttemptStates, + ArgumentCaptor arg, List expectedJobHistoryEvents, + long expectedMapLaunches, long expectedFailedMaps) { + + assertEquals("Final State of Task", finalState, checkTask.getState()); + + Map recoveredAttempts = + checkTask.getAttempts(); + assertEquals("Expected Number of Task Attempts", + finalAttemptStates.size(), recoveredAttempts.size()); + for (TaskAttemptID taID : finalAttemptStates.keySet()) { + assertEquals("Expected Task Attempt State", + finalAttemptStates.get(taID), + recoveredAttempts.get(TypeConverter.toYarn(taID)).getState()); + } + + Iterator ie = arg.getAllValues().iterator(); + int eventNum = 0; + long totalLaunchedMaps = 0; + long totalFailedMaps = 0; + boolean jobTaskEventReceived = false; + + while (ie.hasNext()) { + Object current = ie.next(); + ++eventNum; + LOG.info(eventNum + " " + current.getClass().getName()); + if (current instanceof JobHistoryEvent) { + JobHistoryEvent jhe = (JobHistoryEvent) current; + LOG.info(expectedJobHistoryEvents.get(0).toString() + " " + + jhe.getHistoryEvent().getEventType().toString() + " " + + jhe.getJobID()); + assertEquals(expectedJobHistoryEvents.get(0), + jhe.getHistoryEvent().getEventType()); + expectedJobHistoryEvents.remove(0); + } else if (current instanceof JobCounterUpdateEvent) { + JobCounterUpdateEvent jcue = (JobCounterUpdateEvent) current; + + LOG.info("JobCounterUpdateEvent " + + jcue.getCounterUpdates().get(0).getCounterKey() + + " " + jcue.getCounterUpdates().get(0).getIncrementValue()); + if (jcue.getCounterUpdates().get(0).getCounterKey() == + JobCounter.NUM_FAILED_MAPS) { + totalFailedMaps += jcue.getCounterUpdates().get(0) + .getIncrementValue(); + } else if (jcue.getCounterUpdates().get(0).getCounterKey() == + JobCounter.TOTAL_LAUNCHED_MAPS) { + totalLaunchedMaps += jcue.getCounterUpdates().get(0) + .getIncrementValue(); + } + } else if (current instanceof JobTaskEvent) { + JobTaskEvent jte = (JobTaskEvent) current; + assertEquals(jte.getState(), finalState); + jobTaskEventReceived = true; + } + } + assertTrue(jobTaskEventReceived || (finalState == TaskState.RUNNING)); + assertEquals("Did not process all expected JobHistoryEvents", + 0, expectedJobHistoryEvents.size()); + assertEquals("Expected Map Launches", + expectedMapLaunches, totalLaunchedMaps); + assertEquals("Expected Failed Maps", + expectedFailedMaps, totalFailedMaps); + } + + private MapTaskImpl getMockMapTask(long clusterTimestamp, EventHandler eh) { + + ApplicationId appId = BuilderUtils.newApplicationId(clusterTimestamp, 1); + JobId jobId = MRBuilderUtils.newJobId(appId, 1); + + int partitions = 2; + + Path remoteJobConfFile = mock(Path.class); + JobConf conf = new JobConf(); + TaskAttemptListener taskAttemptListener = mock(TaskAttemptListener.class); + Token jobToken = + (Token) mock(Token.class); + Credentials credentials = null; + Clock clock = new SystemClock(); + int appAttemptId = 3; + MRAppMetrics metrics = mock(MRAppMetrics.class); + Resource minContainerRequirements = mock(Resource.class); + when(minContainerRequirements.getMemory()).thenReturn(1000); + + ClusterInfo clusterInfo = mock(ClusterInfo.class); + when(clusterInfo.getMinContainerCapability()).thenReturn( + minContainerRequirements); + AppContext appContext = mock(AppContext.class); + when(appContext.getClusterInfo()).thenReturn(clusterInfo); + + TaskSplitMetaInfo taskSplitMetaInfo = mock(TaskSplitMetaInfo.class); + MapTaskImpl mapTask = new MapTaskImpl(jobId, partitions, + eh, remoteJobConfFile, conf, + taskSplitMetaInfo, taskAttemptListener, jobToken, credentials, clock, + appAttemptId, metrics, appContext); + return mapTask; + } + + private TaskAttemptInfo getMockTaskAttemptInfo(TaskAttemptID tai, + TaskAttemptState tas) { + + ContainerId ci = mock(ContainerId.class); + Counters counters = mock(Counters.class); + TaskType tt = TaskType.MAP; + + long finishTime = System.currentTimeMillis(); + + TaskAttemptInfo mockTAinfo = mock(TaskAttemptInfo.class); + + when(mockTAinfo.getAttemptId()).thenReturn(tai); + when(mockTAinfo.getContainerId()).thenReturn(ci); + when(mockTAinfo.getCounters()).thenReturn(counters); + when(mockTAinfo.getError()).thenReturn(""); + when(mockTAinfo.getFinishTime()).thenReturn(finishTime); + when(mockTAinfo.getHostname()).thenReturn("localhost"); + when(mockTAinfo.getHttpPort()).thenReturn(23); + when(mockTAinfo.getMapFinishTime()).thenReturn(finishTime - 1000L); + when(mockTAinfo.getPort()).thenReturn(24); + when(mockTAinfo.getRackname()).thenReturn("defaultRack"); + when(mockTAinfo.getShuffleFinishTime()).thenReturn(finishTime - 2000L); + when(mockTAinfo.getShufflePort()).thenReturn(25); + when(mockTAinfo.getSortFinishTime()).thenReturn(finishTime - 3000L); + when(mockTAinfo.getStartTime()).thenReturn(finishTime -10000); + when(mockTAinfo.getState()).thenReturn("task in progress"); + when(mockTAinfo.getTaskStatus()).thenReturn(tas.toString()); + when(mockTAinfo.getTaskType()).thenReturn(tt); + when(mockTAinfo.getTrackerName()).thenReturn("TrackerName"); + return mockTAinfo; + } + private void writeBadOutput(TaskAttempt attempt, Configuration conf) throws Exception { TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, @@ -1145,5 +1604,16 @@ public class TestRecovery { public static void main(String[] arg) throws Exception { TestRecovery test = new TestRecovery(); test.testCrashed(); + test.testMultipleCrashes(); + test.testOutputRecovery(); + test.testOutputRecoveryMapsOnly(); + test.testRecoveryWithOldCommiter(); + test.testSpeculative(); + test.testRecoveryWithoutShuffleSecret(); + test.testRecoverySuccessAttempt(); + test.testRecoveryAllFailAttempts(); + test.testRecoveryTaskSuccessAllAttemptsFail(); + test.testRecoveryTaskSuccessAllAttemptsSucceed(); + test.testRecoveryAllAttemptsKilled(); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java index b278186766e..10b79aba73e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java @@ -33,7 +33,9 @@ import junit.framework.TestCase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.api.records.JobState; import org.apache.hadoop.mapreduce.v2.app.client.ClientService; @@ -45,6 +47,7 @@ import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator; import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent; import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler; import org.apache.hadoop.mapreduce.v2.util.MRApps; +import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.YarnException; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; @@ -86,9 +89,68 @@ import org.junit.Test; attemptId.setApplicationId(appId); JobId jobid = recordFactory.newRecordInstance(JobId.class); jobid.setAppId(appId); - MRAppMaster appMaster = new TestMRApp(attemptId); + ContainerAllocator mockAlloc = mock(ContainerAllocator.class); + MRAppMaster appMaster = new TestMRApp(attemptId, mockAlloc, + JobStateInternal.RUNNING, MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS); appMaster.init(conf); + appMaster.start(); appMaster.shutDownJob(); + //test whether notifyIsLastAMRetry called + Assert.assertEquals(true, ((TestMRApp)appMaster).getTestIsLastAMRetry()); + verify(fs).delete(stagingJobPath, true); + } + + @Test (timeout = 30000) + public void testNoDeletionofStagingOnReboot() throws IOException { + conf.set(MRJobConfig.MAPREDUCE_JOB_DIR, stagingJobDir); + fs = mock(FileSystem.class); + when(fs.delete(any(Path.class),anyBoolean())).thenReturn(true); + String user = UserGroupInformation.getCurrentUser().getShortUserName(); + Path stagingDir = MRApps.getStagingAreaDir(conf, user); + when(fs.exists(stagingDir)).thenReturn(true); + ApplicationAttemptId attemptId = recordFactory.newRecordInstance( + ApplicationAttemptId.class); + attemptId.setAttemptId(0); + ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class); + appId.setClusterTimestamp(System.currentTimeMillis()); + appId.setId(0); + attemptId.setApplicationId(appId); + ContainerAllocator mockAlloc = mock(ContainerAllocator.class); + MRAppMaster appMaster = new TestMRApp(attemptId, mockAlloc, + JobStateInternal.REBOOT, 4); + appMaster.init(conf); + appMaster.start(); + //shutdown the job, not the lastRetry + appMaster.shutDownJob(); + //test whether notifyIsLastAMRetry called + Assert.assertEquals(false, ((TestMRApp)appMaster).getTestIsLastAMRetry()); + verify(fs, times(0)).delete(stagingJobPath, true); + } + + @Test (timeout = 30000) + public void testDeletionofStagingOnReboot() throws IOException { + conf.set(MRJobConfig.MAPREDUCE_JOB_DIR, stagingJobDir); + fs = mock(FileSystem.class); + when(fs.delete(any(Path.class),anyBoolean())).thenReturn(true); + String user = UserGroupInformation.getCurrentUser().getShortUserName(); + Path stagingDir = MRApps.getStagingAreaDir(conf, user); + when(fs.exists(stagingDir)).thenReturn(true); + ApplicationAttemptId attemptId = recordFactory.newRecordInstance( + ApplicationAttemptId.class); + attemptId.setAttemptId(1); + ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class); + appId.setClusterTimestamp(System.currentTimeMillis()); + appId.setId(0); + attemptId.setApplicationId(appId); + ContainerAllocator mockAlloc = mock(ContainerAllocator.class); + MRAppMaster appMaster = new TestMRApp(attemptId, mockAlloc, + JobStateInternal.REBOOT, MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS); + appMaster.init(conf); + appMaster.start(); + //shutdown the job, is lastRetry + appMaster.shutDownJob(); + //test whether notifyIsLastAMRetry called + Assert.assertEquals(true, ((TestMRApp)appMaster).getTestIsLastAMRetry()); verify(fs).delete(stagingJobPath, true); } @@ -151,6 +213,8 @@ import org.junit.Test; private class TestMRApp extends MRAppMaster { ContainerAllocator allocator; + boolean testIsLastAMRetry = false; + JobStateInternal jobStateInternal; public TestMRApp(ApplicationAttemptId applicationAttemptId, ContainerAllocator allocator, int maxAppAttempts) { @@ -160,9 +224,11 @@ import org.junit.Test; this.allocator = allocator; } - public TestMRApp(ApplicationAttemptId applicationAttemptId) { - this(applicationAttemptId, null, - MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS); + public TestMRApp(ApplicationAttemptId applicationAttemptId, + ContainerAllocator allocator, JobStateInternal jobStateInternal, + int maxAppAttempts) { + this(applicationAttemptId, allocator, maxAppAttempts); + this.jobStateInternal = jobStateInternal; } @Override @@ -179,6 +245,31 @@ import org.junit.Test; return allocator; } + @Override + protected Job createJob(Configuration conf, JobStateInternal forcedState, + String diagnostic) { + JobImpl jobImpl = mock(JobImpl.class); + when(jobImpl.getInternalState()).thenReturn(this.jobStateInternal); + JobID jobID = JobID.forName("job_1234567890000_0001"); + JobId jobId = TypeConverter.toYarn(jobID); + when(jobImpl.getID()).thenReturn(jobId); + ((AppContext) getContext()) + .getAllJobs().put(jobImpl.getID(), jobImpl); + return jobImpl; + } + + @Override + public void start() { + super.start(); + DefaultMetricsSystem.shutdown(); + } + + @Override + public void notifyIsLastAMRetry(boolean isLastAMRetry){ + testIsLastAMRetry = isLastAMRetry; + super.notifyIsLastAMRetry(isLastAMRetry); + } + @Override public RMHeartbeatHandler getRMHeartbeatHandler() { return getStubbedHeartbeatHandler(getContext()); @@ -197,6 +288,9 @@ import org.junit.Test; protected void downloadTokensAndSetupUGI(Configuration conf) { } + public boolean getTestIsLastAMRetry(){ + return testIsLastAMRetry; + } } private final class MRAppTestCleanup extends MRApp { @@ -222,7 +316,8 @@ import org.junit.Test; Job newJob = new TestJob(getJobId(), getAttemptID(), conf, getDispatcher().getEventHandler(), getTaskAttemptListener(), getContext().getClock(), - isNewApiCommitter(), currentUser.getUserName(), getContext(), + getCommitter(), isNewApiCommitter(), + currentUser.getUserName(), getContext(), forcedState, diagnostic); ((AppContext) getContext()).getAllJobs().put(newJob.getID(), newJob); @@ -288,7 +383,7 @@ import org.junit.Test; }; } - @Test + @Test(timeout=20000) public void testStagingCleanupOrder() throws Exception { MRAppTestCleanup app = new MRAppTestCleanup(1, 1, true, this.getClass().getName(), true); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java index 8cfbe03c09a..0b93e75546d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java @@ -25,6 +25,8 @@ import static org.mockito.Mockito.when; import java.io.File; import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; import java.util.EnumSet; import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CyclicBarrier; @@ -35,6 +37,7 @@ import org.apache.hadoop.mapreduce.JobACL; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.jobhistory.EventType; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent; +import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo; import org.apache.hadoop.mapreduce.jobhistory.JobSubmittedEvent; import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.JobStatus.State; @@ -47,6 +50,7 @@ import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager; import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo; import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.api.records.JobState; +import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskState; import org.apache.hadoop.mapreduce.v2.api.records.TaskType; import org.apache.hadoop.mapreduce.v2.app.AppContext; @@ -57,6 +61,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.JobEventType; import org.apache.hadoop.mapreduce.v2.app.job.event.JobFinishEvent; +import org.apache.hadoop.mapreduce.v2.app.job.event.JobStartEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType; import org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl.InitTransition; @@ -69,7 +74,6 @@ import org.apache.hadoop.yarn.SystemClock; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.Dispatcher; -import org.apache.hadoop.yarn.event.Event; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.state.StateMachine; import org.apache.hadoop.yarn.state.StateMachineFactory; @@ -114,6 +118,7 @@ public class TestJobImpl { conf.set(MRJobConfig.WORKFLOW_NODE_NAME, "testNodeName"); conf.set(MRJobConfig.WORKFLOW_ADJACENCY_PREFIX_STRING + "key1", "value1"); conf.set(MRJobConfig.WORKFLOW_ADJACENCY_PREFIX_STRING + "key2", "value2"); + conf.set(MRJobConfig.WORKFLOW_TAGS, "tag1,tag2"); AsyncDispatcher dispatcher = new AsyncDispatcher(); @@ -126,12 +131,13 @@ public class TestJobImpl { commitHandler.start(); JobSubmittedEventHandler jseHandler = new JobSubmittedEventHandler("testId", - "testName", "testNodeName", "\"key2\"=\"value2\" \"key1\"=\"value1\" "); + "testName", "testNodeName", "\"key2\"=\"value2\" \"key1\"=\"value1\" ", + "tag1,tag2"); dispatcher.register(EventType.class, jseHandler); JobImpl job = createStubbedJob(conf, dispatcher, 0); job.handle(new JobEvent(job.getID(), JobEventType.JOB_INIT)); assertJobState(job, JobStateInternal.INITED); - job.handle(new JobEvent(job.getID(), JobEventType.JOB_START)); + job.handle(new JobStartEvent(job.getID())); assertJobState(job, JobStateInternal.SUCCEEDED); dispatcher.stop(); commitHandler.stop(); @@ -192,6 +198,68 @@ public class TestJobImpl { commitHandler.stop(); } + @Test(timeout=20000) + public void testRebootedDuringSetup() throws Exception{ + Configuration conf = new Configuration(); + conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir); + AsyncDispatcher dispatcher = new AsyncDispatcher(); + dispatcher.init(conf); + dispatcher.start(); + OutputCommitter committer = new StubbedOutputCommitter() { + @Override + public synchronized void setupJob(JobContext jobContext) + throws IOException { + while(!Thread.interrupted()){ + try{ + wait(); + }catch (InterruptedException e) { + } + } + } + }; + CommitterEventHandler commitHandler = + createCommitterEventHandler(dispatcher, committer); + commitHandler.init(conf); + commitHandler.start(); + + JobImpl job = createStubbedJob(conf, dispatcher, 2); + JobId jobId = job.getID(); + job.handle(new JobEvent(jobId, JobEventType.JOB_INIT)); + assertJobState(job, JobStateInternal.INITED); + job.handle(new JobStartEvent(jobId)); + assertJobState(job, JobStateInternal.SETUP); + + job.handle(new JobEvent(job.getID(), JobEventType.JOB_AM_REBOOT)); + assertJobState(job, JobStateInternal.REBOOT); + dispatcher.stop(); + commitHandler.stop(); + } + + @Test(timeout=20000) + public void testRebootedDuringCommit() throws Exception { + Configuration conf = new Configuration(); + conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir); + AsyncDispatcher dispatcher = new AsyncDispatcher(); + dispatcher.init(conf); + dispatcher.start(); + CyclicBarrier syncBarrier = new CyclicBarrier(2); + OutputCommitter committer = new WaitingOutputCommitter(syncBarrier, true); + CommitterEventHandler commitHandler = + createCommitterEventHandler(dispatcher, committer); + commitHandler.init(conf); + commitHandler.start(); + + JobImpl job = createRunningStubbedJob(conf, dispatcher, 2); + completeJobTasks(job); + assertJobState(job, JobStateInternal.COMMITTING); + + syncBarrier.await(); + job.handle(new JobEvent(job.getID(), JobEventType.JOB_AM_REBOOT)); + assertJobState(job, JobStateInternal.REBOOT); + dispatcher.stop(); + commitHandler.stop(); + } + @Test(timeout=20000) public void testKilledDuringSetup() throws Exception { Configuration conf = new Configuration(); @@ -220,7 +288,7 @@ public class TestJobImpl { JobId jobId = job.getID(); job.handle(new JobEvent(jobId, JobEventType.JOB_INIT)); assertJobState(job, JobStateInternal.INITED); - job.handle(new JobEvent(jobId, JobEventType.JOB_START)); + job.handle(new JobStartEvent(jobId)); assertJobState(job, JobStateInternal.SETUP); job.handle(new JobEvent(job.getID(), JobEventType.JOB_KILL)); @@ -287,7 +355,7 @@ public class TestJobImpl { JobId jobId = job.getID(); job.handle(new JobEvent(jobId, JobEventType.JOB_INIT)); assertJobState(job, JobStateInternal.INITED); - job.handle(new JobEvent(jobId, JobEventType.JOB_START)); + job.handle(new JobStartEvent(jobId)); assertJobState(job, JobStateInternal.FAIL_ABORT); job.handle(new JobEvent(jobId, JobEventType.JOB_KILL)); @@ -324,7 +392,7 @@ public class TestJobImpl { JobId jobId = job.getID(); job.handle(new JobEvent(jobId, JobEventType.JOB_INIT)); assertJobState(job, JobStateInternal.INITED); - job.handle(new JobEvent(jobId, JobEventType.JOB_START)); + job.handle(new JobStartEvent(jobId)); assertJobState(job, JobStateInternal.SETUP); job.handle(new JobEvent(jobId, JobEventType.JOB_KILL)); @@ -364,7 +432,7 @@ public class TestJobImpl { // Verify access JobImpl job1 = new JobImpl(jobId, null, conf1, null, null, null, null, null, - null, null, true, null, 0, null, null, null, null); + null, null, null, true, null, 0, null, null, null, null); Assert.assertTrue(job1.checkAccess(ugi1, JobACL.VIEW_JOB)); Assert.assertFalse(job1.checkAccess(ugi2, JobACL.VIEW_JOB)); @@ -375,7 +443,7 @@ public class TestJobImpl { // Verify access JobImpl job2 = new JobImpl(jobId, null, conf2, null, null, null, null, null, - null, null, true, null, 0, null, null, null, null); + null, null, null, true, null, 0, null, null, null, null); Assert.assertTrue(job2.checkAccess(ugi1, JobACL.VIEW_JOB)); Assert.assertTrue(job2.checkAccess(ugi2, JobACL.VIEW_JOB)); @@ -386,7 +454,7 @@ public class TestJobImpl { // Verify access JobImpl job3 = new JobImpl(jobId, null, conf3, null, null, null, null, null, - null, null, true, null, 0, null, null, null, null); + null, null, null, true, null, 0, null, null, null, null); Assert.assertTrue(job3.checkAccess(ugi1, JobACL.VIEW_JOB)); Assert.assertTrue(job3.checkAccess(ugi2, JobACL.VIEW_JOB)); @@ -397,7 +465,7 @@ public class TestJobImpl { // Verify access JobImpl job4 = new JobImpl(jobId, null, conf4, null, null, null, null, null, - null, null, true, null, 0, null, null, null, null); + null, null, null, true, null, 0, null, null, null, null); Assert.assertTrue(job4.checkAccess(ugi1, JobACL.VIEW_JOB)); Assert.assertTrue(job4.checkAccess(ugi2, JobACL.VIEW_JOB)); @@ -408,7 +476,7 @@ public class TestJobImpl { // Verify access JobImpl job5 = new JobImpl(jobId, null, conf5, null, null, null, null, null, - null, null, true, null, 0, null, null, null, null); + null, null, null, true, null, 0, null, null, null, null); Assert.assertTrue(job5.checkAccess(ugi1, null)); Assert.assertTrue(job5.checkAccess(ugi2, null)); } @@ -426,7 +494,7 @@ public class TestJobImpl { mock(EventHandler.class), null, mock(JobTokenSecretManager.class), null, new SystemClock(), null, - mrAppMetrics, true, null, 0, null, null, null, null); + mrAppMetrics, null, true, null, 0, null, null, null, null); job.handle(diagUpdateEvent); String diagnostics = job.getReport().getDiagnostics(); Assert.assertNotNull(diagnostics); @@ -437,7 +505,7 @@ public class TestJobImpl { mock(EventHandler.class), null, mock(JobTokenSecretManager.class), null, new SystemClock(), null, - mrAppMetrics, true, null, 0, null, null, null, null); + mrAppMetrics, null, true, null, 0, null, null, null, null); job.handle(new JobEvent(jobId, JobEventType.JOB_KILL)); job.handle(diagUpdateEvent); diagnostics = job.getReport().getDiagnostics(); @@ -492,7 +560,7 @@ public class TestJobImpl { JobImpl job = new JobImpl(jobId, Records .newRecord(ApplicationAttemptId.class), conf, mock(EventHandler.class), null, new JobTokenSecretManager(), new Credentials(), null, null, - mrAppMetrics, true, null, 0, null, null, null, null); + mrAppMetrics, null, true, null, 0, null, null, null, null); InitTransition initTransition = getInitTransition(2); JobEvent mockJobEvent = mock(JobEvent.class); initTransition.transition(job, mockJobEvent); @@ -533,7 +601,7 @@ public class TestJobImpl { JobId jobId = job.getID(); job.handle(new JobEvent(jobId, JobEventType.JOB_INIT)); assertJobState(job, JobStateInternal.INITED); - job.handle(new JobEvent(jobId, JobEventType.JOB_START)); + job.handle(new JobStartEvent(jobId)); assertJobState(job, JobStateInternal.FAILED); job.handle(new JobEvent(jobId, JobEventType.JOB_TASK_COMPLETED)); @@ -597,7 +665,7 @@ public class TestJobImpl { StubbedJob job = createStubbedJob(conf, dispatcher, numSplits); job.handle(new JobEvent(job.getID(), JobEventType.JOB_INIT)); assertJobState(job, JobStateInternal.INITED); - job.handle(new JobEvent(job.getID(), JobEventType.JOB_START)); + job.handle(new JobStartEvent(job.getID())); assertJobState(job, JobStateInternal.RUNNING); return job; } @@ -644,14 +712,18 @@ public class TestJobImpl { private String workflowAdjacencies; + private String workflowTags; + private Boolean assertBoolean; public JobSubmittedEventHandler(String workflowId, String workflowName, - String workflowNodeName, String workflowAdjacencies) { + String workflowNodeName, String workflowAdjacencies, + String workflowTags) { this.workflowId = workflowId; this.workflowName = workflowName; this.workflowNodeName = workflowNodeName; this.workflowAdjacencies = workflowAdjacencies; + this.workflowTags = workflowTags; assertBoolean = null; } @@ -673,7 +745,16 @@ public class TestJobImpl { setAssertValue(false); return; } - if (!workflowAdjacencies.equals(jsEvent.getWorkflowAdjacencies())) { + + String[] wrkflowAdj = workflowAdjacencies.split(" "); + String[] jswrkflowAdj = jsEvent.getWorkflowAdjacencies().split(" "); + Arrays.sort(wrkflowAdj); + Arrays.sort(jswrkflowAdj); + if (!Arrays.equals(wrkflowAdj, jswrkflowAdj)) { + setAssertValue(false); + return; + } + if (!workflowTags.equals(jsEvent.getWorkflowTags())) { setAssertValue(false); return; } @@ -713,9 +794,9 @@ public class TestJobImpl { boolean newApiCommitter, String user, int numSplits) { super(jobId, applicationAttemptId, conf, eventHandler, null, new JobTokenSecretManager(), new Credentials(), - new SystemClock(), null, MRAppMetrics.create(), - newApiCommitter, user, System.currentTimeMillis(), null, null, null, - null); + new SystemClock(), Collections. emptyMap(), + MRAppMetrics.create(), null, newApiCommitter, user, + System.currentTimeMillis(), null, null, null, null); initTransition = getInitTransition(numSplits); localFactory = stateMachineFactory.addTransition(JobStateInternal.NEW, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttemptContainerRequest.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttemptContainerRequest.java index 87575d61f39..54be1d74f0c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttemptContainerRequest.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttemptContainerRequest.java @@ -113,9 +113,9 @@ public class TestTaskAttemptContainerRequest { ContainerId containerId = BuilderUtils.newContainerId(1, 1, 1, 1); ContainerLaunchContext launchCtx = - TaskAttemptImpl.createContainerLaunchContext(acls, containerId, + TaskAttemptImpl.createContainerLaunchContext(acls, jobConf, jobToken, taImpl.createRemoteTask(), - TypeConverter.fromYarn(jobId), mock(Resource.class), + TypeConverter.fromYarn(jobId), mock(WrappedJvmID.class), taListener, credentials); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java index d3297b3fb6b..9fd0fb8b1ac 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java @@ -27,7 +27,6 @@ import static org.mockito.Mockito.when; import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -38,7 +37,6 @@ import org.apache.hadoop.mapred.TaskUmbilicalProtocol; import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.TaskCounter; -import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo; import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier; import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo; import org.apache.hadoop.mapreduce.v2.api.records.Avataar; @@ -80,7 +78,6 @@ public class TestTaskImpl { private Path remoteJobConfFile; private Credentials credentials; private Clock clock; - private Map completedTasksFromPreviousRun; private MRAppMetrics metrics; private TaskImpl mockTask; private ApplicationId appId; @@ -104,13 +101,12 @@ public class TestTaskImpl { EventHandler eventHandler, Path remoteJobConfFile, JobConf conf, TaskAttemptListener taskAttemptListener, Token jobToken, - Credentials credentials, Clock clock, - Map completedTasksFromPreviousRun, int startCount, + Credentials credentials, Clock clock, int startCount, MRAppMetrics metrics, AppContext appContext, TaskType taskType) { super(jobId, taskType , partition, eventHandler, remoteJobConfFile, conf, taskAttemptListener, jobToken, credentials, clock, - completedTasksFromPreviousRun, startCount, metrics, appContext); + startCount, metrics, appContext); this.taskType = taskType; } @@ -247,8 +243,7 @@ public class TestTaskImpl { return new MockTaskImpl(jobId, partition, dispatcher.getEventHandler(), remoteJobConfFile, conf, taskAttemptListener, jobToken, credentials, clock, - completedTasksFromPreviousRun, startCount, - metrics, appContext, taskType); + startCount, metrics, appContext, taskType); } @After @@ -652,9 +647,7 @@ public class TestTaskImpl { public void testFailedTransitions() { mockTask = new MockTaskImpl(jobId, partition, dispatcher.getEventHandler(), remoteJobConfFile, conf, taskAttemptListener, jobToken, - credentials, clock, - completedTasksFromPreviousRun, startCount, - metrics, appContext, TaskType.MAP) { + credentials, clock, startCount, metrics, appContext, TaskType.MAP) { @Override protected int getMaxAttempts() { return 1; @@ -721,9 +714,7 @@ public class TestTaskImpl { public void testCountersWithSpeculation() { mockTask = new MockTaskImpl(jobId, partition, dispatcher.getEventHandler(), remoteJobConfFile, conf, taskAttemptListener, jobToken, - credentials, clock, - completedTasksFromPreviousRun, startCount, - metrics, appContext, TaskType.MAP) { + credentials, clock, startCount, metrics, appContext, TaskType.MAP) { @Override protected int getMaxAttempts() { return 1; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncher.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncher.java index e1bab017561..c5d0a885f4b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncher.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncher.java @@ -60,7 +60,6 @@ import org.apache.hadoop.yarn.api.protocolrecords.StopContainerResponse; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.ContainerToken; @@ -383,7 +382,6 @@ public class TestContainerLauncher { @Override public StartContainerResponse startContainer(StartContainerRequest request) throws YarnRemoteException { - ContainerLaunchContext container = request.getContainerLaunchContext(); StartContainerResponse response = recordFactory .newRecordInstance(StartContainerResponse.class); status = recordFactory.newRecordInstance(ContainerStatus.class); @@ -395,7 +393,7 @@ public class TestContainerLauncher { throw new UndeclaredThrowableException(e); } status.setState(ContainerState.RUNNING); - status.setContainerId(container.getContainerId()); + status.setContainerId(request.getContainer().getId()); status.setExitStatus(0); return response; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java index 8ef7e68c6cc..fdf650d647b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java @@ -144,7 +144,10 @@ public class TestAMWebApp { @Test public void testTaskView() { AppContext appContext = new TestAppContext(); Map params = getTaskParams(appContext); - WebAppTests.testPage(TaskPage.class, AppContext.class, appContext, params); + App app = new App(appContext); + app.setJob(appContext.getAllJobs().values().iterator().next()); + app.setTask(app.getJob().getTasks().values().iterator().next()); + WebAppTests.testPage(TaskPage.class, App.class, app, params); } public static Map getJobParams(AppContext appContext) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java index 3368d5134e9..1055516b65e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java @@ -32,13 +32,13 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.Random; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ThreadFactory; +import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -91,6 +91,9 @@ class LocalDistributedCacheManager { Map localResources = new LinkedHashMap(); MRApps.setupDistributedCache(conf, localResources); + // Generating unique numbers for FSDownload. + AtomicLong uniqueNumberGenerator = + new AtomicLong(System.currentTimeMillis()); // Find which resources are to be put on the local classpath Map classpaths = new HashMap(); @@ -128,8 +131,10 @@ class LocalDistributedCacheManager { Path destPath = localDirAllocator.getLocalPathForWrite(".", conf); Map> resourcesToPaths = Maps.newHashMap(); for (LocalResource resource : localResources.values()) { - Callable download = new FSDownload(localFSFileContext, ugi, conf, - destPath, resource, new Random()); + Callable download = + new FSDownload(localFSFileContext, ugi, conf, new Path(destPath, + Long.toString(uniqueNumberGenerator.incrementAndGet())), + resource); Future future = exec.submit(download); resourcesToPaths.put(resource, future); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/impl/pb/client/MRClientProtocolPBClientImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/impl/pb/client/MRClientProtocolPBClientImpl.java index ad2ce63144f..d8aa812f294 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/impl/pb/client/MRClientProtocolPBClientImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/impl/pb/client/MRClientProtocolPBClientImpl.java @@ -18,6 +18,7 @@ package org.apache.hadoop.mapreduce.v2.api.impl.pb.client; +import java.io.Closeable; import java.io.IOException; import java.net.InetSocketAddress; @@ -101,7 +102,8 @@ import org.apache.hadoop.yarn.exceptions.impl.pb.YarnRemoteExceptionPBImpl; import com.google.protobuf.ServiceException; -public class MRClientProtocolPBClientImpl implements MRClientProtocol { +public class MRClientProtocolPBClientImpl implements MRClientProtocol, + Closeable { protected MRClientProtocolPB proxy; @@ -117,6 +119,13 @@ public class MRClientProtocolPBClientImpl implements MRClientProtocol { return RPC.getServerAddress(proxy); } + @Override + public void close() { + if (this.proxy != null) { + RPC.stopProxy(this.proxy); + } + } + @Override public GetJobReportResponse getJobReport(GetJobReportRequest request) throws YarnRemoteException { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java index a2ba7f79d73..66471d367cb 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java @@ -47,6 +47,7 @@ import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import com.google.common.base.Joiner; @@ -525,4 +526,19 @@ public class JobHistoryUtils { sb.append(jobId.toString()); return sb.toString(); } + + public static Path getPreviousJobHistoryPath( + Configuration conf, ApplicationAttemptId applicationAttemptId) + throws IOException { + String jobId = + TypeConverter.fromYarn(applicationAttemptId.getApplicationId()) + .toString(); + String jobhistoryDir = + JobHistoryUtils.getConfiguredHistoryStagingDirPrefix(conf, jobId); + Path histDirPath = FileContext.getFileContext(conf).makeQualified( + new Path(jobhistoryDir)); + FileContext fc = FileContext.getFileContext(histDirPath.toUri(), conf); + return fc.makeQualified(JobHistoryUtils.getStagingJobHistoryFile( + histDirPath,jobId, (applicationAttemptId.getAttemptId() - 1))); + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/avro/Events.avpr b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/avro/Events.avpr index dcb9ca40ac8..b78fc80584d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/avro/Events.avpr +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/avro/Events.avpr @@ -95,7 +95,8 @@ {"name": "workflowId", "type": "string"}, {"name": "workflowName", "type": "string"}, {"name": "workflowNodeName", "type": "string"}, - {"name": "workflowAdjacencies", "type": "string"} + {"name": "workflowAdjacencies", "type": "string"}, + {"name": "workflowTags", "type": "string"} ] }, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobClient.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobClient.java index 70bcbc56aa4..902907447be 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobClient.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobClient.java @@ -138,15 +138,6 @@ import org.apache.hadoop.util.ToolRunner; public class JobClient extends CLI { public static enum TaskStatusFilter { NONE, KILLED, FAILED, SUCCEEDED, ALL } private TaskStatusFilter taskOutputFilter = TaskStatusFilter.FAILED; - /* notes that get delegation token was called. Again this is hack for oozie - * to make sure we add history server delegation tokens to the credentials - * for the job. Since the api only allows one delegation token to be returned, - * we have to add this hack. - */ - private boolean getDelegationTokenCalled = false; - /* do we need a HS delegation token for this client */ - static final String HS_DELEGATION_TOKEN_REQUIRED - = "mapreduce.history.server.delegationtoken.required"; static{ ConfigUtil.loadResources(); @@ -569,10 +560,6 @@ public class JobClient extends CLI { try { conf.setBooleanIfUnset("mapred.mapper.new-api", false); conf.setBooleanIfUnset("mapred.reducer.new-api", false); - if (getDelegationTokenCalled) { - conf.setBoolean(HS_DELEGATION_TOKEN_REQUIRED, getDelegationTokenCalled); - getDelegationTokenCalled = false; - } Job job = clientUgi.doAs(new PrivilegedExceptionAction () { @Override public Job run() throws IOException, ClassNotFoundException, @@ -1173,7 +1160,6 @@ public class JobClient extends CLI { */ public Token getDelegationToken(final Text renewer) throws IOException, InterruptedException { - getDelegationTokenCalled = true; return clientUgi.doAs(new PrivilegedExceptionAction>() { public Token run() throws IOException, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java index 375391320be..bbac5fcab9d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java @@ -103,4 +103,15 @@ public interface MRConfig { "mapreduce.ifile.readahead.bytes"; public static final int DEFAULT_MAPRED_IFILE_READAHEAD_BYTES = - 4 * 1024 * 1024;} + 4 * 1024 * 1024; + + /** + * Whether users are explicitly trying to control resource monitoring + * configuration for the MiniMRCluster. Disabled by default. + */ + public static final String MAPREDUCE_MINICLUSTER_CONTROL_RESOURCE_MONITORING + = "mapreduce.minicluster.control-resource-monitoring"; + public static final boolean + DEFAULT_MAPREDUCE_MINICLUSTER_CONTROL_RESOURCE_MONITORING = false; +} + diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index 3f80065a82c..6e399ee7410 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -422,6 +422,7 @@ public interface MRJobConfig { /** Enable job recovery.*/ public static final String MR_AM_JOB_RECOVERY_ENABLE = MR_AM_PREFIX + "job.recovery.enable"; + public static final boolean MR_AM_JOB_RECOVERY_ENABLE_DEFAULT = true; /** * Limit on the number of reducers that can be preempted to ensure that at @@ -664,6 +665,8 @@ public interface MRJobConfig { public static final String WORKFLOW_ADJACENCY_PREFIX_PATTERN = "^mapreduce\\.workflow\\.adjacency\\..+"; + public static final String WORKFLOW_TAGS = "mapreduce.workflow.tags"; + /** * The maximum number of application attempts. * It is a application-specific setting. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobSubmittedEvent.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobSubmittedEvent.java index 83bdbe6f4a6..24f820e4301 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobSubmittedEvent.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobSubmittedEvent.java @@ -75,6 +75,31 @@ public class JobSubmittedEvent implements HistoryEvent { Map jobACLs, String jobQueueName, String workflowId, String workflowName, String workflowNodeName, String workflowAdjacencies) { + this(id, jobName, userName, submitTime, jobConfPath, jobACLs, + jobQueueName, workflowId, workflowName, workflowNodeName, + workflowAdjacencies, ""); + } + + /** + * Create an event to record job submission + * @param id The job Id of the job + * @param jobName Name of the job + * @param userName Name of the user who submitted the job + * @param submitTime Time of submission + * @param jobConfPath Path of the Job Configuration file + * @param jobACLs The configured acls for the job. + * @param jobQueueName The job-queue to which this job was submitted to + * @param workflowId The Id of the workflow + * @param workflowName The name of the workflow + * @param workflowNodeName The node name of the workflow + * @param workflowAdjacencies The adjacencies of the workflow + * @param workflowTags Comma-separated tags for the workflow + */ + public JobSubmittedEvent(JobID id, String jobName, String userName, + long submitTime, String jobConfPath, + Map jobACLs, String jobQueueName, + String workflowId, String workflowName, String workflowNodeName, + String workflowAdjacencies, String workflowTags) { datum.jobid = new Utf8(id.toString()); datum.jobName = new Utf8(jobName); datum.userName = new Utf8(userName); @@ -101,6 +126,9 @@ public class JobSubmittedEvent implements HistoryEvent { if (workflowAdjacencies != null) { datum.workflowAdjacencies = new Utf8(workflowAdjacencies); } + if (workflowTags != null) { + datum.workflowTags = new Utf8(workflowTags); + } } JobSubmittedEvent() {} @@ -168,6 +196,13 @@ public class JobSubmittedEvent implements HistoryEvent { } return null; } + /** Get the workflow tags */ + public String getWorkflowTags() { + if (datum.workflowTags != null) { + return datum.workflowTags.toString(); + } + return null; + } /** Get the event type */ public EventType getEventType() { return EventType.JOB_SUBMITTED; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java index 004d5b0a6f2..a1887f72d44 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java @@ -869,4 +869,9 @@ public class HistoryFileManager extends AbstractService { } } } + // for test + @VisibleForTesting + void setMaxHistoryAge(long newValue){ + maxHistoryAge=newValue; + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/JobInfo.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/JobInfo.java index e5358dcb3e1..9a4f137da77 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/JobInfo.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/JobInfo.java @@ -287,7 +287,7 @@ public class JobInfo { avgShuffleTime += (attempt.getShuffleFinishTime() - attempt .getLaunchTime()); avgMergeTime += attempt.getSortFinishTime() - - attempt.getLaunchTime(); + - attempt.getShuffleFinishTime(); avgReduceTime += (attempt.getFinishTime() - attempt .getShuffleFinishTime()); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestCompletedTask.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestCompletedTask.java index 23e636d0dfc..0206e95f1f0 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestCompletedTask.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestCompletedTask.java @@ -21,46 +21,75 @@ package org.apache.hadoop.mapreduce.v2.hs; import java.util.Map; import java.util.TreeMap; +import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.TaskID; import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo; +import org.apache.hadoop.mapreduce.v2.api.records.Phase; import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskReport; import org.apache.hadoop.mapreduce.v2.hs.CompletedTask; -import org.junit.Assert; import org.junit.Test; -import org.mockito.Mockito; +import static org.mockito.Mockito.*; +import static org.junit.Assert.*; public class TestCompletedTask{ - @Test + @Test (timeout=5000) public void testTaskStartTimes() { - TaskId taskId = Mockito.mock(TaskId.class); - TaskInfo taskInfo = Mockito.mock(TaskInfo.class); + TaskId taskId = mock(TaskId.class); + TaskInfo taskInfo = mock(TaskInfo.class); Map taskAttempts = new TreeMap(); TaskAttemptID id = new TaskAttemptID("0", 0, TaskType.MAP, 0, 0); - TaskAttemptInfo info = Mockito.mock(TaskAttemptInfo.class); - Mockito.when(info.getAttemptId()).thenReturn(id); - Mockito.when(info.getStartTime()).thenReturn(10l); + TaskAttemptInfo info = mock(TaskAttemptInfo.class); + when(info.getAttemptId()).thenReturn(id); + when(info.getStartTime()).thenReturn(10l); taskAttempts.put(id, info); id = new TaskAttemptID("1", 0, TaskType.MAP, 1, 1); - info = Mockito.mock(TaskAttemptInfo.class); - Mockito.when(info.getAttemptId()).thenReturn(id); - Mockito.when(info.getStartTime()).thenReturn(20l); + info = mock(TaskAttemptInfo.class); + when(info.getAttemptId()).thenReturn(id); + when(info.getStartTime()).thenReturn(20l); taskAttempts.put(id, info); - Mockito.when(taskInfo.getAllTaskAttempts()).thenReturn(taskAttempts); + when(taskInfo.getAllTaskAttempts()).thenReturn(taskAttempts); CompletedTask task = new CompletedTask(taskId, taskInfo); TaskReport report = task.getReport(); // Make sure the startTime returned by report is the lesser of the // attempy launch times - Assert.assertTrue(report.getStartTime() == 10); + assertTrue(report.getStartTime() == 10); + } + /** + * test some methods of CompletedTaskAttempt + */ + @Test (timeout=5000) + public void testCompletedTaskAttempt(){ + + TaskAttemptInfo attemptInfo= mock(TaskAttemptInfo.class); + when(attemptInfo.getRackname()).thenReturn("Rackname"); + when(attemptInfo.getShuffleFinishTime()).thenReturn(11L); + when(attemptInfo.getSortFinishTime()).thenReturn(12L); + when(attemptInfo.getShufflePort()).thenReturn(10); + + JobID jobId= new JobID("12345",0); + TaskID taskId =new TaskID(jobId,TaskType.REDUCE, 0); + TaskAttemptID taskAttemptId= new TaskAttemptID(taskId, 0); + when(attemptInfo.getAttemptId()).thenReturn(taskAttemptId); + + + CompletedTaskAttempt taskAttemt= new CompletedTaskAttempt(null,attemptInfo); + assertEquals( "Rackname", taskAttemt.getNodeRackName()); + assertEquals( Phase.CLEANUP, taskAttemt.getPhase()); + assertTrue( taskAttemt.isFinished()); + assertEquals( 11L, taskAttemt.getShuffleFinishTime()); + assertEquals( 12L, taskAttemt.getSortFinishTime()); + assertEquals( 10, taskAttemt.getShufflePort()); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEntities.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEntities.java index 837f9e17b73..0c4f3d7779e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEntities.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEntities.java @@ -45,7 +45,9 @@ import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; +import org.apache.hadoop.mapred.TaskCompletionEvent; +import static org.junit.Assert.*; import static org.mockito.Mockito.*; @RunWith(value = Parameterized.class) @@ -79,7 +81,7 @@ public class TestJobHistoryEntities { } /* Verify some expected values based on the history file */ - @Test (timeout=10000) + @Test (timeout=100000) public void testCompletedJob() throws Exception { HistoryFileInfo info = mock(HistoryFileInfo.class); when(info.getConfFile()).thenReturn(fullConfPath); @@ -92,11 +94,11 @@ public class TestJobHistoryEntities { assertEquals(1, completedJob.getAMInfos().size()); assertEquals(10, completedJob.getCompletedMaps()); assertEquals(1, completedJob.getCompletedReduces()); - assertEquals(11, completedJob.getTasks().size()); + assertEquals(12, completedJob.getTasks().size()); //Verify tasks loaded at this point. assertEquals(true, completedJob.tasksLoaded.get()); assertEquals(10, completedJob.getTasks(TaskType.MAP).size()); - assertEquals(1, completedJob.getTasks(TaskType.REDUCE).size()); + assertEquals(2, completedJob.getTasks(TaskType.REDUCE).size()); assertEquals("user", completedJob.getUserName()); assertEquals(JobState.SUCCEEDED, completedJob.getState()); JobReport jobReport = completedJob.getReport(); @@ -117,7 +119,7 @@ public class TestJobHistoryEntities { Map mapTasks = completedJob.getTasks(TaskType.MAP); Map reduceTasks = completedJob.getTasks(TaskType.REDUCE); assertEquals(10, mapTasks.size()); - assertEquals(1, reduceTasks.size()); + assertEquals(2, reduceTasks.size()); Task mt1 = mapTasks.get(mt1Id); assertEquals(1, mt1.getAttempts().size()); @@ -132,7 +134,7 @@ public class TestJobHistoryEntities { assertEquals(TaskState.SUCCEEDED, rt1Report.getTaskState()); assertEquals(rt1Id, rt1Report.getTaskId()); } - + @Test (timeout=10000) public void testCompletedTaskAttempt() throws Exception { HistoryFileInfo info = mock(HistoryFileInfo.class); @@ -168,4 +170,45 @@ public class TestJobHistoryEntities { assertEquals(45454, rta1Report.getNodeManagerPort()); assertEquals(9999, rta1Report.getNodeManagerHttpPort()); } + /** + * Simple test of some methods of CompletedJob + * @throws Exception + */ + @Test (timeout=30000) + public void testGetTaskAttemptCompletionEvent() throws Exception{ + HistoryFileInfo info = mock(HistoryFileInfo.class); + when(info.getConfFile()).thenReturn(fullConfPath); + completedJob = + new CompletedJob(conf, jobId, fulleHistoryPath, loadTasks, "user", + info, jobAclsManager); + TaskCompletionEvent[] events= completedJob.getMapAttemptCompletionEvents(0,1000); + assertEquals(10, completedJob.getMapAttemptCompletionEvents(0,10).length); + int currentEventId=0; + for (TaskCompletionEvent taskAttemptCompletionEvent : events) { + int eventId= taskAttemptCompletionEvent.getEventId(); + assertTrue(eventId>=currentEventId); + currentEventId=eventId; + } + assertNull(completedJob.loadConfFile() ); + // job name + assertEquals("Sleep job",completedJob.getName()); + // queue name + assertEquals("default",completedJob.getQueueName()); + // progress + assertEquals(1.0, completedJob.getProgress(),0.001); + // 12 rows in answer + assertEquals(12,completedJob.getTaskAttemptCompletionEvents(0,1000).length); + // select first 10 rows + assertEquals(10,completedJob.getTaskAttemptCompletionEvents(0,10).length); + // select 5-10 rows include 5th + assertEquals(7,completedJob.getTaskAttemptCompletionEvents(5,10).length); + + // without errors + assertEquals(1,completedJob.getDiagnostics().size()); + assertEquals("",completedJob.getDiagnostics().get(0)); + + assertEquals(0, completedJob.getJobACLs().size()); + + } + } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java index 073a17bde17..ccf2120ee50 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java @@ -19,6 +19,9 @@ package org.apache.hadoop.mapreduce.v2.hs; import java.io.ByteArrayOutputStream; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + import java.io.IOException; import java.io.PrintStream; import java.util.Arrays; @@ -54,6 +57,9 @@ import org.apache.hadoop.mapreduce.v2.api.records.JobState; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskState; +import org.apache.hadoop.mapreduce.v2.api.records.TaskType; +import org.apache.hadoop.mapreduce.v2.api.records.impl.pb.JobIdPBImpl; +import org.apache.hadoop.mapreduce.v2.api.records.impl.pb.TaskIdPBImpl; import org.apache.hadoop.mapreduce.v2.app.MRApp; import org.apache.hadoop.mapreduce.v2.app.job.Job; import org.apache.hadoop.mapreduce.v2.app.job.Task; @@ -65,7 +71,9 @@ import org.apache.hadoop.mapreduce.v2.hs.TestJobHistoryEvents.MRAppWithHistory; import org.apache.hadoop.mapreduce.v2.jobhistory.FileNameIndexUtils; import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils; import org.apache.hadoop.mapreduce.v2.jobhistory.JobIndexInfo; +import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.JobsInfo; import org.apache.hadoop.net.DNSToSwitchMapping; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.service.Service; import org.apache.hadoop.yarn.util.BuilderUtils; @@ -80,12 +88,12 @@ public class TestJobHistoryParsing { private static final String RACK_NAME = "/MyRackName"; - private ByteArrayOutputStream outContent = new ByteArrayOutputStream(); + private ByteArrayOutputStream outContent = new ByteArrayOutputStream(); public static class MyResolver implements DNSToSwitchMapping { @Override public List resolve(List names) { - return Arrays.asList(new String[]{RACK_NAME}); + return Arrays.asList(new String[] { RACK_NAME }); } @Override @@ -93,14 +101,14 @@ public class TestJobHistoryParsing { } } - @Test (timeout=50000) + @Test(timeout = 50000) public void testJobInfo() throws Exception { JobInfo info = new JobInfo(); Assert.assertEquals("NORMAL", info.getPriority()); info.printAll(); } - @Test (timeout=50000) + @Test(timeout = 300000) public void testHistoryParsing() throws Exception { LOG.info("STARTING testHistoryParsing()"); try { @@ -109,8 +117,8 @@ public class TestJobHistoryParsing { LOG.info("FINISHED testHistoryParsing()"); } } - - @Test (timeout=50000) + + @Test(timeout = 50000) public void testHistoryParsingWithParseErrors() throws Exception { LOG.info("STARTING testHistoryParsingWithParseErrors()"); try { @@ -119,18 +127,18 @@ public class TestJobHistoryParsing { LOG.info("FINISHED testHistoryParsingWithParseErrors()"); } } - - private static String getJobSummary(FileContext fc, Path path) throws IOException { + + private static String getJobSummary(FileContext fc, Path path) + throws IOException { Path qPath = fc.makeQualified(path); FSDataInputStream in = fc.open(qPath); String jobSummaryString = in.readUTF(); in.close(); return jobSummaryString; } - + private void checkHistoryParsing(final int numMaps, final int numReduces, - final int numSuccessfulMaps) - throws Exception { + final int numSuccessfulMaps) throws Exception { Configuration conf = new Configuration(); conf.set(MRJobConfig.USER_NAME, System.getProperty("user.name")); long amStartTimeEst = System.currentTimeMillis(); @@ -138,9 +146,8 @@ public class TestJobHistoryParsing { CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, MyResolver.class, DNSToSwitchMapping.class); RackResolver.init(conf); - MRApp app = - new MRAppWithHistory(numMaps, numReduces, true, - this.getClass().getName(), true); + MRApp app = new MRAppWithHistory(numMaps, numReduces, true, this.getClass() + .getName(), true); app.submit(conf); Job job = app.getContext().getAllJobs().values().iterator().next(); JobId jobId = job.getID(); @@ -152,7 +159,7 @@ public class TestJobHistoryParsing { String jobhistoryDir = JobHistoryUtils .getHistoryIntermediateDoneDirForUser(conf); - + FileContext fc = null; try { fc = FileContext.getFileContext(conf); @@ -160,7 +167,7 @@ public class TestJobHistoryParsing { LOG.info("Can not get FileContext", ioe); throw (new Exception("Can not get File Context")); } - + if (numMaps == numSuccessfulMaps) { String summaryFileName = JobHistoryUtils .getIntermediateSummaryFileName(jobId); @@ -185,20 +192,22 @@ public class TestJobHistoryParsing { Long.parseLong(jobSummaryElements.get("submitTime")) != 0); Assert.assertTrue("launchTime should not be 0", Long.parseLong(jobSummaryElements.get("launchTime")) != 0); - Assert.assertTrue("firstMapTaskLaunchTime should not be 0", - Long.parseLong(jobSummaryElements.get("firstMapTaskLaunchTime")) != 0); Assert - .assertTrue( - "firstReduceTaskLaunchTime should not be 0", - Long.parseLong(jobSummaryElements.get("firstReduceTaskLaunchTime")) != 0); + .assertTrue( + "firstMapTaskLaunchTime should not be 0", + Long.parseLong(jobSummaryElements.get("firstMapTaskLaunchTime")) != 0); + Assert + .assertTrue("firstReduceTaskLaunchTime should not be 0", + Long.parseLong(jobSummaryElements + .get("firstReduceTaskLaunchTime")) != 0); Assert.assertTrue("finishTime should not be 0", Long.parseLong(jobSummaryElements.get("finishTime")) != 0); Assert.assertEquals("Mismatch in num map slots", numSuccessfulMaps, Integer.parseInt(jobSummaryElements.get("numMaps"))); Assert.assertEquals("Mismatch in num reduce slots", numReduces, Integer.parseInt(jobSummaryElements.get("numReduces"))); - Assert.assertEquals("User does not match", System.getProperty("user.name"), - jobSummaryElements.get("user")); + Assert.assertEquals("User does not match", + System.getProperty("user.name"), jobSummaryElements.get("user")); Assert.assertEquals("Queue does not match", "default", jobSummaryElements.get("queue")); Assert.assertEquals("Status does not match", "SUCCEEDED", @@ -210,8 +219,8 @@ public class TestJobHistoryParsing { HistoryFileInfo fileInfo = jobHistory.getJobFileInfo(jobId); JobInfo jobInfo; long numFinishedMaps; - - synchronized(fileInfo) { + + synchronized (fileInfo) { Path historyFilePath = fileInfo.getHistoryFile(); FSDataInputStream in = null; LOG.info("JobHistoryFile is: " + historyFilePath); @@ -228,11 +237,11 @@ public class TestJobHistoryParsing { if (numMaps == numSuccessfulMaps) { reader = realReader; } else { - final AtomicInteger numFinishedEvents = new AtomicInteger(0); // Hack! + final AtomicInteger numFinishedEvents = new AtomicInteger(0); // Hack! Mockito.when(reader.getNextEvent()).thenAnswer( new Answer() { - public HistoryEvent answer(InvocationOnMock invocation) - throws IOException { + public HistoryEvent answer(InvocationOnMock invocation) + throws IOException { HistoryEvent event = realReader.getNextEvent(); if (event instanceof TaskFinishedEvent) { numFinishedEvents.incrementAndGet(); @@ -244,22 +253,20 @@ public class TestJobHistoryParsing { throw new IOException("test"); } } - } - ); + }); } jobInfo = parser.parse(reader); - numFinishedMaps = - computeFinishedMaps(jobInfo, numMaps, numSuccessfulMaps); + numFinishedMaps = computeFinishedMaps(jobInfo, numMaps, numSuccessfulMaps); if (numFinishedMaps != numMaps) { Exception parseException = parser.getParseException(); - Assert.assertNotNull("Didn't get expected parse exception", + Assert.assertNotNull("Didn't get expected parse exception", parseException); } } - + Assert.assertEquals("Incorrect username ", System.getProperty("user.name"), jobInfo.getUsername()); Assert.assertEquals("Incorrect jobName ", "test", jobInfo.getJobname()); @@ -267,7 +274,7 @@ public class TestJobHistoryParsing { jobInfo.getJobQueueName()); Assert .assertEquals("incorrect conf path", "test", jobInfo.getJobConfPath()); - Assert.assertEquals("incorrect finishedMap ", numSuccessfulMaps, + Assert.assertEquals("incorrect finishedMap ", numSuccessfulMaps, numFinishedMaps); Assert.assertEquals("incorrect finishedReduces ", numReduces, jobInfo.getFinishedReduces()); @@ -275,8 +282,8 @@ public class TestJobHistoryParsing { jobInfo.getUberized()); Map allTasks = jobInfo.getAllTasks(); int totalTasks = allTasks.size(); - Assert.assertEquals("total number of tasks is incorrect ", - (numMaps+numReduces), totalTasks); + Assert.assertEquals("total number of tasks is incorrect ", + (numMaps + numReduces), totalTasks); // Verify aminfo Assert.assertEquals(1, jobInfo.getAMInfos().size()); @@ -306,8 +313,7 @@ public class TestJobHistoryParsing { // Deep compare Job and JobInfo for (Task task : job.getTasks().values()) { - TaskInfo taskInfo = allTasks.get( - TypeConverter.fromYarn(task.getID())); + TaskInfo taskInfo = allTasks.get(TypeConverter.fromYarn(task.getID())); Assert.assertNotNull("TaskInfo not found", taskInfo); for (TaskAttempt taskAttempt : task.getAttempts().values()) { TaskAttemptInfo taskAttemptInfo = taskInfo.getAllTaskAttempts().get( @@ -318,27 +324,32 @@ public class TestJobHistoryParsing { if (numMaps == numSuccessfulMaps) { Assert.assertEquals(MRApp.NM_HOST, taskAttemptInfo.getHostname()); Assert.assertEquals(MRApp.NM_PORT, taskAttemptInfo.getPort()); - + // Verify rack-name - Assert.assertEquals("rack-name is incorrect", taskAttemptInfo - .getRackname(), RACK_NAME); + Assert.assertEquals("rack-name is incorrect", + taskAttemptInfo.getRackname(), RACK_NAME); } } } - + // test output for HistoryViewer - PrintStream stdps=System.out; + PrintStream stdps = System.out; try { System.setOut(new PrintStream(outContent)); HistoryViewer viewer = new HistoryViewer(fc.makeQualified( fileInfo.getHistoryFile()).toString(), conf, true); viewer.print(); - - for (TaskInfo taskInfo : allTasks.values()) { - - String test= (taskInfo.getTaskStatus()==null?"":taskInfo.getTaskStatus())+" "+taskInfo.getTaskType()+" task list for "+taskInfo.getTaskId().getJobID(); - Assert.assertTrue(outContent.toString().indexOf(test)>0); - Assert.assertTrue(outContent.toString().indexOf(taskInfo.getTaskId().toString())>0); + + for (TaskInfo taskInfo : allTasks.values()) { + + String test = (taskInfo.getTaskStatus() == null ? "" : taskInfo + .getTaskStatus()) + + " " + + taskInfo.getTaskType() + + " task list for " + taskInfo.getTaskId().getJobID(); + Assert.assertTrue(outContent.toString().indexOf(test) > 0); + Assert.assertTrue(outContent.toString().indexOf( + taskInfo.getTaskId().toString()) > 0); } } finally { System.setOut(stdps); @@ -363,186 +374,180 @@ public class TestJobHistoryParsing { } return numFinishedMaps; } - - @Test (timeout=50000) + + @Test(timeout = 30000) public void testHistoryParsingForFailedAttempts() throws Exception { LOG.info("STARTING testHistoryParsingForFailedAttempts"); try { - Configuration conf = new Configuration(); - conf - .setClass( - CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, - MyResolver.class, DNSToSwitchMapping.class); - RackResolver.init(conf); - MRApp app = new MRAppWithHistoryWithFailedAttempt(2, 1, true, this.getClass().getName(), - true); - app.submit(conf); - Job job = app.getContext().getAllJobs().values().iterator().next(); - JobId jobId = job.getID(); - app.waitForState(job, JobState.SUCCEEDED); - - // make sure all events are flushed - app.waitForState(Service.STATE.STOPPED); + Configuration conf = new Configuration(); + conf.setClass( + CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, + MyResolver.class, DNSToSwitchMapping.class); + RackResolver.init(conf); + MRApp app = new MRAppWithHistoryWithFailedAttempt(2, 1, true, this + .getClass().getName(), true); + app.submit(conf); + Job job = app.getContext().getAllJobs().values().iterator().next(); + JobId jobId = job.getID(); + app.waitForState(job, JobState.SUCCEEDED); - String jobhistoryDir = JobHistoryUtils - .getHistoryIntermediateDoneDirForUser(conf); - JobHistory jobHistory = new JobHistory(); - jobHistory.init(conf); + // make sure all events are flushed + app.waitForState(Service.STATE.STOPPED); - JobIndexInfo jobIndexInfo = jobHistory.getJobFileInfo(jobId) - .getJobIndexInfo(); - String jobhistoryFileName = FileNameIndexUtils - .getDoneFileName(jobIndexInfo); + String jobhistoryDir = JobHistoryUtils + .getHistoryIntermediateDoneDirForUser(conf); + JobHistory jobHistory = new JobHistory(); + jobHistory.init(conf); - Path historyFilePath = new Path(jobhistoryDir, jobhistoryFileName); - FSDataInputStream in = null; - FileContext fc = null; - try { - fc = FileContext.getFileContext(conf); - in = fc.open(fc.makeQualified(historyFilePath)); - } catch (IOException ioe) { - LOG.info("Can not open history file: " + historyFilePath, ioe); - throw (new Exception("Can not open History File")); - } + JobIndexInfo jobIndexInfo = jobHistory.getJobFileInfo(jobId) + .getJobIndexInfo(); + String jobhistoryFileName = FileNameIndexUtils + .getDoneFileName(jobIndexInfo); - JobHistoryParser parser = new JobHistoryParser(in); - JobInfo jobInfo = parser.parse(); - Exception parseException = parser.getParseException(); - Assert.assertNull("Caught an expected exception " + parseException, - parseException); - int noOffailedAttempts = 0; - Map allTasks = jobInfo.getAllTasks(); - for (Task task : job.getTasks().values()) { - TaskInfo taskInfo = allTasks.get(TypeConverter.fromYarn(task.getID())); - for (TaskAttempt taskAttempt : task.getAttempts().values()) { - TaskAttemptInfo taskAttemptInfo = taskInfo.getAllTaskAttempts().get( - TypeConverter.fromYarn((taskAttempt.getID()))); - // Verify rack-name for all task attempts - Assert.assertEquals("rack-name is incorrect", taskAttemptInfo - .getRackname(), RACK_NAME); - if (taskAttemptInfo.getTaskStatus().equals("FAILED")) { - noOffailedAttempts++; + Path historyFilePath = new Path(jobhistoryDir, jobhistoryFileName); + FSDataInputStream in = null; + FileContext fc = null; + try { + fc = FileContext.getFileContext(conf); + in = fc.open(fc.makeQualified(historyFilePath)); + } catch (IOException ioe) { + LOG.info("Can not open history file: " + historyFilePath, ioe); + throw (new Exception("Can not open History File")); + } + + JobHistoryParser parser = new JobHistoryParser(in); + JobInfo jobInfo = parser.parse(); + Exception parseException = parser.getParseException(); + Assert.assertNull("Caught an expected exception " + parseException, + parseException); + int noOffailedAttempts = 0; + Map allTasks = jobInfo.getAllTasks(); + for (Task task : job.getTasks().values()) { + TaskInfo taskInfo = allTasks.get(TypeConverter.fromYarn(task.getID())); + for (TaskAttempt taskAttempt : task.getAttempts().values()) { + TaskAttemptInfo taskAttemptInfo = taskInfo.getAllTaskAttempts().get( + TypeConverter.fromYarn((taskAttempt.getID()))); + // Verify rack-name for all task attempts + Assert.assertEquals("rack-name is incorrect", + taskAttemptInfo.getRackname(), RACK_NAME); + if (taskAttemptInfo.getTaskStatus().equals("FAILED")) { + noOffailedAttempts++; + } } } - } - Assert.assertEquals("No of Failed tasks doesn't match.", 2, noOffailedAttempts); + Assert.assertEquals("No of Failed tasks doesn't match.", 2, + noOffailedAttempts); } finally { LOG.info("FINISHED testHistoryParsingForFailedAttempts"); } } - - @Test (timeout=5000) + + @Test(timeout = 60000) public void testCountersForFailedTask() throws Exception { LOG.info("STARTING testCountersForFailedTask"); try { - Configuration conf = new Configuration(); - conf - .setClass( - CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, - MyResolver.class, DNSToSwitchMapping.class); - RackResolver.init(conf); - MRApp app = new MRAppWithHistoryWithFailedTask(2, 1, true, - this.getClass().getName(), true); - app.submit(conf); - Job job = app.getContext().getAllJobs().values().iterator().next(); - JobId jobId = job.getID(); - app.waitForState(job, JobState.FAILED); + Configuration conf = new Configuration(); + conf.setClass( + CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, + MyResolver.class, DNSToSwitchMapping.class); + RackResolver.init(conf); + MRApp app = new MRAppWithHistoryWithFailedTask(2, 1, true, this + .getClass().getName(), true); + app.submit(conf); + Job job = app.getContext().getAllJobs().values().iterator().next(); + JobId jobId = job.getID(); + app.waitForState(job, JobState.FAILED); - // make sure all events are flushed - app.waitForState(Service.STATE.STOPPED); + // make sure all events are flushed + app.waitForState(Service.STATE.STOPPED); - String jobhistoryDir = JobHistoryUtils - .getHistoryIntermediateDoneDirForUser(conf); - JobHistory jobHistory = new JobHistory(); - jobHistory.init(conf); + String jobhistoryDir = JobHistoryUtils + .getHistoryIntermediateDoneDirForUser(conf); + JobHistory jobHistory = new JobHistory(); + jobHistory.init(conf); - JobIndexInfo jobIndexInfo = jobHistory.getJobFileInfo(jobId) - .getJobIndexInfo(); - String jobhistoryFileName = FileNameIndexUtils - .getDoneFileName(jobIndexInfo); + JobIndexInfo jobIndexInfo = jobHistory.getJobFileInfo(jobId) + .getJobIndexInfo(); + String jobhistoryFileName = FileNameIndexUtils + .getDoneFileName(jobIndexInfo); - Path historyFilePath = new Path(jobhistoryDir, jobhistoryFileName); - FSDataInputStream in = null; - FileContext fc = null; - try { - fc = FileContext.getFileContext(conf); - in = fc.open(fc.makeQualified(historyFilePath)); - } catch (IOException ioe) { - LOG.info("Can not open history file: " + historyFilePath, ioe); - throw (new Exception("Can not open History File")); - } + Path historyFilePath = new Path(jobhistoryDir, jobhistoryFileName); + FSDataInputStream in = null; + FileContext fc = null; + try { + fc = FileContext.getFileContext(conf); + in = fc.open(fc.makeQualified(historyFilePath)); + } catch (IOException ioe) { + LOG.info("Can not open history file: " + historyFilePath, ioe); + throw (new Exception("Can not open History File")); + } - JobHistoryParser parser = new JobHistoryParser(in); - JobInfo jobInfo = parser.parse(); - Exception parseException = parser.getParseException(); - Assert.assertNull("Caught an expected exception " + parseException, - parseException); - for (Map.Entry entry : jobInfo.getAllTasks().entrySet()) { - TaskId yarnTaskID = TypeConverter.toYarn(entry.getKey()); - CompletedTask ct = new CompletedTask(yarnTaskID, entry.getValue()); - Assert.assertNotNull("completed task report has null counters", - ct.getReport().getCounters()); - //Make sure all the completedTask has counters, and the counters are not empty - Assert.assertTrue(ct.getReport().getCounters() - .getAllCounterGroups().size() > 0); - } + JobHistoryParser parser = new JobHistoryParser(in); + JobInfo jobInfo = parser.parse(); + Exception parseException = parser.getParseException(); + Assert.assertNull("Caught an expected exception " + parseException, + parseException); + for (Map.Entry entry : jobInfo.getAllTasks().entrySet()) { + TaskId yarnTaskID = TypeConverter.toYarn(entry.getKey()); + CompletedTask ct = new CompletedTask(yarnTaskID, entry.getValue()); + Assert.assertNotNull("completed task report has null counters", ct + .getReport().getCounters()); + } } finally { LOG.info("FINISHED testCountersForFailedTask"); } } - @Test (timeout=50000) + @Test(timeout = 50000) public void testScanningOldDirs() throws Exception { LOG.info("STARTING testScanningOldDirs"); try { - Configuration conf = new Configuration(); - conf - .setClass( - CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, - MyResolver.class, DNSToSwitchMapping.class); - RackResolver.init(conf); - MRApp app = - new MRAppWithHistory(1, 1, true, - this.getClass().getName(), true); - app.submit(conf); - Job job = app.getContext().getAllJobs().values().iterator().next(); - JobId jobId = job.getID(); - LOG.info("JOBID is " + TypeConverter.fromYarn(jobId).toString()); - app.waitForState(job, JobState.SUCCEEDED); + Configuration conf = new Configuration(); + conf.setClass( + CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, + MyResolver.class, DNSToSwitchMapping.class); + RackResolver.init(conf); + MRApp app = new MRAppWithHistory(1, 1, true, this.getClass().getName(), + true); + app.submit(conf); + Job job = app.getContext().getAllJobs().values().iterator().next(); + JobId jobId = job.getID(); + LOG.info("JOBID is " + TypeConverter.fromYarn(jobId).toString()); + app.waitForState(job, JobState.SUCCEEDED); - // make sure all events are flushed - app.waitForState(Service.STATE.STOPPED); + // make sure all events are flushed + app.waitForState(Service.STATE.STOPPED); - HistoryFileManagerForTest hfm = new HistoryFileManagerForTest(); - hfm.init(conf); - HistoryFileInfo fileInfo = hfm.getFileInfo(jobId); - Assert.assertNotNull("Unable to locate job history", fileInfo); + HistoryFileManagerForTest hfm = new HistoryFileManagerForTest(); + hfm.init(conf); + HistoryFileInfo fileInfo = hfm.getFileInfo(jobId); + Assert.assertNotNull("Unable to locate job history", fileInfo); - // force the manager to "forget" the job - hfm.deleteJobFromJobListCache(fileInfo); - final int msecPerSleep = 10; - int msecToSleep = 10 * 1000; - while (fileInfo.isMovePending() && msecToSleep > 0) { - Assert.assertTrue(!fileInfo.didMoveFail()); - msecToSleep -= msecPerSleep; - Thread.sleep(msecPerSleep); - } - Assert.assertTrue("Timeout waiting for history move", msecToSleep > 0); + // force the manager to "forget" the job + hfm.deleteJobFromJobListCache(fileInfo); + final int msecPerSleep = 10; + int msecToSleep = 10 * 1000; + while (fileInfo.isMovePending() && msecToSleep > 0) { + Assert.assertTrue(!fileInfo.didMoveFail()); + msecToSleep -= msecPerSleep; + Thread.sleep(msecPerSleep); + } + Assert.assertTrue("Timeout waiting for history move", msecToSleep > 0); - fileInfo = hfm.getFileInfo(jobId); - Assert.assertNotNull("Unable to locate old job history", fileInfo); - } finally { + fileInfo = hfm.getFileInfo(jobId); + Assert.assertNotNull("Unable to locate old job history", fileInfo); + } finally { LOG.info("FINISHED testScanningOldDirs"); } } static class MRAppWithHistoryWithFailedAttempt extends MRAppWithHistory { - public MRAppWithHistoryWithFailedAttempt(int maps, int reduces, boolean autoComplete, - String testName, boolean cleanOnStart) { + public MRAppWithHistoryWithFailedAttempt(int maps, int reduces, + boolean autoComplete, String testName, boolean cleanOnStart) { super(maps, reduces, autoComplete, testName, cleanOnStart); } - + @SuppressWarnings("unchecked") @Override protected void attemptLaunched(TaskAttemptId attemptID) { @@ -558,8 +563,8 @@ public class TestJobHistoryParsing { static class MRAppWithHistoryWithFailedTask extends MRAppWithHistory { - public MRAppWithHistoryWithFailedTask(int maps, int reduces, boolean autoComplete, - String testName, boolean cleanOnStart) { + public MRAppWithHistoryWithFailedTask(int maps, int reduces, + boolean autoComplete, String testName, boolean cleanOnStart) { super(maps, reduces, autoComplete, testName, cleanOnStart); } @@ -587,4 +592,133 @@ public class TestJobHistoryParsing { t.testHistoryParsing(); t.testHistoryParsingForFailedAttempts(); } + + /** + * Test clean old history files. Files should be deleted after 1 week by + * default. + */ + @Test(timeout = 15000) + public void testDeleteFileInfo() throws Exception { + LOG.info("STARTING testDeleteFileInfo"); + try { + Configuration conf = new Configuration(); + + conf.setClass( + CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, + MyResolver.class, DNSToSwitchMapping.class); + + RackResolver.init(conf); + MRApp app = new MRAppWithHistory(1, 1, true, this.getClass().getName(), + true); + app.submit(conf); + Job job = app.getContext().getAllJobs().values().iterator().next(); + JobId jobId = job.getID(); + + app.waitForState(job, JobState.SUCCEEDED); + + // make sure all events are flushed + app.waitForState(Service.STATE.STOPPED); + + HistoryFileManager hfm = new HistoryFileManager(); + hfm.init(conf); + HistoryFileInfo fileInfo = hfm.getFileInfo(jobId); + hfm.initExisting(); + // wait for move files form the done_intermediate directory to the gone + // directory + while (fileInfo.isMovePending()) { + Thread.sleep(300); + } + + Assert.assertNotNull(hfm.jobListCache.values()); + + // try to remove fileInfo + hfm.clean(); + // check that fileInfo does not deleted + Assert.assertFalse(fileInfo.isDeleted()); + // correct live time + hfm.setMaxHistoryAge(-1); + hfm.clean(); + // should be deleted ! + Assert.assertTrue("file should be deleted ", fileInfo.isDeleted()); + + } finally { + LOG.info("FINISHED testDeleteFileInfo"); + } + } + + /** + * Simple test some methods of JobHistory + */ + @Test(timeout = 20000) + public void testJobHistoryMethods() throws Exception { + LOG.info("STARTING testJobHistoryMethods"); + try { + Configuration configuration = new Configuration(); + configuration + .setClass( + CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, + MyResolver.class, DNSToSwitchMapping.class); + + RackResolver.init(configuration); + MRApp app = new MRAppWithHistory(1, 1, true, this.getClass().getName(), + true); + app.submit(configuration); + Job job = app.getContext().getAllJobs().values().iterator().next(); + JobId jobId = job.getID(); + LOG.info("JOBID is " + TypeConverter.fromYarn(jobId).toString()); + app.waitForState(job, JobState.SUCCEEDED); + + JobHistory jobHistory = new JobHistory(); + jobHistory.init(configuration); + // Method getAllJobs + Assert.assertEquals(1, jobHistory.getAllJobs().size()); + // and with ApplicationId + Assert.assertEquals(1, jobHistory.getAllJobs(app.getAppID()).size()); + + JobsInfo jobsinfo = jobHistory.getPartialJobs(0L, 10L, null, "default", + 0L, System.currentTimeMillis() + 1, 0L, + System.currentTimeMillis() + 1, JobState.SUCCEEDED); + + Assert.assertEquals(1, jobsinfo.getJobs().size()); + Assert.assertNotNull(jobHistory.getApplicationAttemptId()); + // test Application Id + Assert.assertEquals("application_0_0000", jobHistory.getApplicationID() + .toString()); + Assert + .assertEquals("Job History Server", jobHistory.getApplicationName()); + // method does not work + Assert.assertNull(jobHistory.getEventHandler()); + // method does not work + Assert.assertNull(jobHistory.getClock()); + // method does not work + Assert.assertNull(jobHistory.getClusterInfo()); + + } finally { + LOG.info("FINISHED testJobHistoryMethods"); + } + } + + /** + * Simple test PartialJob + */ + @Test(timeout = 1000) + public void testPartialJob() throws Exception { + JobId jobId = new JobIdPBImpl(); + jobId.setId(0); + JobIndexInfo jii = new JobIndexInfo(0L, System.currentTimeMillis(), "user", + "jobName", jobId, 3, 2, "JobStatus"); + PartialJob test = new PartialJob(jii, jobId); + + Assert.assertEquals(1.0f, test.getProgress(), 0.001f); + assertNull(test.getAllCounters()); + assertNull(test.getTasks()); + assertNull(test.getTasks(TaskType.MAP)); + assertNull(test.getTask(new TaskIdPBImpl())); + + assertNull(test.getTaskAttemptCompletionEvents(0, 100)); + assertNull(test.getMapAttemptCompletionEvents(0, 100)); + assertTrue(test.checkAccess(UserGroupInformation.getCurrentUser(), null)); + assertNull(test.getAMInfos()); + + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryServer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryServer.java new file mode 100644 index 00000000000..faf3aebd259 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryServer.java @@ -0,0 +1,209 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce.v2.hs; + + +import java.util.Map; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.mapreduce.TaskCounter; +import org.apache.hadoop.mapreduce.v2.api.MRClientProtocol; +import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetDiagnosticsRequest; +import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetDiagnosticsResponse; +import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetTaskAttemptCompletionEventsRequest; +import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetTaskAttemptCompletionEventsResponse; +import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetTaskAttemptReportRequest; +import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetTaskAttemptReportResponse; +import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetTaskReportRequest; +import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetTaskReportResponse; +import org.apache.hadoop.mapreduce.v2.api.records.JobId; +import org.apache.hadoop.mapreduce.v2.api.records.JobState; +import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; +import org.apache.hadoop.mapreduce.v2.api.records.TaskId; +import org.apache.hadoop.mapreduce.v2.api.records.TaskState; +import org.apache.hadoop.mapreduce.v2.app.MRApp; +import org.apache.hadoop.mapreduce.v2.app.job.Job; +import org.apache.hadoop.mapreduce.v2.app.job.Task; +import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt; +import org.apache.hadoop.mapreduce.v2.hs.TestJobHistoryEvents.MRAppWithHistory; +import org.apache.hadoop.mapreduce.v2.hs.TestJobHistoryParsing.MyResolver; +import org.apache.hadoop.net.DNSToSwitchMapping; +import org.apache.hadoop.util.ExitUtil; +import org.apache.hadoop.yarn.factories.RecordFactory; +import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.service.Service; +import org.apache.hadoop.yarn.service.Service.STATE; +import org.apache.hadoop.yarn.util.RackResolver; +import org.junit.After; +import org.junit.Test; + +import static org.junit.Assert.*; + +/* +test JobHistoryServer protocols.... + */ +public class TestJobHistoryServer { + private static RecordFactory recordFactory = RecordFactoryProvider + .getRecordFactory(null); + + + + JobHistoryServer historyServer=null; + // simple test init/start/stop JobHistoryServer. Status should change. + + @Test (timeout= 50000 ) + public void testStartStopServer() throws Exception { + + historyServer = new JobHistoryServer(); + Configuration config = new Configuration(); + historyServer.init(config); + assertEquals(STATE.INITED, historyServer.getServiceState()); + assertEquals(3, historyServer.getServices().size()); + historyServer.start(); + assertEquals(STATE.STARTED, historyServer.getServiceState()); + historyServer.stop(); + assertEquals(STATE.STOPPED, historyServer.getServiceState()); + assertNotNull(historyServer.getClientService()); + HistoryClientService historyService = historyServer.getClientService(); + assertNotNull(historyService.getClientHandler().getConnectAddress()); + + + + } + + + + //Test reports of JobHistoryServer. History server should get log files from MRApp and read them + + @Test (timeout= 50000 ) + public void testReports() throws Exception { + Configuration config = new Configuration(); + config + .setClass( + CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, + MyResolver.class, DNSToSwitchMapping.class); + + RackResolver.init(config); + MRApp app = new MRAppWithHistory(1, 1, true, this.getClass().getName(), + true); + app.submit(config); + Job job = app.getContext().getAllJobs().values().iterator().next(); + app.waitForState(job, JobState.SUCCEEDED); + + historyServer = new JobHistoryServer(); + + historyServer.init(config); + historyServer.start(); + + // search JobHistory service + JobHistory jobHistory= null; + for (Service service : historyServer.getServices() ) { + if (service instanceof JobHistory) { + jobHistory = (JobHistory) service; + } + }; + + Map jobs= jobHistory.getAllJobs(); + + assertEquals(1, jobs.size()); + assertEquals("job_0_0000",jobs.keySet().iterator().next().toString()); + + + Task task = job.getTasks().values().iterator().next(); + TaskAttempt attempt = task.getAttempts().values().iterator().next(); + + HistoryClientService historyService = historyServer.getClientService(); + MRClientProtocol protocol = historyService.getClientHandler(); + + GetTaskAttemptReportRequest gtarRequest = recordFactory + .newRecordInstance(GetTaskAttemptReportRequest.class); + // test getTaskAttemptReport + TaskAttemptId taId = attempt.getID(); + taId.setTaskId(task.getID()); + taId.getTaskId().setJobId(job.getID()); + gtarRequest.setTaskAttemptId(taId); + GetTaskAttemptReportResponse response = protocol + .getTaskAttemptReport(gtarRequest); + assertEquals("container_0_0000_01_000000", response.getTaskAttemptReport() + .getContainerId().toString()); + assertTrue(response.getTaskAttemptReport().getDiagnosticInfo().isEmpty()); + // counters + assertNotNull(response.getTaskAttemptReport().getCounters() + .getCounter(TaskCounter.PHYSICAL_MEMORY_BYTES)); + assertEquals(taId.toString(), response.getTaskAttemptReport() + .getTaskAttemptId().toString()); + // test getTaskReport + GetTaskReportRequest request = recordFactory + .newRecordInstance(GetTaskReportRequest.class); + TaskId taskId = task.getID(); + taskId.setJobId(job.getID()); + request.setTaskId(taskId); + GetTaskReportResponse reportResponse = protocol.getTaskReport(request); + assertEquals("", reportResponse.getTaskReport().getDiagnosticsList() + .iterator().next()); + // progress + assertEquals(1.0f, reportResponse.getTaskReport().getProgress(), 0.01); + // report has corrected taskId + assertEquals(taskId.toString(), reportResponse.getTaskReport().getTaskId() + .toString()); + // Task state should be SUCCEEDED + assertEquals(TaskState.SUCCEEDED, reportResponse.getTaskReport() + .getTaskState()); + // test getTaskAttemptCompletionEvents + GetTaskAttemptCompletionEventsRequest taskAttemptRequest = recordFactory + .newRecordInstance(GetTaskAttemptCompletionEventsRequest.class); + taskAttemptRequest.setJobId(job.getID()); + GetTaskAttemptCompletionEventsResponse taskAttemptCompletionEventsResponse = protocol + .getTaskAttemptCompletionEvents(taskAttemptRequest); + assertEquals(0, taskAttemptCompletionEventsResponse.getCompletionEventCount()); + + // test getDiagnostics + GetDiagnosticsRequest diagnosticRequest = recordFactory + .newRecordInstance(GetDiagnosticsRequest.class); + diagnosticRequest.setTaskAttemptId(taId); + GetDiagnosticsResponse diagnosticResponse = protocol + .getDiagnostics(diagnosticRequest); + // it is strange : why one empty string ? + assertEquals(1, diagnosticResponse.getDiagnosticsCount()); + assertEquals("", diagnosticResponse.getDiagnostics(0)); + + } + // test main method + @Test (timeout =60000) + public void testMainMethod() throws Exception { + + ExitUtil.disableSystemExit(); + try { + JobHistoryServer.main(new String[0]); + + } catch (ExitUtil.ExitException e) { + assertEquals(0,e.status); + ExitUtil.resetFirstExitException(); + fail(); + } + } + + @After + public void stop(){ + if(historyServer !=null && !STATE.STOPPED.equals(historyServer.getServiceState())){ + historyServer.stop(); + } + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/TestJobInfo.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/TestJobInfo.java new file mode 100644 index 00000000000..c59672fa719 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/TestJobInfo.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapreduce.v2.hs.webapp.dao; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.io.IOException; + +import junit.framework.Assert; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.JobACLsManager; +import org.apache.hadoop.mapreduce.v2.api.records.JobId; +import org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager.HistoryFileInfo; +import org.apache.hadoop.mapreduce.v2.hs.CompletedJob; +import org.apache.hadoop.mapreduce.v2.hs.TestJobHistoryEntities; +import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils; +import org.junit.Test; + +public class TestJobInfo { + + @Test(timeout = 10000) + public void testAverageMergeTime() throws IOException { + String historyFileName = + "job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist"; + String confFileName = + "job_1329348432655_0001_conf.xml"; + Configuration conf = new Configuration(); + JobACLsManager jobAclsMgr = new JobACLsManager(conf); + Path fulleHistoryPath = + new Path(TestJobHistoryEntities.class.getClassLoader() + .getResource(historyFileName) + .getFile()); + Path fullConfPath = + new Path(TestJobHistoryEntities.class.getClassLoader() + .getResource(confFileName) + .getFile()); + + HistoryFileInfo info = mock(HistoryFileInfo.class); + when(info.getConfFile()).thenReturn(fullConfPath); + + JobId jobId = MRBuilderUtils.newJobId(1329348432655l, 1, 1); + CompletedJob completedJob = + new CompletedJob(conf, jobId, fulleHistoryPath, true, "user", + info, jobAclsMgr); + JobInfo jobInfo = new JobInfo(completedJob); + // There are 2 tasks with merge time of 45 and 55 respectively. So average + // merge time should be 50. + Assert.assertEquals(50L, jobInfo.getAvgMergeTime().longValue()); + } +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist index 484971898e0..017e52c53e2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist @@ -15,6 +15,7 @@ Avro-Json {"type":"TASK_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskStarted":{"taskid":"task_1329348432655_0001_m_000008","taskType":"MAP","startTime":1329348448388,"splitLocations":""}}} {"type":"TASK_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskStarted":{"taskid":"task_1329348432655_0001_m_000009","taskType":"MAP","startTime":1329348448388,"splitLocations":""}}} {"type":"TASK_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskStarted":{"taskid":"task_1329348432655_0001_r_000000","taskType":"REDUCE","startTime":1329348448388,"splitLocations":""}}} + {"type":"TASK_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskStarted":{"taskid":"task_1329348432655_0001_r_000001","taskType":"REDUCE","startTime":1329348448388,"splitLocations":""}}} {"type":"MAP_ATTEMPT_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStarted":{"taskid":"task_1329348432655_0001_m_000000","taskType":"MAP","attemptId":"attempt_1329348432655_0001_m_000000_0","startTime":1329348450485,"trackerName":"localhost","httpPort":9999,"shufflePort":8080,"containerId":"container_1329348432655_0001_01_000002"}}} {"type":"MAP_ATTEMPT_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStarted":{"taskid":"task_1329348432655_0001_m_000002","taskType":"MAP","attemptId":"attempt_1329348432655_0001_m_000002_0","startTime":1329348450537,"trackerName":"localhost","httpPort":9999,"shufflePort":8080,"containerId":"container_1329348432655_0001_01_000004"}}} {"type":"MAP_ATTEMPT_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStarted":{"taskid":"task_1329348432655_0001_m_000004","taskType":"MAP","attemptId":"attempt_1329348432655_0001_m_000004_0","startTime":1329348450538,"trackerName":"localhost","httpPort":9999,"shufflePort":8080,"containerId":"container_1329348432655_0001_01_000006"}}} @@ -40,6 +41,7 @@ Avro-Json {"type":"MAP_ATTEMPT_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStarted":{"taskid":"task_1329348432655_0001_m_000008","taskType":"MAP","attemptId":"attempt_1329348432655_0001_m_000008_0","startTime":1329348462765,"trackerName":"localhost","httpPort":9999,"shufflePort":8080,"containerId":"container_1329348432655_0001_01_000010"}}} {"type":"MAP_ATTEMPT_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStarted":{"taskid":"task_1329348432655_0001_m_000009","taskType":"MAP","attemptId":"attempt_1329348432655_0001_m_000009_0","startTime":1329348462792,"trackerName":"localhost","httpPort":9999,"shufflePort":8080,"containerId":"container_1329348432655_0001_01_000011"}}} {"type":"REDUCE_ATTEMPT_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStarted":{"taskid":"task_1329348432655_0001_r_000000","taskType":"REDUCE","attemptId":"attempt_1329348432655_0001_r_000000_0","startTime":1329348464995,"trackerName":"localhost","httpPort":9999,"shufflePort":8080,"containerId":"container_1329348432655_0001_01_000014"}}} + {"type":"REDUCE_ATTEMPT_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStarted":{"taskid":"task_1329348432655_0001_r_000001","taskType":"REDUCE","attemptId":"attempt_1329348432655_0001_r_000001_0","startTime":1329348464995,"trackerName":"localhost","httpPort":9999,"shufflePort":8080,"containerId":"container_1329348432655_0001_01_000014"}}} {"type":"MAP_ATTEMPT_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.MapAttemptFinished":{"taskid":"task_1329348432655_0001_m_000007","attemptId":"attempt_1329348432655_0001_m_000007_0","taskType":"MAP","taskStatus":"SUCCEEDED","mapFinishTime":1329348465534,"finishTime":1329348465965,"hostname":"localhost","port":45454,"rackname":"/default-rack","state":"Sleeping... (1) ms left","counters":{"name":"COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":120},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48051},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":48},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":1},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"MAP_INPUT_RECORDS","displayName":"Map input records","value":1},{"name":"MAP_OUTPUT_RECORDS","displayName":"Map output records","value":1},{"name":"MAP_OUTPUT_BYTES","displayName":"Map output bytes","value":4},{"name":"MAP_OUTPUT_MATERIALIZED_BYTES","displayName":"Map output materialized bytes","value":12},{"name":"SPLIT_RAW_BYTES","displayName":"Input split bytes","value":48},{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":1},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":0},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":194},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":320},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":185327616},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":713089024},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":165478400}]},{"name":"org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter","displayName":"File Input Format Counters ","counts":[{"name":"BYTES_READ","displayName":"Bytes Read","value":0}]}]},"clockSplits":[3464,18,19,18,18,19,18,18,19,18,18,19],"cpuUsages":[26,27,27,26,27,27,26,27,27,26,27,27],"vMemKbytes":[29015,87046,145078,203109,261140,319171,377203,435234,493266,551297,609328,667360],"physMemKbytes":[7541,22623,37705,52786,67869,82950,98033,113114,128197,143279,158360,173443]}}} {"type":"TASK_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskFinished":{"taskid":"task_1329348432655_0001_m_000007","taskType":"MAP","finishTime":1329348465965,"status":"SUCCEEDED","counters":{"name":"COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":120},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48051},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":48},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":1},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"MAP_INPUT_RECORDS","displayName":"Map input records","value":1},{"name":"MAP_OUTPUT_RECORDS","displayName":"Map output records","value":1},{"name":"MAP_OUTPUT_BYTES","displayName":"Map output bytes","value":4},{"name":"MAP_OUTPUT_MATERIALIZED_BYTES","displayName":"Map output materialized bytes","value":12},{"name":"SPLIT_RAW_BYTES","displayName":"Input split bytes","value":48},{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":1},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":0},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":194},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":320},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":185327616},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":713089024},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":165478400}]},{"name":"org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter","displayName":"File Input Format Counters ","counts":[{"name":"BYTES_READ","displayName":"Bytes Read","value":0}]}]}}}} {"type":"MAP_ATTEMPT_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.MapAttemptFinished":{"taskid":"task_1329348432655_0001_m_000009","attemptId":"attempt_1329348432655_0001_m_000009_0","taskType":"MAP","taskStatus":"SUCCEEDED","mapFinishTime":1329348465986,"finishTime":1329348466363,"hostname":"localhost","port":45454,"rackname":"/default-rack","state":"Sleeping... (1) ms left","counters":{"name":"COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":120},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48051},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":48},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":1},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"MAP_INPUT_RECORDS","displayName":"Map input records","value":1},{"name":"MAP_OUTPUT_RECORDS","displayName":"Map output records","value":1},{"name":"MAP_OUTPUT_BYTES","displayName":"Map output bytes","value":4},{"name":"MAP_OUTPUT_MATERIALIZED_BYTES","displayName":"Map output materialized bytes","value":12},{"name":"SPLIT_RAW_BYTES","displayName":"Input split bytes","value":48},{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":1},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":0},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":23},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":330},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":182169600},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":705945600},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":165478400}]},{"name":"org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter","displayName":"File Input Format Counters ","counts":[{"name":"BYTES_READ","displayName":"Bytes Read","value":0}]}]},"clockSplits":[3223,21,21,21,21,21,20,21,21,21,21,21],"cpuUsages":[27,28,27,28,27,28,27,28,27,28,27,28],"vMemKbytes":[28725,86175,143625,201074,258525,315974,373425,430874,488325,545775,603224,660675],"physMemKbytes":[7412,22237,37062,51887,66712,81537,96362,111187,126012,140837,155662,170487]}}} @@ -47,5 +49,7 @@ Avro-Json {"type":"MAP_ATTEMPT_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.MapAttemptFinished":{"taskid":"task_1329348432655_0001_m_000008","attemptId":"attempt_1329348432655_0001_m_000008_0","taskType":"MAP","taskStatus":"SUCCEEDED","mapFinishTime":1329348467231,"finishTime":1329348467421,"hostname":"localhost","port":45454,"rackname":"/default-rack","state":"Sleeping... (1) ms left","counters":{"name":"COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":120},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48051},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":48},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":1},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"MAP_INPUT_RECORDS","displayName":"Map input records","value":1},{"name":"MAP_OUTPUT_RECORDS","displayName":"Map output records","value":1},{"name":"MAP_OUTPUT_BYTES","displayName":"Map output bytes","value":4},{"name":"MAP_OUTPUT_MATERIALIZED_BYTES","displayName":"Map output materialized bytes","value":12},{"name":"SPLIT_RAW_BYTES","displayName":"Input split bytes","value":48},{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":1},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":0},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":12},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":320},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":181297152},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":705019904},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":165478400}]},{"name":"org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter","displayName":"File Input Format Counters ","counts":[{"name":"BYTES_READ","displayName":"Bytes Read","value":0}]}]},"clockSplits":[4483,15,16,15,16,15,15,16,15,16,15,16],"cpuUsages":[26,27,27,26,27,27,26,27,27,26,27,27],"vMemKbytes":[28686,86061,143436,200810,258185,315560,372935,430309,487684,545059,602433,659808],"physMemKbytes":[7377,22131,36885,51638,66393,81146,95901,110654,125409,140163,154916,169671]}}} {"type":"TASK_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskFinished":{"taskid":"task_1329348432655_0001_m_000008","taskType":"MAP","finishTime":1329348467421,"status":"SUCCEEDED","counters":{"name":"COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":120},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48051},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":48},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":1},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"MAP_INPUT_RECORDS","displayName":"Map input records","value":1},{"name":"MAP_OUTPUT_RECORDS","displayName":"Map output records","value":1},{"name":"MAP_OUTPUT_BYTES","displayName":"Map output bytes","value":4},{"name":"MAP_OUTPUT_MATERIALIZED_BYTES","displayName":"Map output materialized bytes","value":12},{"name":"SPLIT_RAW_BYTES","displayName":"Input split bytes","value":48},{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":1},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":0},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":12},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":320},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":181297152},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":705019904},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":165478400}]},{"name":"org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter","displayName":"File Input Format Counters ","counts":[{"name":"BYTES_READ","displayName":"Bytes Read","value":0}]}]}}}} {"type":"REDUCE_ATTEMPT_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.ReduceAttemptFinished":{"taskid":"task_1329348432655_0001_r_000000","attemptId":"attempt_1329348432655_0001_r_000000_0","taskType":"REDUCE","taskStatus":"SUCCEEDED","shuffleFinishTime":1329348468462,"sortFinishTime":1329348468517,"finishTime":1329348468600,"hostname":"localhost","port":45454,"rackname":"/default-rack","state":"Sleeping... (1) ms left > reduce","counters":{"name":"COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":186},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48074},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":0},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":0},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"COMBINE_OUTPUT_RECORDS","displayName":"Combine output records","value":0},{"name":"REDUCE_INPUT_GROUPS","displayName":"Reduce input groups","value":1},{"name":"REDUCE_SHUFFLE_BYTES","displayName":"Reduce shuffle bytes","value":120},{"name":"REDUCE_INPUT_RECORDS","displayName":"Reduce input records","value":10},{"name":"REDUCE_OUTPUT_RECORDS","displayName":"Reduce output records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":10},{"name":"SHUFFLED_MAPS","displayName":"Shuffled Maps ","value":10},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":10},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":14},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":1070},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":82780160},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":714436608},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":60555264}]},{"name":"Shuffle Errors","displayName":"Shuffle Errors","counts":[{"name":"BAD_ID","displayName":"BAD_ID","value":0},{"name":"CONNECTION","displayName":"CONNECTION","value":0},{"name":"IO_ERROR","displayName":"IO_ERROR","value":0},{"name":"WRONG_LENGTH","displayName":"WRONG_LENGTH","value":0},{"name":"WRONG_MAP","displayName":"WRONG_MAP","value":0},{"name":"WRONG_REDUCE","displayName":"WRONG_REDUCE","value":0}]},{"name":"org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter","displayName":"File Output Format Counters ","counts":[{"name":"BYTES_WRITTEN","displayName":"Bytes Written","value":0}]}]},"clockSplits":[3530,6,7,6,7,6,6,7,6,7,6,7],"cpuUsages":[89,89,89,89,89,90,89,89,89,89,89,90],"vMemKbytes":[29070,87211,145352,203493,261634,319775,377916,436057,494198,552339,610480,668621],"physMemKbytes":[3367,10104,16841,23577,30314,37051,43788,50524,57261,63998,70734,77471]}}} + {"type":"REDUCE_ATTEMPT_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.ReduceAttemptFinished":{"taskid":"task_1329348432655_0001_r_000001","attemptId":"attempt_1329348432655_0001_r_000001_0","taskType":"REDUCE","taskStatus":"SUCCEEDED","shuffleFinishTime":1329348468462,"sortFinishTime":1329348468507,"finishTime":1329348468600,"hostname":"localhost","port":45454,"rackname":"/default-rack","state":"Sleeping... (1) ms left > reduce","counters":{"name":"COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":186},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48074},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":0},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":0},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"COMBINE_OUTPUT_RECORDS","displayName":"Combine output records","value":0},{"name":"REDUCE_INPUT_GROUPS","displayName":"Reduce input groups","value":1},{"name":"REDUCE_SHUFFLE_BYTES","displayName":"Reduce shuffle bytes","value":120},{"name":"REDUCE_INPUT_RECORDS","displayName":"Reduce input records","value":10},{"name":"REDUCE_OUTPUT_RECORDS","displayName":"Reduce output records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":10},{"name":"SHUFFLED_MAPS","displayName":"Shuffled Maps ","value":10},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":10},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":14},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":1070},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":82780160},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":714436608},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":60555264}]},{"name":"Shuffle Errors","displayName":"Shuffle Errors","counts":[{"name":"BAD_ID","displayName":"BAD_ID","value":0},{"name":"CONNECTION","displayName":"CONNECTION","value":0},{"name":"IO_ERROR","displayName":"IO_ERROR","value":0},{"name":"WRONG_LENGTH","displayName":"WRONG_LENGTH","value":0},{"name":"WRONG_MAP","displayName":"WRONG_MAP","value":0},{"name":"WRONG_REDUCE","displayName":"WRONG_REDUCE","value":0}]},{"name":"org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter","displayName":"File Output Format Counters ","counts":[{"name":"BYTES_WRITTEN","displayName":"Bytes Written","value":0}]}]},"clockSplits":[3530,6,7,6,7,6,6,7,6,7,6,7],"cpuUsages":[89,89,89,89,89,90,89,89,89,89,89,90],"vMemKbytes":[29070,87211,145352,203493,261634,319775,377916,436057,494198,552339,610480,668621],"physMemKbytes":[3367,10104,16841,23577,30314,37051,43788,50524,57261,63998,70734,77471]}}} {"type":"TASK_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskFinished":{"taskid":"task_1329348432655_0001_r_000000","taskType":"REDUCE","finishTime":1329348468600,"status":"SUCCEEDED","counters":{"name":"COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":186},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48074},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":0},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":0},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"COMBINE_OUTPUT_RECORDS","displayName":"Combine output records","value":0},{"name":"REDUCE_INPUT_GROUPS","displayName":"Reduce input groups","value":1},{"name":"REDUCE_SHUFFLE_BYTES","displayName":"Reduce shuffle bytes","value":120},{"name":"REDUCE_INPUT_RECORDS","displayName":"Reduce input records","value":10},{"name":"REDUCE_OUTPUT_RECORDS","displayName":"Reduce output records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":10},{"name":"SHUFFLED_MAPS","displayName":"Shuffled Maps ","value":10},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":10},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":14},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":1070},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":82780160},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":714436608},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":60555264}]},{"name":"Shuffle Errors","displayName":"Shuffle Errors","counts":[{"name":"BAD_ID","displayName":"BAD_ID","value":0},{"name":"CONNECTION","displayName":"CONNECTION","value":0},{"name":"IO_ERROR","displayName":"IO_ERROR","value":0},{"name":"WRONG_LENGTH","displayName":"WRONG_LENGTH","value":0},{"name":"WRONG_MAP","displayName":"WRONG_MAP","value":0},{"name":"WRONG_REDUCE","displayName":"WRONG_REDUCE","value":0}]},{"name":"org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter","displayName":"File Output Format Counters ","counts":[{"name":"BYTES_WRITTEN","displayName":"Bytes Written","value":0}]}]}}}} + {"type":"TASK_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskFinished":{"taskid":"task_1329348432655_0001_r_000001","taskType":"REDUCE","finishTime":1329348468600,"status":"SUCCEEDED","counters":{"name":"COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":186},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48074},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":0},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":0},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"COMBINE_OUTPUT_RECORDS","displayName":"Combine output records","value":0},{"name":"REDUCE_INPUT_GROUPS","displayName":"Reduce input groups","value":1},{"name":"REDUCE_SHUFFLE_BYTES","displayName":"Reduce shuffle bytes","value":120},{"name":"REDUCE_INPUT_RECORDS","displayName":"Reduce input records","value":10},{"name":"REDUCE_OUTPUT_RECORDS","displayName":"Reduce output records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":10},{"name":"SHUFFLED_MAPS","displayName":"Shuffled Maps ","value":10},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":10},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":14},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":1070},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":82780160},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":714436608},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":60555264}]},{"name":"Shuffle Errors","displayName":"Shuffle Errors","counts":[{"name":"BAD_ID","displayName":"BAD_ID","value":0},{"name":"CONNECTION","displayName":"CONNECTION","value":0},{"name":"IO_ERROR","displayName":"IO_ERROR","value":0},{"name":"WRONG_LENGTH","displayName":"WRONG_LENGTH","value":0},{"name":"WRONG_MAP","displayName":"WRONG_MAP","value":0},{"name":"WRONG_REDUCE","displayName":"WRONG_REDUCE","value":0}]},{"name":"org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter","displayName":"File Output Format Counters ","counts":[{"name":"BYTES_WRITTEN","displayName":"Bytes Written","value":0}]}]}}}} {"type":"JOB_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.JobFinished":{"jobid":"job_1329348432655_0001","finishTime":1329348468601,"finishedMaps":10,"finishedReduces":1,"failedMaps":0,"failedReduces":0,"totalCounters":{"name":"TOTAL_COUNTERS","groups":[{"name":"Shuffle Errors","displayName":"Shuffle Errors","counts":[{"name":"BAD_ID","displayName":"BAD_ID","value":0},{"name":"CONNECTION","displayName":"CONNECTION","value":0},{"name":"IO_ERROR","displayName":"IO_ERROR","value":0},{"name":"WRONG_LENGTH","displayName":"WRONG_LENGTH","value":0},{"name":"WRONG_MAP","displayName":"WRONG_MAP","value":0},{"name":"WRONG_REDUCE","displayName":"WRONG_REDUCE","value":0}]},{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":1386},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":528584},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":480},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":10},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.JobCounter","displayName":"Job Counters ","counts":[{"name":"TOTAL_LAUNCHED_MAPS","displayName":"Launched map tasks","value":10},{"name":"TOTAL_LAUNCHED_REDUCES","displayName":"Launched reduce tasks","value":1},{"name":"OTHER_LOCAL_MAPS","displayName":"Other local map tasks","value":10},{"name":"SLOTS_MILLIS_MAPS","displayName":"Total time spent by all maps in occupied slots (ms)","value":0},{"name":"SLOTS_MILLIS_REDUCES","displayName":"Total time spent by all reduces in occupied slots (ms)","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"MAP_INPUT_RECORDS","displayName":"Map input records","value":10},{"name":"MAP_OUTPUT_RECORDS","displayName":"Map output records","value":10},{"name":"MAP_OUTPUT_BYTES","displayName":"Map output bytes","value":40},{"name":"MAP_OUTPUT_MATERIALIZED_BYTES","displayName":"Map output materialized bytes","value":120},{"name":"SPLIT_RAW_BYTES","displayName":"Input split bytes","value":480},{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"COMBINE_OUTPUT_RECORDS","displayName":"Combine output records","value":0},{"name":"REDUCE_INPUT_GROUPS","displayName":"Reduce input groups","value":1},{"name":"REDUCE_SHUFFLE_BYTES","displayName":"Reduce shuffle bytes","value":120},{"name":"REDUCE_INPUT_RECORDS","displayName":"Reduce input records","value":10},{"name":"REDUCE_OUTPUT_RECORDS","displayName":"Reduce output records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":20},{"name":"SHUFFLED_MAPS","displayName":"Shuffled Maps ","value":10},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":10},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":2256},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":4460},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":1923493888},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":7773462528},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":1778515968}]},{"name":"org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter","displayName":"File Input Format Counters ","counts":[{"name":"BYTES_READ","displayName":"Bytes Read","value":0}]},{"name":"org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter","displayName":"File Output Format Counters ","counts":[{"name":"BYTES_WRITTEN","displayName":"Bytes Written","value":0}]}]},"mapCounters":{"name":"MAP_COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":1200},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":480510},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":480},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":10},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"MAP_INPUT_RECORDS","displayName":"Map input records","value":10},{"name":"MAP_OUTPUT_RECORDS","displayName":"Map output records","value":10},{"name":"MAP_OUTPUT_BYTES","displayName":"Map output bytes","value":40},{"name":"MAP_OUTPUT_MATERIALIZED_BYTES","displayName":"Map output materialized bytes","value":120},{"name":"SPLIT_RAW_BYTES","displayName":"Input split bytes","value":480},{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":10},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":0},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":2242},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":3390},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":1840713728},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":7059025920},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":1717960704}]},{"name":"org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter","displayName":"File Input Format Counters ","counts":[{"name":"BYTES_READ","displayName":"Bytes Read","value":0}]}]},"reduceCounters":{"name":"REDUCE_COUNTERS","groups":[{"name":"Shuffle Errors","displayName":"Shuffle Errors","counts":[{"name":"BAD_ID","displayName":"BAD_ID","value":0},{"name":"CONNECTION","displayName":"CONNECTION","value":0},{"name":"IO_ERROR","displayName":"IO_ERROR","value":0},{"name":"WRONG_LENGTH","displayName":"WRONG_LENGTH","value":0},{"name":"WRONG_MAP","displayName":"WRONG_MAP","value":0},{"name":"WRONG_REDUCE","displayName":"WRONG_REDUCE","value":0}]},{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":186},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48074},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":0},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":0},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"COMBINE_OUTPUT_RECORDS","displayName":"Combine output records","value":0},{"name":"REDUCE_INPUT_GROUPS","displayName":"Reduce input groups","value":1},{"name":"REDUCE_SHUFFLE_BYTES","displayName":"Reduce shuffle bytes","value":120},{"name":"REDUCE_INPUT_RECORDS","displayName":"Reduce input records","value":10},{"name":"REDUCE_OUTPUT_RECORDS","displayName":"Reduce output records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":10},{"name":"SHUFFLED_MAPS","displayName":"Shuffled Maps ","value":10},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":10},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":14},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":1070},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":82780160},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":714436608},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":60555264}]},{"name":"org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter","displayName":"File Output Format Counters ","counts":[{"name":"BYTES_WRITTEN","displayName":"Bytes Written","value":0}]}]}}}} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java index 6a72917476c..5903a4aaac6 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java @@ -19,6 +19,7 @@ package org.apache.hadoop.mapred; import java.io.IOException; +import java.net.InetSocketAddress; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -87,6 +88,10 @@ public class ResourceMgrDelegate extends YarnClientImpl { return oldMetrics; } + InetSocketAddress getConnectAddress() { + return rmAddress; + } + @SuppressWarnings("rawtypes") public Token getDelegationToken(Text renewer) throws IOException, InterruptedException { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java index e8fd18a4c8a..241258ac222 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java @@ -60,6 +60,7 @@ import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetDelegationTokenRequ import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.AccessControlList; import org.apache.hadoop.security.token.Token; @@ -81,6 +82,7 @@ import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.security.client.RMTokenSelector; import org.apache.hadoop.yarn.util.BuilderUtils; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.ProtoUtils; @@ -90,7 +92,7 @@ import com.google.common.annotations.VisibleForTesting; /** * This class enables the current JobClient (0.22 hadoop) to run on YARN. */ -@SuppressWarnings({ "rawtypes", "unchecked" }) +@SuppressWarnings("unchecked") public class YARNRunner implements ClientProtocol { private static final Log LOG = LogFactory.getLog(YARNRunner.class); @@ -101,14 +103,6 @@ public class YARNRunner implements ClientProtocol { private Configuration conf; private final FileContext defaultFileContext; - /* usually is false unless the jobclient get delegation token is - * called. This is a hack wherein we do return a token from RM - * on getDelegationtoken but due to the restricted api on jobclient - * we just add a job history DT token when submitting a job. - */ - private static final boolean DEFAULT_HS_DELEGATION_TOKEN_REQUIRED = - false; - /** * Yarn runner incapsulates the client interface of * yarn @@ -185,6 +179,28 @@ public class YARNRunner implements ClientProtocol { return resMgrDelegate.getClusterMetrics(); } + @VisibleForTesting + void addHistoyToken(Credentials ts) throws IOException, InterruptedException { + /* check if we have a hsproxy, if not, no need */ + MRClientProtocol hsProxy = clientCache.getInitializedHSProxy(); + if (UserGroupInformation.isSecurityEnabled() && (hsProxy != null)) { + /* + * note that get delegation token was called. Again this is hack for oozie + * to make sure we add history server delegation tokens to the credentials + */ + RMTokenSelector tokenSelector = new RMTokenSelector(); + Text service = SecurityUtil.buildTokenService(resMgrDelegate + .getConnectAddress()); + if (tokenSelector.selectToken(service, ts.getAllTokens()) != null) { + Text hsService = SecurityUtil.buildTokenService(hsProxy + .getConnectAddress()); + if (ts.getToken(hsService) == null) { + ts.addToken(hsService, getDelegationTokenFromHS(hsProxy)); + } + } + } + } + @VisibleForTesting Token getDelegationTokenFromHS(MRClientProtocol hsProxy) throws IOException, InterruptedException { @@ -263,18 +279,8 @@ public class YARNRunner implements ClientProtocol { public JobStatus submitJob(JobID jobId, String jobSubmitDir, Credentials ts) throws IOException, InterruptedException { - /* check if we have a hsproxy, if not, no need */ - MRClientProtocol hsProxy = clientCache.getInitializedHSProxy(); - if (hsProxy != null) { - // JobClient will set this flag if getDelegationToken is called, if so, get - // the delegation tokens for the HistoryServer also. - if (conf.getBoolean(JobClient.HS_DELEGATION_TOKEN_REQUIRED, - DEFAULT_HS_DELEGATION_TOKEN_REQUIRED)) { - Token hsDT = getDelegationTokenFromHS(hsProxy); - ts.addToken(hsDT.getService(), hsDT); - } - } - + addHistoyToken(ts); + // Upload only in security mode: TODO Path applicationTokensFile = new Path(jobSubmitDir, MRJobConfig.APPLICATION_TOKENS_FILE); @@ -462,16 +468,14 @@ public class YARNRunner implements ClientProtocol { // Setup ContainerLaunchContext for AM container ContainerLaunchContext amContainer = BuilderUtils - .newContainerLaunchContext(null, UserGroupInformation - .getCurrentUser().getShortUserName(), capability, localResources, + .newContainerLaunchContext(UserGroupInformation + .getCurrentUser().getShortUserName(), localResources, environment, vargsFinal, null, securityTokens, acls); // Set up the ApplicationSubmissionContext ApplicationSubmissionContext appContext = recordFactory.newRecordInstance(ApplicationSubmissionContext.class); appContext.setApplicationId(applicationId); // ApplicationId - appContext.setUser( // User name - UserGroupInformation.getCurrentUser().getShortUserName()); appContext.setQueue( // Queue name jobConf.get(JobContext.QUEUE_NAME, YarnConfiguration.DEFAULT_QUEUE_NAME)); @@ -484,7 +488,7 @@ public class YARNRunner implements ClientProtocol { appContext.setMaxAppAttempts( conf.getInt(MRJobConfig.MR_AM_MAX_ATTEMPTS, MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS)); - + appContext.setResource(capability); return appContext; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/TestDFSIO.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/TestDFSIO.java index a13f8ed3bd5..a8c99e5be4b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/TestDFSIO.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/TestDFSIO.java @@ -99,6 +99,7 @@ public class TestDFSIO implements Tool { " [genericOptions]" + " -read [-random | -backward | -skip [-skipSize Size]] |" + " -write | -append | -clean" + + " [-compression codecClassName]" + " [-nrFiles N]" + " [-size Size[B|KB|MB|GB|TB]]" + " [-resFile resultFileName] [-bufferSize Bytes]"; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java index 5675742cfd1..601268a7e78 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java @@ -20,8 +20,10 @@ package org.apache.hadoop.mapred; import static org.mockito.Matchers.any; import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -30,6 +32,7 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.net.InetSocketAddress; import java.nio.ByteBuffer; import java.security.PrivilegedExceptionAction; import java.util.List; @@ -39,28 +42,24 @@ import junit.framework.TestCase; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapred.ClientCache; -import org.apache.hadoop.mapred.ClientServiceDelegate; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.Master; -import org.apache.hadoop.mapred.ResourceMgrDelegate; -import org.apache.hadoop.mapred.YARNRunner; +import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.JobPriority; import org.apache.hadoop.mapreduce.JobStatus.State; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.v2.api.MRClientProtocol; +import org.apache.hadoop.mapreduce.v2.api.MRDelegationTokenIdentifier; import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetDelegationTokenRequest; import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetDelegationTokenResponse; import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.Token; import org.apache.hadoop.yarn.api.ClientRMProtocol; -import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest; -import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse; -import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; @@ -69,21 +68,27 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; +import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.DelegationToken; import org.apache.hadoop.yarn.api.records.QueueInfo; -import org.apache.hadoop.yarn.api.records.YarnClusterMetrics; import org.apache.hadoop.yarn.api.records.YarnApplicationState; +import org.apache.hadoop.yarn.api.records.YarnClusterMetrics; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; +import org.apache.hadoop.yarn.util.BuilderUtils; +import org.apache.hadoop.yarn.util.Records; import org.apache.log4j.Appender; import org.apache.log4j.Layout; import org.apache.log4j.Logger; @@ -146,7 +151,7 @@ public class TestYARNRunner extends TestCase { } - @Test + @Test(timeout=20000) public void testJobKill() throws Exception { clientDelegate = mock(ClientServiceDelegate.class); when(clientDelegate.getJobStatus(any(JobID.class))).thenReturn(new @@ -171,7 +176,7 @@ public class TestYARNRunner extends TestCase { verify(clientDelegate).killJob(jobId); } - @Test + @Test(timeout=20000) public void testJobSubmissionFailure() throws Exception { when(resourceMgrDelegate.submitApplication(any(ApplicationSubmissionContext.class))). thenReturn(appId); @@ -193,7 +198,7 @@ public class TestYARNRunner extends TestCase { } } - @Test + @Test(timeout=20000) public void testResourceMgrDelegate() throws Exception { /* we not want a mock of resource mgr delegate */ final ClientRMProtocol clientRMProtocol = mock(ClientRMProtocol.class); @@ -259,8 +264,88 @@ public class TestYARNRunner extends TestCase { delegate.getQueueAclsForCurrentUser(); verify(clientRMProtocol).getQueueUserAcls(any(GetQueueUserAclsInfoRequest.class)); } - - @Test + + @Test(timeout=20000) + public void testGetHSDelegationToken() throws Exception { + try { + Configuration conf = new Configuration(); + + // Setup mock service + InetSocketAddress mockRmAddress = new InetSocketAddress("localhost", 4444); + Text rmTokenSevice = SecurityUtil.buildTokenService(mockRmAddress); + + InetSocketAddress mockHsAddress = new InetSocketAddress("localhost", 9200); + Text hsTokenSevice = SecurityUtil.buildTokenService(mockHsAddress); + + // Setup mock rm token + RMDelegationTokenIdentifier tokenIdentifier = new RMDelegationTokenIdentifier( + new Text("owner"), new Text("renewer"), new Text("real")); + Token token = new Token( + new byte[0], new byte[0], tokenIdentifier.getKind(), rmTokenSevice); + token.setKind(RMDelegationTokenIdentifier.KIND_NAME); + + // Setup mock history token + DelegationToken historyToken = BuilderUtils.newDelegationToken( + new byte[0], MRDelegationTokenIdentifier.KIND_NAME.toString(), + new byte[0], hsTokenSevice.toString()); + GetDelegationTokenResponse getDtResponse = Records + .newRecord(GetDelegationTokenResponse.class); + getDtResponse.setDelegationToken(historyToken); + + // mock services + MRClientProtocol mockHsProxy = mock(MRClientProtocol.class); + doReturn(mockHsAddress).when(mockHsProxy).getConnectAddress(); + doReturn(getDtResponse).when(mockHsProxy).getDelegationToken( + any(GetDelegationTokenRequest.class)); + + ResourceMgrDelegate rmDelegate = mock(ResourceMgrDelegate.class); + doReturn(mockRmAddress).when(rmDelegate).getConnectAddress(); + + ClientCache clientCache = mock(ClientCache.class); + doReturn(mockHsProxy).when(clientCache).getInitializedHSProxy(); + + Credentials creds = new Credentials(); + + YARNRunner yarnRunner = new YARNRunner(conf, rmDelegate, clientCache); + + // No HS token if no RM token + yarnRunner.addHistoyToken(creds); + verify(mockHsProxy, times(0)).getDelegationToken( + any(GetDelegationTokenRequest.class)); + + // No HS token if RM token, but secirity disabled. + creds.addToken(new Text("rmdt"), token); + yarnRunner.addHistoyToken(creds); + verify(mockHsProxy, times(0)).getDelegationToken( + any(GetDelegationTokenRequest.class)); + + conf.set(CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION, + "kerberos"); + UserGroupInformation.setConfiguration(conf); + creds = new Credentials(); + + // No HS token if no RM token, security enabled + yarnRunner.addHistoyToken(creds); + verify(mockHsProxy, times(0)).getDelegationToken( + any(GetDelegationTokenRequest.class)); + + // HS token if RM token present, security enabled + creds.addToken(new Text("rmdt"), token); + yarnRunner.addHistoyToken(creds); + verify(mockHsProxy, times(1)).getDelegationToken( + any(GetDelegationTokenRequest.class)); + + // No additional call to get HS token if RM and HS token present + yarnRunner.addHistoyToken(creds); + verify(mockHsProxy, times(1)).getDelegationToken( + any(GetDelegationTokenRequest.class)); + } finally { + // Back to defaults. + UserGroupInformation.setConfiguration(new Configuration()); + } + } + + @Test(timeout=20000) public void testHistoryServerToken() throws Exception { //Set the master principal in the config conf.set(YarnConfiguration.RM_PRINCIPAL,"foo@LOCAL"); @@ -303,7 +388,7 @@ public class TestYARNRunner extends TestCase { }); } - @Test + @Test(timeout=20000) public void testAMAdminCommandOpts() throws Exception { JobConf jobConf = new JobConf(); @@ -366,7 +451,7 @@ public class TestYARNRunner extends TestCase { assertTrue("AM admin command opts is after user command opts.", adminIndex < userIndex); } } - @Test + @Test(timeout=20000) public void testWarnCommandOpts() throws Exception { Logger logger = Logger.getLogger(YARNRunner.class); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java index 8edf4f15d93..10d7a71e1b8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java @@ -20,16 +20,13 @@ package org.apache.hadoop.mapreduce.v2; import java.io.File; import java.io.IOException; -import java.net.InetAddress; -import java.net.UnknownHostException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileContext; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.LocalContainerLauncher; import org.apache.hadoop.mapred.ShuffleHandler; import org.apache.hadoop.mapreduce.MRConfig; @@ -76,6 +73,15 @@ public class MiniMRYarnCluster extends MiniYARNCluster { conf.set(MRJobConfig.MR_AM_STAGING_DIR, new File(getTestWorkDir(), "apps_staging_dir/").getAbsolutePath()); } + + // By default, VMEM monitoring disabled, PMEM monitoring enabled. + if (!conf.getBoolean( + MRConfig.MAPREDUCE_MINICLUSTER_CONTROL_RESOURCE_MONITORING, + MRConfig.DEFAULT_MAPREDUCE_MINICLUSTER_CONTROL_RESOURCE_MONITORING)) { + conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false); + conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false); + } + conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000"); try { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/ExampleDriver.java b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/ExampleDriver.java index cddfea60862..2d9a500280b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/ExampleDriver.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/ExampleDriver.java @@ -38,6 +38,12 @@ public class ExampleDriver { try { pgd.addClass("wordcount", WordCount.class, "A map/reduce program that counts the words in the input files."); + pgd.addClass("wordmean", WordMean.class, + "A map/reduce program that counts the average length of the words in the input files."); + pgd.addClass("wordmedian", WordMedian.class, + "A map/reduce program that counts the median length of the words in the input files."); + pgd.addClass("wordstandarddeviation", WordStandardDeviation.class, + "A map/reduce program that counts the standard deviation of the length of the words in the input files."); pgd.addClass("aggregatewordcount", AggregateWordCount.class, "An Aggregate based map/reduce program that counts the words in the input files."); pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, diff --git a/hadoop-tools/hadoop-gridmix/dev-support/findbugs-exclude.xml b/hadoop-tools/hadoop-gridmix/dev-support/findbugs-exclude.xml new file mode 100644 index 00000000000..92458d4a213 --- /dev/null +++ b/hadoop-tools/hadoop-gridmix/dev-support/findbugs-exclude.xml @@ -0,0 +1,33 @@ + + + + + + + + + + + + + + + + + + diff --git a/hadoop-tools/hadoop-gridmix/pom.xml b/hadoop-tools/hadoop-gridmix/pom.xml index 15ed600324b..1aeaa73b759 100644 --- a/hadoop-tools/hadoop-gridmix/pom.xml +++ b/hadoop-tools/hadoop-gridmix/pom.xml @@ -91,10 +91,25 @@ test-jar test + + org.mockito + mockito-all + test + + + org.codehaus.mojo + findbugs-maven-plugin + + true + true + ${basedir}/dev-support/findbugs-exclude.xml + Max + + org.apache.maven.plugins maven-antrun-plugin @@ -115,6 +130,15 @@ + + org.apache.rat + apache-rat-plugin + + + src/test/resources/data/* + + + org.apache.maven.plugins maven-jar-plugin diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/Gridmix.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/Gridmix.java index 4bdc001c550..4620cfc5a29 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/Gridmix.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/Gridmix.java @@ -36,7 +36,9 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.mapred.gridmix.GenerateData.DataStatistics; import org.apache.hadoop.mapred.gridmix.Statistics.JobStats; import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Tool; @@ -693,7 +695,7 @@ public class Gridmix extends Configured implements Tool { try { res = ToolRunner.run(new Configuration(), new Gridmix(argv), argv); } finally { - System.exit(res); + ExitUtil.terminate(res); } } @@ -800,6 +802,10 @@ public class Gridmix extends Configured implements Tool { */ void abort(); } - + // it is need for tests + protected Summarizer getSummarizer() { + return summarizer; + } + } diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/JobMonitor.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/JobMonitor.java index 64e0c7a0bea..341c49a9c99 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/JobMonitor.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/JobMonitor.java @@ -86,9 +86,7 @@ class JobMonitor implements Gridmix.Component { * Add a running job's status to the polling queue. */ public void add(JobStats job) throws InterruptedException { - synchronized (runningJobs) { runningJobs.put(job); - } } /** @@ -147,12 +145,10 @@ class JobMonitor implements Gridmix.Component { boolean shutdown; while (true) { try { - synchronized (runningJobs) { - synchronized (mJobs) { - graceful = JobMonitor.this.graceful; - shutdown = JobMonitor.this.shutdown; - runningJobs.drainTo(mJobs); - } + synchronized (mJobs) { + graceful = JobMonitor.this.graceful; + shutdown = JobMonitor.this.shutdown; + runningJobs.drainTo(mJobs); } // shutdown conditions; either shutdown requested and all jobs @@ -160,11 +156,9 @@ class JobMonitor implements Gridmix.Component { // submitted jobs not in the monitored set if (shutdown) { if (!graceful) { - synchronized (runningJobs) { - while (!runningJobs.isEmpty()) { - synchronized (mJobs) { - runningJobs.drainTo(mJobs); - } + while (!runningJobs.isEmpty()) { + synchronized (mJobs) { + runningJobs.drainTo(mJobs); } } break; diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SerialJobFactory.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SerialJobFactory.java index 3301cbdf888..af554ff0b1f 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SerialJobFactory.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SerialJobFactory.java @@ -175,4 +175,8 @@ public class SerialJobFactory extends JobFactory { LOG.info(" Starting Serial submission "); this.rThread.start(); } + // it is need for test + void setDistCacheEmulator(DistributedCacheEmulator e) { + jobCreator.setDistCacheEmulator(e); + } } diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SleepJob.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SleepJob.java index a9f2999dd3c..6c2a0480bb8 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SleepJob.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/SleepJob.java @@ -84,7 +84,7 @@ public class SleepJob extends GridmixJob { String[] hosts) throws IOException { super(conf, submissionMillis, jobdesc, outRoot, ugi, seq); this.fakeLocations = numLocations; - this.hosts = hosts; + this.hosts = hosts.clone(); this.selector = (fakeLocations > 0)? new Selector(hosts.length, (float) fakeLocations / hosts.length, rand.get()) : null; this.mapTasksOnly = conf.getBoolean(SLEEPJOB_MAPTASK_ONLY, false); @@ -289,9 +289,9 @@ public class SleepJob extends GridmixJob { this.id = id; this.sleepDuration = sleepDuration; nSpec = reduceDurations.length; - this.reduceDurations = reduceDurations; + this.reduceDurations = reduceDurations.clone(); this.nMaps = nMaps; - this.locations = locations; + this.locations = locations.clone(); } @Override @@ -349,7 +349,7 @@ public class SleepJob extends GridmixJob { @Override public String[] getLocations() { - return locations; + return locations.clone(); } } diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java index b9971dc2af5..4cfe1b533a2 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java @@ -93,7 +93,7 @@ implements ResourceUsageEmulatorPlugin { private static final float DEFAULT_HEAP_LOAD_RATIO = 0.1F; - public static int ONE_MB = 1024 * 1024; + public static final int ONE_MB = 1024 * 1024; /** * Defines the core heap usage emulation algorithm. This engine is expected @@ -129,7 +129,8 @@ implements ResourceUsageEmulatorPlugin { public static class DefaultHeapUsageEmulator implements HeapUsageEmulatorCore { // store the unit loads in a list - protected static ArrayList heapSpace = new ArrayList(); + protected static final ArrayList heapSpace = + new ArrayList(); /** * Increase heap usage by current process by the given amount. diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/CommonJobTest.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/CommonJobTest.java new file mode 100644 index 00000000000..77d7f0743b1 --- /dev/null +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/CommonJobTest.java @@ -0,0 +1,384 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapred.gridmix; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.File; +import java.io.IOException; +import java.text.DecimalFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.mapred.Counters; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapred.JobID; +import org.apache.hadoop.mapred.TaskReport; +import org.apache.hadoop.mapreduce.Counter; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.mapreduce.TaskCounter; +import org.apache.hadoop.mapreduce.TaskType; +import org.apache.hadoop.tools.rumen.TaskInfo; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.tools.rumen.JobStory; +import org.apache.hadoop.util.ToolRunner; + +public class CommonJobTest { + public static final Log LOG = LogFactory.getLog(Gridmix.class); + + protected static int NJOBS = 2; + protected static final long GENDATA = 1; // in megabytes + protected static GridmixJobSubmissionPolicy policy = GridmixJobSubmissionPolicy.REPLAY; + private static File workspace = new File("target" + File.separator + + TestGridmixSubmission.class.getName() + "-test"); + + static class DebugGridmix extends Gridmix { + + private JobFactory factory; + private TestMonitor monitor; + + @Override + protected JobMonitor createJobMonitor(Statistics stats, Configuration conf) + throws IOException { + monitor = new TestMonitor(3, stats); + return monitor; + } + + @Override + protected JobFactory createJobFactory(JobSubmitter submitter, + String traceIn, Path scratchDir, Configuration conf, + CountDownLatch startFlag, UserResolver userResolver) throws IOException { + factory = DebugJobFactory.getFactory(submitter, scratchDir, NJOBS, conf, + startFlag, userResolver); + return factory; + } + + public void checkMonitor() throws Exception { + monitor.verify(((DebugJobFactory.Debuggable) factory).getSubmitted()); + } + } + + static class TestMonitor extends JobMonitor { + private final BlockingQueue retiredJobs; + private final int expected; + static final long SLOPBYTES = 1024; + + public TestMonitor(int expected, Statistics stats) { + super(3, TimeUnit.SECONDS, stats, 1); + this.expected = expected; + retiredJobs = new LinkedBlockingQueue(); + } + + @Override + protected void onSuccess(Job job) { + LOG.info(" Job Success " + job); + retiredJobs.add(job); + } + + @Override + protected void onFailure(Job job) { + fail("Job failure: " + job); + } + + public void verify(ArrayList submitted) throws Exception { + assertEquals("Bad job count", expected, retiredJobs.size()); + + final ArrayList succeeded = new ArrayList(); + assertEquals("Bad job count", expected, retiredJobs.drainTo(succeeded)); + final HashMap sub = new HashMap(); + for (JobStory spec : submitted) { + sub.put(spec.getJobID().toString(), spec); + } + for (Job job : succeeded) { + final String jobName = job.getJobName(); + Configuration configuration = job.getConfiguration(); + if (GenerateData.JOB_NAME.equals(jobName)) { + RemoteIterator rit = GridmixTestUtils.dfs + .listFiles(new Path("/"), true); + while (rit.hasNext()) { + System.out.println(rit.next().toString()); + } + final Path in = new Path("foo").makeQualified( + GridmixTestUtils.dfs.getUri(), + GridmixTestUtils.dfs.getWorkingDirectory()); + // data was compressed. All files = compressed test size+ logs= 1000000/2 + logs + final ContentSummary generated = GridmixTestUtils.dfs + .getContentSummary(in); + assertEquals(550000, generated.getLength(), 10000); + + Counter counter = job.getCounters() + .getGroup("org.apache.hadoop.mapreduce.FileSystemCounter") + .findCounter("HDFS_BYTES_WRITTEN"); + + assertEquals(generated.getLength(), counter.getValue()); + + continue; + } else if (GenerateDistCacheData.JOB_NAME.equals(jobName)) { + continue; + } + + final String originalJobId = configuration.get(Gridmix.ORIGINAL_JOB_ID); + final JobStory spec = sub.get(originalJobId); + assertNotNull("No spec for " + jobName, spec); + assertNotNull("No counters for " + jobName, job.getCounters()); + final String originalJobName = spec.getName(); + System.out.println("originalJobName=" + originalJobName + + ";GridmixJobName=" + jobName + ";originalJobID=" + originalJobId); + assertTrue("Original job name is wrong.", + originalJobName.equals(configuration.get(Gridmix.ORIGINAL_JOB_NAME))); + + // Gridmix job seqNum contains 6 digits + int seqNumLength = 6; + String jobSeqNum = new DecimalFormat("000000").format(configuration.getInt( + GridmixJob.GRIDMIX_JOB_SEQ, -1)); + // Original job name is of the format MOCKJOB<6 digit sequence number> + // because MockJob jobNames are of this format. + assertTrue(originalJobName.substring( + originalJobName.length() - seqNumLength).equals(jobSeqNum)); + + assertTrue("Gridmix job name is not in the expected format.", + jobName.equals(GridmixJob.JOB_NAME_PREFIX + jobSeqNum)); + final FileStatus stat = GridmixTestUtils.dfs.getFileStatus(new Path( + GridmixTestUtils.DEST, "" + Integer.valueOf(jobSeqNum))); + assertEquals("Wrong owner for " + jobName, spec.getUser(), + stat.getOwner()); + final int nMaps = spec.getNumberMaps(); + final int nReds = spec.getNumberReduces(); + + final JobClient client = new JobClient( + GridmixTestUtils.mrvl.getConfig()); + final TaskReport[] mReports = client.getMapTaskReports(JobID + .downgrade(job.getJobID())); + assertEquals("Mismatched map count", nMaps, mReports.length); + check(TaskType.MAP, spec, mReports, 0, 0, SLOPBYTES, nReds); + + final TaskReport[] rReports = client.getReduceTaskReports(JobID + .downgrade(job.getJobID())); + assertEquals("Mismatched reduce count", nReds, rReports.length); + check(TaskType.REDUCE, spec, rReports, nMaps * SLOPBYTES, 2 * nMaps, 0, + 0); + + } + + } + // Verify if correct job queue is used + private void check(final TaskType type, JobStory spec, + final TaskReport[] runTasks, long extraInputBytes, + int extraInputRecords, long extraOutputBytes, int extraOutputRecords) + throws Exception { + + long[] runInputRecords = new long[runTasks.length]; + long[] runInputBytes = new long[runTasks.length]; + long[] runOutputRecords = new long[runTasks.length]; + long[] runOutputBytes = new long[runTasks.length]; + long[] specInputRecords = new long[runTasks.length]; + long[] specInputBytes = new long[runTasks.length]; + long[] specOutputRecords = new long[runTasks.length]; + long[] specOutputBytes = new long[runTasks.length]; + + for (int i = 0; i < runTasks.length; ++i) { + final TaskInfo specInfo; + final Counters counters = runTasks[i].getCounters(); + switch (type) { + case MAP: + runInputBytes[i] = counters.findCounter("FileSystemCounters", + "HDFS_BYTES_READ").getValue() + - counters.findCounter(TaskCounter.SPLIT_RAW_BYTES).getValue(); + runInputRecords[i] = (int) counters.findCounter( + TaskCounter.MAP_INPUT_RECORDS).getValue(); + runOutputBytes[i] = counters + .findCounter(TaskCounter.MAP_OUTPUT_BYTES).getValue(); + runOutputRecords[i] = (int) counters.findCounter( + TaskCounter.MAP_OUTPUT_RECORDS).getValue(); + + specInfo = spec.getTaskInfo(TaskType.MAP, i); + specInputRecords[i] = specInfo.getInputRecords(); + specInputBytes[i] = specInfo.getInputBytes(); + specOutputRecords[i] = specInfo.getOutputRecords(); + specOutputBytes[i] = specInfo.getOutputBytes(); + + LOG.info(String.format(type + " SPEC: %9d -> %9d :: %5d -> %5d\n", + specInputBytes[i], specOutputBytes[i], specInputRecords[i], + specOutputRecords[i])); + LOG.info(String.format(type + " RUN: %9d -> %9d :: %5d -> %5d\n", + runInputBytes[i], runOutputBytes[i], runInputRecords[i], + runOutputRecords[i])); + break; + case REDUCE: + runInputBytes[i] = 0; + runInputRecords[i] = (int) counters.findCounter( + TaskCounter.REDUCE_INPUT_RECORDS).getValue(); + runOutputBytes[i] = counters.findCounter("FileSystemCounters", + "HDFS_BYTES_WRITTEN").getValue(); + runOutputRecords[i] = (int) counters.findCounter( + TaskCounter.REDUCE_OUTPUT_RECORDS).getValue(); + + specInfo = spec.getTaskInfo(TaskType.REDUCE, i); + // There is no reliable counter for reduce input bytes. The + // variable-length encoding of intermediate records and other noise + // make this quantity difficult to estimate. The shuffle and spec + // input bytes are included in debug output for reference, but are + // not checked + specInputBytes[i] = 0; + specInputRecords[i] = specInfo.getInputRecords(); + specOutputRecords[i] = specInfo.getOutputRecords(); + specOutputBytes[i] = specInfo.getOutputBytes(); + LOG.info(String.format(type + " SPEC: (%9d) -> %9d :: %5d -> %5d\n", + specInfo.getInputBytes(), specOutputBytes[i], + specInputRecords[i], specOutputRecords[i])); + LOG.info(String + .format(type + " RUN: (%9d) -> %9d :: %5d -> %5d\n", counters + .findCounter(TaskCounter.REDUCE_SHUFFLE_BYTES).getValue(), + runOutputBytes[i], runInputRecords[i], runOutputRecords[i])); + break; + default: + fail("Unexpected type: " + type); + } + } + + // Check input bytes + Arrays.sort(specInputBytes); + Arrays.sort(runInputBytes); + for (int i = 0; i < runTasks.length; ++i) { + assertTrue("Mismatched " + type + " input bytes " + specInputBytes[i] + + "/" + runInputBytes[i], + eqPlusMinus(runInputBytes[i], specInputBytes[i], extraInputBytes)); + } + + // Check input records + Arrays.sort(specInputRecords); + Arrays.sort(runInputRecords); + for (int i = 0; i < runTasks.length; ++i) { + assertTrue( + "Mismatched " + type + " input records " + specInputRecords[i] + + "/" + runInputRecords[i], + eqPlusMinus(runInputRecords[i], specInputRecords[i], + extraInputRecords)); + } + + // Check output bytes + Arrays.sort(specOutputBytes); + Arrays.sort(runOutputBytes); + for (int i = 0; i < runTasks.length; ++i) { + assertTrue( + "Mismatched " + type + " output bytes " + specOutputBytes[i] + "/" + + runOutputBytes[i], + eqPlusMinus(runOutputBytes[i], specOutputBytes[i], extraOutputBytes)); + } + + // Check output records + Arrays.sort(specOutputRecords); + Arrays.sort(runOutputRecords); + for (int i = 0; i < runTasks.length; ++i) { + assertTrue( + "Mismatched " + type + " output records " + specOutputRecords[i] + + "/" + runOutputRecords[i], + eqPlusMinus(runOutputRecords[i], specOutputRecords[i], + extraOutputRecords)); + } + + } + + private static boolean eqPlusMinus(long a, long b, long x) { + final long diff = Math.abs(a - b); + return diff <= x; + } + + } + + protected void doSubmission(String jobCreatorName, boolean defaultOutputPath) + throws Exception { + final Path in = new Path("foo").makeQualified( + GridmixTestUtils.dfs.getUri(), + GridmixTestUtils.dfs.getWorkingDirectory()); + final Path out = GridmixTestUtils.DEST.makeQualified( + GridmixTestUtils.dfs.getUri(), + GridmixTestUtils.dfs.getWorkingDirectory()); + final Path root = new Path(workspace.getAbsolutePath()); + if (!workspace.exists()) { + assertTrue(workspace.mkdirs()); + } + Configuration conf = null; + + try { + ArrayList argsList = new ArrayList(); + + argsList.add("-D" + FilePool.GRIDMIX_MIN_FILE + "=0"); + argsList.add("-D" + Gridmix.GRIDMIX_USR_RSV + "=" + + EchoUserResolver.class.getName()); + if (jobCreatorName != null) { + argsList.add("-D" + JobCreator.GRIDMIX_JOB_TYPE + "=" + jobCreatorName); + } + + // Set the config property gridmix.output.directory only if + // defaultOutputPath is false. If defaultOutputPath is true, then + // let us allow gridmix to use the path foo/gridmix/ as output dir. + if (!defaultOutputPath) { + argsList.add("-D" + Gridmix.GRIDMIX_OUT_DIR + "=" + out); + } + argsList.add("-generate"); + argsList.add(String.valueOf(GENDATA) + "m"); + argsList.add(in.toString()); + argsList.add("-"); // ignored by DebugGridmix + + String[] argv = argsList.toArray(new String[argsList.size()]); + + DebugGridmix client = new DebugGridmix(); + conf = GridmixTestUtils.mrvl.getConfig(); + + CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); + conf.setEnum(GridmixJobSubmissionPolicy.JOB_SUBMISSION_POLICY, policy); + + conf.setBoolean(GridmixJob.GRIDMIX_USE_QUEUE_IN_TRACE, true); + UserGroupInformation ugi = UserGroupInformation.getLoginUser(); + conf.set(MRJobConfig.USER_NAME, ugi.getUserName()); + + // allow synthetic users to create home directories + GridmixTestUtils.dfs.mkdirs(root, new FsPermission((short) 777)); + GridmixTestUtils.dfs.setPermission(root, new FsPermission((short) 777)); + + int res = ToolRunner.run(conf, client, argv); + assertEquals("Client exited with nonzero status", 0, res); + client.checkMonitor(); + } catch (Exception e) { + e.printStackTrace(); + } finally { + in.getFileSystem(conf).delete(in, true); + out.getFileSystem(conf).delete(out, true); + root.getFileSystem(conf).delete(root, true); + } + } +} diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/DebugJobFactory.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/DebugJobFactory.java index 413dfd99074..99b4571b7e0 100644 --- a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/DebugJobFactory.java +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/DebugJobFactory.java @@ -29,13 +29,13 @@ import java.util.concurrent.CountDownLatch; /** * Component generating random job traces for testing on a single node. */ -class DebugJobFactory { +public class DebugJobFactory { interface Debuggable { ArrayList getSubmitted(); } - public static JobFactory getFactory( + public static JobFactory getFactory( JobSubmitter submitter, Path scratch, int numJobs, Configuration conf, CountDownLatch startFlag, UserResolver resolver) throws IOException { GridmixJobSubmissionPolicy policy = GridmixJobSubmissionPolicy.getPolicy( diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java index fca29afce23..8b57308d9b3 100644 --- a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java @@ -216,7 +216,7 @@ public class DebugJobProducer implements JobStoryProducer { if (user == null) { user = String.format("foobar%d", id); } - GridmixTestUtils.createHomeAndStagingDirectory(user, (JobConf)conf); + GridmixTestUtils.createHomeAndStagingDirectory(user, conf); return user; } @@ -300,7 +300,7 @@ public class DebugJobProducer implements JobStoryProducer { @Override public String getQueueName() { - String qName = "q" + ((id % 2) + 1); + String qName = "default"; return qName; } diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/GridmixTestUtils.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/GridmixTestUtils.java index 49f17097fed..50865b53da9 100644 --- a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/GridmixTestUtils.java +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/GridmixTestUtils.java @@ -4,55 +4,76 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.mapred.MiniMRCluster; -import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.MiniMRClientCluster; +import org.apache.hadoop.mapred.MiniMRClientClusterFactory; +import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.security.ShellBasedUnixGroupsMapping; -import org.apache.hadoop.security.Groups; import java.io.IOException; /** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at *

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. */ public class GridmixTestUtils { private static final Log LOG = LogFactory.getLog(GridmixTestUtils.class); static final Path DEST = new Path("/gridmix"); static FileSystem dfs = null; static MiniDFSCluster dfsCluster = null; - static MiniMRCluster mrCluster = null; + static MiniMRClientCluster mrvl = null; + protected static final String GRIDMIX_USE_QUEUE_IN_TRACE = + "gridmix.job-submission.use-queue-in-trace"; + protected static final String GRIDMIX_DEFAULT_QUEUE = + "gridmix.job-submission.default-queue"; - public static void initCluster() throws IOException { + public static void initCluster(Class caller) throws IOException { Configuration conf = new Configuration(); - conf.set("mapred.queue.names", "default,q1,q2"); - dfsCluster = new MiniDFSCluster(conf, 3, true, null); +// conf.set("mapred.queue.names", "default,q1,q2"); + conf.set("mapred.queue.names", "default"); + conf.set("yarn.scheduler.capacity.root.queues", "default"); + conf.set("yarn.scheduler.capacity.root.default.capacity", "100.0"); + + + conf.setBoolean(GRIDMIX_USE_QUEUE_IN_TRACE, false); + conf.set(GRIDMIX_DEFAULT_QUEUE, "default"); + + + dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).format(true) + .build();// MiniDFSCluster(conf, 3, true, null); dfs = dfsCluster.getFileSystem(); conf.set(JTConfig.JT_RETIREJOBS, "false"); - mrCluster = new MiniMRCluster(3, dfs.getUri().toString(), 1, null, null, - new JobConf(conf)); + mrvl = MiniMRClientClusterFactory.create(caller, 2, conf); + + conf = mrvl.getConfig(); + String[] files = conf.getStrings(MRJobConfig.CACHE_FILES); + if (files != null) { + String[] timestamps = new String[files.length]; + for (int i = 0; i < files.length; i++) { + timestamps[i] = Long.toString(System.currentTimeMillis()); + } + conf.setStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS, timestamps); + } + } public static void shutdownCluster() throws IOException { - if (mrCluster != null) { - mrCluster.shutdown(); + if (mrvl != null) { + mrvl.stop(); } if (dfsCluster != null) { dfsCluster.shutdown(); @@ -61,23 +82,25 @@ public class GridmixTestUtils { /** * Methods to generate the home directory for dummy users. - * + * * @param conf */ - public static void createHomeAndStagingDirectory(String user, JobConf conf) { + public static void createHomeAndStagingDirectory(String user, + Configuration conf) { try { FileSystem fs = dfsCluster.getFileSystem(); String path = "/user/" + user; Path homeDirectory = new Path(path); - if(fs.exists(homeDirectory)) { - fs.delete(homeDirectory,true); - } - LOG.info("Creating Home directory : " + homeDirectory); - fs.mkdirs(homeDirectory); - changePermission(user,homeDirectory, fs); - Path stagingArea = - new Path(conf.get("mapreduce.jobtracker.staging.root.dir", - "/tmp/hadoop/mapred/staging")); + if (!fs.exists(homeDirectory)) { + LOG.info("Creating Home directory : " + homeDirectory); + fs.mkdirs(homeDirectory); + changePermission(user, homeDirectory, fs); + + } + changePermission(user, homeDirectory, fs); + Path stagingArea = new Path( + conf.get("mapreduce.jobtracker.staging.root.dir", + "/tmp/hadoop/mapred/staging")); LOG.info("Creating Staging root directory : " + stagingArea); fs.mkdirs(stagingArea); fs.setPermission(stagingArea, new FsPermission((short) 0777)); @@ -87,7 +110,7 @@ public class GridmixTestUtils { } static void changePermission(String user, Path homeDirectory, FileSystem fs) - throws IOException { + throws IOException { fs.setOwner(homeDirectory, user, ""); } } diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestDistCacheEmulation.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestDistCacheEmulation.java new file mode 100644 index 00000000000..597bea2f10f --- /dev/null +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestDistCacheEmulation.java @@ -0,0 +1,430 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapred.gridmix; + +import static org.junit.Assert.*; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.mapreduce.MapContext; +import org.apache.hadoop.mapreduce.MapReduceTestUtil; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.lib.input.FileSplit; +import org.apache.hadoop.mapreduce.task.MapContextImpl; +import org.apache.hadoop.security.UserGroupInformation; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Validate emulation of distributed cache load in gridmix simulated jobs. + * + */ +public class TestDistCacheEmulation { + + private DistributedCacheEmulator dce = null; + + @BeforeClass + public static void init() throws IOException { + GridmixTestUtils.initCluster(TestDistCacheEmulation.class); + File target=new File("target"+File.separator+TestDistCacheEmulation.class.getName()); + if(!target.exists()){ + assertTrue(target.mkdirs()); + } + + } + + @AfterClass + public static void shutDown() throws IOException { + GridmixTestUtils.shutdownCluster(); + } + + /** + * Validate the dist cache files generated by GenerateDistCacheData job. + * + * @param jobConf + * configuration of GenerateDistCacheData job. + * @param sortedFileSizes + * array of sorted distributed cache file sizes + * @throws IOException + * @throws FileNotFoundException + */ + private void validateDistCacheData(Configuration jobConf, + long[] sortedFileSizes) throws FileNotFoundException, IOException { + Path distCachePath = dce.getDistributedCacheDir(); + String filesListFile = jobConf + .get(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_LIST); + FileSystem fs = FileSystem.get(jobConf); + + // Validate the existence of Distributed Cache files list file directly + // under distributed cache directory + Path listFile = new Path(filesListFile); + assertTrue("Path of Distributed Cache files list file is wrong.", + distCachePath.equals(listFile.getParent().makeQualified(fs.getUri(), fs.getWorkingDirectory()))); + + // Delete the dist cache files list file + assertTrue( + "Failed to delete distributed Cache files list file " + listFile, + fs.delete(listFile,true)); + + List fileSizes = new ArrayList(); + for (long size : sortedFileSizes) { + fileSizes.add(size); + } + // validate dist cache files after deleting the 'files list file' + validateDistCacheFiles(fileSizes, distCachePath); + } + + /** + * Validate private/public distributed cache files. + * + * @param filesSizesExpected + * list of sizes of expected dist cache files + * @param distCacheDir + * the distributed cache dir to be validated + * @throws IOException + * @throws FileNotFoundException + */ + private void validateDistCacheFiles(List filesSizesExpected, Path distCacheDir) + throws FileNotFoundException, IOException { + // RemoteIterator iter = + FileStatus[] statuses = GridmixTestUtils.dfs.listStatus(distCacheDir); + int numFiles = filesSizesExpected.size(); + assertEquals("Number of files under distributed cache dir is wrong.", + numFiles, statuses.length); + for (int i = 0; i < numFiles; i++) { + FileStatus stat = statuses[i]; + assertTrue("File size of distributed cache file " + + stat.getPath().toUri().getPath() + " is wrong.", + filesSizesExpected.remove(stat.getLen())); + + FsPermission perm = stat.getPermission(); + assertEquals("Wrong permissions for distributed cache file " + + stat.getPath().toUri().getPath(), new FsPermission((short) 0644), + perm); + } + } + + /** + * Configures 5 HDFS-based dist cache files and 1 local-FS-based dist cache + * file in the given Configuration object conf. + * + * @param conf + * configuration where dist cache config properties are to be set + * @return array of sorted HDFS-based distributed cache file sizes + * @throws IOException + */ + private long[] configureDummyDistCacheFiles(Configuration conf) + throws IOException { + String user = UserGroupInformation.getCurrentUser().getShortUserName(); + conf.set("user.name", user); + + // Set some dummy dist cache files in gridmix configuration so that they go + // into the configuration of JobStory objects. + String[] distCacheFiles = { "hdfs:///tmp/file1.txt", + "/tmp/" + user + "/.staging/job_1/file2.txt", + "hdfs:///user/user1/file3.txt", "/home/user2/file4.txt", + "subdir1/file5.txt", "subdir2/file6.gz" }; + + String[] fileSizes = { "400", "2500", "700", "1200", "1500", "500" }; + + String[] visibilities = { "true", "false", "false", "true", "true", "false" }; + String[] timeStamps = { "1234", "2345", "34567", "5434", "125", "134" }; + + // DistributedCache.setCacheFiles(fileCaches, conf); + conf.setStrings(MRJobConfig.CACHE_FILES, distCacheFiles); + conf.setStrings(MRJobConfig.CACHE_FILES_SIZES, fileSizes); + conf.setStrings(JobContext.CACHE_FILE_VISIBILITIES, visibilities); + conf.setStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS, timeStamps); + + // local FS based dist cache file whose path contains /.staging is + // not created on HDFS. So file size 2500 is not added to sortedFileSizes. + long[] sortedFileSizes = new long[] { 1500, 1200, 700, 500, 400 }; + return sortedFileSizes; + } + + /** + * Runs setupGenerateDistCacheData() on a new DistrbutedCacheEmulator and and + * returns the jobConf. Fills the array sortedFileSizes that can + * be used for validation. Validation of exit code from + * setupGenerateDistCacheData() is done. + * + * @param generate + * true if -generate option is specified + * @param sortedFileSizes + * sorted HDFS-based distributed cache file sizes + * @throws IOException + * @throws InterruptedException + */ + private Configuration runSetupGenerateDistCacheData(boolean generate, + long[] sortedFileSizes) throws IOException, InterruptedException { + Configuration conf = new Configuration(); + long[] fileSizes = configureDummyDistCacheFiles(conf); + System.arraycopy(fileSizes, 0, sortedFileSizes, 0, fileSizes.length); + + // Job stories of all 3 jobs will have same dist cache files in their + // configurations + final int numJobs = 3; + DebugJobProducer jobProducer = new DebugJobProducer(numJobs, conf); + + Configuration jobConf = GridmixTestUtils.mrvl.getConfig(); + Path ioPath = new Path("testSetupGenerateDistCacheData") + .makeQualified(GridmixTestUtils.dfs.getUri(),GridmixTestUtils.dfs.getWorkingDirectory()); + FileSystem fs = FileSystem.get(jobConf); + if (fs.exists(ioPath)) { + fs.delete(ioPath, true); + } + FileSystem.mkdirs(fs, ioPath, new FsPermission((short) 0777)); + + dce = createDistributedCacheEmulator(jobConf, ioPath, generate); + int exitCode = dce.setupGenerateDistCacheData(jobProducer); + int expectedExitCode = generate ? 0 + : Gridmix.MISSING_DIST_CACHE_FILES_ERROR; + assertEquals("setupGenerateDistCacheData failed.", expectedExitCode, + exitCode); + + // reset back + resetDistCacheConfigProperties(jobConf); + return jobConf; + } + + /** + * Reset the config properties related to Distributed Cache in the given job + * configuration jobConf. + * + * @param jobConf + * job configuration + */ + private void resetDistCacheConfigProperties(Configuration jobConf) { + // reset current/latest property names + jobConf.setStrings(MRJobConfig.CACHE_FILES, ""); + jobConf.setStrings(MRJobConfig.CACHE_FILES_SIZES, ""); + jobConf.setStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS, ""); + jobConf.setStrings(JobContext.CACHE_FILE_VISIBILITIES, ""); + // reset old property names + jobConf.setStrings("mapred.cache.files", ""); + jobConf.setStrings("mapred.cache.files.filesizes", ""); + jobConf.setStrings("mapred.cache.files.visibilities", ""); + jobConf.setStrings("mapred.cache.files.timestamps", ""); + } + + /** + * Validate GenerateDistCacheData job if it creates dist cache files properly. + * + * @throws Exception + */ + @Test (timeout=200000) + public void testGenerateDistCacheData() throws Exception { + long[] sortedFileSizes = new long[5]; + Configuration jobConf = runSetupGenerateDistCacheData(true, sortedFileSizes); + GridmixJob gridmixJob = new GenerateDistCacheData(jobConf); + Job job = gridmixJob.call(); + assertEquals("Number of reduce tasks in GenerateDistCacheData is not 0.", + 0, job.getNumReduceTasks()); + assertTrue("GenerateDistCacheData job failed.", + job.waitForCompletion(false)); + validateDistCacheData(jobConf, sortedFileSizes); + } + + /** + * Validate setupGenerateDistCacheData by validating

  • permissions of the + * distributed cache directories and
  • content of the generated sequence + * file. This includes validation of dist cache file paths and their file + * sizes. + */ + private void validateSetupGenDC(Configuration jobConf, long[] sortedFileSizes) + throws IOException, InterruptedException { + // build things needed for validation + long sumOfFileSizes = 0; + for (int i = 0; i < sortedFileSizes.length; i++) { + sumOfFileSizes += sortedFileSizes[i]; + } + + FileSystem fs = FileSystem.get(jobConf); + assertEquals("Number of distributed cache files to be generated is wrong.", + sortedFileSizes.length, + jobConf.getInt(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_COUNT, -1)); + assertEquals("Total size of dist cache files to be generated is wrong.", + sumOfFileSizes, + jobConf.getLong(GenerateDistCacheData.GRIDMIX_DISTCACHE_BYTE_COUNT, -1)); + Path filesListFile = new Path( + jobConf.get(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_LIST)); + FileStatus stat = fs.getFileStatus(filesListFile); + assertEquals("Wrong permissions of dist Cache files list file " + + filesListFile, new FsPermission((short) 0644), stat.getPermission()); + + InputSplit split = new FileSplit(filesListFile, 0, stat.getLen(), + (String[]) null); + TaskAttemptContext taskContext = MapReduceTestUtil + .createDummyMapTaskAttemptContext(jobConf); + RecordReader reader = new GenerateDistCacheData.GenDCDataFormat() + .createRecordReader(split, taskContext); + MapContext mapContext = new MapContextImpl( + jobConf, taskContext.getTaskAttemptID(), reader, null, null, + MapReduceTestUtil.createDummyReporter(), split); + reader.initialize(split, mapContext); + + // start validating setupGenerateDistCacheData + doValidateSetupGenDC(reader, fs, sortedFileSizes); + } + + /** + * Validate setupGenerateDistCacheData by validating
  • permissions of the + * distributed cache directory and
  • content of the generated sequence file. + * This includes validation of dist cache file paths and their file sizes. + */ + private void doValidateSetupGenDC( + RecordReader reader, FileSystem fs, + long[] sortedFileSizes) throws IOException, InterruptedException { + + // Validate permissions of dist cache directory + Path distCacheDir = dce.getDistributedCacheDir(); + assertEquals( + "Wrong permissions for distributed cache dir " + distCacheDir, + fs.getFileStatus(distCacheDir).getPermission().getOtherAction() + .and(FsAction.EXECUTE), FsAction.EXECUTE); + + // Validate the content of the sequence file generated by + // dce.setupGenerateDistCacheData(). + LongWritable key = new LongWritable(); + BytesWritable val = new BytesWritable(); + for (int i = 0; i < sortedFileSizes.length; i++) { + assertTrue("Number of files written to the sequence file by " + + "setupGenerateDistCacheData is less than the expected.", + reader.nextKeyValue()); + key = reader.getCurrentKey(); + val = reader.getCurrentValue(); + long fileSize = key.get(); + String file = new String(val.getBytes(), 0, val.getLength()); + + // Dist Cache files should be sorted based on file size. + assertEquals("Dist cache file size is wrong.", sortedFileSizes[i], + fileSize); + + // Validate dist cache file path. + + // parent dir of dist cache file + Path parent = new Path(file).getParent().makeQualified(fs.getUri(),fs.getWorkingDirectory()); + // should exist in dist cache dir + assertTrue("Public dist cache file path is wrong.", + distCacheDir.equals(parent)); + } + } + + /** + * Test if DistributedCacheEmulator's setup of GenerateDistCacheData is + * working as expected. + * + * @throws IOException + * @throws InterruptedException + */ + @Test (timeout=20000) + public void testSetupGenerateDistCacheData() throws IOException, + InterruptedException { + long[] sortedFileSizes = new long[5]; + Configuration jobConf = runSetupGenerateDistCacheData(true, sortedFileSizes); + validateSetupGenDC(jobConf, sortedFileSizes); + + // Verify if correct exit code is seen when -generate option is missing and + // distributed cache files are missing in the expected path. + runSetupGenerateDistCacheData(false, sortedFileSizes); + } + + /** + * Create DistributedCacheEmulator object and do the initialization by calling + * init() on it with dummy trace. Also configure the pseudo local FS. + */ + private DistributedCacheEmulator createDistributedCacheEmulator( + Configuration conf, Path ioPath, boolean generate) throws IOException { + DistributedCacheEmulator dce = new DistributedCacheEmulator(conf, ioPath); + JobCreator jobCreator = JobCreator.getPolicy(conf, JobCreator.LOADJOB); + jobCreator.setDistCacheEmulator(dce); + dce.init("dummytrace", jobCreator, generate); + return dce; + } + + /** + * Test the configuration property for disabling/enabling emulation of + * distributed cache load. + */ + @Test (timeout=2000) + public void testDistCacheEmulationConfigurability() throws IOException { + Configuration jobConf = GridmixTestUtils.mrvl.getConfig(); + Path ioPath = new Path("testDistCacheEmulationConfigurability") + .makeQualified(GridmixTestUtils.dfs.getUri(),GridmixTestUtils.dfs.getWorkingDirectory()); + FileSystem fs = FileSystem.get(jobConf); + FileSystem.mkdirs(fs, ioPath, new FsPermission((short) 0777)); + + // default config + dce = createDistributedCacheEmulator(jobConf, ioPath, false); + assertTrue("Default configuration of " + + DistributedCacheEmulator.GRIDMIX_EMULATE_DISTRIBUTEDCACHE + + " is wrong.", dce.shouldEmulateDistCacheLoad()); + + // config property set to false + jobConf.setBoolean( + DistributedCacheEmulator.GRIDMIX_EMULATE_DISTRIBUTEDCACHE, false); + dce = createDistributedCacheEmulator(jobConf, ioPath, false); + assertFalse("Disabling of emulation of distributed cache load by setting " + + DistributedCacheEmulator.GRIDMIX_EMULATE_DISTRIBUTEDCACHE + + " to false is not working.", dce.shouldEmulateDistCacheLoad()); + } +/** + * test method configureDistCacheFiles + * + */ + @Test (timeout=2000) + public void testDistCacheEmulator() throws Exception { + + Configuration conf = new Configuration(); + configureDummyDistCacheFiles(conf); + File ws = new File("target" + File.separator + this.getClass().getName()); + Path ioPath = new Path(ws.getAbsolutePath()); + + DistributedCacheEmulator dce = new DistributedCacheEmulator(conf, ioPath); + JobConf jobConf = new JobConf(conf); + jobConf.setUser(UserGroupInformation.getCurrentUser().getShortUserName()); + File fin=new File("src"+File.separator+"test"+File.separator+"resources"+File.separator+"data"+File.separator+"wordcount.json"); + dce.init(fin.getAbsolutePath(), JobCreator.LOADJOB, true); + dce.configureDistCacheFiles(conf, jobConf); + + String[] caches=conf.getStrings(MRJobConfig.CACHE_FILES); + String[] tmpfiles=conf.getStrings("tmpfiles"); + // this method should fill caches AND tmpfiles from MRJobConfig.CACHE_FILES property + assertEquals(6, ((caches==null?0:caches.length)+(tmpfiles==null?0:tmpfiles.length))); + } +} diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestGridMixClasses.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestGridMixClasses.java new file mode 100644 index 00000000000..ef1265b903f --- /dev/null +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestGridMixClasses.java @@ -0,0 +1,989 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapred.gridmix; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CountDownLatch; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.CustomOutputCommitter; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PositionedReadable; +import org.apache.hadoop.fs.Seekable; +import org.apache.hadoop.io.DataInputBuffer; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.RawComparator; +import org.apache.hadoop.io.WritableUtils; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobContext; +import org.apache.hadoop.mapred.RawKeyValueIterator; +import org.apache.hadoop.mapred.gridmix.GridmixKey.Spec; +import org.apache.hadoop.mapred.gridmix.SleepJob.SleepReducer; +import org.apache.hadoop.mapred.gridmix.SleepJob.SleepSplit; +import org.apache.hadoop.mapreduce.Counter; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.mapreduce.MapContext; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.ReduceContext; +import org.apache.hadoop.mapreduce.StatusReporter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.Mapper.Context; +import org.apache.hadoop.mapreduce.counters.GenericCounter; +import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit; +import org.apache.hadoop.mapreduce.lib.map.WrappedMapper; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer; +import org.apache.hadoop.mapreduce.task.MapContextImpl; +import org.apache.hadoop.mapreduce.task.ReduceContextImpl; +import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; +import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl.DummyReporter; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.tools.rumen.JobStory; +import org.apache.hadoop.tools.rumen.JobStoryProducer; +import org.apache.hadoop.tools.rumen.ResourceUsageMetrics; +import org.apache.hadoop.tools.rumen.ZombieJobProducer; +import org.apache.hadoop.util.Progress; +import org.junit.Assert; +import org.junit.Test; + +import static org.mockito.Mockito.*; + +import static org.junit.Assert.*; + +public class TestGridMixClasses { + private static final Log LOG = LogFactory.getLog(TestGridMixClasses.class); + + /* + * simple test LoadSplit (getters,copy, write, read...) + */ + @Test (timeout=1000) + public void testLoadSplit() throws Exception { + + LoadSplit test = getLoadSplit(); + + ByteArrayOutputStream data = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(data); + test.write(out); + LoadSplit copy = new LoadSplit(); + copy.readFields(new DataInputStream(new ByteArrayInputStream(data + .toByteArray()))); + + // data should be the same + assertEquals(test.getId(), copy.getId()); + assertEquals(test.getMapCount(), copy.getMapCount()); + assertEquals(test.getInputRecords(), copy.getInputRecords()); + + assertEquals(test.getOutputBytes()[0], copy.getOutputBytes()[0]); + assertEquals(test.getOutputRecords()[0], copy.getOutputRecords()[0]); + assertEquals(test.getReduceBytes(0), copy.getReduceBytes(0)); + assertEquals(test.getReduceRecords(0), copy.getReduceRecords(0)); + assertEquals(test.getMapResourceUsageMetrics().getCumulativeCpuUsage(), + copy.getMapResourceUsageMetrics().getCumulativeCpuUsage()); + assertEquals(test.getReduceResourceUsageMetrics(0).getCumulativeCpuUsage(), + copy.getReduceResourceUsageMetrics(0).getCumulativeCpuUsage()); + + } + + /* + * simple test GridmixSplit (copy, getters, write, read..) + */ + @Test (timeout=1000) + public void testGridmixSplit() throws Exception { + Path[] files = {new Path("one"), new Path("two")}; + long[] start = {1, 2}; + long[] lengths = {100, 200}; + String[] locations = {"locOne", "loctwo"}; + + CombineFileSplit cfSplit = new CombineFileSplit(files, start, lengths, + locations); + ResourceUsageMetrics metrics = new ResourceUsageMetrics(); + metrics.setCumulativeCpuUsage(200); + + double[] reduceBytes = {8.1d, 8.2d}; + double[] reduceRecords = {9.1d, 9.2d}; + long[] reduceOutputBytes = {101L, 102L}; + long[] reduceOutputRecords = {111L, 112L}; + + GridmixSplit test = new GridmixSplit(cfSplit, 2, 3, 4L, 5L, 6L, 7L, + reduceBytes, reduceRecords, reduceOutputBytes, reduceOutputRecords); + + ByteArrayOutputStream data = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(data); + test.write(out); + GridmixSplit copy = new GridmixSplit(); + copy.readFields(new DataInputStream(new ByteArrayInputStream(data + .toByteArray()))); + + // data should be the same + assertEquals(test.getId(), copy.getId()); + assertEquals(test.getMapCount(), copy.getMapCount()); + assertEquals(test.getInputRecords(), copy.getInputRecords()); + + assertEquals(test.getOutputBytes()[0], copy.getOutputBytes()[0]); + assertEquals(test.getOutputRecords()[0], copy.getOutputRecords()[0]); + assertEquals(test.getReduceBytes(0), copy.getReduceBytes(0)); + assertEquals(test.getReduceRecords(0), copy.getReduceRecords(0)); + + } + + /* + * test LoadMapper loadMapper should write to writer record for each reduce + */ + @SuppressWarnings({"rawtypes", "unchecked"}) + @Test (timeout=10000) + public void testLoadMapper() throws Exception { + + Configuration conf = new Configuration(); + conf.setInt(JobContext.NUM_REDUCES, 2); + + CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); + conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true); + + TaskAttemptID taskId = new TaskAttemptID(); + RecordReader reader = new FakeRecordReader(); + + LoadRecordGkGrWriter writer = new LoadRecordGkGrWriter(); + + OutputCommitter committer = new CustomOutputCommitter(); + StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter(); + LoadSplit split = getLoadSplit(); + + MapContext mapContext = new MapContextImpl( + conf, taskId, reader, writer, committer, reporter, split); + // context + Context ctx = new WrappedMapper() + .getMapContext(mapContext); + + reader.initialize(split, ctx); + ctx.getConfiguration().setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true); + CompressionEmulationUtil.setCompressionEmulationEnabled( + ctx.getConfiguration(), true); + + LoadJob.LoadMapper mapper = new LoadJob.LoadMapper(); + // setup, map, clean + mapper.run(ctx); + + Map data = writer.getData(); + // check result + assertEquals(2, data.size()); + + } + + private LoadSplit getLoadSplit() throws Exception { + + Path[] files = {new Path("one"), new Path("two")}; + long[] start = {1, 2}; + long[] lengths = {100, 200}; + String[] locations = {"locOne", "loctwo"}; + + CombineFileSplit cfSplit = new CombineFileSplit(files, start, lengths, + locations); + ResourceUsageMetrics metrics = new ResourceUsageMetrics(); + metrics.setCumulativeCpuUsage(200); + ResourceUsageMetrics[] rMetrics = {metrics}; + + double[] reduceBytes = {8.1d, 8.2d}; + double[] reduceRecords = {9.1d, 9.2d}; + long[] reduceOutputBytes = {101L, 102L}; + long[] reduceOutputRecords = {111L, 112L}; + + return new LoadSplit(cfSplit, 2, 1, 4L, 5L, 6L, 7L, + reduceBytes, reduceRecords, reduceOutputBytes, reduceOutputRecords, + metrics, rMetrics); + } + + private class FakeRecordLLReader extends + RecordReader { + + int counter = 10; + + @Override + public void initialize(InputSplit split, TaskAttemptContext context) + throws IOException, InterruptedException { + + } + + @Override + public boolean nextKeyValue() throws IOException, InterruptedException { + counter--; + return counter > 0; + } + + @Override + public LongWritable getCurrentKey() throws IOException, + InterruptedException { + + return new LongWritable(counter); + } + + @Override + public LongWritable getCurrentValue() throws IOException, + InterruptedException { + return new LongWritable(counter * 10); + } + + @Override + public float getProgress() throws IOException, InterruptedException { + return counter / 10.0f; + } + + @Override + public void close() throws IOException { + // restore data + counter = 10; + } + } + + private class FakeRecordReader extends + RecordReader { + + int counter = 10; + + @Override + public void initialize(InputSplit split, TaskAttemptContext context) + throws IOException, InterruptedException { + + } + + @Override + public boolean nextKeyValue() throws IOException, InterruptedException { + counter--; + return counter > 0; + } + + @Override + public NullWritable getCurrentKey() throws IOException, + InterruptedException { + + return NullWritable.get(); + } + + @Override + public GridmixRecord getCurrentValue() throws IOException, + InterruptedException { + return new GridmixRecord(100, 100L); + } + + @Override + public float getProgress() throws IOException, InterruptedException { + return counter / 10.0f; + } + + @Override + public void close() throws IOException { + // restore data + counter = 10; + } + } + + private class LoadRecordGkGrWriter extends + RecordWriter { + private Map data = new HashMap(); + + @Override + public void write(GridmixKey key, GridmixRecord value) throws IOException, + InterruptedException { + data.put(key, value); + } + + @Override + public void close(TaskAttemptContext context) throws IOException, + InterruptedException { + } + + public Map getData() { + return data; + } + + } + + private class LoadRecordGkNullWriter extends + RecordWriter { + private Map data = new HashMap(); + + @Override + public void write(GridmixKey key, NullWritable value) throws IOException, + InterruptedException { + data.put(key, value); + } + + @Override + public void close(TaskAttemptContext context) throws IOException, + InterruptedException { + } + + public Map getData() { + return data; + } + + } + + private class LoadRecordWriter extends + RecordWriter { + private Map data = new HashMap(); + + @Override + public void write(NullWritable key, GridmixRecord value) + throws IOException, InterruptedException { + data.put(key, value); + } + + @Override + public void close(TaskAttemptContext context) throws IOException, + InterruptedException { + } + + public Map getData() { + return data; + } + + } + + /* + * test LoadSortComparator + */ + @Test (timeout=1000) + public void testLoadJobLoadSortComparator() throws Exception { + LoadJob.LoadSortComparator test = new LoadJob.LoadSortComparator(); + + ByteArrayOutputStream data = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(data); + WritableUtils.writeVInt(dos, 2); + WritableUtils.writeVInt(dos, 1); + WritableUtils.writeVInt(dos, 4); + WritableUtils.writeVInt(dos, 7); + WritableUtils.writeVInt(dos, 4); + + byte[] b1 = data.toByteArray(); + + byte[] b2 = data.toByteArray(); + + // the same data should be equals + assertEquals(0, test.compare(b1, 0, 1, b2, 0, 1)); + b2[2] = 5; + // compare like GridMixKey first byte: shift count -1=4-5 + assertEquals(-1, test.compare(b1, 0, 1, b2, 0, 1)); + b2[2] = 2; + // compare like GridMixKey first byte: shift count 2=4-2 + assertEquals(2, test.compare(b1, 0, 1, b2, 0, 1)); + // compare arrays by first byte witch offset (2-1) because 4==4 + b2[2] = 4; + assertEquals(1, test.compare(b1, 0, 1, b2, 1, 1)); + + } + + /* + * test SpecGroupingComparator + */ + @Test (timeout=1000) + public void testGridmixJobSpecGroupingComparator() throws Exception { + GridmixJob.SpecGroupingComparator test = new GridmixJob.SpecGroupingComparator(); + + ByteArrayOutputStream data = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(data); + WritableUtils.writeVInt(dos, 2); + WritableUtils.writeVInt(dos, 1); + // 0: REDUCE SPEC + WritableUtils.writeVInt(dos, 0); + WritableUtils.writeVInt(dos, 7); + WritableUtils.writeVInt(dos, 4); + + byte[] b1 = data.toByteArray(); + + byte[] b2 = data.toByteArray(); + + // the same object should be equals + assertEquals(0, test.compare(b1, 0, 1, b2, 0, 1)); + b2[2] = 1; + // for Reduce + assertEquals(-1, test.compare(b1, 0, 1, b2, 0, 1)); + // by Reduce spec + b2[2] = 1; // 1: DATA SPEC + assertEquals(-1, test.compare(b1, 0, 1, b2, 0, 1)); + // compare GridmixKey the same objects should be equals + assertEquals(0, test.compare(new GridmixKey(GridmixKey.DATA, 100, 2), + new GridmixKey(GridmixKey.DATA, 100, 2))); + // REDUSE SPEC + assertEquals(-1, test.compare( + new GridmixKey(GridmixKey.REDUCE_SPEC, 100, 2), new GridmixKey( + GridmixKey.DATA, 100, 2))); + assertEquals(1, test.compare(new GridmixKey(GridmixKey.DATA, 100, 2), + new GridmixKey(GridmixKey.REDUCE_SPEC, 100, 2))); + // only DATA + assertEquals(2, test.compare(new GridmixKey(GridmixKey.DATA, 102, 2), + new GridmixKey(GridmixKey.DATA, 100, 2))); + + } + + /* + * test CompareGridmixJob only equals and compare + */ + @Test (timeout=10000) + public void testCompareGridmixJob() throws Exception { + Configuration conf = new Configuration(); + Path outRoot = new Path("target"); + JobStory jobDesc = mock(JobStory.class); + when(jobDesc.getName()).thenReturn("JobName"); + when(jobDesc.getJobConf()).thenReturn(new JobConf(conf)); + UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); + GridmixJob j1 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 0); + GridmixJob j2 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 0); + GridmixJob j3 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 1); + GridmixJob j4 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 1); + + assertTrue(j1.equals(j2)); + assertEquals(0, j1.compareTo(j2)); + // Only one parameter matters + assertFalse(j1.equals(j3)); + // compare id and submissionMillis + assertEquals(-1, j1.compareTo(j3)); + assertEquals(-1, j1.compareTo(j4)); + + } + + /* + * test ReadRecordFactory. should read all data from inputstream + */ + @Test (timeout=1000) + public void testReadRecordFactory() throws Exception { + + // RecordFactory factory, InputStream src, Configuration conf + RecordFactory rf = new FakeRecordFactory(); + FakeInputStream input = new FakeInputStream(); + ReadRecordFactory test = new ReadRecordFactory(rf, input, + new Configuration()); + GridmixKey key = new GridmixKey(GridmixKey.DATA, 100, 2); + GridmixRecord val = new GridmixRecord(200, 2); + while (test.next(key, val)) { + + } + // should be read 10* (GridmixKey.size +GridmixRecord.value) + assertEquals(3000, input.getCounter()); + // should be -1 because all data readed; + assertEquals(-1, rf.getProgress(), 0.01); + + test.close(); + } + + private class FakeRecordFactory extends RecordFactory { + + private int counter = 10; + + @Override + public void close() throws IOException { + + } + + @Override + public boolean next(GridmixKey key, GridmixRecord val) throws IOException { + counter--; + return counter >= 0; + } + + @Override + public float getProgress() throws IOException { + return counter; + } + + } + + private class FakeInputStream extends InputStream implements Seekable, + PositionedReadable { + private long counter; + + @Override + public int read() throws IOException { + return 0; + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + int realLen = len - off; + counter += realLen; + for (int i = 0; i < b.length; i++) { + b[i] = 0; + } + return realLen; + } + + public long getCounter() { + return counter; + } + + @Override + public void seek(long pos) throws IOException { + + } + + @Override + public long getPos() throws IOException { + return counter; + } + + @Override + public boolean seekToNewSource(long targetPos) throws IOException { + return false; + } + + @Override + public int read(long position, byte[] buffer, int offset, int length) + throws IOException { + return 0; + } + + @Override + public void readFully(long position, byte[] buffer, int offset, int length) + throws IOException { + + } + + @Override + public void readFully(long position, byte[] buffer) throws IOException { + + } + } + + private class FakeFSDataInputStream extends FSDataInputStream { + + public FakeFSDataInputStream(InputStream in) throws IOException { + super(in); + + } + + } + + /* + * test LoadRecordReader. It class reads data from some files. + */ + @Test (timeout=1000) + public void testLoadJobLoadRecordReader() throws Exception { + LoadJob.LoadRecordReader test = new LoadJob.LoadRecordReader(); + Configuration conf = new Configuration(); + + FileSystem fs1 = mock(FileSystem.class); + when(fs1.open((Path) anyObject())).thenReturn( + new FakeFSDataInputStream(new FakeInputStream())); + Path p1 = mock(Path.class); + when(p1.getFileSystem((JobConf) anyObject())).thenReturn(fs1); + + FileSystem fs2 = mock(FileSystem.class); + when(fs2.open((Path) anyObject())).thenReturn( + new FakeFSDataInputStream(new FakeInputStream())); + Path p2 = mock(Path.class); + when(p2.getFileSystem((JobConf) anyObject())).thenReturn(fs2); + + Path[] paths = {p1, p2}; + + long[] start = {0, 0}; + long[] lengths = {1000, 1000}; + String[] locations = {"temp1", "temp2"}; + CombineFileSplit cfsplit = new CombineFileSplit(paths, start, lengths, + locations); + double[] reduceBytes = {100, 100}; + double[] reduceRecords = {2, 2}; + long[] reduceOutputBytes = {500, 500}; + long[] reduceOutputRecords = {2, 2}; + ResourceUsageMetrics metrics = new ResourceUsageMetrics(); + ResourceUsageMetrics[] rMetrics = {new ResourceUsageMetrics(), + new ResourceUsageMetrics()}; + LoadSplit input = new LoadSplit(cfsplit, 2, 3, 1500L, 2L, 3000L, 2L, + reduceBytes, reduceRecords, reduceOutputBytes, reduceOutputRecords, + metrics, rMetrics); + TaskAttemptID taskId = new TaskAttemptID(); + TaskAttemptContext ctx = new TaskAttemptContextImpl(conf, taskId); + test.initialize(input, ctx); + GridmixRecord gr = test.getCurrentValue(); + int counter = 0; + while (test.nextKeyValue()) { + gr = test.getCurrentValue(); + if (counter == 0) { + // read first file + assertEquals(0.5, test.getProgress(), 0.001); + } else if (counter == 1) { + // read second file + assertEquals(1.0, test.getProgress(), 0.001); + } + // + assertEquals(1000, gr.getSize()); + counter++; + } + assertEquals(1000, gr.getSize()); + // Two files have been read + assertEquals(2, counter); + + test.close(); + } + + /* + * test LoadReducer + */ + + @Test (timeout=1000) + public void testLoadJobLoadReducer() throws Exception { + LoadJob.LoadReducer test = new LoadJob.LoadReducer(); + + Configuration conf = new Configuration(); + conf.setInt(JobContext.NUM_REDUCES, 2); + CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); + conf.setBoolean(FileOutputFormat.COMPRESS, true); + + CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); + conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true); + TaskAttemptID taskid = new TaskAttemptID(); + + RawKeyValueIterator input = new FakeRawKeyValueIterator(); + + Counter counter = new GenericCounter(); + Counter inputValueCounter = new GenericCounter(); + LoadRecordWriter output = new LoadRecordWriter(); + + OutputCommitter committer = new CustomOutputCommitter(); + + StatusReporter reporter = new DummyReporter(); + RawComparator comparator = new FakeRawComparator(); + + ReduceContext reduceContext = new ReduceContextImpl( + conf, taskid, input, counter, inputValueCounter, output, committer, + reporter, comparator, GridmixKey.class, GridmixRecord.class); + // read for previous data + reduceContext.nextKeyValue(); + org.apache.hadoop.mapreduce.Reducer.Context context = new WrappedReducer() + .getReducerContext(reduceContext); + + // test.setup(context); + test.run(context); + // have been readed 9 records (-1 for previous) + assertEquals(9, counter.getValue()); + assertEquals(10, inputValueCounter.getValue()); + assertEquals(1, output.getData().size()); + GridmixRecord record = output.getData().values().iterator() + .next(); + + assertEquals(1593, record.getSize()); + } + + protected class FakeRawKeyValueIterator implements RawKeyValueIterator { + + int counter = 10; + + @Override + public DataInputBuffer getKey() throws IOException { + ByteArrayOutputStream dt = new ByteArrayOutputStream(); + GridmixKey key = new GridmixKey(GridmixKey.REDUCE_SPEC, 10 * counter, 1L); + Spec spec = new Spec(); + spec.rec_in = counter; + spec.rec_out = counter; + spec.bytes_out = counter * 100; + + key.setSpec(spec); + key.write(new DataOutputStream(dt)); + DataInputBuffer result = new DataInputBuffer(); + byte[] b = dt.toByteArray(); + result.reset(b, 0, b.length); + return result; + } + + @Override + public DataInputBuffer getValue() throws IOException { + ByteArrayOutputStream dt = new ByteArrayOutputStream(); + GridmixRecord key = new GridmixRecord(100, 1); + key.write(new DataOutputStream(dt)); + DataInputBuffer result = new DataInputBuffer(); + byte[] b = dt.toByteArray(); + result.reset(b, 0, b.length); + return result; + } + + @Override + public boolean next() throws IOException { + counter--; + return counter >= 0; + } + + @Override + public void close() throws IOException { + + } + + @Override + public Progress getProgress() { + return null; + } + + } + + private class FakeRawComparator implements RawComparator { + + @Override + public int compare(GridmixKey o1, GridmixKey o2) { + return o1.compareTo(o2); + } + + @Override + public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { + if ((l1 - s1) != (l2 - s2)) { + return (l1 - s1) - (l2 - s2); + } + int len = l1 - s1; + for (int i = 0; i < len; i++) { + if (b1[s1 + i] != b2[s2 + i]) { + return b1[s1 + i] - b2[s2 + i]; + } + } + return 0; + } + + } + + /* + * test SerialJobFactory + */ + @Test (timeout=40000) + public void testSerialReaderThread() throws Exception { + + Configuration conf = new Configuration(); + File fin = new File("src" + File.separator + "test" + File.separator + + "resources" + File.separator + "data" + File.separator + + "wordcount2.json"); + // read couple jobs from wordcount2.json + JobStoryProducer jobProducer = new ZombieJobProducer(new Path( + fin.getAbsolutePath()), null, conf); + CountDownLatch startFlag = new CountDownLatch(1); + UserResolver resolver = new SubmitterUserResolver(); + FakeJobSubmitter submitter = new FakeJobSubmitter(); + File ws = new File("target" + File.separator + this.getClass().getName()); + if (!ws.exists()) { + Assert.assertTrue(ws.mkdirs()); + } + + SerialJobFactory jobFactory = new SerialJobFactory(submitter, jobProducer, + new Path(ws.getAbsolutePath()), conf, startFlag, resolver); + + Path ioPath = new Path(ws.getAbsolutePath()); + jobFactory.setDistCacheEmulator(new DistributedCacheEmulator(conf, ioPath)); + Thread test = jobFactory.createReaderThread(); + test.start(); + Thread.sleep(1000); + // SerialReaderThread waits startFlag + assertEquals(0, submitter.getJobs().size()); + // start! + startFlag.countDown(); + while (test.isAlive()) { + Thread.sleep(1000); + jobFactory.update(null); + } + // submitter was called twice + assertEquals(2, submitter.getJobs().size()); + } + + private class FakeJobSubmitter extends JobSubmitter { + // counter for submitted jobs + private List jobs = new ArrayList(); + + public FakeJobSubmitter() { + super(null, 1, 1, null, null); + + } + + @Override + public void add(GridmixJob job) throws InterruptedException { + jobs.add(job); + } + + public List getJobs() { + return jobs; + } + } + + /* + * test SleepMapper + */ + @SuppressWarnings({"unchecked", "rawtypes"}) + @Test (timeout=10000) + public void testSleepMapper() throws Exception { + SleepJob.SleepMapper test = new SleepJob.SleepMapper(); + + Configuration conf = new Configuration(); + conf.setInt(JobContext.NUM_REDUCES, 2); + + CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); + conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true); + TaskAttemptID taskId = new TaskAttemptID(); + FakeRecordLLReader reader = new FakeRecordLLReader(); + LoadRecordGkNullWriter writer = new LoadRecordGkNullWriter(); + OutputCommitter committer = new CustomOutputCommitter(); + StatusReporter reporter = new TaskAttemptContextImpl.DummyReporter(); + SleepSplit split = getSleepSplit(); + MapContext mapcontext = new MapContextImpl( + conf, taskId, reader, writer, committer, reporter, split); + Context context = new WrappedMapper() + .getMapContext(mapcontext); + + long start = System.currentTimeMillis(); + LOG.info("start:" + start); + LongWritable key = new LongWritable(start + 2000); + LongWritable value = new LongWritable(start + 2000); + // should slip 2 sec + test.map(key, value, context); + LOG.info("finish:" + System.currentTimeMillis()); + assertTrue(System.currentTimeMillis() >= (start + 2000)); + + test.cleanup(context); + assertEquals(1, writer.getData().size()); + } + + private SleepSplit getSleepSplit() throws Exception { + + String[] locations = {"locOne", "loctwo"}; + + long[] reduceDurations = {101L, 102L}; + + return new SleepSplit(0, 2000L, reduceDurations, 2, locations); + } + + /* + * test SleepReducer + */ + @Test (timeout=1000) + public void testSleepReducer() throws Exception { + Configuration conf = new Configuration(); + conf.setInt(JobContext.NUM_REDUCES, 2); + CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); + conf.setBoolean(FileOutputFormat.COMPRESS, true); + + CompressionEmulationUtil.setCompressionEmulationEnabled(conf, true); + conf.setBoolean(MRJobConfig.MAP_OUTPUT_COMPRESS, true); + TaskAttemptID taskId = new TaskAttemptID(); + + RawKeyValueIterator input = new FakeRawKeyValueReducerIterator(); + + Counter counter = new GenericCounter(); + Counter inputValueCounter = new GenericCounter(); + RecordWriter output = new LoadRecordReduceWriter(); + + OutputCommitter committer = new CustomOutputCommitter(); + + StatusReporter reporter = new DummyReporter(); + RawComparator comparator = new FakeRawComparator(); + + ReduceContext reducecontext = new ReduceContextImpl( + conf, taskId, input, counter, inputValueCounter, output, committer, + reporter, comparator, GridmixKey.class, NullWritable.class); + org.apache.hadoop.mapreduce.Reducer.Context context = new WrappedReducer() + .getReducerContext(reducecontext); + + SleepReducer test = new SleepReducer(); + long start = System.currentTimeMillis(); + test.setup(context); + long sleeper = context.getCurrentKey().getReduceOutputBytes(); + // status has been changed + assertEquals("Sleeping... " + sleeper + " ms left", context.getStatus()); + // should sleep 0.9 sec + + assertTrue(System.currentTimeMillis() >= (start + sleeper)); + test.cleanup(context); + // status has been changed again + + assertEquals("Slept for " + sleeper, context.getStatus()); + + } + + private class LoadRecordReduceWriter extends + RecordWriter { + + @Override + public void write(NullWritable key, NullWritable value) throws IOException, + InterruptedException { + } + + @Override + public void close(TaskAttemptContext context) throws IOException, + InterruptedException { + } + + } + + protected class FakeRawKeyValueReducerIterator implements RawKeyValueIterator { + + int counter = 10; + + @Override + public DataInputBuffer getKey() throws IOException { + ByteArrayOutputStream dt = new ByteArrayOutputStream(); + GridmixKey key = new GridmixKey(GridmixKey.REDUCE_SPEC, 10 * counter, 1L); + Spec spec = new Spec(); + spec.rec_in = counter; + spec.rec_out = counter; + spec.bytes_out = counter * 100; + + key.setSpec(spec); + key.write(new DataOutputStream(dt)); + DataInputBuffer result = new DataInputBuffer(); + byte[] b = dt.toByteArray(); + result.reset(b, 0, b.length); + return result; + } + + @Override + public DataInputBuffer getValue() throws IOException { + ByteArrayOutputStream dt = new ByteArrayOutputStream(); + NullWritable key = NullWritable.get(); + key.write(new DataOutputStream(dt)); + DataInputBuffer result = new DataInputBuffer(); + byte[] b = dt.toByteArray(); + result.reset(b, 0, b.length); + return result; + } + + @Override + public boolean next() throws IOException { + counter--; + return counter >= 0; + } + + @Override + public void close() throws IOException { + + } + + @Override + public Progress getProgress() { + return null; + } + + } +} diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestGridmixSubmission.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestGridmixSubmission.java new file mode 100644 index 00000000000..f1800c177aa --- /dev/null +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestGridmixSubmission.java @@ -0,0 +1,202 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapred.gridmix; + +import org.apache.commons.logging.LogFactory; +import org.apache.commons.logging.impl.Log4JLogger; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.tools.rumen.JobStory; +import org.apache.hadoop.tools.rumen.JobStoryProducer; +import org.apache.hadoop.util.ExitUtil; +import org.apache.log4j.Level; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.InputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.util.zip.GZIPInputStream; + +import static org.junit.Assert.*; + +public class TestGridmixSubmission extends CommonJobTest { + private static File inSpace = new File("src" + File.separator + "test" + + File.separator + "resources" + File.separator + "data"); + + + static { + ((Log4JLogger) LogFactory.getLog("org.apache.hadoop.mapred.gridmix")) + .getLogger().setLevel(Level.DEBUG); + } + + + @BeforeClass + public static void init() throws IOException { + GridmixTestUtils.initCluster(TestGridmixSubmission.class); + + System.setProperty("src.test.data", inSpace.getAbsolutePath()); + } + + @AfterClass + public static void shutDown() throws IOException { + GridmixTestUtils.shutdownCluster(); + } + + /** + * Verifies that the given {@code JobStory} corresponds to the checked-in + * WordCount {@code JobStory}. The verification is effected via JUnit + * assertions. + * + * @param js the candidate JobStory. + */ + private void verifyWordCountJobStory(JobStory js) { + assertNotNull("Null JobStory", js); + String expectedJobStory = "WordCount:johndoe:default:1285322645148:3:1"; + String actualJobStory = js.getName() + ":" + js.getUser() + ":" + + js.getQueueName() + ":" + js.getSubmissionTime() + ":" + + js.getNumberMaps() + ":" + js.getNumberReduces(); + assertEquals("Unexpected JobStory", expectedJobStory, actualJobStory); + } + + /** + * Expands a file compressed using {@code gzip}. + * + * @param fs the {@code FileSystem} corresponding to the given file. + * @param in the path to the compressed file. + * @param out the path to the uncompressed output. + * @throws Exception if there was an error during the operation. + */ + private void expandGzippedTrace(FileSystem fs, Path in, Path out) + throws Exception { + byte[] buff = new byte[4096]; + GZIPInputStream gis = new GZIPInputStream(fs.open(in)); + FSDataOutputStream fsdOs = fs.create(out); + int numRead; + while ((numRead = gis.read(buff, 0, buff.length)) != -1) { + fsdOs.write(buff, 0, numRead); + } + gis.close(); + fsdOs.close(); + } + + /** + * Tests the reading of traces in GridMix3. These traces are generated by + * Rumen and are in the JSON format. The traces can optionally be compressed + * and uncompressed traces can also be passed to GridMix3 via its standard + * input stream. The testing is effected via JUnit assertions. + * + * @throws Exception if there was an error. + */ + @Test (timeout=20000) + public void testTraceReader() throws Exception { + Configuration conf = new Configuration(); + FileSystem lfs = FileSystem.getLocal(conf); + Path rootInputDir = new Path(System.getProperty("src.test.data")); + rootInputDir = rootInputDir.makeQualified(lfs.getUri(), + lfs.getWorkingDirectory()); + Path rootTempDir = new Path(System.getProperty("test.build.data", + System.getProperty("java.io.tmpdir")), "testTraceReader"); + rootTempDir = rootTempDir.makeQualified(lfs.getUri(), + lfs.getWorkingDirectory()); + Path inputFile = new Path(rootInputDir, "wordcount.json.gz"); + Path tempFile = new Path(rootTempDir, "gridmix3-wc.json"); + + InputStream origStdIn = System.in; + InputStream tmpIs = null; + try { + DebugGridmix dgm = new DebugGridmix(); + JobStoryProducer jsp = dgm.createJobStoryProducer(inputFile.toString(), + conf); + + LOG.info("Verifying JobStory from compressed trace..."); + verifyWordCountJobStory(jsp.getNextJob()); + + expandGzippedTrace(lfs, inputFile, tempFile); + jsp = dgm.createJobStoryProducer(tempFile.toString(), conf); + LOG.info("Verifying JobStory from uncompressed trace..."); + verifyWordCountJobStory(jsp.getNextJob()); + + tmpIs = lfs.open(tempFile); + System.setIn(tmpIs); + LOG.info("Verifying JobStory from trace in standard input..."); + jsp = dgm.createJobStoryProducer("-", conf); + verifyWordCountJobStory(jsp.getNextJob()); + } finally { + System.setIn(origStdIn); + if (tmpIs != null) { + tmpIs.close(); + } + lfs.delete(rootTempDir, true); + } + } + + @Test (timeout=500000) + public void testReplaySubmit() throws Exception { + policy = GridmixJobSubmissionPolicy.REPLAY; + LOG.info(" Replay started at " + System.currentTimeMillis()); + doSubmission(null, false); + LOG.info(" Replay ended at " + System.currentTimeMillis()); + + } + + @Test (timeout=500000) + public void testStressSubmit() throws Exception { + policy = GridmixJobSubmissionPolicy.STRESS; + LOG.info(" Stress started at " + System.currentTimeMillis()); + doSubmission(null, false); + LOG.info(" Stress ended at " + System.currentTimeMillis()); + } + + // test empty request should be hint message + @Test (timeout=100000) + public void testMain() throws Exception { + + SecurityManager securityManager = System.getSecurityManager(); + + final ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + final PrintStream out = new PrintStream(bytes); + final PrintStream oldOut = System.out; + System.setErr(out); + ExitUtil.disableSystemExit(); + try { + String[] argv = new String[0]; + DebugGridmix.main(argv); + + } catch (ExitUtil.ExitException e) { + assertEquals("ExitException", e.getMessage()); + ExitUtil.resetFirstExitException(); + } finally { + System.setErr(oldOut); + System.setSecurityManager(securityManager); + } + String print = bytes.toString(); + // should be printed tip in std error stream + assertTrue(print + .contains("Usage: gridmix [-generate ] [-users URI] [-Dname=value ...] ")); + assertTrue(print.contains("e.g. gridmix -generate 100m foo -")); + } + + +} diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestGridmixSummary.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestGridmixSummary.java index 61e5ea05777..2f735988b71 100644 --- a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestGridmixSummary.java +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestGridmixSummary.java @@ -133,7 +133,7 @@ public class TestGridmixSummary { /** * A fake {@link JobFactory}. */ - @SuppressWarnings("unchecked") + @SuppressWarnings("rawtypes") private static class FakeJobFactory extends JobFactory { /** * A fake {@link JobStoryProducer} for {@link FakeJobFactory}. @@ -167,7 +167,7 @@ public class TestGridmixSummary { * Test {@link ExecutionSummarizer}. */ @Test - @SuppressWarnings("unchecked") + @SuppressWarnings({ "unchecked", "rawtypes" }) public void testExecutionSummarizer() throws IOException { Configuration conf = new Configuration(); @@ -360,7 +360,6 @@ public class TestGridmixSummary { * Test {@link ClusterSummarizer}. */ @Test - @SuppressWarnings("deprecation") public void testClusterSummarizer() throws IOException { ClusterSummarizer cs = new ClusterSummarizer(); Configuration conf = new Configuration(); @@ -374,13 +373,13 @@ public class TestGridmixSummary { assertEquals("JT name mismatch", jt, cs.getJobTrackerInfo()); assertEquals("NN name mismatch", nn, cs.getNamenodeInfo()); - ClusterStats cstats = ClusterStats.getClusterStats(); + ClusterStats cStats = ClusterStats.getClusterStats(); conf.set(JTConfig.JT_IPC_ADDRESS, "local"); conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "local"); JobClient jc = new JobClient(conf); - cstats.setClusterMetric(jc.getClusterStatus()); + cStats.setClusterMetric(jc.getClusterStatus()); - cs.update(cstats); + cs.update(cStats); // test assertEquals("Cluster summary test failed!", 1, cs.getMaxMapTasks()); diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestLoadJob.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestLoadJob.java new file mode 100644 index 00000000000..69c3a793736 --- /dev/null +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestLoadJob.java @@ -0,0 +1,81 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapred.gridmix; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.commons.logging.impl.Log4JLogger; +import org.apache.log4j.Level; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; + +/* + Test LoadJob Gridmix sends data to job and after that + */ +public class TestLoadJob extends CommonJobTest { + + public static final Log LOG = LogFactory.getLog(Gridmix.class); + + static { + ((Log4JLogger) LogFactory.getLog("org.apache.hadoop.mapred.gridmix")) + .getLogger().setLevel(Level.DEBUG); + ((Log4JLogger) LogFactory.getLog(StressJobFactory.class)).getLogger() + .setLevel(Level.DEBUG); + } + + + @BeforeClass + public static void init() throws IOException { + GridmixTestUtils.initCluster(TestLoadJob.class); + } + + @AfterClass + public static void shutDown() throws IOException { + GridmixTestUtils.shutdownCluster(); + } + + + /* + * test serial policy with LoadJob. Task should execute without exceptions + */ + @Test (timeout=500000) + public void testSerialSubmit() throws Exception { + policy = GridmixJobSubmissionPolicy.SERIAL; + LOG.info("Serial started at " + System.currentTimeMillis()); + doSubmission(JobCreator.LOADJOB.name(), false); + + LOG.info("Serial ended at " + System.currentTimeMillis()); + } + + /* + * test reply policy with LoadJob + */ + @Test (timeout=500000) + public void testReplaySubmit() throws Exception { + policy = GridmixJobSubmissionPolicy.REPLAY; + LOG.info(" Replay started at " + System.currentTimeMillis()); + doSubmission(JobCreator.LOADJOB.name(), false); + + LOG.info(" Replay ended at " + System.currentTimeMillis()); + } + + +} diff --git a/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestSleepJob.java b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestSleepJob.java new file mode 100644 index 00000000000..8f58e9d66d2 --- /dev/null +++ b/hadoop-tools/hadoop-gridmix/src/test/java/org/apache/hadoop/mapred/gridmix/TestSleepJob.java @@ -0,0 +1,142 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapred.gridmix; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.commons.logging.impl.Log4JLogger; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.tools.rumen.JobStory; +import org.apache.log4j.Level; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; + +import static org.junit.Assert.*; + +public class TestSleepJob extends CommonJobTest { + + public static final Log LOG = LogFactory.getLog(Gridmix.class); + + static { + ((Log4JLogger) LogFactory.getLog("org.apache.hadoop.mapred.gridmix")) + .getLogger().setLevel(Level.DEBUG); + } + + static GridmixJobSubmissionPolicy policy = GridmixJobSubmissionPolicy.REPLAY; + + @BeforeClass + public static void init() throws IOException { + GridmixTestUtils.initCluster(TestSleepJob.class); + } + + @AfterClass + public static void shutDown() throws IOException { + GridmixTestUtils.shutdownCluster(); + } + + + /* + * test RandomLocation + */ + @Test + public void testRandomLocation() throws Exception { + UserGroupInformation ugi = UserGroupInformation.getLoginUser(); + + testRandomLocation(1, 10, ugi); + testRandomLocation(2, 10, ugi); + } + + @Test + public void testMapTasksOnlySleepJobs() throws Exception { + Configuration configuration = GridmixTestUtils.mrvl.getConfig(); + + DebugJobProducer jobProducer = new DebugJobProducer(5, configuration); + configuration.setBoolean(SleepJob.SLEEPJOB_MAPTASK_ONLY, true); + + UserGroupInformation ugi = UserGroupInformation.getLoginUser(); + JobStory story; + int seq = 1; + while ((story = jobProducer.getNextJob()) != null) { + GridmixJob gridmixJob = JobCreator.SLEEPJOB.createGridmixJob(configuration, 0, + story, new Path("ignored"), ugi, seq++); + gridmixJob.buildSplits(null); + Job job = gridmixJob.call(); + assertEquals(0, job.getNumReduceTasks()); + } + jobProducer.close(); + assertEquals(6, seq); + } + + // test Serial submit + @Test + public void testSerialSubmit() throws Exception { + // set policy + policy = GridmixJobSubmissionPolicy.SERIAL; + LOG.info("Serial started at " + System.currentTimeMillis()); + doSubmission(JobCreator.SLEEPJOB.name(), false); + LOG.info("Serial ended at " + System.currentTimeMillis()); + } + + @Test + public void testReplaySubmit() throws Exception { + policy = GridmixJobSubmissionPolicy.REPLAY; + LOG.info(" Replay started at " + System.currentTimeMillis()); + doSubmission(JobCreator.SLEEPJOB.name(), false); + LOG.info(" Replay ended at " + System.currentTimeMillis()); + } + + @Test + public void testStressSubmit() throws Exception { + policy = GridmixJobSubmissionPolicy.STRESS; + LOG.info(" Replay started at " + System.currentTimeMillis()); + doSubmission(JobCreator.SLEEPJOB.name(), false); + LOG.info(" Replay ended at " + System.currentTimeMillis()); + } + + private void testRandomLocation(int locations, int njobs, + UserGroupInformation ugi) throws Exception { + Configuration configuration = new Configuration(); + + DebugJobProducer jobProducer = new DebugJobProducer(njobs, configuration); + Configuration jconf = GridmixTestUtils.mrvl.getConfig(); + jconf.setInt(JobCreator.SLEEPJOB_RANDOM_LOCATIONS, locations); + + JobStory story; + int seq = 1; + while ((story = jobProducer.getNextJob()) != null) { + GridmixJob gridmixJob = JobCreator.SLEEPJOB.createGridmixJob(jconf, 0, + story, new Path("ignored"), ugi, seq++); + gridmixJob.buildSplits(null); + List splits = new SleepJob.SleepInputFormat() + .getSplits(gridmixJob.getJob()); + for (InputSplit split : splits) { + assertEquals(locations, split.getLocations().length); + } + } + jobProducer.close(); + } + +} diff --git a/hadoop-tools/hadoop-gridmix/src/test/resources/data/wordcount.json b/hadoop-tools/hadoop-gridmix/src/test/resources/data/wordcount.json new file mode 100644 index 00000000000..1b7ccf860b6 --- /dev/null +++ b/hadoop-tools/hadoop-gridmix/src/test/resources/data/wordcount.json @@ -0,0 +1,414 @@ +{ + "priority" : "NORMAL", + "jobID" : "job_201009241532_0001", + "user" : "johndoe", + "jobName" : "WordCount", + "mapTasks" : [ { + "startTime" : 1285322651360, + "taskID" : "task_201009241532_0001_m_000000", + "taskType" : "MAP", + "attempts" : [ { + "location" : null, + "hostName" : "/default-rack/foo.example.com", + "startTime" : 1285322651366, + "finishTime" : 1285322658262, + "result" : "SUCCESS", + "attemptID" : "attempt_201009241532_0001_m_000000_0", + "shuffleFinished" : -1, + "sortFinished" : -1, + "hdfsBytesRead" : 704270, + "hdfsBytesWritten" : -1, + "fileBytesRead" : -1, + "fileBytesWritten" : 48266, + "mapInputRecords" : 13427, + "mapOutputBytes" : 1182333, + "mapOutputRecords" : 126063, + "combineInputRecords" : 126063, + "reduceInputGroups" : -1, + "reduceInputRecords" : -1, + "reduceShuffleBytes" : -1, + "reduceOutputRecords" : -1, + "spilledRecords" : 6612, + "mapInputBytes" : -1 + } ], + "finishTime" : 1285322660778, + "preferredLocations" : [ { + "layers" : [ "default-rack", "foo.example.com" ] + } ], + "taskStatus" : "SUCCESS", + "inputBytes" : 704270, + "inputRecords" : 13427, + "outputBytes" : 48266, + "outputRecords" : 126063 + }, { + "startTime" : 1285322651361, + "taskID" : "task_201009241532_0001_m_000001", + "taskType" : "MAP", + "attempts" : [ { + "location" : null, + "hostName" : "/default-rack/foo.example.com", + "startTime" : 1285322651378, + "finishTime" : 1285322657906, + "result" : "SUCCESS", + "attemptID" : "attempt_201009241532_0001_m_000001_0", + "shuffleFinished" : -1, + "sortFinished" : -1, + "hdfsBytesRead" : 577214, + "hdfsBytesWritten" : -1, + "fileBytesRead" : -1, + "fileBytesWritten" : 58143, + "mapInputRecords" : 13015, + "mapOutputBytes" : 985534, + "mapOutputRecords" : 108400, + "combineInputRecords" : 108400, + "reduceInputGroups" : -1, + "reduceInputRecords" : -1, + "reduceShuffleBytes" : -1, + "reduceOutputRecords" : -1, + "spilledRecords" : 8214, + "mapInputBytes" : -1 + } ], + "finishTime" : 1285322660781, + "preferredLocations" : [ { + "layers" : [ "default-rack", "foo.example.com" ] + } ], + "taskStatus" : "SUCCESS", + "inputBytes" : 577214, + "inputRecords" : 13015, + "outputBytes" : 58143, + "outputRecords" : 108400 + }, { + "startTime" : 1285322660789, + "taskID" : "task_201009241532_0001_m_000002", + "taskType" : "MAP", + "attempts" : [ { + "location" : null, + "hostName" : "/default-rack/foo.example.com", + "startTime" : 1285322660807, + "finishTime" : 1285322664865, + "result" : "SUCCESS", + "attemptID" : "attempt_201009241532_0001_m_000002_0", + "shuffleFinished" : -1, + "sortFinished" : -1, + "hdfsBytesRead" : 163907, + "hdfsBytesWritten" : -1, + "fileBytesRead" : -1, + "fileBytesWritten" : 21510, + "mapInputRecords" : 3736, + "mapOutputBytes" : 275796, + "mapOutputRecords" : 30528, + "combineInputRecords" : 30528, + "reduceInputGroups" : -1, + "reduceInputRecords" : -1, + "reduceShuffleBytes" : -1, + "reduceOutputRecords" : -1, + "spilledRecords" : 3040, + "mapInputBytes" : -1 + } ], + "finishTime" : 1285322666805, + "preferredLocations" : [ { + "layers" : [ "default-rack", "foo.example.com" ] + } ], + "taskStatus" : "SUCCESS", + "inputBytes" : 163907, + "inputRecords" : 3736, + "outputBytes" : 21510, + "outputRecords" : 30528 + } ], + "finishTime" : 1285322675837, + "reduceTasks" : [ { + "startTime" : 1285322660790, + "taskID" : "task_201009241532_0001_r_000000", + "taskType" : "REDUCE", + "attempts" : [ { + "location" : null, + "hostName" : "/default-rack/foo.example.com", + "startTime" : 1285322660807, + "finishTime" : 1285322670759, + "result" : "SUCCESS", + "attemptID" : "attempt_201009241532_0001_r_000000_0", + "shuffleFinished" : 1285322667962, + "sortFinished" : 1285322668146, + "hdfsBytesRead" : -1, + "hdfsBytesWritten" : 122793, + "fileBytesRead" : 111026, + "fileBytesWritten" : 111026, + "mapInputRecords" : -1, + "mapOutputBytes" : -1, + "mapOutputRecords" : -1, + "combineInputRecords" : 0, + "reduceInputGroups" : 11713, + "reduceInputRecords" : 17866, + "reduceShuffleBytes" : 127823, + "reduceOutputRecords" : 11713, + "spilledRecords" : 17866, + "mapInputBytes" : -1 + } ], + "finishTime" : 1285322672821, + "preferredLocations" : [ ], + "taskStatus" : "SUCCESS", + "inputBytes" : 127823, + "inputRecords" : 17866, + "outputBytes" : 122793, + "outputRecords" : 11713 + } ], + "submitTime" : 1285322645148, + "launchTime" : 1285322645614, + "totalMaps" : 3, + "totalReduces" : 1, + "otherTasks" : [ { + "startTime" : 1285322648294, + "taskID" : "task_201009241532_0001_m_000004", + "taskType" : "SETUP", + "attempts" : [ { + "location" : null, + "hostName" : "/default-rack/foo.example.com", + "startTime" : 1285322648482, + "finishTime" : 1285322649588, + "result" : "SUCCESS", + "attemptID" : "attempt_201009241532_0001_m_000004_0", + "shuffleFinished" : -1, + "sortFinished" : -1, + "hdfsBytesRead" : -1, + "hdfsBytesWritten" : -1, + "fileBytesRead" : -1, + "fileBytesWritten" : -1, + "mapInputRecords" : -1, + "mapOutputBytes" : -1, + "mapOutputRecords" : -1, + "combineInputRecords" : -1, + "reduceInputGroups" : -1, + "reduceInputRecords" : -1, + "reduceShuffleBytes" : -1, + "reduceOutputRecords" : -1, + "spilledRecords" : 0, + "mapInputBytes" : -1 + } ], + "finishTime" : 1285322651351, + "preferredLocations" : [ ], + "taskStatus" : "SUCCESS", + "inputBytes" : -1, + "inputRecords" : -1, + "outputBytes" : -1, + "outputRecords" : -1 + }, { + "startTime" : 1285322672829, + "taskID" : "task_201009241532_0001_m_000003", + "taskType" : "CLEANUP", + "attempts" : [ { + "location" : null, + "hostName" : "/default-rack/foo.example.com", + "startTime" : 1285322672838, + "finishTime" : 1285322673971, + "result" : "SUCCESS", + "attemptID" : "attempt_201009241532_0001_m_000003_0", + "shuffleFinished" : -1, + "sortFinished" : -1, + "hdfsBytesRead" : -1, + "hdfsBytesWritten" : -1, + "fileBytesRead" : -1, + "fileBytesWritten" : -1, + "mapInputRecords" : -1, + "mapOutputBytes" : -1, + "mapOutputRecords" : -1, + "combineInputRecords" : -1, + "reduceInputGroups" : -1, + "reduceInputRecords" : -1, + "reduceShuffleBytes" : -1, + "reduceOutputRecords" : -1, + "spilledRecords" : 0, + "mapInputBytes" : -1 + } ], + "finishTime" : 1285322675835, + "preferredLocations" : [ ], + "taskStatus" : "SUCCESS", + "inputBytes" : -1, + "inputRecords" : -1, + "outputBytes" : -1, + "outputRecords" : -1 + } ], + "computonsPerMapInputByte" : -1, + "computonsPerMapOutputByte" : -1, + "computonsPerReduceInputByte" : -1, + "computonsPerReduceOutputByte" : -1, + "heapMegabytes" : 1024, + "outcome" : "SUCCESS", + "jobtype" : "JAVA", + "directDependantJobs" : [ ], + "successfulMapAttemptCDFs" : [ { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, { + "maximum" : 6896, + "minimum" : 4058, + "rankings" : [ { + "datum" : 4058, + "relativeRanking" : 0.05 + }, { + "datum" : 4058, + "relativeRanking" : 0.1 + }, { + "datum" : 4058, + "relativeRanking" : 0.15 + }, { + "datum" : 4058, + "relativeRanking" : 0.2 + }, { + "datum" : 4058, + "relativeRanking" : 0.25 + }, { + "datum" : 4058, + "relativeRanking" : 0.3 + }, { + "datum" : 4058, + "relativeRanking" : 0.35 + }, { + "datum" : 4058, + "relativeRanking" : 0.4 + }, { + "datum" : 4058, + "relativeRanking" : 0.45 + }, { + "datum" : 4058, + "relativeRanking" : 0.5 + }, { + "datum" : 4058, + "relativeRanking" : 0.55 + }, { + "datum" : 4058, + "relativeRanking" : 0.6 + }, { + "datum" : 4058, + "relativeRanking" : 0.65 + }, { + "datum" : 6528, + "relativeRanking" : 0.7 + }, { + "datum" : 6528, + "relativeRanking" : 0.75 + }, { + "datum" : 6528, + "relativeRanking" : 0.8 + }, { + "datum" : 6528, + "relativeRanking" : 0.85 + }, { + "datum" : 6528, + "relativeRanking" : 0.9 + }, { + "datum" : 6528, + "relativeRanking" : 0.95 + } ], + "numberValues" : 3 + } ], + "failedMapAttemptCDFs" : [ { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + } ], + "successfulReduceAttemptCDF" : { + "maximum" : 9952, + "minimum" : 9952, + "rankings" : [ { + "datum" : 9952, + "relativeRanking" : 0.05 + }, { + "datum" : 9952, + "relativeRanking" : 0.1 + }, { + "datum" : 9952, + "relativeRanking" : 0.15 + }, { + "datum" : 9952, + "relativeRanking" : 0.2 + }, { + "datum" : 9952, + "relativeRanking" : 0.25 + }, { + "datum" : 9952, + "relativeRanking" : 0.3 + }, { + "datum" : 9952, + "relativeRanking" : 0.35 + }, { + "datum" : 9952, + "relativeRanking" : 0.4 + }, { + "datum" : 9952, + "relativeRanking" : 0.45 + }, { + "datum" : 9952, + "relativeRanking" : 0.5 + }, { + "datum" : 9952, + "relativeRanking" : 0.55 + }, { + "datum" : 9952, + "relativeRanking" : 0.6 + }, { + "datum" : 9952, + "relativeRanking" : 0.65 + }, { + "datum" : 9952, + "relativeRanking" : 0.7 + }, { + "datum" : 9952, + "relativeRanking" : 0.75 + }, { + "datum" : 9952, + "relativeRanking" : 0.8 + }, { + "datum" : 9952, + "relativeRanking" : 0.85 + }, { + "datum" : 9952, + "relativeRanking" : 0.9 + }, { + "datum" : 9952, + "relativeRanking" : 0.95 + } ], + "numberValues" : 1 + }, + "failedReduceAttemptCDF" : { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, + "mapperTriesToSucceed" : [ 1.0 ], + "failedMapperFraction" : 0.0, + "relativeTime" : 0, + "queue" : "default", + "clusterMapMB" : -1, + "clusterReduceMB" : -1, + "jobMapMB" : 1024, + "jobReduceMB" : 1024 +} diff --git a/hadoop-tools/hadoop-gridmix/src/test/resources/data/wordcount2.json b/hadoop-tools/hadoop-gridmix/src/test/resources/data/wordcount2.json new file mode 100644 index 00000000000..87fcfb92ee9 --- /dev/null +++ b/hadoop-tools/hadoop-gridmix/src/test/resources/data/wordcount2.json @@ -0,0 +1,828 @@ +{ + "priority" : "NORMAL", + "jobID" : "job_201009241532_0001", + "user" : "johndoe", + "jobName" : "WordCount", + "mapTasks" : [ { + "startTime" : 1285322651360, + "taskID" : "task_201009241532_0001_m_000000", + "taskType" : "MAP", + "attempts" : [ { + "location" : null, + "hostName" : "/default-rack/foo.example.com", + "startTime" : 1285322651366, + "finishTime" : 1285322658262, + "result" : "SUCCESS", + "attemptID" : "attempt_201009241532_0001_m_000000_0", + "shuffleFinished" : -1, + "sortFinished" : -1, + "hdfsBytesRead" : 704270, + "hdfsBytesWritten" : -1, + "fileBytesRead" : -1, + "fileBytesWritten" : 48266, + "mapInputRecords" : 13427, + "mapOutputBytes" : 1182333, + "mapOutputRecords" : 126063, + "combineInputRecords" : 126063, + "reduceInputGroups" : -1, + "reduceInputRecords" : -1, + "reduceShuffleBytes" : -1, + "reduceOutputRecords" : -1, + "spilledRecords" : 6612, + "mapInputBytes" : -1 + } ], + "finishTime" : 1285322660778, + "preferredLocations" : [ { + "layers" : [ "default-rack", "foo.example.com" ] + } ], + "taskStatus" : "SUCCESS", + "inputBytes" : 704270, + "inputRecords" : 13427, + "outputBytes" : 48266, + "outputRecords" : 126063 + }, { + "startTime" : 1285322651361, + "taskID" : "task_201009241532_0001_m_000001", + "taskType" : "MAP", + "attempts" : [ { + "location" : null, + "hostName" : "/default-rack/foo.example.com", + "startTime" : 1285322651378, + "finishTime" : 1285322657906, + "result" : "SUCCESS", + "attemptID" : "attempt_201009241532_0001_m_000001_0", + "shuffleFinished" : -1, + "sortFinished" : -1, + "hdfsBytesRead" : 577214, + "hdfsBytesWritten" : -1, + "fileBytesRead" : -1, + "fileBytesWritten" : 58143, + "mapInputRecords" : 13015, + "mapOutputBytes" : 985534, + "mapOutputRecords" : 108400, + "combineInputRecords" : 108400, + "reduceInputGroups" : -1, + "reduceInputRecords" : -1, + "reduceShuffleBytes" : -1, + "reduceOutputRecords" : -1, + "spilledRecords" : 8214, + "mapInputBytes" : -1 + } ], + "finishTime" : 1285322660781, + "preferredLocations" : [ { + "layers" : [ "default-rack", "foo.example.com" ] + } ], + "taskStatus" : "SUCCESS", + "inputBytes" : 577214, + "inputRecords" : 13015, + "outputBytes" : 58143, + "outputRecords" : 108400 + }, { + "startTime" : 1285322660789, + "taskID" : "task_201009241532_0001_m_000002", + "taskType" : "MAP", + "attempts" : [ { + "location" : null, + "hostName" : "/default-rack/foo.example.com", + "startTime" : 1285322660807, + "finishTime" : 1285322664865, + "result" : "SUCCESS", + "attemptID" : "attempt_201009241532_0001_m_000002_0", + "shuffleFinished" : -1, + "sortFinished" : -1, + "hdfsBytesRead" : 163907, + "hdfsBytesWritten" : -1, + "fileBytesRead" : -1, + "fileBytesWritten" : 21510, + "mapInputRecords" : 3736, + "mapOutputBytes" : 275796, + "mapOutputRecords" : 30528, + "combineInputRecords" : 30528, + "reduceInputGroups" : -1, + "reduceInputRecords" : -1, + "reduceShuffleBytes" : -1, + "reduceOutputRecords" : -1, + "spilledRecords" : 3040, + "mapInputBytes" : -1 + } ], + "finishTime" : 1285322666805, + "preferredLocations" : [ { + "layers" : [ "default-rack", "foo.example.com" ] + } ], + "taskStatus" : "SUCCESS", + "inputBytes" : 163907, + "inputRecords" : 3736, + "outputBytes" : 21510, + "outputRecords" : 30528 + } ], + "finishTime" : 1285322675837, + "reduceTasks" : [ { + "startTime" : 1285322660790, + "taskID" : "task_201009241532_0001_r_000000", + "taskType" : "REDUCE", + "attempts" : [ { + "location" : null, + "hostName" : "/default-rack/foo.example.com", + "startTime" : 1285322660807, + "finishTime" : 1285322670759, + "result" : "SUCCESS", + "attemptID" : "attempt_201009241532_0001_r_000000_0", + "shuffleFinished" : 1285322667962, + "sortFinished" : 1285322668146, + "hdfsBytesRead" : -1, + "hdfsBytesWritten" : 122793, + "fileBytesRead" : 111026, + "fileBytesWritten" : 111026, + "mapInputRecords" : -1, + "mapOutputBytes" : -1, + "mapOutputRecords" : -1, + "combineInputRecords" : 0, + "reduceInputGroups" : 11713, + "reduceInputRecords" : 17866, + "reduceShuffleBytes" : 127823, + "reduceOutputRecords" : 11713, + "spilledRecords" : 17866, + "mapInputBytes" : -1 + } ], + "finishTime" : 1285322672821, + "preferredLocations" : [ ], + "taskStatus" : "SUCCESS", + "inputBytes" : 127823, + "inputRecords" : 17866, + "outputBytes" : 122793, + "outputRecords" : 11713 + } ], + "submitTime" : 1285322645148, + "launchTime" : 1285322645614, + "totalMaps" : 3, + "totalReduces" : 1, + "otherTasks" : [ { + "startTime" : 1285322648294, + "taskID" : "task_201009241532_0001_m_000004", + "taskType" : "SETUP", + "attempts" : [ { + "location" : null, + "hostName" : "/default-rack/foo.example.com", + "startTime" : 1285322648482, + "finishTime" : 1285322649588, + "result" : "SUCCESS", + "attemptID" : "attempt_201009241532_0001_m_000004_0", + "shuffleFinished" : -1, + "sortFinished" : -1, + "hdfsBytesRead" : -1, + "hdfsBytesWritten" : -1, + "fileBytesRead" : -1, + "fileBytesWritten" : -1, + "mapInputRecords" : -1, + "mapOutputBytes" : -1, + "mapOutputRecords" : -1, + "combineInputRecords" : -1, + "reduceInputGroups" : -1, + "reduceInputRecords" : -1, + "reduceShuffleBytes" : -1, + "reduceOutputRecords" : -1, + "spilledRecords" : 0, + "mapInputBytes" : -1 + } ], + "finishTime" : 1285322651351, + "preferredLocations" : [ ], + "taskStatus" : "SUCCESS", + "inputBytes" : -1, + "inputRecords" : -1, + "outputBytes" : -1, + "outputRecords" : -1 + }, { + "startTime" : 1285322672829, + "taskID" : "task_201009241532_0001_m_000003", + "taskType" : "CLEANUP", + "attempts" : [ { + "location" : null, + "hostName" : "/default-rack/foo.example.com", + "startTime" : 1285322672838, + "finishTime" : 1285322673971, + "result" : "SUCCESS", + "attemptID" : "attempt_201009241532_0001_m_000003_0", + "shuffleFinished" : -1, + "sortFinished" : -1, + "hdfsBytesRead" : -1, + "hdfsBytesWritten" : -1, + "fileBytesRead" : -1, + "fileBytesWritten" : -1, + "mapInputRecords" : -1, + "mapOutputBytes" : -1, + "mapOutputRecords" : -1, + "combineInputRecords" : -1, + "reduceInputGroups" : -1, + "reduceInputRecords" : -1, + "reduceShuffleBytes" : -1, + "reduceOutputRecords" : -1, + "spilledRecords" : 0, + "mapInputBytes" : -1 + } ], + "finishTime" : 1285322675835, + "preferredLocations" : [ ], + "taskStatus" : "SUCCESS", + "inputBytes" : -1, + "inputRecords" : -1, + "outputBytes" : -1, + "outputRecords" : -1 + } ], + "computonsPerMapInputByte" : -1, + "computonsPerMapOutputByte" : -1, + "computonsPerReduceInputByte" : -1, + "computonsPerReduceOutputByte" : -1, + "heapMegabytes" : 1024, + "outcome" : "SUCCESS", + "jobtype" : "JAVA", + "directDependantJobs" : [ ], + "successfulMapAttemptCDFs" : [ { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, { + "maximum" : 6896, + "minimum" : 4058, + "rankings" : [ { + "datum" : 4058, + "relativeRanking" : 0.05 + }, { + "datum" : 4058, + "relativeRanking" : 0.1 + }, { + "datum" : 4058, + "relativeRanking" : 0.15 + }, { + "datum" : 4058, + "relativeRanking" : 0.2 + }, { + "datum" : 4058, + "relativeRanking" : 0.25 + }, { + "datum" : 4058, + "relativeRanking" : 0.3 + }, { + "datum" : 4058, + "relativeRanking" : 0.35 + }, { + "datum" : 4058, + "relativeRanking" : 0.4 + }, { + "datum" : 4058, + "relativeRanking" : 0.45 + }, { + "datum" : 4058, + "relativeRanking" : 0.5 + }, { + "datum" : 4058, + "relativeRanking" : 0.55 + }, { + "datum" : 4058, + "relativeRanking" : 0.6 + }, { + "datum" : 4058, + "relativeRanking" : 0.65 + }, { + "datum" : 6528, + "relativeRanking" : 0.7 + }, { + "datum" : 6528, + "relativeRanking" : 0.75 + }, { + "datum" : 6528, + "relativeRanking" : 0.8 + }, { + "datum" : 6528, + "relativeRanking" : 0.85 + }, { + "datum" : 6528, + "relativeRanking" : 0.9 + }, { + "datum" : 6528, + "relativeRanking" : 0.95 + } ], + "numberValues" : 3 + } ], + "failedMapAttemptCDFs" : [ { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + } ], + "successfulReduceAttemptCDF" : { + "maximum" : 9952, + "minimum" : 9952, + "rankings" : [ { + "datum" : 9952, + "relativeRanking" : 0.05 + }, { + "datum" : 9952, + "relativeRanking" : 0.1 + }, { + "datum" : 9952, + "relativeRanking" : 0.15 + }, { + "datum" : 9952, + "relativeRanking" : 0.2 + }, { + "datum" : 9952, + "relativeRanking" : 0.25 + }, { + "datum" : 9952, + "relativeRanking" : 0.3 + }, { + "datum" : 9952, + "relativeRanking" : 0.35 + }, { + "datum" : 9952, + "relativeRanking" : 0.4 + }, { + "datum" : 9952, + "relativeRanking" : 0.45 + }, { + "datum" : 9952, + "relativeRanking" : 0.5 + }, { + "datum" : 9952, + "relativeRanking" : 0.55 + }, { + "datum" : 9952, + "relativeRanking" : 0.6 + }, { + "datum" : 9952, + "relativeRanking" : 0.65 + }, { + "datum" : 9952, + "relativeRanking" : 0.7 + }, { + "datum" : 9952, + "relativeRanking" : 0.75 + }, { + "datum" : 9952, + "relativeRanking" : 0.8 + }, { + "datum" : 9952, + "relativeRanking" : 0.85 + }, { + "datum" : 9952, + "relativeRanking" : 0.9 + }, { + "datum" : 9952, + "relativeRanking" : 0.95 + } ], + "numberValues" : 1 + }, + "failedReduceAttemptCDF" : { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, + "mapperTriesToSucceed" : [ 1.0 ], + "failedMapperFraction" : 0.0, + "relativeTime" : 0, + "queue" : "default", + "clusterMapMB" : -1, + "clusterReduceMB" : -1, + "jobMapMB" : 1024, + "jobReduceMB" : 1024 +} +{ + "priority" : "NORMAL", + "jobID" : "job_201009241532_0001", + "user" : "johndoe", + "jobName" : "WordCount", + "mapTasks" : [ { + "startTime" : 1285322651360, + "taskID" : "task_201009241532_0001_m_000000", + "taskType" : "MAP", + "attempts" : [ { + "location" : null, + "hostName" : "/default-rack/foo.example.com", + "startTime" : 1285322651366, + "finishTime" : 1285322658262, + "result" : "SUCCESS", + "attemptID" : "attempt_201009241532_0001_m_000000_0", + "shuffleFinished" : -1, + "sortFinished" : -1, + "hdfsBytesRead" : 704270, + "hdfsBytesWritten" : -1, + "fileBytesRead" : -1, + "fileBytesWritten" : 48266, + "mapInputRecords" : 13427, + "mapOutputBytes" : 1182333, + "mapOutputRecords" : 126063, + "combineInputRecords" : 126063, + "reduceInputGroups" : -1, + "reduceInputRecords" : -1, + "reduceShuffleBytes" : -1, + "reduceOutputRecords" : -1, + "spilledRecords" : 6612, + "mapInputBytes" : -1 + } ], + "finishTime" : 1285322660778, + "preferredLocations" : [ { + "layers" : [ "default-rack", "foo.example.com" ] + } ], + "taskStatus" : "SUCCESS", + "inputBytes" : 704270, + "inputRecords" : 13427, + "outputBytes" : 48266, + "outputRecords" : 126063 + }, { + "startTime" : 1285322651361, + "taskID" : "task_201009241532_0001_m_000001", + "taskType" : "MAP", + "attempts" : [ { + "location" : null, + "hostName" : "/default-rack/foo.example.com", + "startTime" : 1285322651378, + "finishTime" : 1285322657906, + "result" : "SUCCESS", + "attemptID" : "attempt_201009241532_0001_m_000001_0", + "shuffleFinished" : -1, + "sortFinished" : -1, + "hdfsBytesRead" : 577214, + "hdfsBytesWritten" : -1, + "fileBytesRead" : -1, + "fileBytesWritten" : 58143, + "mapInputRecords" : 13015, + "mapOutputBytes" : 985534, + "mapOutputRecords" : 108400, + "combineInputRecords" : 108400, + "reduceInputGroups" : -1, + "reduceInputRecords" : -1, + "reduceShuffleBytes" : -1, + "reduceOutputRecords" : -1, + "spilledRecords" : 8214, + "mapInputBytes" : -1 + } ], + "finishTime" : 1285322660781, + "preferredLocations" : [ { + "layers" : [ "default-rack", "foo.example.com" ] + } ], + "taskStatus" : "SUCCESS", + "inputBytes" : 577214, + "inputRecords" : 13015, + "outputBytes" : 58143, + "outputRecords" : 108400 + }, { + "startTime" : 1285322660789, + "taskID" : "task_201009241532_0001_m_000002", + "taskType" : "MAP", + "attempts" : [ { + "location" : null, + "hostName" : "/default-rack/foo.example.com", + "startTime" : 1285322660807, + "finishTime" : 1285322664865, + "result" : "SUCCESS", + "attemptID" : "attempt_201009241532_0001_m_000002_0", + "shuffleFinished" : -1, + "sortFinished" : -1, + "hdfsBytesRead" : 163907, + "hdfsBytesWritten" : -1, + "fileBytesRead" : -1, + "fileBytesWritten" : 21510, + "mapInputRecords" : 3736, + "mapOutputBytes" : 275796, + "mapOutputRecords" : 30528, + "combineInputRecords" : 30528, + "reduceInputGroups" : -1, + "reduceInputRecords" : -1, + "reduceShuffleBytes" : -1, + "reduceOutputRecords" : -1, + "spilledRecords" : 3040, + "mapInputBytes" : -1 + } ], + "finishTime" : 1285322666805, + "preferredLocations" : [ { + "layers" : [ "default-rack", "foo.example.com" ] + } ], + "taskStatus" : "SUCCESS", + "inputBytes" : 163907, + "inputRecords" : 3736, + "outputBytes" : 21510, + "outputRecords" : 30528 + } ], + "finishTime" : 1285322675837, + "reduceTasks" : [ { + "startTime" : 1285322660790, + "taskID" : "task_201009241532_0001_r_000000", + "taskType" : "REDUCE", + "attempts" : [ { + "location" : null, + "hostName" : "/default-rack/foo.example.com", + "startTime" : 1285322660807, + "finishTime" : 1285322670759, + "result" : "SUCCESS", + "attemptID" : "attempt_201009241532_0001_r_000000_0", + "shuffleFinished" : 1285322667962, + "sortFinished" : 1285322668146, + "hdfsBytesRead" : -1, + "hdfsBytesWritten" : 122793, + "fileBytesRead" : 111026, + "fileBytesWritten" : 111026, + "mapInputRecords" : -1, + "mapOutputBytes" : -1, + "mapOutputRecords" : -1, + "combineInputRecords" : 0, + "reduceInputGroups" : 11713, + "reduceInputRecords" : 17866, + "reduceShuffleBytes" : 127823, + "reduceOutputRecords" : 11713, + "spilledRecords" : 17866, + "mapInputBytes" : -1 + } ], + "finishTime" : 1285322672821, + "preferredLocations" : [ ], + "taskStatus" : "SUCCESS", + "inputBytes" : 127823, + "inputRecords" : 17866, + "outputBytes" : 122793, + "outputRecords" : 11713 + } ], + "submitTime" : 1285322645148, + "launchTime" : 1285322645614, + "totalMaps" : 3, + "totalReduces" : 1, + "otherTasks" : [ { + "startTime" : 1285322648294, + "taskID" : "task_201009241532_0001_m_000004", + "taskType" : "SETUP", + "attempts" : [ { + "location" : null, + "hostName" : "/default-rack/foo.example.com", + "startTime" : 1285322648482, + "finishTime" : 1285322649588, + "result" : "SUCCESS", + "attemptID" : "attempt_201009241532_0001_m_000004_0", + "shuffleFinished" : -1, + "sortFinished" : -1, + "hdfsBytesRead" : -1, + "hdfsBytesWritten" : -1, + "fileBytesRead" : -1, + "fileBytesWritten" : -1, + "mapInputRecords" : -1, + "mapOutputBytes" : -1, + "mapOutputRecords" : -1, + "combineInputRecords" : -1, + "reduceInputGroups" : -1, + "reduceInputRecords" : -1, + "reduceShuffleBytes" : -1, + "reduceOutputRecords" : -1, + "spilledRecords" : 0, + "mapInputBytes" : -1 + } ], + "finishTime" : 1285322651351, + "preferredLocations" : [ ], + "taskStatus" : "SUCCESS", + "inputBytes" : -1, + "inputRecords" : -1, + "outputBytes" : -1, + "outputRecords" : -1 + }, { + "startTime" : 1285322672829, + "taskID" : "task_201009241532_0001_m_000003", + "taskType" : "CLEANUP", + "attempts" : [ { + "location" : null, + "hostName" : "/default-rack/foo.example.com", + "startTime" : 1285322672838, + "finishTime" : 1285322673971, + "result" : "SUCCESS", + "attemptID" : "attempt_201009241532_0001_m_000003_0", + "shuffleFinished" : -1, + "sortFinished" : -1, + "hdfsBytesRead" : -1, + "hdfsBytesWritten" : -1, + "fileBytesRead" : -1, + "fileBytesWritten" : -1, + "mapInputRecords" : -1, + "mapOutputBytes" : -1, + "mapOutputRecords" : -1, + "combineInputRecords" : -1, + "reduceInputGroups" : -1, + "reduceInputRecords" : -1, + "reduceShuffleBytes" : -1, + "reduceOutputRecords" : -1, + "spilledRecords" : 0, + "mapInputBytes" : -1 + } ], + "finishTime" : 1285322675835, + "preferredLocations" : [ ], + "taskStatus" : "SUCCESS", + "inputBytes" : -1, + "inputRecords" : -1, + "outputBytes" : -1, + "outputRecords" : -1 + } ], + "computonsPerMapInputByte" : -1, + "computonsPerMapOutputByte" : -1, + "computonsPerReduceInputByte" : -1, + "computonsPerReduceOutputByte" : -1, + "heapMegabytes" : 1024, + "outcome" : "SUCCESS", + "jobtype" : "JAVA", + "directDependantJobs" : [ ], + "successfulMapAttemptCDFs" : [ { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, { + "maximum" : 6896, + "minimum" : 4058, + "rankings" : [ { + "datum" : 4058, + "relativeRanking" : 0.05 + }, { + "datum" : 4058, + "relativeRanking" : 0.1 + }, { + "datum" : 4058, + "relativeRanking" : 0.15 + }, { + "datum" : 4058, + "relativeRanking" : 0.2 + }, { + "datum" : 4058, + "relativeRanking" : 0.25 + }, { + "datum" : 4058, + "relativeRanking" : 0.3 + }, { + "datum" : 4058, + "relativeRanking" : 0.35 + }, { + "datum" : 4058, + "relativeRanking" : 0.4 + }, { + "datum" : 4058, + "relativeRanking" : 0.45 + }, { + "datum" : 4058, + "relativeRanking" : 0.5 + }, { + "datum" : 4058, + "relativeRanking" : 0.55 + }, { + "datum" : 4058, + "relativeRanking" : 0.6 + }, { + "datum" : 4058, + "relativeRanking" : 0.65 + }, { + "datum" : 6528, + "relativeRanking" : 0.7 + }, { + "datum" : 6528, + "relativeRanking" : 0.75 + }, { + "datum" : 6528, + "relativeRanking" : 0.8 + }, { + "datum" : 6528, + "relativeRanking" : 0.85 + }, { + "datum" : 6528, + "relativeRanking" : 0.9 + }, { + "datum" : 6528, + "relativeRanking" : 0.95 + } ], + "numberValues" : 3 + } ], + "failedMapAttemptCDFs" : [ { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + } ], + "successfulReduceAttemptCDF" : { + "maximum" : 9952, + "minimum" : 9952, + "rankings" : [ { + "datum" : 9952, + "relativeRanking" : 0.05 + }, { + "datum" : 9952, + "relativeRanking" : 0.1 + }, { + "datum" : 9952, + "relativeRanking" : 0.15 + }, { + "datum" : 9952, + "relativeRanking" : 0.2 + }, { + "datum" : 9952, + "relativeRanking" : 0.25 + }, { + "datum" : 9952, + "relativeRanking" : 0.3 + }, { + "datum" : 9952, + "relativeRanking" : 0.35 + }, { + "datum" : 9952, + "relativeRanking" : 0.4 + }, { + "datum" : 9952, + "relativeRanking" : 0.45 + }, { + "datum" : 9952, + "relativeRanking" : 0.5 + }, { + "datum" : 9952, + "relativeRanking" : 0.55 + }, { + "datum" : 9952, + "relativeRanking" : 0.6 + }, { + "datum" : 9952, + "relativeRanking" : 0.65 + }, { + "datum" : 9952, + "relativeRanking" : 0.7 + }, { + "datum" : 9952, + "relativeRanking" : 0.75 + }, { + "datum" : 9952, + "relativeRanking" : 0.8 + }, { + "datum" : 9952, + "relativeRanking" : 0.85 + }, { + "datum" : 9952, + "relativeRanking" : 0.9 + }, { + "datum" : 9952, + "relativeRanking" : 0.95 + } ], + "numberValues" : 1 + }, + "failedReduceAttemptCDF" : { + "maximum" : 9223372036854775807, + "minimum" : -9223372036854775808, + "rankings" : [ ], + "numberValues" : 0 + }, + "mapperTriesToSucceed" : [ 1.0 ], + "failedMapperFraction" : 0.0, + "relativeTime" : 0, + "queue" : "default", + "clusterMapMB" : -1, + "clusterReduceMB" : -1, + "jobMapMB" : 1024, + "jobReduceMB" : 1024 +} diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Job20LineHistoryEventEmitter.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Job20LineHistoryEventEmitter.java index 73b6957a084..a8497f4cd8d 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Job20LineHistoryEventEmitter.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Job20LineHistoryEventEmitter.java @@ -92,6 +92,10 @@ public class Job20LineHistoryEventEmitter extends HistoryEventEmitter { if (workflowAdjacencies == null) { workflowAdjacencies = ""; } + String workflowTags = line.get("WORKFLOW_TAGS"); + if (workflowTags == null) { + workflowTags = ""; + } if (submitTime != null) { @@ -104,7 +108,8 @@ public class Job20LineHistoryEventEmitter extends HistoryEventEmitter { new HashMap(); return new JobSubmittedEvent(jobID, jobName, user, that.originalSubmitTime, jobConf, jobACLs, jobQueueName, - workflowId, workflowName, workflowNodeName, workflowAdjacencies); + workflowId, workflowName, workflowNodeName, workflowAdjacencies, + workflowTags); } return null; diff --git a/hadoop-tools/hadoop-streaming/pom.xml b/hadoop-tools/hadoop-streaming/pom.xml index 99249693166..7265c0468d5 100644 --- a/hadoop-tools/hadoop-streaming/pom.xml +++ b/hadoop-tools/hadoop-streaming/pom.xml @@ -127,6 +127,20 @@ + + copy-test-bin + process-test-resources + + run + + + + + + + + + diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java index ec8d72e22ae..d18a7654e09 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java @@ -294,8 +294,7 @@ public class StreamJob implements Tool { for (String file : values) { packageFiles_.add(file); try { - URI pathURI = new URI(file); - Path path = new Path(pathURI); + Path path = new Path(file); FileSystem localFs = FileSystem.getLocal(config_); String finalPath = path.makeQualified(localFs).toString(); if(fileList.length() > 0) { @@ -875,7 +874,7 @@ public class StreamJob implements Tool { IdentifierResolver.TEXT_ID)); jobConf_.setClass("stream.map.output.reader.class", idResolver.getOutputReaderClass(), OutputReader.class); - if (isMapperACommand) { + if (isMapperACommand || jobConf_.get("stream.map.output") != null) { // if mapper is a command, then map output key/value classes come from the // idResolver jobConf_.setMapOutputKeyClass(idResolver.getOutputKeyClass()); @@ -891,7 +890,7 @@ public class StreamJob implements Tool { IdentifierResolver.TEXT_ID)); jobConf_.setClass("stream.reduce.output.reader.class", idResolver.getOutputReaderClass(), OutputReader.class); - if (isReducerACommand) { + if (isReducerACommand || jobConf_.get("stream.reduce.output") != null) { // if reducer is a command, then output key/value classes come from the // idResolver jobConf_.setOutputKeyClass(idResolver.getOutputKeyClass()); diff --git a/hadoop-tools/hadoop-streaming/src/test/bin/cat.cmd b/hadoop-tools/hadoop-streaming/src/test/bin/cat.cmd new file mode 100644 index 00000000000..4b38e3e3b4b --- /dev/null +++ b/hadoop-tools/hadoop-streaming/src/test/bin/cat.cmd @@ -0,0 +1,18 @@ +@rem Licensed to the Apache Software Foundation (ASF) under one +@rem or more contributor license agreements. See the NOTICE file +@rem distributed with this work for additional information +@rem regarding copyright ownership. The ASF licenses this file +@rem to you under the Apache License, Version 2.0 (the +@rem "License"); you may not use this file except in compliance +@rem with the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +@for /F "usebackq tokens=* delims=" %%A in (`findstr .`) do @echo %%A +@rem lines have been copied from stdin to stdout diff --git a/hadoop-tools/hadoop-streaming/src/test/bin/xargs_cat.cmd b/hadoop-tools/hadoop-streaming/src/test/bin/xargs_cat.cmd new file mode 100644 index 00000000000..f398a8d65c3 --- /dev/null +++ b/hadoop-tools/hadoop-streaming/src/test/bin/xargs_cat.cmd @@ -0,0 +1,18 @@ +@rem Licensed to the Apache Software Foundation (ASF) under one +@rem or more contributor license agreements. See the NOTICE file +@rem distributed with this work for additional information +@rem regarding copyright ownership. The ASF licenses this file +@rem to you under the Apache License, Version 2.0 (the +@rem "License"); you may not use this file except in compliance +@rem with the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. + +@for /F "usebackq tokens=* delims=" %%A in (`findstr .`) do @type %%A +@rem files named on stdin have been copied to stdout diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java index ac577e4c7ec..e864e9d8555 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java @@ -19,6 +19,7 @@ package org.apache.hadoop.streaming; import java.io.DataOutputStream; +import java.io.File; import java.io.IOException; import java.util.Map; @@ -27,6 +28,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.MiniMRCluster; +import org.apache.hadoop.util.Shell; import org.junit.After; import org.junit.Before; @@ -45,7 +47,8 @@ public class TestFileArgs extends TestStreaming private static final String EXPECTED_OUTPUT = "job.jar\t\nsidefile\t\n"; - private static final String LS_PATH = "/bin/ls"; + private static final String LS_PATH = Shell.WINDOWS ? "cmd /c dir /B" : + "/bin/ls"; public TestFileArgs() throws IOException { @@ -58,6 +61,7 @@ public class TestFileArgs extends TestStreaming map = LS_PATH; FileSystem.setDefaultUri(conf, "hdfs://" + namenode); + setTestDir(new File("/tmp/TestFileArgs")); } @Before diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java index c5136e6dc7d..47b70ef6fbd 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java @@ -70,8 +70,8 @@ public class TestMultipleArchiveFiles extends TestStreaming namenode = fileSys.getUri().getAuthority(); mr = new MiniMRCluster(1, namenode, 1); - map = "xargs cat"; - reduce = "cat"; + map = XARGS_CAT; + reduce = CAT; } @Override @@ -84,7 +84,8 @@ public class TestMultipleArchiveFiles extends TestStreaming { fileSys.delete(new Path(INPUT_DIR), true); DataOutputStream dos = fileSys.create(new Path(INPUT_FILE)); - String inputFileString = "symlink1/cacheArchive1\nsymlink2/cacheArchive2"; + String inputFileString = "symlink1" + File.separator + + "cacheArchive1\nsymlink2" + File.separator + "cacheArchive2"; dos.write(inputFileString.getBytes("UTF-8")); dos.close(); diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java index 357bfcfd0b3..ae8f57d231c 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java @@ -49,8 +49,8 @@ public class TestMultipleCachefiles String CACHE_FILE = "/testing-streaming/cache.txt"; String CACHE_FILE_2 = "/testing-streaming/cache2.txt"; String input = "check to see if we can read this none reduce"; - String map = "xargs cat "; - String reduce = "cat"; + String map = TestStreaming.XARGS_CAT; + String reduce = TestStreaming.CAT; String mapString = "testlink"; String mapString2 = "testlink2"; String cacheString = "This is just the cache string"; diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java index da0bdae484c..53009dbbabc 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java @@ -33,7 +33,7 @@ public class TestStreamXmlRecordReader extends TestStreaming { INPUT_FILE = new File("target/input.xml"); input = "\t\nroses.are.red\t\nviolets.are.blue\t\n" + "bunnies.are.pink\t\n\t\n"; - map = "cat"; + map = CAT; reduce = "NONE"; outputExpect = input; } diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java index 98ed1a299ea..4f39120a162 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java @@ -33,7 +33,7 @@ import static org.junit.Assert.*; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.conf.Configuration; - +import org.apache.hadoop.util.Shell; /** * This class tests hadoopStreaming in MapReduce local mode. @@ -43,6 +43,22 @@ public class TestStreaming public static final String STREAMING_JAR = JarFinder.getJar(StreamJob.class); + /** + * cat command used for copying stdin to stdout as mapper or reducer function. + * On Windows, use a cmd script that approximates the functionality of cat. + */ + static final String CAT = Shell.WINDOWS ? + "cmd /c " + new File("target/bin/cat.cmd").getAbsolutePath() : "cat"; + + /** + * Command used for iterating through file names on stdin and copying each + * file's contents to stdout, used as mapper or reducer function. On Windows, + * use a cmd script that approximates the functionality of xargs cat. + */ + static final String XARGS_CAT = Shell.WINDOWS ? + "cmd /c " + new File("target/bin/xargs_cat.cmd").getAbsolutePath() : + "xargs cat"; + // "map" command: grep -E (red|green|blue) // reduce command: uniq protected File TEST_DIR; @@ -66,9 +82,22 @@ public class TestStreaming UtilTest utilTest = new UtilTest(getClass().getName()); utilTest.checkUserDir(); utilTest.redirectIfAntJunit(); - TEST_DIR = new File("target/TestStreaming").getAbsoluteFile(); - OUTPUT_DIR = new File(TEST_DIR, "out"); - INPUT_FILE = new File(TEST_DIR, "input.txt"); + setTestDir(new File("target/TestStreaming").getAbsoluteFile()); + } + + /** + * Sets root of test working directory and resets any other paths that must be + * children of the test working directory. Typical usage is for subclasses + * that use HDFS to override the test directory to the form "/tmp/" + * so that on Windows, tests won't attempt to use paths containing a ':' from + * the drive specifier. The ':' character is considered invalid by HDFS. + * + * @param testDir File to set + */ + protected void setTestDir(File testDir) { + TEST_DIR = testDir; + OUTPUT_DIR = new File(testDir, "out"); + INPUT_FILE = new File(testDir, "input.txt"); } @Before @@ -89,19 +118,18 @@ public class TestStreaming protected void createInput() throws IOException { - DataOutputStream out = getFileSystem().create( - new Path(INPUT_FILE.getAbsolutePath())); + DataOutputStream out = getFileSystem().create(new Path( + INPUT_FILE.getPath())); out.write(getInputData().getBytes("UTF-8")); out.close(); } protected void setInputOutput() { - inputFile = INPUT_FILE.getAbsolutePath(); - outDir = OUTPUT_DIR.getAbsolutePath(); + inputFile = INPUT_FILE.getPath(); + outDir = OUTPUT_DIR.getPath(); } protected String[] genArgs() { - setInputOutput(); args.add("-input");args.add(inputFile); args.add("-output");args.add(outDir); args.add("-mapper");args.add(map); @@ -129,7 +157,7 @@ public class TestStreaming } protected void checkOutput() throws IOException { - Path outPath = new Path(OUTPUT_DIR.getAbsolutePath(), "part-00000"); + Path outPath = new Path(OUTPUT_DIR.getPath(), "part-00000"); FileSystem fs = getFileSystem(); String output = StreamUtil.slurpHadoop(outPath, fs); fs.delete(outPath, true); @@ -155,6 +183,7 @@ public class TestStreaming * @throws IOException */ protected int runStreamJob() throws IOException { + setInputOutput(); createInput(); boolean mayExit = false; diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java index 444355f4fbb..c21cb159f4f 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java @@ -76,7 +76,7 @@ public class TestStreamingKeyValue return new String[] { "-input", INPUT_FILE.getAbsolutePath(), "-output", OUTPUT_DIR.getAbsolutePath(), - "-mapper", "cat", + "-mapper", TestStreaming.CAT, "-jobconf", MRJobConfig.PRESERVE_FAILED_TASK_FILES + "=true", "-jobconf", "stream.non.zero.exit.is.failure=true", "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp"), diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java index db63847f187..35eb752b23a 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java @@ -24,6 +24,7 @@ import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.mapreduce.MRJobConfig; import org.junit.Before; import org.junit.Test; @@ -119,7 +120,7 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming { @Test public void testJavaMapperAndCommandReducer() throws Exception { map = "org.apache.hadoop.mapred.lib.IdentityMapper"; - reduce = "cat"; + reduce = CAT; super.testCommandLine(); } @@ -127,7 +128,7 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming { @Test public void testJavaMapperAndCommandReducerAndZeroReduces() throws Exception { map = "org.apache.hadoop.mapred.lib.IdentityMapper"; - reduce = "cat"; + reduce = CAT; args.add("-numReduceTasks"); args.add("0"); super.testCommandLine(); @@ -136,7 +137,7 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming { // Check with Command Mapper, Java Reducer @Test public void testCommandMapperAndJavaReducer() throws Exception { - map = "cat"; + map = CAT; reduce = MyReducer.class.getName(); super.testCommandLine(); } @@ -144,7 +145,7 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming { // Check with Command Mapper, Java Reducer and -numReduceTasks 0 @Test public void testCommandMapperAndJavaReducerAndZeroReduces() throws Exception { - map = "cat"; + map = CAT; reduce = MyReducer.class.getName(); args.add("-numReduceTasks"); args.add("0"); @@ -154,7 +155,7 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming { // Check with Command Mapper, Reducer = "NONE" @Test public void testCommandMapperWithReduceNone() throws Exception { - map = "cat"; + map = CAT; reduce = "NONE"; super.testCommandLine(); } @@ -162,8 +163,8 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming { // Check with Command Mapper, Command Reducer @Test public void testCommandMapperAndCommandReducer() throws Exception { - map = "cat"; - reduce = "cat"; + map = CAT; + reduce = CAT; super.testCommandLine(); } @@ -171,12 +172,23 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming { @Test public void testCommandMapperAndCommandReducerAndZeroReduces() throws Exception { - map = "cat"; - reduce = "cat"; + map = CAT; + reduce = CAT; args.add("-numReduceTasks"); args.add("0"); super.testCommandLine(); } + + @Test + public void testDefaultToIdentityReducer() throws Exception { + args.add("-mapper");args.add(map); + args.add("-jobconf"); + args.add("mapreduce.task.files.preserve.failedtasks=true"); + args.add("-jobconf"); + args.add("stream.tmpdir="+System.getProperty("test.build.data","/tmp")); + args.add("-inputformat");args.add(TextInputFormat.class.getName()); + super.testCommandLine(); + } @Override @Test diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java index 823433c4c04..11c3b4e9b04 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java @@ -83,7 +83,7 @@ public class TestStreamingTaskLog { * (b) hadoop.tasklog.totalLogFileSize * for the children of java tasks in streaming jobs. */ - @Test (timeout = 30000) + @Test (timeout = 120000) public void testStreamingTaskLogWithHadoopCmd() { try { final int numSlaves = 1; @@ -95,13 +95,14 @@ public class TestStreamingTaskLog { fs.delete(testDir, true); } fs.mkdirs(testDir); - File scriptFile = createScript( - testDir.toString() + "/testTaskLog.sh"); + File scriptFile = createScript(testDir.toString() + + (Shell.WINDOWS ? "/testTaskLog.cmd" : "/testTaskLog.sh")); conf.setBoolean(JTConfig.JT_PERSIST_JOBSTATUS, false); mr = new MiniMRCluster(numSlaves, fs.getUri().toString(), 1, null, null, conf); writeInputFile(fs, inputPath); - map = scriptFile.getAbsolutePath(); + map = Shell.WINDOWS ? "cmd /c " + scriptFile.getAbsolutePath() : + scriptFile.getAbsolutePath(); runStreamJobAndValidateEnv(); @@ -120,8 +121,12 @@ public class TestStreamingTaskLog { File scriptFile = new File(script); UtilTest.recursiveDelete(scriptFile); FileOutputStream in = new FileOutputStream(scriptFile); - in.write(("cat > /dev/null 2>&1\n" + - "echo $HADOOP_ROOT_LOGGER $HADOOP_CLIENT_OPTS").getBytes()); + if (Shell.WINDOWS) { + in.write("@echo %HADOOP_ROOT_LOGGER% %HADOOP_CLIENT_OPTS%".getBytes()); + } else { + in.write(("cat > /dev/null 2>&1\n" + + "echo $HADOOP_ROOT_LOGGER $HADOOP_CLIENT_OPTS").getBytes()); + } in.close(); Shell.execCommand(Shell.getSetPermissionCommand("+x", false, diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java index dba676a32db..730429d6daf 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java @@ -47,13 +47,13 @@ public class TestSymLink String OUTPUT_DIR = "/testing-streaming/out"; String CACHE_FILE = "/testing-streaming/cache.txt"; String input = "check to see if we can read this none reduce"; - String map = "xargs cat "; - String reduce = "cat"; + String map = TestStreaming.XARGS_CAT; + String reduce = TestStreaming.CAT; String mapString = "testlink\n"; String cacheString = "This is just the cache string"; StreamJob job; - @Test (timeout = 60000) + @Test (timeout = 120000) public void testSymLink() throws Exception { boolean mayExit = false; diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TrApp.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TrApp.java index 30142ba823c..e413261fad1 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TrApp.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TrApp.java @@ -43,7 +43,6 @@ public class TrApp // Note the dots translated to underscore: // property names have been escaped in PipeMapRed.safeEnvVarName() expectDefined("mapreduce_cluster_local_dir"); - expect("mapred_output_format_class", "org.apache.hadoop.mapred.TextOutputFormat"); expect("mapreduce_map_output_key_class", "org.apache.hadoop.io.Text"); expect("mapreduce_map_output_value_class", "org.apache.hadoop.io.Text"); diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 42e5e0373dc..4aa9c364fcf 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -25,6 +25,15 @@ Trunk - Unreleased YARN-491. TestContainerLogsPage fails on Windows. (Chris Nauroth via hitesh) + YARN-557. Fix TestUnmanagedAMLauncher failure on Windows. (Chris Nauroth via + vinodkv) + + YARN-524 TestYarnVersionInfo failing if generated properties doesn't + include an SVN URL. (stevel) + + YARN-487. Modify path manipulation in LocalDirsHandlerService to let + TestDiskFailures pass on Windows. (Chris Nauroth via vinodkv) + BREAKDOWN OF HADOOP-8562 SUBTASKS YARN-158. Yarn creating package-info.java must not depend on sh. @@ -54,7 +63,7 @@ Trunk - Unreleased YARN-359. Fixing commands for container signalling in Windows. (Chris Nauroth via vinodkv) - + Release 2.0.5-beta - UNRELEASED INCOMPATIBLE CHANGES @@ -66,6 +75,13 @@ Release 2.0.5-beta - UNRELEASED YARN-440. Flatten RegisterNodeManagerResponse. (Xuan Gong via sseth) + YARN-536. Removed the unused objects ContainerStatus and ContainerStatus from + Container which also don't belong to the container. (Xuan Gong via vinodkv) + + YARN-486. Changed NM's startContainer API to accept Container record given by + RM as a direct parameter instead of as part of the ContainerLaunchContext + record. (Xuan Gong via vinodkv) + NEW FEATURES IMPROVEMENTS @@ -103,6 +119,29 @@ Release 2.0.5-beta - UNRELEASED YARN-450. Define value for * in the scheduling protocol (Zhijie Shen via bikas) + YARN-475. Remove a unused constant in the public API - + ApplicationConstants.AM_APP_ATTEMPT_ID_ENV. (Hitesh Shah via vinodkv) + + YARN-309. Changed NodeManager to obtain heart-beat interval from the + ResourceManager. (Xuan Gong via vinodkv) + + YARN-447. Move ApplicationComparator in CapacityScheduler to use comparator + in ApplicationId. (Nemon Lou via vinodkv) + + YARN-381. Improve fair scheduler docs. (Sandy Ryza via tomwhite) + + YARN-458. YARN daemon addresses must be placed in many different configs. + (sandyr via tucu) + + YARN-193. Scheduler.normalizeRequest does not account for allocation + requests that exceed maximumAllocation limits (Zhijie Shen via bikas) + + YARN-479. NM retry behavior for connection to RM should be similar for + lost heartbeats (Jian He via bikas) + + YARN-495. Changed NM reboot behaviour to be a simple resync - kill all + containers and re-register with RM. (Jian He via vinodkv) + OPTIMIZATIONS BUG FIXES @@ -134,9 +173,6 @@ Release 2.0.5-beta - UNRELEASED YARN-485. TestProcfsProcessTree#testProcessTree() doesn't wait long enough for the process to die. (kkambatl via tucu) - YARN-470. Support a way to disable resource monitoring on the NodeManager. - (Siddharth Seth via hitesh) - YARN-71. Fix the NodeManager to clean up local-dirs on restart. (Xuan Gong via sseth) @@ -158,6 +194,47 @@ Release 2.0.5-beta - UNRELEASED YARN-24. Nodemanager fails to start if log aggregation enabled and namenode unavailable. (sandyr via tucu) + YARN-515. Node Manager not getting the master key. (Robert Joseph Evans + via jlowe) + + YARN-382. SchedulerUtils improve way normalizeRequest sets the resource + capabilities. (Zhijie Shen via bikas) + + YARN-467. Modify public distributed cache to localize files such that no + local directory hits unix file count limits and thus prevent job failures. + (Omkar Vinit Joshi via vinodkv) + + YARN-101. Fix NodeManager heartbeat processing to not lose track of completed + containers in case of dropped heartbeats. (Xuan Gong via vinodkv) + + YARN-538. RM address DNS lookup can cause unnecessary slowness on every JHS + page load. (sandyr via tucu) + + YARN-532. Change RMAdmin and Localization client protocol PB implementations + to implement closeable so that they can be stopped when needed via + RPC.stopProxy(). (Siddharth Seth via vinodkv) + + YARN-99. Modify private distributed cache to localize files such that no + local directory hits unix file count limits and thus prevent job failures. + (Omkar Vinit Joshi via vinodkv) + + YARN-112. Fixed a race condition during localization that fails containers. + (Omkar Vinit Joshi via vinodkv) + + YARN-534. Change RM restart recovery to also account for AM max-attempts + configuration after the restart. (Jian He via vinodkv) + + YARN-539. Addressed memory leak of LocalResource objects NM when a resource + localization fails. (Omkar Vinit Joshi via vinodkv) + + YARN-319. Submitting a job to a fair scheduler queue for which the user + does not have permission causes the client to wait forever. + (shenhong via tomwhite) + + YARN-412. Fixed FifoScheduler to check hostname of a NodeManager rather + than its host:port during scheduling which caused incorrect locality for + containers. (Roger Hoover via acmurthy) + Release 2.0.4-alpha - UNRELEASED INCOMPATIBLE CHANGES @@ -173,6 +250,9 @@ Release 2.0.4-alpha - UNRELEASED YARN-429. capacity-scheduler config missing from yarn-test artifact. (sseth via hitesh) + YARN-470. Support a way to disable resource monitoring on the NodeManager. + (Siddharth Seth via hitesh) + Release 2.0.3-alpha - 2013-02-06 INCOMPATIBLE CHANGES @@ -371,10 +451,6 @@ Release 2.0.3-alpha - 2013-02-06 YARN-302. Fair scheduler assignmultiple should default to false. (sandyr via tucu) - YARN-319. Submitting a job to a fair scheduler queue for which the user - does not have permission causes the client to wait forever. - (shenhong via tomwhite) - YARN-372. Move InlineDispatcher from hadoop-yarn-server-resourcemanager to hadoop-yarn-common (sseth via hitesh) @@ -437,6 +513,18 @@ Release 2.0.2-alpha - 2012-09-07 YARN-138. Ensure default values for minimum/maximum container sizes is sane. (harsh & sseth via acmurthy) +Release 0.23.8 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + Release 0.23.7 - UNRELEASED INCOMPATIBLE CHANGES @@ -469,6 +557,8 @@ Release 0.23.7 - UNRELEASED YARN-200. yarn log does not output all needed information, and is in a binary format (Ravi Prakash via jlowe) + YARN-525. make CS node-locality-delay refreshable (Thomas Graves via jlowe) + OPTIMIZATIONS YARN-357. App submission should not be synchronized (daryn) diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml index 2d63dad48b8..4ba2d72289e 100644 --- a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml @@ -256,4 +256,25 @@ + + + + + + + + + + + + + + + + + + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java index 2fafffeb7ea..02b15c4dac8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java @@ -42,12 +42,6 @@ public interface ApplicationConstants { * only */ public static final String AM_CONTAINER_ID_ENV = "AM_CONTAINER_ID"; - - /** - * The environment variable for APPLICATION_ATTEMPT_ID. Set in AppMaster - * environment only - */ - public static final String AM_APP_ATTEMPT_ID_ENV = "AM_APP_ATTEMPT_ID"; /** * The environment variable for the NM_HOST. Set in the AppMaster environment @@ -109,7 +103,7 @@ public interface ApplicationConstants { * $USER * Final, non-modifiable. */ - USER("USER"), + USER(Shell.WINDOWS ? "USERNAME": "USER"), /** * $LOGNAME diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/StartContainerRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/StartContainerRequest.java index a3f7e4b70d0..78862b95d1a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/StartContainerRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/StartContainerRequest.java @@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.api.protocolrecords; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Stable; import org.apache.hadoop.yarn.api.ContainerManager; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; /** @@ -58,4 +59,12 @@ public interface StartContainerRequest { @Public @Stable public abstract void setContainerLaunchContext(ContainerLaunchContext context); + + @Public + @Stable + public Container getContainer(); + + @Public + @Stable + public void setContainer(Container container); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/StartContainerRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/StartContainerRequestPBImpl.java index e1c589e5308..d4190e09a04 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/StartContainerRequestPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/StartContainerRequestPBImpl.java @@ -20,10 +20,13 @@ package org.apache.hadoop.yarn.api.protocolrecords.impl.pb; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ProtoBase; import org.apache.hadoop.yarn.api.records.impl.pb.ContainerLaunchContextPBImpl; +import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerLaunchContextProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainerRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainerRequestProtoOrBuilder; @@ -35,7 +38,8 @@ public class StartContainerRequestPBImpl extends ProtoBaseuser submitting the application. - * @return user submitting the application - */ - @Public - @Stable - public String getUser(); - - /** - * Set the user submitting the application. - * @param user user submitting the application - */ - @Public - @Stable - public void setUser(String user); /** * Get the ContainerLaunchContext to describe the @@ -207,4 +191,12 @@ public interface ApplicationSubmissionContext { @Public @Unstable public void setMaxAppAttempts(int maxAppAttempts); + + @Public + @Stable + public Resource getResource(); + + @Public + @Stable + public void setResource(Resource resource); } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Container.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Container.java index 4ac8e0748c9..9478d341216 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Container.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Container.java @@ -124,18 +124,6 @@ public interface Container extends Comparable { @Unstable void setPriority(Priority priority); - /** - * Get the current ContainerState of the container. - * @return current ContainerState of the container - */ - @Public - @Stable - ContainerState getState(); - - @Private - @Unstable - void setState(ContainerState state); - /** * Get the ContainerToken for the container. * @return ContainerToken for the container @@ -147,16 +135,4 @@ public interface Container extends Comparable { @Private @Unstable void setContainerToken(ContainerToken containerToken); - - /** - * Get the ContainerStatus of the container. - * @return ContainerStatus of the container - */ - @Public - @Stable - ContainerStatus getContainerStatus(); - - @Private - @Unstable - void setContainerStatus(ContainerStatus containerStatus); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerLaunchContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerLaunchContext.java index 78f85b2b0ec..36cfdfbadd8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerLaunchContext.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerLaunchContext.java @@ -51,22 +51,6 @@ import org.apache.hadoop.yarn.api.ContainerManager; @Public @Stable public interface ContainerLaunchContext { - /** - * Get ContainerId of container to be launched. - * @return ContainerId of container to be launched - */ - @Public - @Stable - ContainerId getContainerId(); - - /** - * Set ContainerId of container to be launched. - * @param containerId et ContainerId of container to be launched - */ - @Public - @Stable - void setContainerId(ContainerId containerId); - /** * Get the user to whom the container has been allocated. * @return the user to whom the container has been allocated @@ -83,25 +67,6 @@ public interface ContainerLaunchContext { @Stable void setUser(String user); - /** - * Get the Resource allocated to the container by the - * ResourceManager. - * @return Resource allocated to the container by the - * ResourceManager - */ - @Public - @Stable - Resource getResource(); - - /** - * Set the Resource allocated to the container by the - * ResourceManager. - * @param resource allocated resource - */ - @Public - @Stable - void setResource(Resource resource); - /** * Get security tokens (if security is enabled). * @return security tokens (if security is enabled) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java index a6a890cc433..403ce6ef4f8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java @@ -23,11 +23,13 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.ProtoBase; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationSubmissionContextProto; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationSubmissionContextProtoOrBuilder; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerLaunchContextProto; import org.apache.hadoop.yarn.proto.YarnProtos.PriorityProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; public class ApplicationSubmissionContextPBImpl extends ProtoBase @@ -40,7 +42,8 @@ implements ApplicationSubmissionContext { private ApplicationId applicationId = null; private Priority priority = null; private ContainerLaunchContext amContainer = null; - + private Resource resource = null; + public ApplicationSubmissionContextPBImpl() { builder = ApplicationSubmissionContextProto.newBuilder(); } @@ -68,6 +71,11 @@ implements ApplicationSubmissionContext { if (this.amContainer != null) { builder.setAmContainerSpec(convertToProtoFormat(this.amContainer)); } + if (this.resource != null && + !((ResourcePBImpl) this.resource).getProto().equals( + builder.getResource())) { + builder.setResource(convertToProtoFormat(this.resource)); + } } private void mergeLocalToProto() { @@ -165,25 +173,6 @@ implements ApplicationSubmissionContext { } builder.setQueue((queue)); } - - @Override - public String getUser() { - ApplicationSubmissionContextProtoOrBuilder p = viaProto ? proto : builder; - if (!p.hasUser()) { - return null; - } - return (p.getUser()); - } - - @Override - public void setUser(String user) { - maybeInitBuilder(); - if (user == null) { - builder.clearUser(); - return; - } - builder.setUser((user)); - } @Override public ContainerLaunchContext getAMContainerSpec() { @@ -244,6 +233,28 @@ implements ApplicationSubmissionContext { builder.setMaxAppAttempts(maxAppAttempts); } + @Override + public Resource getResource() { + ApplicationSubmissionContextProtoOrBuilder p = viaProto ? proto : builder; + if (this.resource != null) { + return this.resource; + } + if (!p.hasResource()) { + return null; + } + this.resource = convertFromProtoFormat(p.getResource()); + return this.resource; + } + + @Override + public void setResource(Resource resource) { + maybeInitBuilder(); + if (resource == null) { + builder.clearResource(); + } + this.resource = resource; + } + private PriorityPBImpl convertFromProtoFormat(PriorityProto p) { return new PriorityPBImpl(p); } @@ -268,4 +279,12 @@ implements ApplicationSubmissionContext { private ContainerLaunchContextProto convertToProtoFormat(ContainerLaunchContext t) { return ((ContainerLaunchContextPBImpl)t).getProto(); } + + private ResourcePBImpl convertFromProtoFormat(ResourceProto p) { + return new ResourcePBImpl(p); + } + + private ResourceProto convertToProtoFormat(Resource t) { + return ((ResourcePBImpl)t).getProto(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerLaunchContextPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerLaunchContextPBImpl.java index b8ba4df26d7..6a26508f711 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerLaunchContextPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerLaunchContextPBImpl.java @@ -26,17 +26,13 @@ import java.util.List; import java.util.Map; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; -import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.ProtoBase; -import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationACLMapProto; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerLaunchContextProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerLaunchContextProtoOrBuilder; import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto; -import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; import org.apache.hadoop.yarn.proto.YarnProtos.StringBytesMapProto; import org.apache.hadoop.yarn.proto.YarnProtos.StringLocalResourceMapProto; import org.apache.hadoop.yarn.proto.YarnProtos.StringStringMapProto; @@ -50,8 +46,6 @@ implements ContainerLaunchContext { ContainerLaunchContextProto.Builder builder = null; boolean viaProto = false; - private ContainerId containerId = null; - private Resource resource = null; private Map localResources = null; private ByteBuffer containerTokens = null; private Map serviceData = null; @@ -76,16 +70,6 @@ implements ContainerLaunchContext { } private void mergeLocalToBuilder() { - if (this.containerId != null && - !((ContainerIdPBImpl)containerId).getProto().equals( - builder.getContainerId())) { - builder.setContainerId(convertToProtoFormat(this.containerId)); - } - if (this.resource != null && - !((ResourcePBImpl)this.resource).getProto().equals( - builder.getResource())) { - builder.setResource(convertToProtoFormat(this.resource)); - } if (this.localResources != null) { addLocalResourcesToProto(); } @@ -120,28 +104,6 @@ implements ContainerLaunchContext { } viaProto = false; } - - - @Override - public Resource getResource() { - ContainerLaunchContextProtoOrBuilder p = viaProto ? proto : builder; - if (this.resource != null) { - return this.resource; - } - if (!p.hasResource()) { - return null; - } - this.resource = convertFromProtoFormat(p.getResource()); - return this.resource; - } - - @Override - public void setResource(Resource resource) { - maybeInitBuilder(); - if (resource == null) - builder.clearResource(); - this.resource = resource; - } @Override public List getCommands() { @@ -197,26 +159,6 @@ implements ContainerLaunchContext { } builder.setUser((user)); } - @Override - public ContainerId getContainerId() { - ContainerLaunchContextProtoOrBuilder p = viaProto ? proto : builder; - if (this.containerId != null) { - return this.containerId; - } - if (!p.hasContainerId()) { - return null; - } - this.containerId = convertFromProtoFormat(p.getContainerId()); - return this.containerId; - } - - @Override - public void setContainerId(ContainerId containerId) { - maybeInitBuilder(); - if (containerId == null) - builder.clearContainerId(); - this.containerId = containerId; - } @Override public Map getLocalResources() { @@ -299,11 +241,12 @@ implements ContainerLaunchContext { @Override public void setContainerTokens(ByteBuffer containerTokens) { maybeInitBuilder(); - if (containerTokens == null) + if (containerTokens == null) { builder.clearContainerTokens(); + } this.containerTokens = containerTokens; } - + @Override public Map getServiceData() { initServiceData(); @@ -500,22 +443,6 @@ implements ContainerLaunchContext { this.applicationACLS.putAll(appACLs); } - private ResourcePBImpl convertFromProtoFormat(ResourceProto p) { - return new ResourcePBImpl(p); - } - - private ResourceProto convertToProtoFormat(Resource t) { - return ((ResourcePBImpl)t).getProto(); - } - - private ContainerIdPBImpl convertFromProtoFormat(ContainerIdProto p) { - return new ContainerIdPBImpl(p); - } - - private ContainerIdProto convertToProtoFormat(ContainerId t) { - return ((ContainerIdPBImpl)t).getProto(); - } - private LocalResourcePBImpl convertFromProtoFormat(LocalResourceProto p) { return new LocalResourcePBImpl(p); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerPBImpl.java index 92a710a20f4..dd6941ff79f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerPBImpl.java @@ -21,8 +21,6 @@ package org.apache.hadoop.yarn.api.records.impl.pb; import org.apache.hadoop.security.proto.SecurityProtos.TokenProto; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.api.records.ContainerState; -import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.ContainerToken; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; @@ -31,12 +29,9 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProtoOrBuilder; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStateProto; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStatusProto; import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto; import org.apache.hadoop.yarn.proto.YarnProtos.PriorityProto; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; -import org.apache.hadoop.yarn.util.ProtoUtils; public class ContainerPBImpl extends ProtoBase implements Container { @@ -49,7 +44,6 @@ public class ContainerPBImpl extends ProtoBase implements Contai private Resource resource = null; private Priority priority = null; private ContainerToken containerToken = null; - private ContainerStatus containerStatus = null; public ContainerPBImpl() { builder = ContainerProto.newBuilder(); @@ -94,11 +88,6 @@ public class ContainerPBImpl extends ProtoBase implements Contai builder.getContainerToken())) { builder.setContainerToken(convertToProtoFormat(this.containerToken)); } - if (this.containerStatus != null - && !((ContainerStatusPBImpl) this.containerStatus).getProto().equals( - builder.getContainerStatus())) { - builder.setContainerStatus(convertToProtoFormat(this.containerStatus)); - } } private void mergeLocalToProto() { @@ -115,26 +104,7 @@ public class ContainerPBImpl extends ProtoBase implements Contai } viaProto = false; } - - - @Override - public ContainerState getState() { - ContainerProtoOrBuilder p = viaProto ? proto : builder; - if (!p.hasState()) { - return null; - } - return convertFromProtoFormat(p.getState()); - } - @Override - public void setState(ContainerState state) { - maybeInitBuilder(); - if (state == null) { - builder.clearState(); - return; - } - builder.setState(convertToProtoFormat(state)); - } @Override public ContainerId getId() { ContainerProtoOrBuilder p = viaProto ? proto : builder; @@ -260,35 +230,6 @@ public class ContainerPBImpl extends ProtoBase implements Contai this.containerToken = containerToken; } - @Override - public ContainerStatus getContainerStatus() { - ContainerProtoOrBuilder p = viaProto ? proto : builder; - if (this.containerStatus != null) { - return this.containerStatus; - } - if (!p.hasContainerStatus()) { - return null; - } - this.containerStatus = convertFromProtoFormat(p.getContainerStatus()); - return this.containerStatus; - } - - @Override - public void setContainerStatus(ContainerStatus containerStatus) { - maybeInitBuilder(); - if (containerStatus == null) - builder.clearContainerStatus(); - this.containerStatus = containerStatus; - } - - private ContainerStateProto convertToProtoFormat(ContainerState e) { - return ProtoUtils.convertToProtoFormat(e); - } - - private ContainerState convertFromProtoFormat(ContainerStateProto e) { - return ProtoUtils.convertFromProtoFormat(e); - } - private ContainerIdPBImpl convertFromProtoFormat(ContainerIdProto p) { return new ContainerIdPBImpl(p); } @@ -329,14 +270,6 @@ public class ContainerPBImpl extends ProtoBase implements Contai return ((ContainerTokenPBImpl)t).getProto(); } - private ContainerStatusPBImpl convertFromProtoFormat(ContainerStatusProto p) { - return new ContainerStatusPBImpl(p); - } - - private ContainerStatusProto convertToProtoFormat(ContainerStatus t) { - return ((ContainerStatusPBImpl)t).getProto(); - } - public String toString() { StringBuilder sb = new StringBuilder(); sb.append("Container: ["); @@ -345,9 +278,7 @@ public class ContainerPBImpl extends ProtoBase implements Contai sb.append("NodeHttpAddress: ").append(getNodeHttpAddress()).append(", "); sb.append("Resource: ").append(getResource()).append(", "); sb.append("Priority: ").append(getPriority()).append(", "); - sb.append("State: ").append(getState()).append(", "); sb.append("Token: ").append(getContainerToken()).append(", "); - sb.append("Status: ").append(getContainerStatus()); sb.append("]"); return sb.toString(); } @@ -357,16 +288,7 @@ public class ContainerPBImpl extends ProtoBase implements Contai public int compareTo(Container other) { if (this.getId().compareTo(other.getId()) == 0) { if (this.getNodeId().compareTo(other.getNodeId()) == 0) { - if (this.getResource().compareTo(other.getResource()) == 0) { - if (this.getState().compareTo(other.getState()) == 0) { - //ContainerToken - return this.getState().compareTo(other.getState()); - } else { - return this.getState().compareTo(other.getState()); - } - } else { - return this.getResource().compareTo(other.getResource()); - } + return this.getResource().compareTo(other.getResource()); } else { return this.getNodeId().compareTo(other.getNodeId()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index 4d0ca0f4d79..aec162c1f0f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -67,9 +67,7 @@ message ContainerProto { optional string node_http_address = 3; optional ResourceProto resource = 4; optional PriorityProto priority = 5; - optional ContainerStateProto state = 6; - optional hadoop.common.TokenProto container_token = 7; - optional ContainerStatusProto container_status = 8; + optional hadoop.common.TokenProto container_token = 6; } enum YarnApplicationStateProto { @@ -213,13 +211,13 @@ message ResourceRequestProto { message ApplicationSubmissionContextProto { optional ApplicationIdProto application_id = 1; optional string application_name = 2 [default = "N/A"]; - optional string user = 3; - optional string queue = 4 [default = "default"]; - optional PriorityProto priority = 5; - optional ContainerLaunchContextProto am_container_spec = 6; - optional bool cancel_tokens_when_complete = 7 [default = true]; - optional bool unmanaged_am = 8 [default = false]; - optional int32 maxAppAttempts = 9 [default = 0]; + optional string queue = 3 [default = "default"]; + optional PriorityProto priority = 4; + optional ContainerLaunchContextProto am_container_spec = 5; + optional bool cancel_tokens_when_complete = 6 [default = true]; + optional bool unmanaged_am = 7 [default = false]; + optional int32 maxAppAttempts = 8 [default = 0]; + optional ResourceProto resource = 9; } enum ApplicationAccessTypeProto { @@ -266,15 +264,13 @@ message QueueUserACLInfoProto { //////////////////////////////////////////////////////////////////////// message ContainerLaunchContextProto { - optional ContainerIdProto container_id = 1; - optional string user = 2; - optional ResourceProto resource = 3; - repeated StringLocalResourceMapProto localResources = 4; - optional bytes container_tokens = 5; - repeated StringBytesMapProto service_data = 6; - repeated StringStringMapProto environment = 7; - repeated string command = 8; - repeated ApplicationACLMapProto application_ACLs = 9; + optional string user = 1; + repeated StringLocalResourceMapProto localResources = 2; + optional bytes container_tokens = 3; + repeated StringBytesMapProto service_data = 4; + repeated StringStringMapProto environment = 5; + repeated string command = 6; + repeated ApplicationACLMapProto application_ACLs = 7; } message ContainerStatusProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto index 50d1cd320ed..ad3b5f18072 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto @@ -151,6 +151,7 @@ message GetQueueUserAclsInfoResponseProto { message StartContainerRequestProto { optional ContainerLaunchContextProto container_launch_context = 1; + optional ContainerProto container = 2; } message StartContainerResponseProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java index 1e65a9a73bf..114b5e5a699 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java @@ -608,7 +608,6 @@ public class ApplicationMaster { + ", containerNode=" + allocatedContainer.getNodeId().getHost() + ":" + allocatedContainer.getNodeId().getPort() + ", containerNodeURI=" + allocatedContainer.getNodeHttpAddress() - + ", containerState" + allocatedContainer.getState() + ", containerResourceMemory" + allocatedContainer.getResource().getMemory()); // + ", containerToken" @@ -680,11 +679,8 @@ public class ApplicationMaster { ContainerLaunchContext ctx = Records .newRecord(ContainerLaunchContext.class); - ctx.setContainerId(container.getId()); - ctx.setResource(container.getResource()); - String jobUserName = System.getenv(ApplicationConstants.Environment.USER - .name()); + .key()); ctx.setUser(jobUserName); LOG.info("Setting user in ContainerLaunchContext to: " + jobUserName); @@ -753,6 +749,7 @@ public class ApplicationMaster { StartContainerRequest startReq = Records .newRecord(StartContainerRequest.class); startReq.setContainerLaunchContext(ctx); + startReq.setContainer(container); try { cm.startContainer(startReq); } catch (YarnRemoteException e) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java index e311957e618..0461d46863e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java @@ -548,7 +548,7 @@ public class Client extends YarnClientImpl { // For now, only memory is supported so we set memory requirements Resource capability = Records.newRecord(Resource.class); capability.setMemory(amMemory); - amContainer.setResource(capability); + appContext.setResource(capability); // Service data is a binary blob that can be passed to the application // Not needed in this scenario @@ -573,6 +573,7 @@ public class Client extends YarnClientImpl { // Ignore the response as either a valid response object is returned on success // or an exception thrown to denote some form of a failure LOG.info("Submitting application to ASM"); + super.submitApplication(appContext); // TODO diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/main/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/UnmanagedAMLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/main/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/UnmanagedAMLauncher.java index 62ac67fc170..605dc3b23dc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/main/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/UnmanagedAMLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/main/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/UnmanagedAMLauncher.java @@ -57,10 +57,11 @@ import org.apache.hadoop.yarn.util.Records; * unmanagedAM is an AM that is not launched and managed by the RM. The client * creates a new application on the RM and negotiates a new attempt id. Then it * waits for the RM app state to reach be YarnApplicationState.ACCEPTED after - * which it spawns the AM in another process and passes it the attempt id via - * env variable ApplicationConstants.AM_APP_ATTEMPT_ID_ENV. The AM can be in any - * language. The AM can register with the RM using the attempt id and proceed as - * normal. The client redirects app stdout and stderr to its own stdout and + * which it spawns the AM in another process and passes it the container id via + * env variable ApplicationConstants.AM_CONTAINER_ID_ENV. The AM can be in any + * language. The AM can register with the RM using the attempt id obtained + * from the container id and proceed as normal. + * The client redirects app stdout and stderr to its own stdout and * stderr and waits for the AM process to exit. Then it waits for the RM to * report app completion. */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java index 43fec24afca..6ab474d43b4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java @@ -30,6 +30,7 @@ import junit.framework.Assert; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.Shell; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.MiniYARNCluster; import org.junit.AfterClass; @@ -50,7 +51,7 @@ public class TestUnmanagedAMLauncher { conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128); if (yarnCluster == null) { yarnCluster = new MiniYARNCluster( - TestUnmanagedAMLauncher.class.getName(), 1, 1, 1); + TestUnmanagedAMLauncher.class.getSimpleName(), 1, 1, 1); yarnCluster.init(conf); yarnCluster.start(); URL url = Thread.currentThread().getContextClassLoader() @@ -93,7 +94,7 @@ public class TestUnmanagedAMLauncher { return envClassPath; } - @Test(timeout=10000) + @Test(timeout=30000) public void testDSShell() throws Exception { String classpath = getTestRuntimeClasspath(); String javaHome = System.getenv("JAVA_HOME"); @@ -112,7 +113,8 @@ public class TestUnmanagedAMLauncher { javaHome + "/bin/java -Xmx512m " + "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster " - + "--container_memory 128 --num_containers 1 --priority 0 --shell_command ls" }; + + "--container_memory 128 --num_containers 1 --priority 0 " + + "--shell_command " + (Shell.WINDOWS ? "dir" : "ls") }; LOG.info("Initializing Launcher"); UnmanagedAMLauncher launcher = new UnmanagedAMLauncher(new Configuration( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/RMAdminProtocolPBClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/RMAdminProtocolPBClientImpl.java index 051df592570..f7b39f218b3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/RMAdminProtocolPBClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/RMAdminProtocolPBClientImpl.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.api.impl.pb.client; +import java.io.Closeable; import java.io.IOException; import java.net.InetSocketAddress; @@ -65,7 +66,7 @@ import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.Refre import com.google.protobuf.ServiceException; -public class RMAdminProtocolPBClientImpl implements RMAdminProtocol { +public class RMAdminProtocolPBClientImpl implements RMAdminProtocol, Closeable { private RMAdminProtocolPB proxy; @@ -77,6 +78,13 @@ public class RMAdminProtocolPBClientImpl implements RMAdminProtocol { RMAdminProtocolPB.class, clientVersion, addr, conf); } + @Override + public void close() { + if (this.proxy != null) { + RPC.stopProxy(this.proxy); + } + } + @Override public RefreshQueuesResponse refreshQueues(RefreshQueuesRequest request) throws YarnRemoteException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 6b8353a33d5..f143a6b76ab 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -122,9 +122,9 @@ public class YarnConfiguration extends Configuration { public static final String RM_SCHEDULER_MAXIMUM_ALLOCATION_MB = YARN_PREFIX + "scheduler.maximum-allocation-mb"; public static final int DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB = 8192; - public static final String RM_SCHEDULER_MAXIMUM_ALLOCATION_CORES = + public static final String RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES = YARN_PREFIX + "scheduler.maximum-allocation-vcores"; - public static final int DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_CORES = 32; + public static final int DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES = 4; /** Number of threads to handle scheduler interface.*/ public static final String RM_SCHEDULER_CLIENT_THREAD_COUNT = @@ -219,6 +219,11 @@ public class YarnConfiguration extends Configuration { public static final String DEFAULT_RM_SCHEDULER = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler"; + /** RM set next Heartbeat interval for NM */ + public static final String RM_NM_HEARTBEAT_INTERVAL_MS = + RM_PREFIX + "nodemanagers.heartbeat-interval-ms"; + public static final long DEFAULT_RM_NM_HEARTBEAT_INTERVAL_MS = 1000; + //Delegation token related keys public static final String DELEGATION_KEY_UPDATE_INTERVAL_KEY = RM_PREFIX + "delegation.key.update-interval"; @@ -329,19 +334,21 @@ public class YarnConfiguration extends Configuration { NM_PREFIX + "delete.thread-count"; public static final int DEFAULT_NM_DELETE_THREAD_COUNT = 4; - // TODO: Should this instead be dictated by RM? - /** Heartbeat interval to RM*/ - public static final String NM_TO_RM_HEARTBEAT_INTERVAL_MS = - NM_PREFIX + "heartbeat.interval-ms"; - public static final int DEFAULT_NM_TO_RM_HEARTBEAT_INTERVAL_MS = 1000; - /** Keytab for NM.*/ public static final String NM_KEYTAB = NM_PREFIX + "keytab"; /**List of directories to store localized files in.*/ public static final String NM_LOCAL_DIRS = NM_PREFIX + "local-dirs"; public static final String DEFAULT_NM_LOCAL_DIRS = "/tmp/nm-local-dir"; - + + /** + * Number of files in each localized directories + * Avoid tuning this too low. + */ + public static final String NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY = + NM_PREFIX + "local-cache.max-files-per-directory"; + public static final int DEFAULT_NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY = 8192; + /** Address where the localizer IPC is.*/ public static final String NM_LOCALIZER_ADDRESS = NM_PREFIX + "localizer.address"; @@ -710,21 +717,16 @@ public class YarnConfiguration extends Configuration { } public static String getRMWebAppHostAndPort(Configuration conf) { - int port = conf.getSocketAddr( + InetSocketAddress address = conf.getSocketAddr( YarnConfiguration.RM_WEBAPP_ADDRESS, YarnConfiguration.DEFAULT_RM_WEBAPP_ADDRESS, - YarnConfiguration.DEFAULT_RM_WEBAPP_PORT).getPort(); - // Use apps manager address to figure out the host for webapp - String host = conf.getSocketAddr( - YarnConfiguration.RM_ADDRESS, - YarnConfiguration.DEFAULT_RM_ADDRESS, - YarnConfiguration.DEFAULT_RM_PORT).getHostName(); - InetSocketAddress address = NetUtils.createSocketAddrForHost(host, port); + YarnConfiguration.DEFAULT_RM_WEBAPP_PORT); + address = NetUtils.getConnectAddress(address); StringBuffer sb = new StringBuffer(); InetAddress resolved = address.getAddress(); if (resolved == null || resolved.isAnyLocalAddress() || resolved.isLoopbackAddress()) { - String lh = host; + String lh = address.getHostName(); try { lh = InetAddress.getLocalHost().getCanonicalHostName(); } catch (UnknownHostException e) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java index 502f1dddc13..c7502c1fb51 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java @@ -246,11 +246,6 @@ public class BuilderUtils { container.setNodeHttpAddress(nodeHttpAddress); container.setResource(resource); container.setPriority(priority); - container.setState(ContainerState.NEW); - ContainerStatus containerStatus = Records.newRecord(ContainerStatus.class); - containerStatus.setContainerId(containerId); - containerStatus.setState(ContainerState.NEW); - container.setContainerStatus(containerStatus); container.setContainerToken(containerToken); return container; } @@ -289,16 +284,13 @@ public class BuilderUtils { } public static ContainerLaunchContext newContainerLaunchContext( - ContainerId containerID, String user, Resource assignedCapability, - Map localResources, + String user, Map localResources, Map environment, List commands, - Map serviceData, ByteBuffer containerTokens, + Map serviceData, ByteBuffer containerTokens, Map acls) { ContainerLaunchContext container = recordFactory .newRecordInstance(ContainerLaunchContext.class); - container.setContainerId(containerID); container.setUser(user); - container.setResource(assignedCapability); container.setLocalResources(localResources); container.setEnvironment(environment); container.setCommands(commands); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/FSDownload.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/FSDownload.java index 5253f49e87d..4a997ce5288 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/FSDownload.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/FSDownload.java @@ -23,7 +23,6 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.net.URISyntaxException; import java.security.PrivilegedExceptionAction; -import java.util.Random; import java.util.concurrent.Callable; import java.util.regex.Pattern; @@ -36,13 +35,12 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Options.Rename; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.RunJar; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; -import org.apache.hadoop.yarn.util.ConverterUtils; /** * Download a single URL to the local disk. @@ -51,8 +49,7 @@ import org.apache.hadoop.yarn.util.ConverterUtils; public class FSDownload implements Callable { private static final Log LOG = LogFactory.getLog(FSDownload.class); - - private Random rand; + private FileContext files; private final UserGroupInformation userUgi; private Configuration conf; @@ -71,13 +68,12 @@ public class FSDownload implements Callable { public FSDownload(FileContext files, UserGroupInformation ugi, Configuration conf, - Path destDirPath, LocalResource resource, Random rand) { + Path destDirPath, LocalResource resource) { this.conf = conf; this.destDirPath = destDirPath; this.files = files; this.userUgi = ugi; this.resource = resource; - this.rand = rand; } LocalResource getResource() { @@ -270,11 +266,6 @@ public class FSDownload implements Callable { } catch (URISyntaxException e) { throw new IOException("Invalid resource", e); } - Path tmp; - do { - tmp = new Path(destDirPath, String.valueOf(rand.nextLong())); - } while (files.util().exists(tmp)); - destDirPath = tmp; createDir(destDirPath, cachePerms); final Path dst_work = new Path(destDirPath + "_tmp"); createDir(dst_work, cachePerms); @@ -305,8 +296,6 @@ public class FSDownload implements Callable { files.delete(dst_work, true); } catch (FileNotFoundException ignore) { } - // clear ref to internal var - rand = null; conf = null; resource = null; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 8d391e2631d..f873ff9d052 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -58,10 +58,16 @@ + + The hostname of the RM. + yarn.resourcemanager.hostname + 0.0.0.0 + + The address of the applications manager interface in the RM. yarn.resourcemanager.address - 0.0.0.0:8032 + ${yarn.resourcemanager.hostname}:8032 @@ -84,7 +90,7 @@ The address of the scheduler interface. yarn.resourcemanager.scheduler.address - 0.0.0.0:8030 + ${yarn.resourcemanager.hostname}:8030 @@ -96,12 +102,12 @@ The address of the RM web application. yarn.resourcemanager.webapp.address - 0.0.0.0:8088 + ${yarn.resourcemanager.hostname}:8088 yarn.resourcemanager.resource-tracker.address - 0.0.0.0:8031 + ${yarn.resourcemanager.hostname}:8031 @@ -119,7 +125,7 @@ The address of the RM admin interface. yarn.resourcemanager.admin.address - 0.0.0.0:8033 + ${yarn.resourcemanager.hostname}:8033 @@ -278,11 +284,23 @@ 86400 + + The heart-beat interval in milliseconds for every NodeManager in the cluster. + yarn.resourcemanager.nodemanagers.heartbeat-interval-ms + 1000 + + + + The hostname of the NM. + yarn.nodemanager.hostname + 0.0.0.0 + + The address of the container manager in the NM. yarn.nodemanager.address - 0.0.0.0:0 + ${yarn.nodemanager.hostname}:0 @@ -336,12 +354,6 @@ 0 - - Heartbeat interval to RM - yarn.nodemanager.heartbeat.interval-ms - 1000 - - Keytab for NM. yarn.nodemanager.keytab @@ -359,10 +371,29 @@ ${hadoop.tmp.dir}/nm-local-dir + + It limits the maximum number of files which will be localized + in a single local directory. If the limit is reached then sub-directories + will be created and new files will be localized in them. If it is set to + a value less than or equal to 36 [which are sub-directories (0-9 and then + a-z)] then NodeManager will fail to start. For example; [for public + cache] if this is configured with a value of 40 ( 4 files + + 36 sub-directories) and the local-dir is "/tmp/local-dir1" then it will + allow 4 files to be created directly inside "/tmp/local-dir1/filecache". + For files that are localized further it will create a sub-directory "0" + inside "/tmp/local-dir1/filecache" and will localize files inside it + until it becomes full. If a file is removed from a sub-directory that + is marked full, then that sub-directory will be used back again to + localize files. + + yarn.nodemanager.local-cache.max-files-per-directory + 8192 + + Address where the localizer IPC is. yarn.nodemanager.localizer.address - 0.0.0.0:8040 + ${yarn.nodemanager.hostname}:8040 @@ -493,7 +524,7 @@ NM Webapp address. yarn.nodemanager.webapp.address - 0.0.0.0:8042 + ${yarn.nodemanager.hostname}:8042 diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java index b18588d9cbb..295a38cee80 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java @@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest; import org.apache.hadoop.yarn.api.protocolrecords.StopContainerResponse; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerState; @@ -50,6 +51,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.factory.providers.YarnRemoteExceptionFactoryProvider; import org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC; import org.apache.hadoop.yarn.ipc.YarnRPC; +import org.apache.hadoop.yarn.util.BuilderUtils; import org.junit.Test; /* @@ -101,13 +103,14 @@ public class TestContainerLaunchRPC { applicationAttemptId.setAttemptId(0); containerId.setApplicationAttemptId(applicationAttemptId); containerId.setId(100); - containerLaunchContext.setContainerId(containerId); - containerLaunchContext.setResource(recordFactory - .newRecordInstance(Resource.class)); + Container container = + BuilderUtils.newContainer(containerId, null, null, recordFactory + .newRecordInstance(Resource.class), null, null); StartContainerRequest scRequest = recordFactory .newRecordInstance(StartContainerRequest.class); scRequest.setContainerLaunchContext(containerLaunchContext); + scRequest.setContainer(container); try { proxy.startContainer(scRequest); } catch (Exception e) { @@ -141,7 +144,6 @@ public class TestContainerLaunchRPC { @Override public StartContainerResponse startContainer(StartContainerRequest request) throws YarnRemoteException { - ContainerLaunchContext container = request.getContainerLaunchContext(); StartContainerResponse response = recordFactory .newRecordInstance(StartContainerResponse.class); status = recordFactory.newRecordInstance(ContainerStatus.class); @@ -153,7 +155,7 @@ public class TestContainerLaunchRPC { throw new UndeclaredThrowableException(e); } status.setState(ContainerState.RUNNING); - status.setContainerId(container.getContainerId()); + status.setContainerId(request.getContainer().getId()); status.setExitStatus(0); return response; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java index 6975db229e5..7d941e92a23 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java @@ -39,6 +39,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest; import org.apache.hadoop.yarn.api.protocolrecords.StopContainerResponse; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerState; @@ -52,6 +53,7 @@ import org.apache.hadoop.yarn.factory.providers.YarnRemoteExceptionFactoryProvid import org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC; import org.apache.hadoop.yarn.ipc.RPCUtil; import org.apache.hadoop.yarn.ipc.YarnRPC; +import org.apache.hadoop.yarn.util.BuilderUtils; import org.apache.hadoop.yarn.util.Records; import org.junit.Test; @@ -124,20 +126,21 @@ public class TestRPC { applicationAttemptId.setAttemptId(0); containerId.setApplicationAttemptId(applicationAttemptId); containerId.setId(100); - containerLaunchContext.setContainerId(containerId); - containerLaunchContext.setResource( - recordFactory.newRecordInstance(Resource.class)); + Container mockContainer = + BuilderUtils.newContainer(containerId, null, null, recordFactory + .newRecordInstance(Resource.class), null, null); // containerLaunchContext.env = new HashMap(); // containerLaunchContext.command = new ArrayList(); StartContainerRequest scRequest = recordFactory.newRecordInstance(StartContainerRequest.class); scRequest.setContainerLaunchContext(containerLaunchContext); + scRequest.setContainer(mockContainer); proxy.startContainer(scRequest); GetContainerStatusRequest gcsRequest = recordFactory.newRecordInstance(GetContainerStatusRequest.class); - gcsRequest.setContainerId(containerLaunchContext.getContainerId()); + gcsRequest.setContainerId(mockContainer.getId()); GetContainerStatusResponse response = proxy.getContainerStatus(gcsRequest); ContainerStatus status = response.getStatus(); @@ -145,7 +148,7 @@ public class TestRPC { boolean exception = false; try { StopContainerRequest stopRequest = recordFactory.newRecordInstance(StopContainerRequest.class); - stopRequest.setContainerId(containerLaunchContext.getContainerId()); + stopRequest.setContainerId(mockContainer.getId()); proxy.stopContainer(stopRequest); } catch (YarnRemoteException e) { exception = true; @@ -179,12 +182,11 @@ public class TestRPC { @Override public StartContainerResponse startContainer(StartContainerRequest request) throws YarnRemoteException { - ContainerLaunchContext container = request.getContainerLaunchContext(); StartContainerResponse response = recordFactory.newRecordInstance(StartContainerResponse.class); status = recordFactory.newRecordInstance(ContainerStatus.class); status.setState(ContainerState.RUNNING); - status.setContainerId(container.getContainerId()); + status.setContainerId(request.getContainer().getId()); status.setExitStatus(0); return response; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestFSDownload.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestFSDownload.java index 912c7248ca5..b02cc517b70 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestFSDownload.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestFSDownload.java @@ -35,6 +35,7 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicLong; import java.util.jar.JarOutputStream; import java.util.jar.Manifest; @@ -66,6 +67,8 @@ import org.junit.Test; public class TestFSDownload { private static final Log LOG = LogFactory.getLog(TestFSDownload.class); + private static AtomicLong uniqueNumberGenerator = + new AtomicLong(System.currentTimeMillis()); @AfterClass public static void deleteTestDir() throws IOException { @@ -267,9 +270,11 @@ public class TestFSDownload { rsrcVis.put(rsrc, vis); Path destPath = dirs.getLocalPathForWrite( basedir.toString(), size, conf); + destPath = new Path (destPath, + Long.toString(uniqueNumberGenerator.incrementAndGet())); FSDownload fsd = new FSDownload(files, UserGroupInformation.getCurrentUser(), conf, - destPath, rsrc, new Random(sharedSeed)); + destPath, rsrc); pending.put(rsrc, exec.submit(fsd)); try { @@ -320,9 +325,11 @@ public class TestFSDownload { rsrcVis.put(rsrc, vis); Path destPath = dirs.getLocalPathForWrite( basedir.toString(), sizes[i], conf); + destPath = new Path (destPath, + Long.toString(uniqueNumberGenerator.incrementAndGet())); FSDownload fsd = new FSDownload(files, UserGroupInformation.getCurrentUser(), conf, - destPath, rsrc, new Random(sharedSeed)); + destPath, rsrc); pending.put(rsrc, exec.submit(fsd)); } @@ -380,9 +387,10 @@ public class TestFSDownload { Path p = new Path(basedir, "" + 1); LocalResource rsrc = createTarFile(files, p, size, rand, vis); Path destPath = dirs.getLocalPathForWrite(basedir.toString(), size, conf); + destPath = new Path (destPath, + Long.toString(uniqueNumberGenerator.incrementAndGet())); FSDownload fsd = new FSDownload(files, - UserGroupInformation.getCurrentUser(), conf, destPath, rsrc, - new Random(sharedSeed)); + UserGroupInformation.getCurrentUser(), conf, destPath, rsrc); pending.put(rsrc, exec.submit(fsd)); try { @@ -437,9 +445,10 @@ public class TestFSDownload { LocalResource rsrcjar = createJarFile(files, p, size, rand, vis); rsrcjar.setType(LocalResourceType.PATTERN); Path destPathjar = dirs.getLocalPathForWrite(basedir.toString(), size, conf); + destPathjar = new Path (destPathjar, + Long.toString(uniqueNumberGenerator.incrementAndGet())); FSDownload fsdjar = new FSDownload(files, - UserGroupInformation.getCurrentUser(), conf, destPathjar, rsrcjar, - new Random(sharedSeed)); + UserGroupInformation.getCurrentUser(), conf, destPathjar, rsrcjar); pending.put(rsrcjar, exec.submit(fsdjar)); try { @@ -493,9 +502,10 @@ public class TestFSDownload { Path p = new Path(basedir, "" + 1); LocalResource rsrczip = createZipFile(files, p, size, rand, vis); Path destPathjar = dirs.getLocalPathForWrite(basedir.toString(), size, conf); + destPathjar = new Path (destPathjar, + Long.toString(uniqueNumberGenerator.incrementAndGet())); FSDownload fsdzip = new FSDownload(files, - UserGroupInformation.getCurrentUser(), conf, destPathjar, rsrczip, - new Random(sharedSeed)); + UserGroupInformation.getCurrentUser(), conf, destPathjar, rsrczip); pending.put(rsrczip, exec.submit(fsdzip)); try { @@ -586,9 +596,11 @@ public class TestFSDownload { rsrcVis.put(rsrc, vis); Path destPath = dirs.getLocalPathForWrite( basedir.toString(), conf); + destPath = new Path (destPath, + Long.toString(uniqueNumberGenerator.incrementAndGet())); FSDownload fsd = new FSDownload(files, UserGroupInformation.getCurrentUser(), conf, - destPath, rsrc, new Random(sharedSeed)); + destPath, rsrc); pending.put(rsrc, exec.submit(fsd)); } @@ -614,4 +626,38 @@ public class TestFSDownload { } -} + + @Test(timeout = 1000) + public void testUniqueDestinationPath() throws Exception { + Configuration conf = new Configuration(); + FileContext files = FileContext.getLocalFSFileContext(conf); + final Path basedir = files.makeQualified(new Path("target", + TestFSDownload.class.getSimpleName())); + files.mkdir(basedir, null, true); + conf.setStrings(TestFSDownload.class.getName(), basedir.toString()); + + ExecutorService singleThreadedExec = Executors.newSingleThreadExecutor(); + + LocalDirAllocator dirs = + new LocalDirAllocator(TestFSDownload.class.getName()); + Path destPath = dirs.getLocalPathForWrite(basedir.toString(), conf); + destPath = + new Path(destPath, Long.toString(uniqueNumberGenerator + .incrementAndGet())); + try { + Path p = new Path(basedir, "dir" + 0 + ".jar"); + LocalResourceVisibility vis = LocalResourceVisibility.PRIVATE; + LocalResource rsrc = createJar(files, p, vis); + FSDownload fsd = + new FSDownload(files, UserGroupInformation.getCurrentUser(), conf, + destPath, rsrc); + Future rPath = singleThreadedExec.submit(fsd); + // Now FSDownload will not create a random directory to localize the + // resource. Therefore the final localizedPath for the resource should be + // destination directory (passed as an argument) + file name. + Assert.assertEquals(destPath, rPath.get().getParent()); + } finally { + singleThreadedExec.shutdown(); + } + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestYarnVersionInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestYarnVersionInfo.java index 73d7ff6d342..61795cae351 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestYarnVersionInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestYarnVersionInfo.java @@ -40,11 +40,11 @@ public class TestYarnVersionInfo extends TestCase { // make sure they aren't Unknown assertTrue("getVersion returned Unknown", !YarnVersionInfo.getVersion().equals("Unknown")); assertTrue("getUser returned Unknown", !YarnVersionInfo.getUser().equals("Unknown")); - assertTrue("getUrl returned Unknown", !YarnVersionInfo.getUrl().equals("Unknown")); assertTrue("getSrcChecksum returned Unknown", !YarnVersionInfo.getSrcChecksum().equals("Unknown")); // these could be Unknown if the VersionInfo generated from code not in svn or git // so just check that they return something + assertNotNull("getUrl returned null", YarnVersionInfo.getUrl()); assertNotNull("getRevision returned null", YarnVersionInfo.getRevision()); assertNotNull("getBranch returned null", YarnVersionInfo.getBranch()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java index 6aff34ae284..b2fd70f68f2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java @@ -42,4 +42,7 @@ public interface NodeHeartbeatResponse { void addAllContainersToCleanup(List containers); void addAllApplicationsToCleanup(List applications); + + long getNextHeartBeatInterval(); + void setNextHeartBeatInterval(long nextHeartBeatInterval); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java index bc51b241f3c..080a79c7020 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java @@ -271,6 +271,18 @@ public class NodeHeartbeatResponsePBImpl extends ProtoBase containersToCleanUp, + List applicationsToCleanUp, + MasterKey masterKey, long nextHeartbeatInterval) { + NodeHeartbeatResponse response = recordFactory + .newRecordInstance(NodeHeartbeatResponse.class); + response.setResponseId(responseId); + response.setNodeAction(action); + response.setMasterKey(masterKey); + response.setNextHeartBeatInterval(nextHeartbeatInterval); + if(containersToCleanUp != null) { + response.addAllContainersToCleanup(containersToCleanUp); + } + if(applicationsToCleanUp != null) { + response.addAllApplicationsToCleanup(applicationsToCleanUp); + } + return response; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto index 89ec81c3ab9..7fa1fb74030 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto @@ -25,7 +25,7 @@ import "yarn_protos.proto"; enum NodeActionProto { NORMAL = 0; - REBOOT = 1; + RESYNC = 1; SHUTDOWN = 2; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto index d713b94332c..931dccd08c5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto @@ -47,4 +47,5 @@ message NodeHeartbeatResponseProto { optional NodeActionProto nodeAction = 3; repeated ContainerIdProto containers_to_cleanup = 4; repeated ApplicationIdProto applications_to_cleanup = 5; + optional int64 nextHeartBeatInterval = 6; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/api/protocolrecords/TestRegisterNodeManagerResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/api/protocolrecords/TestRegisterNodeManagerResponse.java new file mode 100644 index 00000000000..8f89d7cc689 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/api/protocolrecords/TestRegisterNodeManagerResponse.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.api.protocolrecords; + +import static org.junit.Assert.*; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; + +import org.apache.hadoop.yarn.factories.RecordFactory; +import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RegisterNodeManagerResponsePBImpl; +import org.apache.hadoop.yarn.server.api.records.MasterKey; +import org.apache.hadoop.yarn.server.api.records.NodeAction; +import org.junit.Test; + +import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.RegisterNodeManagerResponseProto; + +public class TestRegisterNodeManagerResponse { + private static final RecordFactory recordFactory = + RecordFactoryProvider.getRecordFactory(null); + + @Test + public void testRoundTrip() throws Exception { + RegisterNodeManagerResponse resp = recordFactory + .newRecordInstance(RegisterNodeManagerResponse.class); + MasterKey mk = recordFactory.newRecordInstance(MasterKey.class); + mk.setKeyId(54321); + byte b [] = {0,1,2,3,4,5}; + mk.setBytes(ByteBuffer.wrap(b)); + resp.setMasterKey(mk); + resp.setNodeAction(NodeAction.NORMAL); + + assertEquals(NodeAction.NORMAL, resp.getNodeAction()); + assertNotNull(resp.getMasterKey()); + assertEquals(54321, resp.getMasterKey().getKeyId()); + assertArrayEquals(b, resp.getMasterKey().getBytes().array()); + + RegisterNodeManagerResponse respCopy = serDe(resp); + + assertEquals(NodeAction.NORMAL, respCopy.getNodeAction()); + assertNotNull(respCopy.getMasterKey()); + assertEquals(54321, respCopy.getMasterKey().getKeyId()); + assertArrayEquals(b, respCopy.getMasterKey().getBytes().array()); + } + + public static RegisterNodeManagerResponse serDe(RegisterNodeManagerResponse orig) throws Exception { + RegisterNodeManagerResponsePBImpl asPB = (RegisterNodeManagerResponsePBImpl)orig; + RegisterNodeManagerResponseProto proto = asPB.getProto(); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + proto.writeTo(out); + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + RegisterNodeManagerResponseProto.Builder cp = RegisterNodeManagerResponseProto.newBuilder(); + cp.mergeFrom(in); + return new RegisterNodeManagerResponsePBImpl(cp.build()); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java index f7e29e5ac93..e0a35829a4a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java @@ -220,7 +220,7 @@ public class LinuxContainerExecutor extends ContainerExecutor { String containerIdStr = ConverterUtils.toString(containerId); resourcesHandler.preExecute(containerId, - container.getLaunchContext().getResource()); + container.getResource()); String resourcesOptions = resourcesHandler.getResourcesOption( containerId); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java index 517b365d060..582db06f5f2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java @@ -307,7 +307,7 @@ public class LocalDirsHandlerService extends AbstractService { URI uriPath = (new Path(paths[i])).toUri(); if (uriPath.getScheme() == null || uriPath.getScheme().equals(FILE_SCHEME)) { - validPaths.add(uriPath.getPath()); + validPaths.add(new Path(uriPath.getPath()).toString()); } else { LOG.warn(paths[i] + " is not a valid path. Path should be with " + FILE_SCHEME + " scheme or without scheme"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index 7a53eb9033a..a5d16c568c8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -81,6 +81,7 @@ public class NodeManager extends CompositeService private Context context; private AsyncDispatcher dispatcher; private ContainerManagerImpl containerManager; + private NodeStatusUpdater nodeStatusUpdater; private static CompositeServiceShutdownHook nodeManagerShutdownHook; private long waitForContainersOnShutdownMillis; @@ -119,6 +120,10 @@ public class NodeManager extends CompositeService return new DeletionService(exec); } + protected NMContext createNMContext(NMContainerTokenSecretManager containerTokenSecretManager) { + return new NMContext(containerTokenSecretManager); + } + protected void doSecureLogin() throws IOException { SecurityUtil.login(getConfig(), YarnConfiguration.NM_KEYTAB, YarnConfiguration.NM_PRINCIPAL); @@ -137,7 +142,7 @@ public class NodeManager extends CompositeService containerTokenSecretManager = new NMContainerTokenSecretManager(conf); } - this.context = new NMContext(containerTokenSecretManager); + this.context = createNMContext(containerTokenSecretManager); this.aclsManager = new ApplicationACLsManager(conf); @@ -159,7 +164,7 @@ public class NodeManager extends CompositeService addService(nodeHealthChecker); dirsHandler = nodeHealthChecker.getDiskHandler(); - NodeStatusUpdater nodeStatusUpdater = + nodeStatusUpdater = createNodeStatusUpdater(context, dispatcher, nodeHealthChecker); NodeResourceMonitor nodeResourceMonitor = createNodeResourceMonitor(); @@ -210,35 +215,67 @@ public class NodeManager extends CompositeService if (isStopping.getAndSet(true)) { return; } - - cleanupContainers(); + + cleanupContainers(NodeManagerEventType.SHUTDOWN); super.stop(); DefaultMetricsSystem.shutdown(); } - + + protected void cleanupContainersOnResync() { + //we do not want to block dispatcher thread here + new Thread() { + @Override + public void run() { + cleanupContainers(NodeManagerEventType.RESYNC); + ((NodeStatusUpdaterImpl) nodeStatusUpdater ).rebootNodeStatusUpdater(); + } + }.start(); + } + @SuppressWarnings("unchecked") - protected void cleanupContainers() { + protected void cleanupContainers(NodeManagerEventType eventType) { Map containers = context.getContainers(); if (containers.isEmpty()) { return; } - LOG.info("Containers still running on shutdown: " + containers.keySet()); + LOG.info("Containers still running on " + eventType + " : " + + containers.keySet()); - List containerIds = new ArrayList(containers.keySet()); + List containerIds = + new ArrayList(containers.keySet()); dispatcher.getEventHandler().handle( new CMgrCompletedContainersEvent(containerIds, CMgrCompletedContainersEvent.Reason.ON_SHUTDOWN)); LOG.info("Waiting for containers to be killed"); - long waitStartTime = System.currentTimeMillis(); - while (!containers.isEmpty() && - System.currentTimeMillis() - waitStartTime < waitForContainersOnShutdownMillis) { - try { - Thread.sleep(1000); - } catch (InterruptedException ex) { - LOG.warn("Interrupted while sleeping on container kill", ex); + switch (eventType) { + case SHUTDOWN: + long waitStartTime = System.currentTimeMillis(); + while (!containers.isEmpty() + && System.currentTimeMillis() - waitStartTime < waitForContainersOnShutdownMillis) { + try { + Thread.sleep(1000); + } catch (InterruptedException ex) { + LOG.warn("Interrupted while sleeping on container kill on shutdown", + ex); + } } + break; + case RESYNC: + while (!containers.isEmpty()) { + try { + Thread.sleep(1000); + //to remove done containers from the map + nodeStatusUpdater.getNodeStatusAndUpdateContainersInContext(); + } catch (InterruptedException ex) { + LOG.warn("Interrupted while sleeping on container kill on resync", + ex); + } + } + break; + default: + LOG.warn("Invalid eventType: " + eventType); } // All containers killed @@ -338,9 +375,8 @@ public class NodeManager extends CompositeService case SHUTDOWN: stop(); break; - case REBOOT: - stop(); - reboot(); + case RESYNC: + cleanupContainersOnResync(); break; default: LOG.warn("Invalid shutdown event " + event.getType() + ". Ignoring."); @@ -357,6 +393,11 @@ public class NodeManager extends CompositeService return containerManager; } + //For testing + Dispatcher getNMDispatcher(){ + return dispatcher; + } + @VisibleForTesting Context getNMContext() { return this.context; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerEventType.java index d18cec6c0fb..f4d1caad789 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerEventType.java @@ -18,5 +18,5 @@ package org.apache.hadoop.yarn.server.nodemanager; public enum NodeManagerEventType { - SHUTDOWN, REBOOT + SHUTDOWN, RESYNC } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java index f1e6ac3bf4c..41949e7baab 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java @@ -18,9 +18,11 @@ package org.apache.hadoop.yarn.server.nodemanager; +import org.apache.hadoop.yarn.server.api.records.NodeStatus; import org.apache.hadoop.yarn.service.Service; public interface NodeStatusUpdater extends Service { void sendOutofBandHeartBeat(); + NodeStatus getNodeStatusAndUpdateContainersInContext(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index b59145a9bda..e9583c2a2e9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -60,6 +60,8 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Cont import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.service.AbstractService; +import com.google.common.annotations.VisibleForTesting; + public class NodeStatusUpdaterImpl extends AbstractService implements NodeStatusUpdater { @@ -71,7 +73,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements private final Dispatcher dispatcher; private NodeId nodeId; - private long heartBeatInterval; + private long nextHeartBeatInterval; private ResourceTracker resourceTracker; private InetSocketAddress rmAddress; private Resource totalResource; @@ -87,6 +89,12 @@ public class NodeStatusUpdaterImpl extends AbstractService implements private final NodeHealthCheckerService healthChecker; private final NodeManagerMetrics metrics; + private long rmConnectWaitMS; + private long rmConnectionRetryIntervalMS; + private boolean waitForEver; + + private Runnable statusUpdaterRunnable; + private Thread statusUpdater; public NodeStatusUpdaterImpl(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) { @@ -103,9 +111,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS, YarnConfiguration.DEFAULT_RM_RESOURCE_TRACKER_ADDRESS, YarnConfiguration.DEFAULT_RM_RESOURCE_TRACKER_PORT); - this.heartBeatInterval = - conf.getLong(YarnConfiguration.NM_TO_RM_HEARTBEAT_INTERVAL_MS, - YarnConfiguration.DEFAULT_NM_TO_RM_HEARTBEAT_INTERVAL_MS); + int memoryMb = conf.getInt( YarnConfiguration.NM_PMEM_MB, YarnConfiguration.DEFAULT_NM_PMEM_MB); @@ -134,8 +140,8 @@ public class NodeStatusUpdaterImpl extends AbstractService implements YarnConfiguration.DEFAULT_RM_NM_EXPIRY_INTERVAL_MS); LOG.info("Initialized nodemanager for " + nodeId + ":" + - " physical-memory=" + memoryMb + " virtual-memory=" + virtualMemoryMb + - " physical-cores=" + cpuCores + " virtual-cores=" + virtualCores); + " physical-memory=" + memoryMb + " virtual-memory=" + virtualMemoryMb + + " physical-cores=" + cpuCores + " virtual-cores=" + virtualCores); super.init(conf); } @@ -168,6 +174,22 @@ public class NodeStatusUpdaterImpl extends AbstractService implements this.isStopped = true; super.stop(); } + + protected void rebootNodeStatusUpdater() { + // Interrupt the updater. + this.isStopped = true; + + try { + statusUpdater.join(); + registerWithRM(); + statusUpdater = new Thread(statusUpdaterRunnable, "Node Status Updater"); + this.isStopped = false; + statusUpdater.start(); + LOG.info("NodeStatusUpdater thread is reRegistered and restarted"); + } catch (Exception e) { + throw new AvroRuntimeException(e); + } + } private boolean isSecurityEnabled() { return UserGroupInformation.isSecurityEnabled(); @@ -187,14 +209,15 @@ public class NodeStatusUpdaterImpl extends AbstractService implements conf); } - private void registerWithRM() throws YarnRemoteException { + @VisibleForTesting + protected void registerWithRM() throws YarnRemoteException { Configuration conf = getConfig(); - long rmConnectWaitMS = + rmConnectWaitMS = conf.getInt( YarnConfiguration.RESOURCEMANAGER_CONNECT_WAIT_SECS, YarnConfiguration.DEFAULT_RESOURCEMANAGER_CONNECT_WAIT_SECS) * 1000; - long rmConnectionRetryIntervalMS = + rmConnectionRetryIntervalMS = conf.getLong( YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS, YarnConfiguration @@ -207,7 +230,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements " should not be negative."); } - boolean waitForEver = (rmConnectWaitMS == -1000); + waitForEver = (rmConnectWaitMS == -1000); if(! waitForEver) { if(rmConnectWaitMS < 0) { @@ -311,7 +334,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements return appList; } - private NodeStatus getNodeStatus() { + public NodeStatus getNodeStatusAndUpdateContainersInContext() { NodeStatus nodeStatus = recordFactory.newRecordInstance(NodeStatus.class); nodeStatus.setNodeId(this.nodeId); @@ -386,7 +409,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements protected void startStatusUpdater() { - new Thread("Node Status Updater") { + statusUpdaterRunnable = new Runnable() { @Override @SuppressWarnings("unchecked") public void run() { @@ -394,10 +417,10 @@ public class NodeStatusUpdaterImpl extends AbstractService implements while (!isStopped) { // Send heartbeat try { - synchronized (heartbeatMonitor) { - heartbeatMonitor.wait(heartBeatInterval); - } - NodeStatus nodeStatus = getNodeStatus(); + NodeHeartbeatResponse response = null; + int rmRetryCount = 0; + long waitStartTime = System.currentTimeMillis(); + NodeStatus nodeStatus = getNodeStatusAndUpdateContainersInContext(); nodeStatus.setResponseId(lastHeartBeatID); NodeHeartbeatRequest request = recordFactory @@ -407,9 +430,33 @@ public class NodeStatusUpdaterImpl extends AbstractService implements request.setLastKnownMasterKey(NodeStatusUpdaterImpl.this.context .getContainerTokenSecretManager().getCurrentKey()); } - NodeHeartbeatResponse response = - resourceTracker.nodeHeartbeat(request); - + while (!isStopped) { + try { + rmRetryCount++; + response = resourceTracker.nodeHeartbeat(request); + break; + } catch (Throwable e) { + LOG.warn("Trying to heartbeat to ResourceManager, " + + "current no. of failed attempts is " + rmRetryCount); + if(System.currentTimeMillis() - waitStartTime < rmConnectWaitMS + || waitForEver) { + try { + LOG.info("Sleeping for " + rmConnectionRetryIntervalMS/1000 + + " seconds before next heartbeat to RM"); + Thread.sleep(rmConnectionRetryIntervalMS); + } catch(InterruptedException ex) { + //done nothing + } + } else { + String errorMessage = "Failed to heartbeat to RM, " + + "no. of failed attempts is "+rmRetryCount; + LOG.error(errorMessage,e); + throw new YarnException(errorMessage,e); + } + } + } + //get next heartbeat interval from response + nextHeartBeatInterval = response.getNextHeartBeatInterval(); // See if the master-key has rolled over if (isSecurityEnabled()) { MasterKey updatedMasterKey = response.getMasterKey(); @@ -423,16 +470,16 @@ public class NodeStatusUpdaterImpl extends AbstractService implements if (response.getNodeAction() == NodeAction.SHUTDOWN) { LOG .info("Recieved SHUTDOWN signal from Resourcemanager as part of heartbeat," + - " hence shutting down."); + " hence shutting down."); dispatcher.getEventHandler().handle( new NodeManagerEvent(NodeManagerEventType.SHUTDOWN)); break; } - if (response.getNodeAction() == NodeAction.REBOOT) { + if (response.getNodeAction() == NodeAction.RESYNC) { LOG.info("Node is out of sync with ResourceManager," + " hence rebooting."); dispatcher.getEventHandler().handle( - new NodeManagerEvent(NodeManagerEventType.REBOOT)); + new NodeManagerEvent(NodeManagerEventType.RESYNC)); break; } @@ -452,13 +499,32 @@ public class NodeStatusUpdaterImpl extends AbstractService implements dispatcher.getEventHandler().handle( new CMgrCompletedAppsEvent(appsToCleanup)); } + } catch (YarnException e) { + //catch and throw the exception if tried MAX wait time to connect RM + dispatcher.getEventHandler().handle( + new NodeManagerEvent(NodeManagerEventType.SHUTDOWN)); + throw e; } catch (Throwable e) { // TODO Better error handling. Thread can die with the rest of the // NM still running. LOG.error("Caught exception in status-updater", e); + } finally { + synchronized (heartbeatMonitor) { + nextHeartBeatInterval = nextHeartBeatInterval <= 0 ? + YarnConfiguration.DEFAULT_RM_NM_HEARTBEAT_INTERVAL_MS : + nextHeartBeatInterval; + try { + heartbeatMonitor.wait(nextHeartBeatInterval); + } catch (InterruptedException e) { + // Do Nothing + } + } } } } - }.start(); + }; + statusUpdater = + new Thread(statusUpdaterRunnable, "Node Status Updater"); + statusUpdater.start(); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/ResourceLocalizationSpec.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/ResourceLocalizationSpec.java new file mode 100644 index 00000000000..63c3fd3fb8b --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/ResourceLocalizationSpec.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.nodemanager.api; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.yarn.api.records.LocalResource; +import org.apache.hadoop.yarn.api.records.URL; + +import com.google.common.annotations.VisibleForTesting; + +@Private +@VisibleForTesting +public interface ResourceLocalizationSpec { + + public void setResource(LocalResource rsrc); + + public LocalResource getResource(); + + public void setDestinationDirectory(URL destinationDirectory); + + public URL getDestinationDirectory(); +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/ResourceLocalizationSpecPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/ResourceLocalizationSpecPBImpl.java new file mode 100644 index 00000000000..643d3a692eb --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/ResourceLocalizationSpecPBImpl.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.nodemanager.api.impl.pb; + +import org.apache.hadoop.yarn.api.records.LocalResource; +import org.apache.hadoop.yarn.api.records.ProtoBase; +import org.apache.hadoop.yarn.api.records.URL; +import org.apache.hadoop.yarn.api.records.impl.pb.LocalResourcePBImpl; +import org.apache.hadoop.yarn.api.records.impl.pb.URLPBImpl; +import org.apache.hadoop.yarn.proto.YarnServerNodemanagerServiceProtos.ResourceLocalizationSpecProto; +import org.apache.hadoop.yarn.proto.YarnServerNodemanagerServiceProtos.ResourceLocalizationSpecProtoOrBuilder; +import org.apache.hadoop.yarn.server.nodemanager.api.ResourceLocalizationSpec; + +public class ResourceLocalizationSpecPBImpl extends + ProtoBase implements + ResourceLocalizationSpec { + + private ResourceLocalizationSpecProto proto = ResourceLocalizationSpecProto + .getDefaultInstance(); + private ResourceLocalizationSpecProto.Builder builder = null; + private boolean viaProto; + private LocalResource resource = null; + private URL destinationDirectory = null; + + public ResourceLocalizationSpecPBImpl() { + builder = ResourceLocalizationSpecProto.newBuilder(); + } + + public ResourceLocalizationSpecPBImpl(ResourceLocalizationSpecProto proto) { + this.proto = proto; + viaProto = true; + } + + @Override + public LocalResource getResource() { + ResourceLocalizationSpecProtoOrBuilder p = viaProto ? proto : builder; + if (resource != null) { + return resource; + } + if (!p.hasResource()) { + return null; + } + resource = new LocalResourcePBImpl(p.getResource()); + return resource; + } + + @Override + public void setResource(LocalResource rsrc) { + maybeInitBuilder(); + resource = rsrc; + } + + @Override + public URL getDestinationDirectory() { + ResourceLocalizationSpecProtoOrBuilder p = viaProto ? proto : builder; + if (destinationDirectory != null) { + return destinationDirectory; + } + if (!p.hasDestinationDirectory()) { + return null; + } + destinationDirectory = new URLPBImpl(p.getDestinationDirectory()); + return destinationDirectory; + } + + @Override + public void setDestinationDirectory(URL destinationDirectory) { + maybeInitBuilder(); + this.destinationDirectory = destinationDirectory; + } + + @Override + public ResourceLocalizationSpecProto getProto() { + mergeLocalToBuilder(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + private synchronized void maybeInitBuilder() { + if (builder == null || viaProto) { + builder = ResourceLocalizationSpecProto.newBuilder(proto); + } + viaProto = false; + } + + private void mergeLocalToBuilder() { + ResourceLocalizationSpecProtoOrBuilder l = viaProto ? proto : builder; + if (this.resource != null + && !(l.getResource() + .equals(((LocalResourcePBImpl) resource).getProto()))) { + maybeInitBuilder(); + builder.setResource(((LocalResourcePBImpl) resource).getProto()); + } + if (this.destinationDirectory != null + && !(l.getDestinationDirectory() + .equals(((URLPBImpl) destinationDirectory).getProto()))) { + maybeInitBuilder(); + builder.setDestinationDirectory(((URLPBImpl) destinationDirectory) + .getProto()); + } + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/client/LocalizationProtocolPBClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/client/LocalizationProtocolPBClientImpl.java index 80b3f79869a..257417d0db0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/client/LocalizationProtocolPBClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/impl/pb/client/LocalizationProtocolPBClientImpl.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.yarn.server.nodemanager.api.impl.pb.client; +import java.io.Closeable; import java.io.IOException; import java.net.InetSocketAddress; @@ -35,7 +36,8 @@ import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.impl.pb.Loc import com.google.protobuf.ServiceException; -public class LocalizationProtocolPBClientImpl implements LocalizationProtocol { +public class LocalizationProtocolPBClientImpl implements LocalizationProtocol, + Closeable { private LocalizationProtocolPB proxy; @@ -44,7 +46,14 @@ public class LocalizationProtocolPBClientImpl implements LocalizationProtocol { proxy = (LocalizationProtocolPB)RPC.getProxy( LocalizationProtocolPB.class, clientVersion, addr, conf); } - + + @Override + public void close() { + if (this.proxy != null) { + RPC.stopProxy(this.proxy); + } + } + @Override public LocalizerHeartbeatResponse heartbeat(LocalizerStatus status) throws YarnRemoteException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/protocolrecords/LocalizerHeartbeatResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/protocolrecords/LocalizerHeartbeatResponse.java index b2f46c5ae38..9d2681a4474 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/protocolrecords/LocalizerHeartbeatResponse.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/protocolrecords/LocalizerHeartbeatResponse.java @@ -18,18 +18,13 @@ package org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords; import java.util.List; - -import org.apache.hadoop.yarn.api.records.LocalResource; +import org.apache.hadoop.yarn.server.nodemanager.api.*; public interface LocalizerHeartbeatResponse { - public LocalizerAction getLocalizerAction(); - public List getAllResources(); - public LocalResource getLocalResource(int i); + public LocalizerAction getLocalizerAction(); public void setLocalizerAction(LocalizerAction action); - public void addAllResources(List resources); - public void addResource(LocalResource resource); - public void removeResource(int index); - public void clearResources(); -} + public List getResourceSpecs(); + public void setResourceSpecs(List rsrcs); +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/protocolrecords/impl/pb/LocalizerHeartbeatResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/protocolrecords/impl/pb/LocalizerHeartbeatResponsePBImpl.java index 0b791c0c14f..d46ba56e22b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/protocolrecords/impl/pb/LocalizerHeartbeatResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/api/protocolrecords/impl/pb/LocalizerHeartbeatResponsePBImpl.java @@ -21,13 +21,14 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import org.apache.hadoop.yarn.api.records.LocalResource; + import org.apache.hadoop.yarn.api.records.ProtoBase; -import org.apache.hadoop.yarn.api.records.impl.pb.LocalResourcePBImpl; -import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerServiceProtos.LocalizerActionProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerServiceProtos.LocalizerHeartbeatResponseProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerServiceProtos.LocalizerHeartbeatResponseProtoOrBuilder; +import org.apache.hadoop.yarn.proto.YarnServerNodemanagerServiceProtos.ResourceLocalizationSpecProto; +import org.apache.hadoop.yarn.server.nodemanager.api.ResourceLocalizationSpec; +import org.apache.hadoop.yarn.server.nodemanager.api.impl.pb.ResourceLocalizationSpecPBImpl; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerAction; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerHeartbeatResponse; @@ -40,13 +41,14 @@ public class LocalizerHeartbeatResponsePBImpl LocalizerHeartbeatResponseProto.Builder builder = null; boolean viaProto = false; - private List resources; + private List resourceSpecs; public LocalizerHeartbeatResponsePBImpl() { builder = LocalizerHeartbeatResponseProto.newBuilder(); } - public LocalizerHeartbeatResponsePBImpl(LocalizerHeartbeatResponseProto proto) { + public LocalizerHeartbeatResponsePBImpl( + LocalizerHeartbeatResponseProto proto) { this.proto = proto; viaProto = true; } @@ -59,7 +61,7 @@ public class LocalizerHeartbeatResponsePBImpl } private void mergeLocalToBuilder() { - if (resources != null) { + if (resourceSpecs != null) { addResourcesToProto(); } } @@ -79,6 +81,7 @@ public class LocalizerHeartbeatResponsePBImpl viaProto = false; } + @Override public LocalizerAction getLocalizerAction() { LocalizerHeartbeatResponseProtoOrBuilder p = viaProto ? proto : builder; if (!p.hasAction()) { @@ -87,14 +90,10 @@ public class LocalizerHeartbeatResponsePBImpl return convertFromProtoFormat(p.getAction()); } - public List getAllResources() { + @Override + public List getResourceSpecs() { initResources(); - return this.resources; - } - - public LocalResource getLocalResource(int i) { - initResources(); - return this.resources.get(i); + return this.resourceSpecs; } public void setLocalizerAction(LocalizerAction action) { @@ -106,31 +105,39 @@ public class LocalizerHeartbeatResponsePBImpl builder.setAction(convertToProtoFormat(action)); } + public void setResourceSpecs(List rsrcs) { + maybeInitBuilder(); + if (rsrcs == null) { + builder.clearResources(); + return; + } + this.resourceSpecs = rsrcs; + } + private void initResources() { - if (this.resources != null) { + if (this.resourceSpecs != null) { return; } LocalizerHeartbeatResponseProtoOrBuilder p = viaProto ? proto : builder; - List list = p.getResourcesList(); - this.resources = new ArrayList(); - - for (LocalResourceProto c : list) { - this.resources.add(convertFromProtoFormat(c)); + List list = p.getResourcesList(); + this.resourceSpecs = new ArrayList(); + for (ResourceLocalizationSpecProto c : list) { + this.resourceSpecs.add(convertFromProtoFormat(c)); } } private void addResourcesToProto() { maybeInitBuilder(); builder.clearResources(); - if (this.resources == null) + if (this.resourceSpecs == null) return; - Iterable iterable = - new Iterable() { + Iterable iterable = + new Iterable() { @Override - public Iterator iterator() { - return new Iterator() { + public Iterator iterator() { + return new Iterator() { - Iterator iter = resources.iterator(); + Iterator iter = resourceSpecs.iterator(); @Override public boolean hasNext() { @@ -138,8 +145,10 @@ public class LocalizerHeartbeatResponsePBImpl } @Override - public LocalResourceProto next() { - return convertToProtoFormat(iter.next()); + public ResourceLocalizationSpecProto next() { + ResourceLocalizationSpec resource = iter.next(); + + return ((ResourceLocalizationSpecPBImpl)resource).getProto(); } @Override @@ -154,34 +163,10 @@ public class LocalizerHeartbeatResponsePBImpl builder.addAllResources(iterable); } - public void addAllResources(List resources) { - if (resources == null) - return; - initResources(); - this.resources.addAll(resources); - } - public void addResource(LocalResource resource) { - initResources(); - this.resources.add(resource); - } - - public void removeResource(int index) { - initResources(); - this.resources.remove(index); - } - - public void clearResources() { - initResources(); - this.resources.clear(); - } - - private LocalResource convertFromProtoFormat(LocalResourceProto p) { - return new LocalResourcePBImpl(p); - } - - private LocalResourceProto convertToProtoFormat(LocalResource s) { - return ((LocalResourcePBImpl)s).getProto(); + private ResourceLocalizationSpec convertFromProtoFormat( + ResourceLocalizationSpecProto p) { + return new ResourceLocalizationSpecPBImpl(p); } private LocalizerActionProto convertToProtoFormat(LocalizerAction a) { @@ -191,5 +176,4 @@ public class LocalizerHeartbeatResponsePBImpl private LocalizerAction convertFromProtoFormat(LocalizerActionProto a) { return LocalizerAction.valueOf(a.name()); } - -} +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index b4a0034217c..8fc8a3ed6fe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -308,7 +308,9 @@ public class ContainerManagerImpl extends CompositeService implements * @throws YarnRemoteException */ private void authorizeRequest(String containerIDStr, - ContainerLaunchContext launchContext, UserGroupInformation remoteUgi) + ContainerLaunchContext launchContext, + org.apache.hadoop.yarn.api.records.Container container, + UserGroupInformation remoteUgi) throws YarnRemoteException { if (!UserGroupInformation.isSecurityEnabled()) { @@ -369,10 +371,10 @@ public class ContainerManagerImpl extends CompositeService implements } Resource resource = tokenId.getResource(); - if (!resource.equals(launchContext.getResource())) { + if (!resource.equals(container.getResource())) { unauthorized = true; messageBuilder.append("\nExpected resource " + resource - + " but found " + launchContext.getResource()); + + " but found " + container.getResource()); } } } @@ -392,12 +394,13 @@ public class ContainerManagerImpl extends CompositeService implements public StartContainerResponse startContainer(StartContainerRequest request) throws YarnRemoteException { ContainerLaunchContext launchContext = request.getContainerLaunchContext(); - - ContainerId containerID = launchContext.getContainerId(); + org.apache.hadoop.yarn.api.records.Container lauchContainer = + request.getContainer(); + ContainerId containerID = lauchContainer.getId(); String containerIDStr = containerID.toString(); UserGroupInformation remoteUgi = getRemoteUgi(containerIDStr); - authorizeRequest(containerIDStr, launchContext, remoteUgi); + authorizeRequest(containerIDStr, launchContext, lauchContainer, remoteUgi); LOG.info("Start request for " + containerIDStr + " by user " + launchContext.getUser()); @@ -424,7 +427,7 @@ public class ContainerManagerImpl extends CompositeService implements // //////////// End of parsing credentials Container container = new ContainerImpl(getConfig(), this.dispatcher, - launchContext, credentials, metrics); + launchContext, lauchContainer, credentials, metrics); ApplicationId applicationID = containerID.getApplicationAttemptId().getApplicationId(); if (context.getContainers().putIfAbsent(containerID, container) != null) { @@ -469,7 +472,7 @@ public class ContainerManagerImpl extends CompositeService implements // TODO launchedContainer misplaced -> doesn't necessarily mean a container // launch. A finished Application will not launch containers. metrics.launchedContainer(); - metrics.allocateContainer(launchContext.getResource()); + metrics.allocateContainer(lauchContainer.getResource()); return response; } @@ -487,7 +490,7 @@ public class ContainerManagerImpl extends CompositeService implements // TODO: Only the container's owner can kill containers today. UserGroupInformation remoteUgi = getRemoteUgi(containerIDStr); - authorizeRequest(containerIDStr, null, remoteUgi); + authorizeRequest(containerIDStr, null, null, remoteUgi); StopContainerResponse response = recordFactory.newRecordInstance(StopContainerResponse.class); @@ -529,7 +532,7 @@ public class ContainerManagerImpl extends CompositeService implements // TODO: Only the container's owner can get containers' status today. UserGroupInformation remoteUgi = getRemoteUgi(containerIDStr); - authorizeRequest(containerIDStr, null, remoteUgi); + authorizeRequest(containerIDStr, null, null, remoteUgi); LOG.info("Getting container-status for " + containerIDStr); Container container = this.context.getContainers().get(containerID); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java index af0f92ee6fc..a43e1b74d2d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java @@ -25,6 +25,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.event.EventHandler; public interface Container extends EventHandler { @@ -44,4 +45,6 @@ public interface Container extends EventHandler { ContainerStatus cloneAndGetContainerStatus(); String toString(); + + Resource getResource(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index b4752ff8f5d..ad2428ae178 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -41,6 +41,7 @@ import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.EventHandler; @@ -76,6 +77,7 @@ public class ContainerImpl implements Container { private final Credentials credentials; private final NodeManagerMetrics metrics; private final ContainerLaunchContext launchContext; + private final org.apache.hadoop.yarn.api.records.Container container; private int exitCode = YarnConfiguration.INVALID_CONTAINER_EXIT_STATUS; private final StringBuilder diagnostics; @@ -96,12 +98,13 @@ public class ContainerImpl implements Container { new ArrayList(); public ContainerImpl(Configuration conf, - Dispatcher dispatcher, - ContainerLaunchContext launchContext, Credentials creds, - NodeManagerMetrics metrics) { + Dispatcher dispatcher, ContainerLaunchContext launchContext, + org.apache.hadoop.yarn.api.records.Container container, + Credentials creds, NodeManagerMetrics metrics) { this.daemonConf = conf; this.dispatcher = dispatcher; this.launchContext = launchContext; + this.container = container; this.diagnostics = new StringBuilder(); this.credentials = creds; this.metrics = metrics; @@ -312,7 +315,7 @@ public class ContainerImpl implements Container { public ContainerId getContainerID() { this.readLock.lock(); try { - return this.launchContext.getContainerId(); + return this.container.getId(); } finally { this.readLock.unlock(); } @@ -373,50 +376,61 @@ public class ContainerImpl implements Container { public ContainerStatus cloneAndGetContainerStatus() { this.readLock.lock(); try { - return BuilderUtils.newContainerStatus(this.getContainerID(), + return BuilderUtils.newContainerStatus(this.container.getId(), getCurrentState(), diagnostics.toString(), exitCode); } finally { this.readLock.unlock(); } } + @Override + public Resource getResource() { + this.readLock.lock(); + try { + return this.container.getResource(); + } finally { + this.readLock.unlock(); + } + } + @SuppressWarnings({"fallthrough", "unchecked"}) private void finished() { + ContainerId containerID = this.container.getId(); + String user = this.launchContext.getUser(); switch (getContainerState()) { case EXITED_WITH_SUCCESS: metrics.endRunningContainer(); metrics.completedContainer(); - NMAuditLogger.logSuccess(getUser(), + NMAuditLogger.logSuccess(user, AuditConstants.FINISH_SUCCESS_CONTAINER, "ContainerImpl", - getContainerID().getApplicationAttemptId().getApplicationId(), - getContainerID()); + containerID.getApplicationAttemptId() + .getApplicationId(), containerID); break; case EXITED_WITH_FAILURE: metrics.endRunningContainer(); // fall through case LOCALIZATION_FAILED: metrics.failedContainer(); - NMAuditLogger.logFailure(getUser(), + NMAuditLogger.logFailure(user, AuditConstants.FINISH_FAILED_CONTAINER, "ContainerImpl", "Container failed with state: " + getContainerState(), - getContainerID().getApplicationAttemptId().getApplicationId(), - getContainerID()); + containerID.getApplicationAttemptId() + .getApplicationId(), containerID); break; case CONTAINER_CLEANEDUP_AFTER_KILL: metrics.endRunningContainer(); // fall through case NEW: metrics.killedContainer(); - NMAuditLogger.logSuccess(getUser(), + NMAuditLogger.logSuccess(user, AuditConstants.FINISH_KILLED_CONTAINER, "ContainerImpl", - getContainerID().getApplicationAttemptId().getApplicationId(), - getContainerID()); + containerID.getApplicationAttemptId().getApplicationId(), + containerID); } - metrics.releaseContainer(getLaunchContext().getResource()); + metrics.releaseContainer(this.container.getResource()); // Inform the application - ContainerId containerID = getContainerID(); @SuppressWarnings("rawtypes") EventHandler eventHandler = dispatcher.getEventHandler(); eventHandler.handle(new ApplicationContainerFinishedEvent(containerID)); @@ -475,7 +489,7 @@ public class ContainerImpl implements Container { @Override public ContainerState transition(ContainerImpl container, ContainerEvent event) { - final ContainerLaunchContext ctxt = container.getLaunchContext(); + final ContainerLaunchContext ctxt = container.launchContext; container.metrics.initingContainer(); // Inform the AuxServices about the opaque serviceData @@ -486,9 +500,9 @@ public class ContainerImpl implements Container { for (Map.Entry service : csd.entrySet()) { container.dispatcher.getEventHandler().handle( new AuxServicesEvent(AuxServicesEventType.APPLICATION_INIT, - ctxt.getUser(), - ctxt.getContainerId().getApplicationAttemptId().getApplicationId(), - service.getKey().toString(), service.getValue())); + ctxt.getUser(), container.container.getId() + .getApplicationAttemptId().getApplicationId(), + service.getKey().toString(), service.getValue())); } } @@ -571,7 +585,7 @@ public class ContainerImpl implements Container { container.pendingResources.remove(rsrcEvent.getResource()); if (null == syms) { LOG.warn("Localized unknown resource " + rsrcEvent.getResource() + - " for container " + container.getContainerID()); + " for container " + container.container.getId()); assert false; // fail container? return ContainerState.LOCALIZING; @@ -599,14 +613,14 @@ public class ContainerImpl implements Container { // Inform the ContainersMonitor to start monitoring the container's // resource usage. long pmemBytes = - container.getLaunchContext().getResource().getMemory() * 1024 * 1024L; + container.container.getResource().getMemory() * 1024 * 1024L; float pmemRatio = container.daemonConf.getFloat( YarnConfiguration.NM_VMEM_PMEM_RATIO, YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO); long vmemBytes = (long) (pmemRatio * pmemBytes); container.dispatcher.getEventHandler().handle( - new ContainerStartMonitoringEvent(container.getContainerID(), + new ContainerStartMonitoringEvent(container.container.getId(), vmemBytes, pmemBytes)); container.metrics.runningContainer(); } @@ -740,7 +754,7 @@ public class ContainerImpl implements Container { container.pendingResources.remove(rsrcEvent.getResource()); if (null == syms) { LOG.warn("Localized unknown resource " + rsrcEvent.getResource() + - " for container " + container.getContainerID()); + " for container " + container.container.getId()); assert false; // fail container? return; @@ -845,10 +859,9 @@ public class ContainerImpl implements Container { public String toString() { this.readLock.lock(); try { - return ConverterUtils.toString(launchContext.getContainerId()); + return ConverterUtils.toString(container.getId()); } finally { this.readLock.unlock(); } } - } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java index 71809b2d7de..71345e0a39d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java @@ -249,9 +249,8 @@ public class ContainerLaunch implements Callable { } catch (Throwable e) { LOG.warn("Failed to launch container.", e); dispatcher.getEventHandler().handle(new ContainerExitEvent( - launchContext.getContainerId(), - ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret, - e.getMessage())); + containerID, ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret, + e.getMessage())); return ret; } finally { completed.set(true); @@ -267,7 +266,7 @@ public class ContainerLaunch implements Callable { // If the process was killed, Send container_cleanedup_after_kill and // just break out of this method. dispatcher.getEventHandler().handle( - new ContainerExitEvent(launchContext.getContainerId(), + new ContainerExitEvent(containerID, ContainerEventType.CONTAINER_KILLED_ON_REQUEST, ret, "Container exited with a non-zero exit code " + ret)); return ret; @@ -276,15 +275,15 @@ public class ContainerLaunch implements Callable { if (ret != 0) { LOG.warn("Container exited with a non-zero exit code " + ret); this.dispatcher.getEventHandler().handle(new ContainerExitEvent( - launchContext.getContainerId(), - ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret, - "Container exited with a non-zero exit code " + ret)); + containerID, + ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret, + "Container exited with a non-zero exit code " + ret)); return ret; } LOG.info("Container " + containerIdStr + " succeeded "); dispatcher.getEventHandler().handle( - new ContainerEvent(launchContext.getContainerId(), + new ContainerEvent(containerID, ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS)); return 0; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java index 74d0227c918..0e5e398c45d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ContainerLocalizer.java @@ -51,6 +51,7 @@ import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.util.DiskChecker; import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.exceptions.YarnRemoteException; @@ -59,6 +60,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.ipc.RPCUtil; import org.apache.hadoop.yarn.ipc.YarnRPC; import org.apache.hadoop.yarn.server.nodemanager.api.LocalizationProtocol; +import org.apache.hadoop.yarn.server.nodemanager.api.ResourceLocalizationSpec; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalResourceStatus; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerHeartbeatResponse; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerStatus; @@ -89,8 +91,6 @@ public class ContainerLocalizer { private final String localizerId; private final FileContext lfs; private final Configuration conf; - private final LocalDirAllocator appDirs; - private final LocalDirAllocator userDirs; private final RecordFactory recordFactory; private final Map> pendingResources; private final String appCacheDirContextName; @@ -112,8 +112,6 @@ public class ContainerLocalizer { this.recordFactory = recordFactory; this.conf = new Configuration(); this.appCacheDirContextName = String.format(APPCACHE_CTXT_FMT, appId); - this.appDirs = new LocalDirAllocator(appCacheDirContextName); - this.userDirs = new LocalDirAllocator(String.format(USERCACHE_CTXT_FMT, user)); this.pendingResources = new HashMap>(); } @@ -197,10 +195,10 @@ public class ContainerLocalizer { return new ExecutorCompletionService(exec); } - Callable download(LocalDirAllocator lda, LocalResource rsrc, + Callable download(Path path, LocalResource rsrc, UserGroupInformation ugi) throws IOException { - Path destPath = lda.getLocalPathForWrite(".", getEstimatedSize(rsrc), conf); - return new FSDownload(lfs, ugi, conf, destPath, rsrc, new Random()); + DiskChecker.checkDir(new File(path.toUri().getRawPath())); + return new FSDownload(lfs, ugi, conf, path, rsrc); } static long getEstimatedSize(LocalResource rsrc) { @@ -238,25 +236,12 @@ public class ContainerLocalizer { LocalizerHeartbeatResponse response = nodemanager.heartbeat(status); switch (response.getLocalizerAction()) { case LIVE: - List newResources = response.getAllResources(); - for (LocalResource r : newResources) { - if (!pendingResources.containsKey(r)) { - final LocalDirAllocator lda; - switch (r.getVisibility()) { - default: - LOG.warn("Unknown visibility: " + r.getVisibility() - + ", Using userDirs"); - //Falling back to userDirs for unknown visibility. - case PUBLIC: - case PRIVATE: - lda = userDirs; - break; - case APPLICATION: - lda = appDirs; - break; - } - // TODO: Synchronization?? - pendingResources.put(r, cs.submit(download(lda, r, ugi))); + List newRsrcs = response.getResourceSpecs(); + for (ResourceLocalizationSpec newRsrc : newRsrcs) { + if (!pendingResources.containsKey(newRsrc.getResource())) { + pendingResources.put(newRsrc.getResource(), cs.submit(download( + new Path(newRsrc.getDestinationDirectory().getFile()), + newRsrc.getResource(), ugi))); } } break; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalCacheDirectoryManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalCacheDirectoryManager.java new file mode 100644 index 00000000000..8a3b6bf2088 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalCacheDirectoryManager.java @@ -0,0 +1,149 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer; + +import java.util.HashMap; +import java.util.LinkedList; +import java.util.Queue; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.yarn.conf.YarnConfiguration; + +/** + * {@link LocalCacheDirectoryManager} is used for managing hierarchical + * directories for local cache. It will allow to restrict the number of files in + * a directory to + * {@link YarnConfiguration#NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY} which + * includes 36 sub-directories (named from 0 to 9 and a to z). Root directory is + * represented by an empty string. It internally maintains a vacant directory + * queue. As soon as the file count for the directory reaches its limit; new + * files will not be created in it until at least one file is deleted from it. + * New sub directories are not created unless a + * {@link LocalCacheDirectoryManager#getRelativePathForLocalization()} request + * is made and nonFullDirectories are empty. + * + * Note : this structure only returns relative localization path but doesn't + * create one on disk. + */ +public class LocalCacheDirectoryManager { + + private final int perDirectoryFileLimit; + // total 36 = a to z plus 0 to 9 + public static final int DIRECTORIES_PER_LEVEL = 36; + + private Queue nonFullDirectories; + private HashMap knownDirectories; + private int totalSubDirectories; + + public LocalCacheDirectoryManager(Configuration conf) { + totalSubDirectories = 0; + Directory rootDir = new Directory(totalSubDirectories); + nonFullDirectories = new LinkedList(); + knownDirectories = new HashMap(); + knownDirectories.put("", rootDir); + nonFullDirectories.add(rootDir); + this.perDirectoryFileLimit = + conf.getInt(YarnConfiguration.NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY, + YarnConfiguration.DEFAULT_NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY) - 36; + } + + /** + * This method will return relative path from the first available vacant + * directory. + * + * @return {@link String} relative path for localization + */ + public synchronized String getRelativePathForLocalization() { + if (nonFullDirectories.isEmpty()) { + totalSubDirectories++; + Directory newDir = new Directory(totalSubDirectories); + nonFullDirectories.add(newDir); + knownDirectories.put(newDir.getRelativePath(), newDir); + } + Directory subDir = nonFullDirectories.peek(); + if (subDir.incrementAndGetCount() >= perDirectoryFileLimit) { + nonFullDirectories.remove(); + } + return subDir.getRelativePath(); + } + + /** + * This method will reduce the file count for the directory represented by + * path. The root directory of this Local cache directory manager is + * represented by an empty string. + */ + public synchronized void decrementFileCountForPath(String relPath) { + relPath = relPath == null ? "" : relPath.trim(); + Directory subDir = knownDirectories.get(relPath); + int oldCount = subDir.getCount(); + if (subDir.decrementAndGetCount() < perDirectoryFileLimit + && oldCount >= perDirectoryFileLimit) { + nonFullDirectories.add(subDir); + } + } + + /* + * It limits the number of files and sub directories in the directory to the + * limit LocalCacheDirectoryManager#perDirectoryFileLimit. + */ + static class Directory { + + private final String relativePath; + private int fileCount; + + public Directory(int directoryNo) { + fileCount = 0; + if (directoryNo == 0) { + relativePath = ""; + } else { + String tPath = Integer.toString(directoryNo - 1, DIRECTORIES_PER_LEVEL); + StringBuffer sb = new StringBuffer(); + if (tPath.length() == 1) { + sb.append(tPath.charAt(0)); + } else { + // this is done to make sure we also reuse 0th sub directory + sb.append(Integer.toString( + Integer.parseInt(tPath.substring(0, 1), DIRECTORIES_PER_LEVEL) - 1, + DIRECTORIES_PER_LEVEL)); + } + for (int i = 1; i < tPath.length(); i++) { + sb.append(Path.SEPARATOR).append(tPath.charAt(i)); + } + relativePath = sb.toString(); + } + } + + public int incrementAndGetCount() { + return ++fileCount; + } + + public int decrementAndGetCount() { + return --fileCount; + } + + public String getRelativePath() { + return relativePath; + } + + public int getCount() { + return fileCount; + } + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTracker.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTracker.java index b24d8afb8ab..98ec471abf0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTracker.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTracker.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; @@ -35,6 +36,9 @@ interface LocalResourcesTracker boolean remove(LocalizedResource req, DeletionService delService); + Path getPathForLocalization(LocalResourceRequest req, Path localDirPath); + String getUser(); + long nextUniqueNumber(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java index 01ec38397b6..786b58ca5d0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java @@ -21,17 +21,20 @@ import java.io.File; import java.util.Iterator; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.atomic.AtomicLong; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEvent; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEventType; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent; + /** * A collection of {@link LocalizedResource}s all of same @@ -49,30 +52,72 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker { private final String user; private final Dispatcher dispatcher; private final ConcurrentMap localrsrc; + private Configuration conf; + /* + * This flag controls whether this resource tracker uses hierarchical + * directories or not. For PRIVATE and PUBLIC resource trackers it + * will be set whereas for APPLICATION resource tracker it would + * be false. + */ + private final boolean useLocalCacheDirectoryManager; + private ConcurrentHashMap directoryManagers; + /* + * It is used to keep track of resource into hierarchical directory + * while it is getting downloaded. It is useful for reference counting + * in case resource localization fails. + */ + private ConcurrentHashMap + inProgressLocalResourcesMap; + /* + * starting with 10 to accommodate 0-9 directories created as a part of + * LocalCacheDirectoryManager. So there will be one unique number generator + * per APPLICATION, USER and PUBLIC cache. + */ + private AtomicLong uniqueNumberGenerator = new AtomicLong(9); - public LocalResourcesTrackerImpl(String user, Dispatcher dispatcher) { + public LocalResourcesTrackerImpl(String user, Dispatcher dispatcher, + boolean useLocalCacheDirectoryManager, Configuration conf) { this(user, dispatcher, - new ConcurrentHashMap()); + new ConcurrentHashMap(), + useLocalCacheDirectoryManager, conf); } LocalResourcesTrackerImpl(String user, Dispatcher dispatcher, - ConcurrentMap localrsrc) { + ConcurrentMap localrsrc, + boolean useLocalCacheDirectoryManager, Configuration conf) { this.user = user; this.dispatcher = dispatcher; this.localrsrc = localrsrc; + this.useLocalCacheDirectoryManager = useLocalCacheDirectoryManager; + if ( this.useLocalCacheDirectoryManager) { + directoryManagers = new ConcurrentHashMap(); + inProgressLocalResourcesMap = + new ConcurrentHashMap(); + } + this.conf = conf; } + /* + * Synchronizing this method for avoiding races due to multiple ResourceEvent's + * coming to LocalResourcesTracker from Public/Private localizer and + * Resource Localization Service. + */ @Override - public void handle(ResourceEvent event) { + public synchronized void handle(ResourceEvent event) { LocalResourceRequest req = event.getLocalResourceRequest(); LocalizedResource rsrc = localrsrc.get(req); switch (event.getType()) { - case REQUEST: case LOCALIZED: + if (useLocalCacheDirectoryManager) { + inProgressLocalResourcesMap.remove(req); + } + break; + case REQUEST: if (rsrc != null && (!isResourcePresent(rsrc))) { LOG.info("Resource " + rsrc.getLocalPath() + " is missing, localizing it again"); localrsrc.remove(req); + decrementFileCountForLocalCacheDirectory(req, rsrc); rsrc = null; } if (null == rsrc) { @@ -82,15 +127,74 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker { break; case RELEASE: if (null == rsrc) { - LOG.info("Release unknown rsrc null (discard)"); + // The container sent a release event on a resource which + // 1) Failed + // 2) Removed for some reason (ex. disk is no longer accessible) + ResourceReleaseEvent relEvent = (ResourceReleaseEvent) event; + LOG.info("Container " + relEvent.getContainer() + + " sent RELEASE event on a resource request " + req + + " not present in cache."); return; } break; + case LOCALIZATION_FAILED: + decrementFileCountForLocalCacheDirectory(req, null); + /* + * If resource localization fails then Localized resource will be + * removed from local cache. + */ + localrsrc.remove(req); + break; } rsrc.handle(event); } - /** + /* + * Update the file-count statistics for a local cache-directory. + * This will retrieve the localized path for the resource from + * 1) inProgressRsrcMap if the resource was under localization and it + * failed. + * 2) LocalizedResource if the resource is already localized. + * From this path it will identify the local directory under which the + * resource was localized. Then rest of the path will be used to decrement + * file count for the HierarchicalSubDirectory pointing to this relative + * path. + */ + private void decrementFileCountForLocalCacheDirectory(LocalResourceRequest req, + LocalizedResource rsrc) { + if ( useLocalCacheDirectoryManager) { + Path rsrcPath = null; + if (inProgressLocalResourcesMap.containsKey(req)) { + // This happens when localization of a resource fails. + rsrcPath = inProgressLocalResourcesMap.remove(req); + } else if (rsrc != null && rsrc.getLocalPath() != null) { + rsrcPath = rsrc.getLocalPath().getParent().getParent(); + } + if (rsrcPath != null) { + Path parentPath = new Path(rsrcPath.toUri().getRawPath()); + while (!directoryManagers.containsKey(parentPath)) { + parentPath = parentPath.getParent(); + if ( parentPath == null) { + return; + } + } + if ( parentPath != null) { + String parentDir = parentPath.toUri().getRawPath().toString(); + LocalCacheDirectoryManager dir = directoryManagers.get(parentPath); + String rsrcDir = rsrcPath.toUri().getRawPath(); + if (rsrcDir.equals(parentDir)) { + dir.decrementFileCountForPath(""); + } else { + dir.decrementFileCountForPath( + rsrcDir.substring( + parentDir.length() + 1)); + } + } + } + } + } + +/** * This module checks if the resource which was localized is already present * or not * @@ -100,7 +204,8 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker { public boolean isResourcePresent(LocalizedResource rsrc) { boolean ret = true; if (rsrc.getState() == ResourceState.LOCALIZED) { - File file = new File(rsrc.getLocalPath().toUri().getRawPath().toString()); + File file = new File(rsrc.getLocalPath().toUri().getRawPath(). + toString()); if (!file.exists()) { ret = false; } @@ -133,11 +238,11 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker { if (ResourceState.LOCALIZED.equals(rsrc.getState())) { delService.delete(getUser(), getPathToDelete(rsrc.getLocalPath())); } + decrementFileCountForLocalCacheDirectory(rem.getRequest(), rsrc); return true; } } - /** * Returns the path up to the random directory component. */ @@ -163,4 +268,43 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker { public Iterator iterator() { return localrsrc.values().iterator(); } -} + + /** + * @return {@link Path} absolute path for localization which includes local + * directory path and the relative hierarchical path (if use local + * cache directory manager is enabled) + * + * @param {@link LocalResourceRequest} Resource localization request to + * localize the resource. + * @param {@link Path} local directory path + */ + @Override + public Path + getPathForLocalization(LocalResourceRequest req, Path localDirPath) { + if (useLocalCacheDirectoryManager && localDirPath != null) { + + if (!directoryManagers.containsKey(localDirPath)) { + directoryManagers.putIfAbsent(localDirPath, + new LocalCacheDirectoryManager(conf)); + } + LocalCacheDirectoryManager dir = directoryManagers.get(localDirPath); + + Path rPath = localDirPath; + String hierarchicalPath = dir.getRelativePathForLocalization(); + // For most of the scenarios we will get root path only which + // is an empty string + if (!hierarchicalPath.isEmpty()) { + rPath = new Path(localDirPath, hierarchicalPath); + } + inProgressLocalResourcesMap.put(req, rPath); + return rPath; + } else { + return localDirPath; + } + } + + @Override + public long nextUniqueNumber() { + return uniqueNumberGenerator.incrementAndGet(); + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java index 00709fd91c2..f0cd87b573a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java @@ -32,10 +32,12 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerResourceFailedEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerResourceLocalizedEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerResourceRequestEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEventType; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceLocalizedEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRequestEvent; @@ -89,6 +91,8 @@ public class LocalizedResource implements EventHandler { .addTransition(ResourceState.DOWNLOADING, EnumSet.of(ResourceState.DOWNLOADING, ResourceState.INIT), ResourceEventType.RELEASE, new ReleasePendingTransition()) + .addTransition(ResourceState.DOWNLOADING, ResourceState.FAILED, + ResourceEventType.LOCALIZATION_FAILED, new FetchFailedTransition()) // From LOCALIZED (ref >= 0, on disk) .addTransition(ResourceState.LOCALIZED, ResourceState.LOCALIZED, @@ -126,12 +130,14 @@ public class LocalizedResource implements EventHandler { } private void release(ContainerId container) { - if (!ref.remove(container)) { - LOG.info("Attempt to release claim on " + this + - " from unregistered container " + container); - assert false; // TODO: FIX + if (ref.remove(container)) { + // updating the timestamp only in case of success. + timestamp.set(currentTime()); + } else { + LOG.info("Container " + container + + " doesn't exist in the container list of the Resource " + this + + " to which it sent RELEASE event"); } - timestamp.set(currentTime()); } private long currentTime() { @@ -250,6 +256,25 @@ public class LocalizedResource implements EventHandler { } } + /** + * Resource localization failed, notify waiting containers. + */ + @SuppressWarnings("unchecked") + private static class FetchFailedTransition extends ResourceTransition { + @Override + public void transition(LocalizedResource rsrc, ResourceEvent event) { + ResourceFailedLocalizationEvent failedEvent = + (ResourceFailedLocalizationEvent) event; + Queue containers = rsrc.ref; + Throwable failureCause = failedEvent.getCause(); + for (ContainerId container : containers) { + rsrc.dispatcher.getEventHandler().handle( + new ContainerResourceFailedEvent(container, failedEvent + .getLocalResourceRequest(), failureCause)); + } + } + } + /** * Resource already localized, notify immediately. */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java index 29971c5b65b..7b9873a1f45 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java @@ -34,7 +34,6 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Random; import java.util.concurrent.CancellationException; import java.util.concurrent.CompletionService; import java.util.concurrent.ConcurrentHashMap; @@ -64,6 +63,7 @@ import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.util.DiskChecker; import org.apache.hadoop.yarn.YarnException; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -79,6 +79,7 @@ import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.apache.hadoop.yarn.server.nodemanager.api.LocalizationProtocol; +import org.apache.hadoop.yarn.server.nodemanager.api.ResourceLocalizationSpec; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalResourceStatus; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerAction; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerHeartbeatResponse; @@ -99,11 +100,13 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.even import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerResourceRequestEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceLocalizedEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRequestEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.security.LocalizerTokenSecretManager; import org.apache.hadoop.yarn.server.nodemanager.security.authorize.NMPolicyProvider; +import org.apache.hadoop.yarn.server.nodemanager.util.NodeManagerBuilderUtils; import org.apache.hadoop.yarn.service.AbstractService; import org.apache.hadoop.yarn.service.CompositeService; import org.apache.hadoop.yarn.util.ConverterUtils; @@ -130,7 +133,7 @@ public class ResourceLocalizationService extends CompositeService private RecordFactory recordFactory; private final ScheduledExecutorService cacheCleanup; - private final LocalResourcesTracker publicRsrc; + private LocalResourcesTracker publicRsrc; private LocalDirsHandlerService dirsHandler; @@ -158,7 +161,6 @@ public class ResourceLocalizationService extends CompositeService this.delService = delService; this.dirsHandler = dirsHandler; - this.publicRsrc = new LocalResourcesTrackerImpl(null, dispatcher); this.cacheCleanup = new ScheduledThreadPoolExecutor(1, new ThreadFactoryBuilder() .setNameFormat("ResourceLocalizationService Cache Cleanup") @@ -173,8 +175,26 @@ public class ResourceLocalizationService extends CompositeService } } + private void validateConf(Configuration conf) { + int perDirFileLimit = + conf.getInt(YarnConfiguration.NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY, + YarnConfiguration.DEFAULT_NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY); + if (perDirFileLimit <= 36) { + LOG.error(YarnConfiguration.NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY + + " parameter is configured with very low value."); + throw new YarnException( + YarnConfiguration.NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY + + " parameter is configured with a value less than 37."); + } else { + LOG.info("per directory file limit = " + perDirFileLimit); + } + } + @Override public void init(Configuration conf) { + this.validateConf(conf); + this.publicRsrc = + new LocalResourcesTrackerImpl(null, dispatcher, true, conf); this.recordFactory = RecordFactoryProvider.getRecordFactory(conf); try { @@ -212,6 +232,7 @@ public class ResourceLocalizationService extends CompositeService YarnConfiguration.NM_LOCALIZER_ADDRESS, YarnConfiguration.DEFAULT_NM_LOCALIZER_ADDRESS, YarnConfiguration.DEFAULT_NM_LOCALIZER_PORT); + localizerTracker = createLocalizerTracker(conf); addService(localizerTracker); dispatcher.register(LocalizerEventType.class, localizerTracker); @@ -306,15 +327,17 @@ public class ResourceLocalizationService extends CompositeService private void handleInitApplicationResources(Application app) { // 0) Create application tracking structs String userName = app.getUser(); - privateRsrc.putIfAbsent(userName, - new LocalResourcesTrackerImpl(userName, dispatcher)); - if (null != appRsrc.putIfAbsent(ConverterUtils.toString(app.getAppId()), - new LocalResourcesTrackerImpl(app.getUser(), dispatcher))) { + privateRsrc.putIfAbsent(userName, new LocalResourcesTrackerImpl(userName, + dispatcher, true, super.getConfig())); + if (null != appRsrc.putIfAbsent( + ConverterUtils.toString(app.getAppId()), + new LocalResourcesTrackerImpl(app.getUser(), dispatcher, false, super + .getConfig()))) { LOG.warn("Initializing application " + app + " already present"); assert false; // TODO: FIXME assert doesn't help // ^ The condition is benign. Tests should fail and it - // should appear in logs, but it's an internal error - // that should have no effect on applications + // should appear in logs, but it's an internal error + // that should have no effect on applications } // 1) Signal container init // @@ -455,6 +478,21 @@ public class ResourceLocalizationService extends CompositeService } } + private String getUserFileCachePath(String user) { + String path = + "." + Path.SEPARATOR + ContainerLocalizer.USERCACHE + Path.SEPARATOR + + user + Path.SEPARATOR + ContainerLocalizer.FILECACHE; + return path; + } + + private String getUserAppCachePath(String user, String appId) { + String path = + "." + Path.SEPARATOR + ContainerLocalizer.USERCACHE + Path.SEPARATOR + + user + Path.SEPARATOR + ContainerLocalizer.APPCACHE + + Path.SEPARATOR + appId; + return path; + } + /** * Sub-component handling the spawning of {@link ContainerLocalizer}s */ @@ -620,8 +658,18 @@ public class ResourceLocalizationService extends CompositeService Path publicDirDestPath = dirsHandler.getLocalPathForWrite( "." + Path.SEPARATOR + ContainerLocalizer.FILECACHE, ContainerLocalizer.getEstimatedSize(resource), true); + Path hierarchicalPath = + publicRsrc.getPathForLocalization(key, publicDirDestPath); + if (!hierarchicalPath.equals(publicDirDestPath)) { + publicDirDestPath = hierarchicalPath; + DiskChecker.checkDir( + new File(publicDirDestPath.toUri().getPath())); + } + publicDirDestPath = + new Path(publicDirDestPath, Long.toString(publicRsrc + .nextUniqueNumber())); pending.put(queue.submit(new FSDownload( - lfs, null, conf, publicDirDestPath, resource, new Random())), + lfs, null, conf, publicDirDestPath, resource)), request); attempts.put(key, new LinkedList()); } catch (IOException e) { @@ -635,7 +683,6 @@ public class ResourceLocalizationService extends CompositeService } @Override - @SuppressWarnings("unchecked") // dispatcher not typed public void run() { try { // TODO shutdown, better error handling esp. DU @@ -651,22 +698,19 @@ public class ResourceLocalizationService extends CompositeService return; } LocalResourceRequest key = assoc.getResource().getRequest(); - assoc.getResource().handle( - new ResourceLocalizedEvent(key, - local, FileUtil.getDU(new File(local.toUri())))); + publicRsrc.handle(new ResourceLocalizedEvent(key, local, FileUtil + .getDU(new File(local.toUri())))); synchronized (attempts) { attempts.remove(key); } } catch (ExecutionException e) { LOG.info("Failed to download rsrc " + assoc.getResource(), e.getCause()); - dispatcher.getEventHandler().handle( - new ContainerResourceFailedEvent( - assoc.getContext().getContainerId(), - assoc.getResource().getRequest(), e.getCause())); - List reqs; + LocalResourceRequest req = assoc.getResource().getRequest(); + publicRsrc.handle(new ResourceFailedLocalizationEvent(req, e + .getCause())); synchronized (attempts) { - LocalResourceRequest req = assoc.getResource().getRequest(); + List reqs; reqs = attempts.get(req); if (null == reqs) { LOG.error("Missing pending list for " + req); @@ -674,13 +718,6 @@ public class ResourceLocalizationService extends CompositeService } attempts.remove(req); } - // let the other containers know about the localization failure - for (LocalizerResourceRequestEvent reqEvent : reqs) { - dispatcher.getEventHandler().handle( - new ContainerResourceFailedEvent( - reqEvent.getContext().getContainerId(), - reqEvent.getResource().getRequest(), e.getCause())); - } } catch (CancellationException e) { // ignore; shutting down } @@ -760,20 +797,34 @@ public class ResourceLocalizationService extends CompositeService return null; } - // TODO this sucks. Fix it later - @SuppressWarnings("unchecked") // dispatcher not typed LocalizerHeartbeatResponse update( List remoteResourceStatuses) { LocalizerHeartbeatResponse response = recordFactory.newRecordInstance(LocalizerHeartbeatResponse.class); + String user = context.getUser(); + ApplicationId applicationId = + context.getContainerId().getApplicationAttemptId().getApplicationId(); // The localizer has just spawned. Start giving it resources for // remote-fetching. if (remoteResourceStatuses.isEmpty()) { LocalResource next = findNextResource(); if (next != null) { response.setLocalizerAction(LocalizerAction.LIVE); - response.addResource(next); + try { + ArrayList rsrcs = + new ArrayList(); + ResourceLocalizationSpec rsrc = + NodeManagerBuilderUtils.newResourceLocalizationSpec(next, + getPathForLocalization(next)); + rsrcs.add(rsrc); + response.setResourceSpecs(rsrcs); + } catch (IOException e) { + LOG.error("local path for PRIVATE localization could not be found." + + "Disks might have failed.", e); + } catch (URISyntaxException e) { + // TODO fail? Already translated several times... + } } else if (pending.isEmpty()) { // TODO: Synchronization response.setLocalizerAction(LocalizerAction.DIE); @@ -782,6 +833,12 @@ public class ResourceLocalizationService extends CompositeService } return response; } + ArrayList rsrcs = + new ArrayList(); + /* + * TODO : It doesn't support multiple downloads per ContainerLocalizer + * at the same time. We need to think whether we should support this. + */ for (LocalResourceStatus stat : remoteResourceStatuses) { LocalResource rsrc = stat.getResource(); @@ -801,10 +858,10 @@ public class ResourceLocalizationService extends CompositeService case FETCH_SUCCESS: // notify resource try { - assoc.getResource().handle( - new ResourceLocalizedEvent(req, - ConverterUtils.getPathFromYarnURL(stat.getLocalPath()), - stat.getLocalSize())); + getLocalResourcesTracker(req.getVisibility(), user, applicationId) + .handle( + new ResourceLocalizedEvent(req, ConverterUtils + .getPathFromYarnURL(stat.getLocalPath()), stat.getLocalSize())); } catch (URISyntaxException e) { } if (pending.isEmpty()) { // TODO: Synchronization @@ -814,7 +871,17 @@ public class ResourceLocalizationService extends CompositeService response.setLocalizerAction(LocalizerAction.LIVE); LocalResource next = findNextResource(); if (next != null) { - response.addResource(next); + try { + ResourceLocalizationSpec resource = + NodeManagerBuilderUtils.newResourceLocalizationSpec(next, + getPathForLocalization(next)); + rsrcs.add(resource); + } catch (IOException e) { + LOG.error("local path for PRIVATE localization could not be " + + "found. Disks might have failed.", e); + } catch (URISyntaxException e) { + //TODO fail? Already translated several times... + } } break; case FETCH_PENDING: @@ -824,24 +891,45 @@ public class ResourceLocalizationService extends CompositeService LOG.info("DEBUG: FAILED " + req, stat.getException()); assoc.getResource().unlock(); response.setLocalizerAction(LocalizerAction.DIE); - // TODO: Why is this event going directly to the container. Why not - // the resource itself? What happens to the resource? Is it removed? - dispatcher.getEventHandler().handle( - new ContainerResourceFailedEvent(context.getContainerId(), - req, stat.getException())); + getLocalResourcesTracker(req.getVisibility(), user, applicationId) + .handle( + new ResourceFailedLocalizationEvent(req, stat.getException())); break; default: LOG.info("Unknown status: " + stat.getStatus()); response.setLocalizerAction(LocalizerAction.DIE); - dispatcher.getEventHandler().handle( - new ContainerResourceFailedEvent(context.getContainerId(), - req, stat.getException())); + getLocalResourcesTracker(req.getVisibility(), user, applicationId) + .handle( + new ResourceFailedLocalizationEvent(req, stat.getException())); break; } } + response.setResourceSpecs(rsrcs); return response; } + private Path getPathForLocalization(LocalResource rsrc) throws IOException, + URISyntaxException { + String user = context.getUser(); + ApplicationId appId = + context.getContainerId().getApplicationAttemptId().getApplicationId(); + LocalResourceVisibility vis = rsrc.getVisibility(); + LocalResourcesTracker tracker = + getLocalResourcesTracker(vis, user, appId); + String cacheDirectory = null; + if (vis == LocalResourceVisibility.PRIVATE) {// PRIVATE Only + cacheDirectory = getUserFileCachePath(user); + } else {// APPLICATION ONLY + cacheDirectory = getUserAppCachePath(user, appId.toString()); + } + Path dirPath = + dirsHandler.getLocalPathForWrite(cacheDirectory, + ContainerLocalizer.getEstimatedSize(rsrc), false); + dirPath = tracker.getPathForLocalization(new LocalResourceRequest(rsrc), + dirPath); + return new Path (dirPath, Long.toString(tracker.nextUniqueNumber())); + } + @Override @SuppressWarnings("unchecked") // dispatcher not typed public void run() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceState.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceState.java index 751f60e0af1..75c8ad7663c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceState.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceState.java @@ -20,5 +20,6 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer; enum ResourceState { INIT, DOWNLOADING, - LOCALIZED + LOCALIZED, + FAILED } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceEventType.java index d68a1b6d391..e657c0acf3c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceEventType.java @@ -29,5 +29,7 @@ public enum ResourceEventType { /** See {@link ResourceLocalizedEvent} */ LOCALIZED, /** See {@link ResourceReleaseEvent} */ - RELEASE + RELEASE, + /** See {@link ResourceFailedLocalizationEvent} */ + LOCALIZATION_FAILED } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceFailedLocalizationEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceFailedLocalizationEvent.java new file mode 100644 index 00000000000..79b28bac908 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceFailedLocalizationEvent.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event; + +import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.LocalResourceRequest; + +/** + * This event is sent by the localizer in case resource localization fails for + * the requested resource. + */ +public class ResourceFailedLocalizationEvent extends ResourceEvent { + + private Throwable cause; + + public ResourceFailedLocalizationEvent(LocalResourceRequest rsrc, + Throwable cause) { + super(rsrc, ResourceEventType.LOCALIZATION_FAILED); + this.cause = cause; + } + + public Throwable getCause() { + return cause; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerBuilderUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerBuilderUtils.java new file mode 100644 index 00000000000..21cf1f27c24 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/util/NodeManagerBuilderUtils.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.nodemanager.util; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.yarn.api.records.LocalResource; +import org.apache.hadoop.yarn.api.records.URL; +import org.apache.hadoop.yarn.server.nodemanager.api.ResourceLocalizationSpec; +import org.apache.hadoop.yarn.util.ConverterUtils; +import org.apache.hadoop.yarn.util.Records; + +public class NodeManagerBuilderUtils { + + public static ResourceLocalizationSpec newResourceLocalizationSpec( + LocalResource rsrc, Path path) { + URL local = ConverterUtils.getYarnUrlFromPath(path); + ResourceLocalizationSpec resourceLocalizationSpec = + Records.newRecord(ResourceLocalizationSpec.class); + resourceLocalizationSpec.setDestinationDirectory(local); + resourceLocalizationSpec.setResource(rsrc); + return resourceLocalizationSpec; + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/ContainerInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/ContainerInfo.java index 41c649eea2c..3f31279edd9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/ContainerInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/ContainerInfo.java @@ -72,7 +72,7 @@ public class ContainerInfo { } this.user = container.getUser(); - Resource res = container.getLaunchContext().getResource(); + Resource res = container.getResource(); if (res != null) { this.totalMemoryNeededMB = res.getMemory(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_service_protos.proto index 500ebf835e1..b1d6ddc5925 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/proto/yarn_server_nodemanager_service_protos.proto @@ -47,7 +47,12 @@ enum LocalizerActionProto { DIE = 2; } +message ResourceLocalizationSpecProto { + optional LocalResourceProto resource = 1; + optional URLProto destination_directory = 2; +} + message LocalizerHeartbeatResponseProto { optional LocalizerActionProto action = 1; - repeated LocalResourceProto resources = 2; + repeated ResourceLocalizationSpecProto resources = 2; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/MockNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/MockNodeStatusUpdater.java index 0e92d893491..d71334e1a17 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/MockNodeStatusUpdater.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/MockNodeStatusUpdater.java @@ -31,6 +31,7 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequ import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse; import org.apache.hadoop.yarn.server.api.records.NodeStatus; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; +import org.apache.hadoop.yarn.server.utils.YarnServerBuilderUtils; /** * This class allows a node manager to run without without communicating with a @@ -73,9 +74,9 @@ public class MockNodeStatusUpdater extends NodeStatusUpdaterImpl { LOG.info("Got heartbeat number " + heartBeatID); nodeStatus.setResponseId(heartBeatID++); - NodeHeartbeatResponse nhResponse = recordFactory - .newRecordInstance(NodeHeartbeatResponse.class); - nhResponse.setResponseId(heartBeatID); + NodeHeartbeatResponse nhResponse = YarnServerBuilderUtils + .newNodeHeartbeatResponse(heartBeatID, null, null, + null, null, 1000L); return nhResponse; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java index b1283b5b6cb..292d00fadf1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java @@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerState; @@ -43,6 +44,8 @@ import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.junit.Test; +import static org.mockito.Mockito.*; + public class TestEventFlow { @@ -117,12 +120,15 @@ public class TestEventFlow { applicationAttemptId.setApplicationId(applicationId); applicationAttemptId.setAttemptId(0); cID.setApplicationAttemptId(applicationAttemptId); - launchContext.setContainerId(cID); + Container mockContainer = mock(Container.class); + when(mockContainer.getId()).thenReturn(cID); + when(mockContainer.getResource()).thenReturn(recordFactory + .newRecordInstance(Resource.class)); launchContext.setUser("testing"); - launchContext.setResource(recordFactory.newRecordInstance(Resource.class)); StartContainerRequest request = recordFactory.newRecordInstance(StartContainerRequest.class); request.setContainerLaunchContext(launchContext); + request.setContainer(mockContainer); containerManager.startContainer(request); BaseContainerManagerTest.waitForContainerState(containerManager, cID, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java index 10a85c74804..1436193d3ae 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java @@ -99,7 +99,9 @@ public class TestNodeManagerReboot { Records.newRecord(ContainerLaunchContext.class); // Construct the Container-id ContainerId cId = createContainerId(); - containerLaunchContext.setContainerId(cId); + org.apache.hadoop.yarn.api.records.Container mockContainer = + mock(org.apache.hadoop.yarn.api.records.Container.class); + when(mockContainer.getId()).thenReturn(cId); containerLaunchContext.setUser(user); @@ -122,12 +124,13 @@ public class TestNodeManagerReboot { containerLaunchContext.setUser(containerLaunchContext.getUser()); List commands = new ArrayList(); containerLaunchContext.setCommands(commands); - containerLaunchContext.setResource(Records - .newRecord(Resource.class)); - containerLaunchContext.getResource().setMemory(1024); + Resource resource = Records.newRecord(Resource.class); + resource.setMemory(1024); + when(mockContainer.getResource()).thenReturn(resource); StartContainerRequest startRequest = Records.newRecord(StartContainerRequest.class); startRequest.setContainerLaunchContext(containerLaunchContext); + startRequest.setContainer(mockContainer); containerManager.startContainer(startRequest); GetContainerStatusRequest request = @@ -160,7 +163,10 @@ public class TestNodeManagerReboot { "container is launched", numOfLocalDirs(nmLocalDir.getAbsolutePath(), ResourceLocalizationService.NM_PRIVATE_DIR) > 0); - nm.handle(new NodeManagerEvent(NodeManagerEventType.REBOOT)); + // restart the NodeManager + nm.stop(); + nm = new MyNodeManager(); + nm.start(); numTries = 0; while ((numOfLocalDirs(nmLocalDir.getAbsolutePath(), ContainerLocalizer @@ -250,26 +256,6 @@ public class TestNodeManagerReboot { return delService; } - // mimic part of reboot process - @Override - public void handle(NodeManagerEvent event) { - switch (event.getType()) { - case SHUTDOWN: - this.stop(); - break; - case REBOOT: - this.stop(); - this.createNewMyNodeManager().start(); - break; - default: - LOG.warn("Invalid shutdown event " + event.getType() + ". Ignoring."); - } - } - - private MyNodeManager createNewMyNodeManager() { - return new MyNodeManager(); - } - private YarnConfiguration createNMConfig() { YarnConfiguration conf = new YarnConfiguration(); conf.setInt(YarnConfiguration.NM_PMEM_MB, 5 * 1024); // 5GB diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java index f42261765fb..1efe80db075 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java @@ -18,6 +18,9 @@ package org.apache.hadoop.yarn.server.nodemanager; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; @@ -28,6 +31,9 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.BrokenBarrierException; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.CyclicBarrier; import junit.framework.Assert; @@ -38,6 +44,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerState; @@ -49,9 +56,12 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.exceptions.YarnRemoteException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; +import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; +import org.apache.hadoop.yarn.util.BuilderUtils; import org.apache.hadoop.yarn.util.ConverterUtils; import org.junit.After; import org.junit.Before; @@ -71,6 +81,7 @@ public class TestNodeManagerShutdown { .getRecordFactory(null); static final String user = "nobody"; private FileContext localFS; + private CyclicBarrier syncBarrier = new CyclicBarrier(2); @Before public void setup() throws UnsupportedFileSystemException { @@ -91,52 +102,7 @@ public class TestNodeManagerShutdown { NodeManager nm = getNodeManager(); nm.init(createNMConfig()); nm.start(); - - ContainerManagerImpl containerManager = nm.getContainerManager(); - File scriptFile = createUnhaltingScriptFile(); - - ContainerLaunchContext containerLaunchContext = - recordFactory.newRecordInstance(ContainerLaunchContext.class); - - // Construct the Container-id - ContainerId cId = createContainerId(); - containerLaunchContext.setContainerId(cId); - - containerLaunchContext.setUser(user); - - URL localResourceUri = - ConverterUtils.getYarnUrlFromPath(localFS - .makeQualified(new Path(scriptFile.getAbsolutePath()))); - LocalResource localResource = - recordFactory.newRecordInstance(LocalResource.class); - localResource.setResource(localResourceUri); - localResource.setSize(-1); - localResource.setVisibility(LocalResourceVisibility.APPLICATION); - localResource.setType(LocalResourceType.FILE); - localResource.setTimestamp(scriptFile.lastModified()); - String destinationFile = "dest_file"; - Map localResources = - new HashMap(); - localResources.put(destinationFile, localResource); - containerLaunchContext.setLocalResources(localResources); - containerLaunchContext.setUser(containerLaunchContext.getUser()); - List commands = new ArrayList(); - commands.add("/bin/bash"); - commands.add(scriptFile.getAbsolutePath()); - containerLaunchContext.setCommands(commands); - containerLaunchContext.setResource(recordFactory - .newRecordInstance(Resource.class)); - containerLaunchContext.getResource().setMemory(1024); - StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class); - startRequest.setContainerLaunchContext(containerLaunchContext); - containerManager.startContainer(startRequest); - - GetContainerStatusRequest request = - recordFactory.newRecordInstance(GetContainerStatusRequest.class); - request.setContainerId(cId); - ContainerStatus containerStatus = - containerManager.getContainerStatus(request).getStatus(); - Assert.assertEquals(ContainerState.RUNNING, containerStatus.getState()); + startContainers(nm); final int MAX_TRIES=20; int numTries = 0; @@ -170,6 +136,74 @@ public class TestNodeManagerShutdown { reader.close(); } + @SuppressWarnings("unchecked") + @Test + public void testKillContainersOnResync() throws IOException, InterruptedException { + NodeManager nm = new TestNodeManager(); + YarnConfiguration conf = createNMConfig(); + nm.init(conf); + nm.start(); + startContainers(nm); + + assert ((TestNodeManager) nm).getNMRegistrationCount() == 1; + nm.getNMDispatcher().getEventHandler(). + handle( new NodeManagerEvent(NodeManagerEventType.RESYNC)); + try { + syncBarrier.await(); + } catch (BrokenBarrierException e) { + } + assert ((TestNodeManager) nm).getNMRegistrationCount() == 2; + } + + private void startContainers(NodeManager nm) throws IOException { + ContainerManagerImpl containerManager = nm.getContainerManager(); + File scriptFile = createUnhaltingScriptFile(); + + ContainerLaunchContext containerLaunchContext = + recordFactory.newRecordInstance(ContainerLaunchContext.class); + Container mockContainer = mock(Container.class); + // Construct the Container-id + ContainerId cId = createContainerId(); + when(mockContainer.getId()).thenReturn(cId); + + containerLaunchContext.setUser(user); + + URL localResourceUri = + ConverterUtils.getYarnUrlFromPath(localFS + .makeQualified(new Path(scriptFile.getAbsolutePath()))); + LocalResource localResource = + recordFactory.newRecordInstance(LocalResource.class); + localResource.setResource(localResourceUri); + localResource.setSize(-1); + localResource.setVisibility(LocalResourceVisibility.APPLICATION); + localResource.setType(LocalResourceType.FILE); + localResource.setTimestamp(scriptFile.lastModified()); + String destinationFile = "dest_file"; + Map localResources = + new HashMap(); + localResources.put(destinationFile, localResource); + containerLaunchContext.setLocalResources(localResources); + containerLaunchContext.setUser(containerLaunchContext.getUser()); + List commands = new ArrayList(); + commands.add("/bin/bash"); + commands.add(scriptFile.getAbsolutePath()); + containerLaunchContext.setCommands(commands); + Resource resource = BuilderUtils.newResource(1024, 1); + when(mockContainer.getResource()).thenReturn(resource); + StartContainerRequest startRequest = + recordFactory.newRecordInstance(StartContainerRequest.class); + startRequest.setContainerLaunchContext(containerLaunchContext); + startRequest.setContainer(mockContainer); + containerManager.startContainer(startRequest); + + GetContainerStatusRequest request = + recordFactory.newRecordInstance(GetContainerStatusRequest.class); + request.setContainerId(cId); + ContainerStatus containerStatus = + containerManager.getContainerStatus(request).getStatus(); + Assert.assertEquals(ContainerState.RUNNING, containerStatus.getState()); + } + private ContainerId createContainerId() { ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class); appId.setClusterTimestamp(0); @@ -226,4 +260,48 @@ public class TestNodeManagerShutdown { } }; } + + class TestNodeManager extends NodeManager { + + private int registrationCount = 0; + + @Override + protected NodeStatusUpdater createNodeStatusUpdater(Context context, + Dispatcher dispatcher, NodeHealthCheckerService healthChecker) { + return new TestNodeStatusUpdaterImpl(context, dispatcher, + healthChecker, metrics); + } + + public int getNMRegistrationCount() { + return registrationCount; + } + + class TestNodeStatusUpdaterImpl extends MockNodeStatusUpdater { + + public TestNodeStatusUpdaterImpl(Context context, Dispatcher dispatcher, + NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) { + super(context, dispatcher, healthChecker, metrics); + } + + @Override + protected void registerWithRM() throws YarnRemoteException { + super.registerWithRM(); + registrationCount++; + } + + @Override + protected void rebootNodeStatusUpdater() { + ConcurrentMap containers = + getNMContext().getContainers(); + // ensure that containers are empty before restart nodeStatusUpdater + Assert.assertTrue(containers.isEmpty()); + super.rebootNodeStatusUpdater(); + try { + syncBarrier.await(); + } catch (InterruptedException e) { + } catch (BrokenBarrierException e) { + } + } + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java index a12de1da65f..29d6a4c3a84 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.nodemanager; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; import java.io.IOException; import java.net.InetSocketAddress; @@ -29,6 +30,8 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.CyclicBarrier; import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; @@ -43,14 +46,17 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.exceptions.YarnRemoteException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.ipc.RPCUtil; import org.apache.hadoop.yarn.server.api.ResourceTracker; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; @@ -58,12 +64,15 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequ import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse; import org.apache.hadoop.yarn.server.api.records.NodeAction; import org.apache.hadoop.yarn.server.api.records.NodeStatus; +import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; +import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; +import org.apache.hadoop.yarn.server.utils.YarnServerBuilderUtils; import org.apache.hadoop.yarn.service.Service; import org.apache.hadoop.yarn.service.Service.STATE; import org.apache.hadoop.yarn.util.BuilderUtils; @@ -90,7 +99,8 @@ public class TestNodeStatusUpdater { private final List registeredNodes = new ArrayList(); private final Configuration conf = createNMConfig(); private NodeManager nm; - protected NodeManager rebootedNodeManager; + private boolean containerStatusBackupSuccessfully = true; + private List completedContainerStatusList = new ArrayList(); @After public void tearDown() { @@ -159,9 +169,15 @@ public class TestNodeStatusUpdater { throws YarnRemoteException { NodeStatus nodeStatus = request.getNodeStatus(); LOG.info("Got heartbeat number " + heartBeatID); + NodeManagerMetrics mockMetrics = mock(NodeManagerMetrics.class); + Dispatcher mockDispatcher = mock(Dispatcher.class); + EventHandler mockEventHandler = mock(EventHandler.class); + when(mockDispatcher.getEventHandler()).thenReturn(mockEventHandler); nodeStatus.setResponseId(heartBeatID++); Map> appToContainers = getAppToContainerStatusMap(nodeStatus.getContainersStatuses()); + org.apache.hadoop.yarn.api.records.Container mockContainer = + mock(org.apache.hadoop.yarn.api.records.Container.class); if (heartBeatID == 1) { Assert.assertEquals(0, nodeStatus.getContainersStatuses().size()); @@ -172,10 +188,12 @@ public class TestNodeStatusUpdater { firstContainerID.setId(heartBeatID); ContainerLaunchContext launchContext = recordFactory .newRecordInstance(ContainerLaunchContext.class); - launchContext.setContainerId(firstContainerID); - launchContext.setResource(recordFactory.newRecordInstance(Resource.class)); - launchContext.getResource().setMemory(2); - Container container = new ContainerImpl(conf , null, launchContext, null, null); + when(mockContainer.getId()).thenReturn(firstContainerID); + Resource resource = BuilderUtils.newResource(2, 1); + when(mockContainer.getResource()).thenReturn(resource); + Container container = + new ContainerImpl(conf, mockDispatcher, launchContext, + mockContainer, null, mockMetrics); this.context.getContainers().put(firstContainerID, container); } else if (heartBeatID == 2) { // Checks on the RM end @@ -196,10 +214,12 @@ public class TestNodeStatusUpdater { secondContainerID.setId(heartBeatID); ContainerLaunchContext launchContext = recordFactory .newRecordInstance(ContainerLaunchContext.class); - launchContext.setContainerId(secondContainerID); - launchContext.setResource(recordFactory.newRecordInstance(Resource.class)); - launchContext.getResource().setMemory(3); - Container container = new ContainerImpl(conf, null, launchContext, null, null); + when(mockContainer.getId()).thenReturn(secondContainerID); + Resource resource = BuilderUtils.newResource(3, 1); + when(mockContainer.getResource()).thenReturn(resource); + Container container = + new ContainerImpl(conf, mockDispatcher, launchContext, + mockContainer, null, mockMetrics); this.context.getContainers().put(secondContainerID, container); } else if (heartBeatID == 3) { // Checks on the RM end @@ -214,21 +234,21 @@ public class TestNodeStatusUpdater { Assert.assertEquals(2, activeContainers.size()); } - NodeHeartbeatResponse nhResponse = recordFactory - .newRecordInstance(NodeHeartbeatResponse.class); - nhResponse.setResponseId(heartBeatID); + NodeHeartbeatResponse nhResponse = YarnServerBuilderUtils. + newNodeHeartbeatResponse(heartBeatID, null, null, null, null, 1000L); return nhResponse; } } private class MyNodeStatusUpdater extends NodeStatusUpdaterImpl { - public ResourceTracker resourceTracker = new MyResourceTracker(this.context); + public ResourceTracker resourceTracker; private Context context; public MyNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) { super(context, dispatcher, healthChecker, metrics); this.context = context; + resourceTracker = new MyResourceTracker(this.context); } @Override @@ -237,6 +257,22 @@ public class TestNodeStatusUpdater { } } + private class MyNodeStatusUpdater2 extends NodeStatusUpdaterImpl { + public ResourceTracker resourceTracker; + + public MyNodeStatusUpdater2(Context context, Dispatcher dispatcher, + NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) { + super(context, dispatcher, healthChecker, metrics); + resourceTracker = new MyResourceTracker4(context); + } + + @Override + protected ResourceTracker getRMClient() { + return resourceTracker; + } + + } + private class MyNodeStatusUpdater3 extends NodeStatusUpdaterImpl { public ResourceTracker resourceTracker; private Context context; @@ -289,6 +325,21 @@ public class TestNodeStatusUpdater { } } + private class MyNodeStatusUpdater5 extends NodeStatusUpdaterImpl { + private ResourceTracker resourceTracker; + + public MyNodeStatusUpdater5(Context context, Dispatcher dispatcher, + NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) { + super(context, dispatcher, healthChecker, metrics); + resourceTracker = new MyResourceTracker5(); + } + + @Override + protected ResourceTracker getRMClient() { + return resourceTracker; + } + } + private class MyNodeManager extends NodeManager { private MyNodeStatusUpdater3 nodeStatusUpdater; @@ -305,6 +356,32 @@ public class TestNodeStatusUpdater { } } + private class MyNodeManager2 extends NodeManager { + public boolean isStopped = false; + private NodeStatusUpdater nodeStatusUpdater; + private CyclicBarrier syncBarrier; + public MyNodeManager2 (CyclicBarrier syncBarrier) { + this.syncBarrier = syncBarrier; + } + @Override + protected NodeStatusUpdater createNodeStatusUpdater(Context context, + Dispatcher dispatcher, NodeHealthCheckerService healthChecker) { + nodeStatusUpdater = + new MyNodeStatusUpdater5(context, dispatcher, healthChecker, + metrics); + return nodeStatusUpdater; + } + + @Override + public void stop() { + super.stop(); + isStopped = true; + try { + syncBarrier.await(); + } catch (Exception e) { + } + } + } // private class MyResourceTracker2 implements ResourceTracker { public NodeAction heartBeatNodeAction = NodeAction.NORMAL; @@ -325,10 +402,9 @@ public class TestNodeStatusUpdater { NodeStatus nodeStatus = request.getNodeStatus(); nodeStatus.setResponseId(heartBeatID++); - NodeHeartbeatResponse nhResponse = recordFactory - .newRecordInstance(NodeHeartbeatResponse.class); - nhResponse.setResponseId(heartBeatID); - nhResponse.setNodeAction(heartBeatNodeAction); + NodeHeartbeatResponse nhResponse = YarnServerBuilderUtils. + newNodeHeartbeatResponse(heartBeatID, heartBeatNodeAction, null, + null, null, 1000L); return nhResponse; } } @@ -361,10 +437,9 @@ public class TestNodeStatusUpdater { LOG.info("Got heartBeatId: [" + heartBeatID +"]"); NodeStatus nodeStatus = request.getNodeStatus(); nodeStatus.setResponseId(heartBeatID++); - NodeHeartbeatResponse nhResponse = - recordFactory.newRecordInstance(NodeHeartbeatResponse.class); - nhResponse.setResponseId(heartBeatID); - nhResponse.setNodeAction(heartBeatNodeAction); + NodeHeartbeatResponse nhResponse = YarnServerBuilderUtils. + newNodeHeartbeatResponse(heartBeatID, heartBeatNodeAction, null, + null, null, 1000L); if (nodeStatus.getKeepAliveApplications() != null && nodeStatus.getKeepAliveApplications().size() > 0) { @@ -386,6 +461,124 @@ public class TestNodeStatusUpdater { } } + private class MyResourceTracker4 implements ResourceTracker { + + public NodeAction registerNodeAction = NodeAction.NORMAL; + public NodeAction heartBeatNodeAction = NodeAction.NORMAL; + private Context context; + + public MyResourceTracker4(Context context) { + this.context = context; + } + + @Override + public RegisterNodeManagerResponse registerNodeManager( + RegisterNodeManagerRequest request) throws YarnRemoteException { + RegisterNodeManagerResponse response = recordFactory + .newRecordInstance(RegisterNodeManagerResponse.class); + response.setNodeAction(registerNodeAction); + return response; + } + + @Override + public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) + throws YarnRemoteException { + try { + if (heartBeatID == 0) { + Assert.assertEquals(request.getNodeStatus().getContainersStatuses() + .size(), 0); + Assert.assertEquals(context.getContainers().size(), 0); + } else if (heartBeatID == 1) { + Assert.assertEquals(request.getNodeStatus().getContainersStatuses() + .size(), 5); + Assert.assertTrue(request.getNodeStatus().getContainersStatuses() + .get(0).getState() == ContainerState.RUNNING + && request.getNodeStatus().getContainersStatuses().get(0) + .getContainerId().getId() == 1); + Assert.assertTrue(request.getNodeStatus().getContainersStatuses() + .get(1).getState() == ContainerState.RUNNING + && request.getNodeStatus().getContainersStatuses().get(1) + .getContainerId().getId() == 2); + Assert.assertTrue(request.getNodeStatus().getContainersStatuses() + .get(2).getState() == ContainerState.COMPLETE + && request.getNodeStatus().getContainersStatuses().get(2) + .getContainerId().getId() == 3); + Assert.assertTrue(request.getNodeStatus().getContainersStatuses() + .get(3).getState() == ContainerState.COMPLETE + && request.getNodeStatus().getContainersStatuses().get(3) + .getContainerId().getId() == 4); + Assert.assertTrue(request.getNodeStatus().getContainersStatuses() + .get(4).getState() == ContainerState.RUNNING + && request.getNodeStatus().getContainersStatuses().get(4) + .getContainerId().getId() == 5); + throw new YarnException("Lost the heartbeat response"); + } else if (heartBeatID == 2) { + Assert.assertEquals(request.getNodeStatus().getContainersStatuses() + .size(), 7); + Assert.assertTrue(request.getNodeStatus().getContainersStatuses() + .get(0).getState() == ContainerState.COMPLETE + && request.getNodeStatus().getContainersStatuses().get(0) + .getContainerId().getId() == 3); + Assert.assertTrue(request.getNodeStatus().getContainersStatuses() + .get(1).getState() == ContainerState.COMPLETE + && request.getNodeStatus().getContainersStatuses().get(1) + .getContainerId().getId() == 4); + Assert.assertTrue(request.getNodeStatus().getContainersStatuses() + .get(2).getState() == ContainerState.RUNNING + && request.getNodeStatus().getContainersStatuses().get(2) + .getContainerId().getId() == 1); + Assert.assertTrue(request.getNodeStatus().getContainersStatuses() + .get(3).getState() == ContainerState.RUNNING + && request.getNodeStatus().getContainersStatuses().get(3) + .getContainerId().getId() == 2); + Assert.assertTrue(request.getNodeStatus().getContainersStatuses() + .get(4).getState() == ContainerState.RUNNING + && request.getNodeStatus().getContainersStatuses().get(4) + .getContainerId().getId() == 5); + Assert.assertTrue(request.getNodeStatus().getContainersStatuses() + .get(5).getState() == ContainerState.RUNNING + && request.getNodeStatus().getContainersStatuses().get(5) + .getContainerId().getId() == 6); + Assert.assertTrue(request.getNodeStatus().getContainersStatuses() + .get(6).getState() == ContainerState.COMPLETE + && request.getNodeStatus().getContainersStatuses().get(6) + .getContainerId().getId() == 7); + } + } catch (AssertionError error) { + LOG.info(error); + containerStatusBackupSuccessfully = false; + } finally { + heartBeatID++; + } + NodeStatus nodeStatus = request.getNodeStatus(); + nodeStatus.setResponseId(heartBeatID); + NodeHeartbeatResponse nhResponse = + YarnServerBuilderUtils.newNodeHeartbeatResponse(heartBeatID, + heartBeatNodeAction, null, null, null, 1000L); + return nhResponse; + } + } + + private class MyResourceTracker5 implements ResourceTracker { + public NodeAction registerNodeAction = NodeAction.NORMAL; + @Override + public RegisterNodeManagerResponse registerNodeManager( + RegisterNodeManagerRequest request) throws YarnRemoteException { + + RegisterNodeManagerResponse response = recordFactory + .newRecordInstance(RegisterNodeManagerResponse.class); + response.setNodeAction(registerNodeAction ); + return response; + } + + @Override + public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) + throws YarnRemoteException { + heartBeatID++; + throw RPCUtil.getRemoteException("NodeHeartbeat exception"); + } + } + @Before public void clearError() { nmStartError = null; @@ -473,8 +666,8 @@ public class TestNodeStatusUpdater { } @Override - protected void cleanupContainers() { - super.cleanupContainers(); + protected void cleanupContainers(NodeManagerEventType eventType) { + super.cleanupContainers(NodeManagerEventType.SHUTDOWN); numCleanups.incrementAndGet(); } }; @@ -527,50 +720,6 @@ public class TestNodeStatusUpdater { Assert.assertEquals(STATE.STOPPED, nm.getServiceState()); } - @Test - public void testNodeReboot() throws Exception { - nm = getNodeManager(NodeAction.REBOOT); - YarnConfiguration conf = createNMConfig(); - nm.init(conf); - Assert.assertEquals(STATE.INITED, nm.getServiceState()); - nm.start(); - - int waitCount = 0; - while (heartBeatID < 1 && waitCount++ != 20) { - Thread.sleep(500); - } - Assert.assertFalse(heartBeatID < 1); - - // NM takes a while to reach the STOPPED state. - waitCount = 0; - while (nm.getServiceState() != STATE.STOPPED && waitCount++ != 20) { - LOG.info("Waiting for NM to stop.."); - Thread.sleep(1000); - } - Assert.assertEquals(STATE.STOPPED, nm.getServiceState()); - - waitCount = 0; - while (null == rebootedNodeManager && waitCount++ != 20) { - LOG.info("Waiting for NM to reinitialize.."); - Thread.sleep(1000); - } - - waitCount = 0; - while (rebootedNodeManager.getServiceState() != STATE.STARTED && waitCount++ != 20) { - LOG.info("Waiting for NM to start.."); - Thread.sleep(1000); - } - Assert.assertEquals(STATE.STARTED, rebootedNodeManager.getServiceState()); - - rebootedNodeManager.stop(); - waitCount = 0; - while (rebootedNodeManager.getServiceState() != STATE.STOPPED && waitCount++ != 20) { - LOG.info("Waiting for NM to stop.."); - Thread.sleep(1000); - } - Assert.assertEquals(STATE.STOPPED, rebootedNodeManager.getServiceState()); - } - @Test public void testNMShutdownForRegistrationFailure() { @@ -727,6 +876,151 @@ public class TestNodeStatusUpdater { } } + /** + * Test completed containerStatus get back up when heart beat lost + */ + @Test(timeout = 20000) + public void testCompletedContainerStatusBackup() throws Exception { + nm = new NodeManager() { + @Override + protected NodeStatusUpdater createNodeStatusUpdater(Context context, + Dispatcher dispatcher, NodeHealthCheckerService healthChecker) { + MyNodeStatusUpdater2 myNodeStatusUpdater = + new MyNodeStatusUpdater2(context, dispatcher, healthChecker, + metrics); + return myNodeStatusUpdater; + } + + @Override + protected NMContext createNMContext( + NMContainerTokenSecretManager containerTokenSecretManager) { + return new MyNMContext(containerTokenSecretManager); + } + + }; + + YarnConfiguration conf = createNMConfig(); + nm.init(conf); + nm.start(); + + int waitCount = 0; + while (heartBeatID <= 3 && waitCount++ != 20) { + Thread.sleep(500); + } + if(!containerStatusBackupSuccessfully) { + Assert.fail("ContainerStatus Backup failed"); + } + nm.stop(); + } + + @Test(timeout = 20000) + public void testNodeStatusUpdaterRetryAndNMShutdown() + throws InterruptedException { + final long connectionWaitSecs = 1; + final long connectionRetryIntervalSecs = 1; + YarnConfiguration conf = createNMConfig(); + conf.setLong(YarnConfiguration.RESOURCEMANAGER_CONNECT_WAIT_SECS, + connectionWaitSecs); + conf.setLong(YarnConfiguration + .RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_SECS, + connectionRetryIntervalSecs); + CyclicBarrier syncBarrier = new CyclicBarrier(2); + nm = new MyNodeManager2(syncBarrier); + nm.init(conf); + nm.start(); + try { + syncBarrier.await(); + } catch (Exception e) { + } + Assert.assertTrue(((MyNodeManager2) nm).isStopped); + Assert.assertTrue("calculate heartBeatCount based on" + + " connectionWaitSecs and RetryIntervalSecs", heartBeatID == 2); + } + + private class MyNMContext extends NMContext { + ConcurrentMap containers = + new ConcurrentSkipListMap(); + + public MyNMContext(NMContainerTokenSecretManager + containerTokenSecretManager) { + super(containerTokenSecretManager); + } + + @Override + public ConcurrentMap getContainers() { + if (heartBeatID == 0) { + return containers; + } else if (heartBeatID == 1) { + ContainerStatus containerStatus1 = + createContainerStatus(1, ContainerState.RUNNING); + Container container1 = getMockContainer(containerStatus1); + containers.put(containerStatus1.getContainerId(), container1); + + ContainerStatus containerStatus2 = + createContainerStatus(2, ContainerState.RUNNING); + Container container2 = getMockContainer(containerStatus2); + containers.put(containerStatus2.getContainerId(), container2); + + ContainerStatus containerStatus3 = + createContainerStatus(3, ContainerState.COMPLETE); + Container container3 = getMockContainer(containerStatus3); + containers.put(containerStatus3.getContainerId(), container3); + completedContainerStatusList.add(containerStatus3); + + ContainerStatus containerStatus4 = + createContainerStatus(4, ContainerState.COMPLETE); + Container container4 = getMockContainer(containerStatus4); + containers.put(containerStatus4.getContainerId(), container4); + completedContainerStatusList.add(containerStatus4); + + ContainerStatus containerStatus5 = + createContainerStatus(5, ContainerState.RUNNING); + Container container5 = getMockContainer(containerStatus5); + containers.put(containerStatus5.getContainerId(), container5); + + return containers; + } else if (heartBeatID == 2) { + ContainerStatus containerStatus6 = + createContainerStatus(6, ContainerState.RUNNING); + Container container6 = getMockContainer(containerStatus6); + containers.put(containerStatus6.getContainerId(), container6); + + ContainerStatus containerStatus7 = + createContainerStatus(7, ContainerState.COMPLETE); + Container container7 = getMockContainer(containerStatus7); + containers.put(containerStatus7.getContainerId(), container7); + completedContainerStatusList.add(containerStatus7); + + return containers; + } else { + containers.clear(); + + return containers; + } + } + + private ContainerStatus createContainerStatus(int id, + ContainerState containerState) { + ApplicationId applicationId = + BuilderUtils.newApplicationId(System.currentTimeMillis(), id); + ApplicationAttemptId applicationAttemptId = + BuilderUtils.newApplicationAttemptId(applicationId, id); + ContainerId contaierId = + BuilderUtils.newContainerId(applicationAttemptId, id); + ContainerStatus containerStatus = + BuilderUtils.newContainerStatus(contaierId, containerState, + "test_containerStatus: id=" + id + ", containerState: " + + containerState, 0); + return containerStatus; + } + + private Container getMockContainer(ContainerStatus containerStatus) { + Container container = mock(Container.class); + when(container.cloneAndGetContainerStatus()).thenReturn(containerStatus); + return container; + } + } + private void verifyNodeStartFailure(String errMessage) { YarnConfiguration conf = createNMConfig(); nm.init(conf); @@ -773,12 +1067,6 @@ public class TestNodeStatusUpdater { myNodeStatusUpdater.resourceTracker = myResourceTracker2; return myNodeStatusUpdater; } - - @Override - NodeManager createNewNodeManager() { - rebootedNodeManager = getNodeManager(NodeAction.NORMAL); - return rebootedNodeManager; - } }; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/api/protocolrecords/impl/pb/TestPBRecordImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/api/protocolrecords/impl/pb/TestPBRecordImpl.java index 8996b1ebfd3..71c7f9f62d4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/api/protocolrecords/impl/pb/TestPBRecordImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/api/protocolrecords/impl/pb/TestPBRecordImpl.java @@ -17,6 +17,13 @@ */ package org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.impl.pb; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.net.URISyntaxException; +import java.util.ArrayList; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DataInputBuffer; @@ -31,15 +38,14 @@ import org.apache.hadoop.yarn.ipc.RPCUtil; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerServiceProtos.LocalResourceStatusProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerServiceProtos.LocalizerHeartbeatResponseProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerServiceProtos.LocalizerStatusProto; +import org.apache.hadoop.yarn.server.nodemanager.api.ResourceLocalizationSpec; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalResourceStatus; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerAction; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerHeartbeatResponse; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerStatus; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.ResourceStatusType; import org.apache.hadoop.yarn.util.ConverterUtils; - import org.junit.Test; -import static org.junit.Assert.*; public class TestPBRecordImpl { @@ -54,9 +60,8 @@ public class TestPBRecordImpl { static LocalResource createResource() { LocalResource ret = recordFactory.newRecordInstance(LocalResource.class); assertTrue(ret instanceof LocalResourcePBImpl); - ret.setResource( - ConverterUtils.getYarnUrlFromPath( - new Path("hdfs://y.ak:8020/foo/bar"))); + ret.setResource(ConverterUtils.getYarnUrlFromPath(new Path( + "hdfs://y.ak:8020/foo/bar"))); ret.setSize(4344L); ret.setTimestamp(3141592653589793L); ret.setVisibility(LocalResourceVisibility.PUBLIC); @@ -90,16 +95,27 @@ public class TestPBRecordImpl { return ret; } - static LocalizerHeartbeatResponse createLocalizerHeartbeatResponse() { + static LocalizerHeartbeatResponse createLocalizerHeartbeatResponse() + throws URISyntaxException { LocalizerHeartbeatResponse ret = recordFactory.newRecordInstance(LocalizerHeartbeatResponse.class); assertTrue(ret instanceof LocalizerHeartbeatResponsePBImpl); ret.setLocalizerAction(LocalizerAction.LIVE); - ret.addResource(createResource()); + LocalResource rsrc = createResource(); + ArrayList rsrcs = + new ArrayList(); + ResourceLocalizationSpec resource = + recordFactory.newRecordInstance(ResourceLocalizationSpec.class); + resource.setResource(rsrc); + resource.setDestinationDirectory(ConverterUtils + .getYarnUrlFromPath(new Path("/tmp" + System.currentTimeMillis()))); + rsrcs.add(resource); + ret.setResourceSpecs(rsrcs); + System.out.println(resource); return ret; } - @Test + @Test(timeout=10000) public void testLocalResourceStatusSerDe() throws Exception { LocalResourceStatus rsrcS = createLocalResourceStatus(); assertTrue(rsrcS instanceof LocalResourceStatusPBImpl); @@ -119,7 +135,7 @@ public class TestPBRecordImpl { assertEquals(createResource(), rsrcD.getResource()); } - @Test + @Test(timeout=10000) public void testLocalizerStatusSerDe() throws Exception { LocalizerStatus rsrcS = createLocalizerStatus(); assertTrue(rsrcS instanceof LocalizerStatusPBImpl); @@ -141,7 +157,7 @@ public class TestPBRecordImpl { assertEquals(createLocalResourceStatus(), rsrcD.getResourceStatus(0)); } - @Test + @Test(timeout=10000) public void testLocalizerHeartbeatResponseSerDe() throws Exception { LocalizerHeartbeatResponse rsrcS = createLocalizerHeartbeatResponse(); assertTrue(rsrcS instanceof LocalizerHeartbeatResponsePBImpl); @@ -158,8 +174,8 @@ public class TestPBRecordImpl { new LocalizerHeartbeatResponsePBImpl(rsrcPbD); assertEquals(rsrcS, rsrcD); - assertEquals(createResource(), rsrcS.getLocalResource(0)); - assertEquals(createResource(), rsrcD.getLocalResource(0)); + assertEquals(createResource(), rsrcS.getResourceSpecs().get(0).getResource()); + assertEquals(createResource(), rsrcD.getResourceSpecs().get(0).getResource()); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java index 5b01cc08140..d405a7c1779 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java @@ -40,6 +40,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerState; @@ -47,7 +48,6 @@ import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; -import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.exceptions.YarnRemoteException; import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent; @@ -58,8 +58,10 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Ap import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; +import org.apache.hadoop.yarn.util.BuilderUtils; import org.apache.hadoop.yarn.util.ConverterUtils; import org.junit.Test; +import static org.mockito.Mockito.*; public class TestContainerManager extends BaseContainerManagerTest { @@ -121,7 +123,6 @@ public class TestContainerManager extends BaseContainerManagerTest { // ////// Construct the Container-id ContainerId cId = createContainerId(); - container.setContainerId(cId); container.setUser(user); @@ -143,14 +144,16 @@ public class TestContainerManager extends BaseContainerManagerTest { localResources.put(destinationFile, rsrc_alpha); containerLaunchContext.setLocalResources(localResources); containerLaunchContext.setUser(container.getUser()); - containerLaunchContext.setContainerId(container.getContainerId()); - containerLaunchContext.setResource(recordFactory - .newRecordInstance(Resource.class)); + Container mockContainer = mock(Container.class); + when(mockContainer.getId()).thenReturn(cId); + when(mockContainer.getResource()).thenReturn( + BuilderUtils.newResource(512, 1)); StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class); startRequest.setContainerLaunchContext(containerLaunchContext); - + startRequest.setContainer(mockContainer); + containerManager.startContainer(startRequest); BaseContainerManagerTest.waitForContainerState(containerManager, cId, @@ -208,7 +211,6 @@ public class TestContainerManager extends BaseContainerManagerTest { // ////// Construct the Container-id ContainerId cId = createContainerId(); - containerLaunchContext.setContainerId(cId); containerLaunchContext.setUser(user); @@ -232,11 +234,13 @@ public class TestContainerManager extends BaseContainerManagerTest { commands.add("/bin/bash"); commands.add(scriptFile.getAbsolutePath()); containerLaunchContext.setCommands(commands); - containerLaunchContext.setResource(recordFactory - .newRecordInstance(Resource.class)); - containerLaunchContext.getResource().setMemory(100 * 1024 * 1024); + Container mockContainer = mock(Container.class); + when(mockContainer.getId()).thenReturn(cId); + when(mockContainer.getResource()).thenReturn( + BuilderUtils.newResource(100 * 1024 * 1024, 1)); StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class); startRequest.setContainerLaunchContext(containerLaunchContext); + startRequest.setContainer(mockContainer); containerManager.startContainer(startRequest); int timeoutSecs = 0; @@ -310,7 +314,6 @@ public class TestContainerManager extends BaseContainerManagerTest { // ////// Construct the Container-id ContainerId cId = createContainerId(); - containerLaunchContext.setContainerId(cId); containerLaunchContext.setUser(user); @@ -334,12 +337,14 @@ public class TestContainerManager extends BaseContainerManagerTest { commands.add("/bin/bash"); commands.add(scriptFile.getAbsolutePath()); containerLaunchContext.setCommands(commands); - containerLaunchContext.setResource(recordFactory - .newRecordInstance(Resource.class)); - containerLaunchContext.getResource().setMemory(100 * 1024 * 1024); + Container mockContainer = mock(Container.class); + when(mockContainer.getId()).thenReturn(cId); + when(mockContainer.getResource()).thenReturn( + BuilderUtils.newResource(100 * 1024 * 1024, 1)); StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class); startRequest.setContainerLaunchContext(containerLaunchContext); + startRequest.setContainer(mockContainer); containerManager.startContainer(startRequest); BaseContainerManagerTest.waitForContainerState(containerManager, cId, @@ -402,7 +407,6 @@ public class TestContainerManager extends BaseContainerManagerTest { // ////// Construct the Container-id ContainerId cId = createContainerId(); ApplicationId appId = cId.getApplicationAttemptId().getApplicationId(); - container.setContainerId(cId); container.setUser(user); @@ -425,14 +429,16 @@ public class TestContainerManager extends BaseContainerManagerTest { localResources.put(destinationFile, rsrc_alpha); containerLaunchContext.setLocalResources(localResources); containerLaunchContext.setUser(container.getUser()); - containerLaunchContext.setContainerId(container.getContainerId()); - containerLaunchContext.setResource(recordFactory - .newRecordInstance(Resource.class)); + Container mockContainer = mock(Container.class); + when(mockContainer.getId()).thenReturn(cId); + when(mockContainer.getResource()).thenReturn( + BuilderUtils.newResource(100, 1)); // containerLaunchContext.command = new ArrayList(); StartContainerRequest request = recordFactory.newRecordInstance(StartContainerRequest.class); request.setContainerLaunchContext(containerLaunchContext); + request.setContainer(mockContainer); containerManager.startContainer(request); BaseContainerManagerTest.waitForContainerState(containerManager, cId, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java index 95872440b6a..230ce46c472 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java @@ -525,8 +525,9 @@ public class TestContainer { return serviceData; } - private Container newContainer(Dispatcher disp, ContainerLaunchContext ctx) { - return new ContainerImpl(conf, disp, ctx, null, metrics); + private Container newContainer(Dispatcher disp, ContainerLaunchContext ctx, + org.apache.hadoop.yarn.api.records.Container container) { + return new ContainerImpl(conf, disp, ctx, container, null, metrics); } @SuppressWarnings("unchecked") @@ -570,12 +571,14 @@ public class TestContainer { this.user = user; ctxt = mock(ContainerLaunchContext.class); + org.apache.hadoop.yarn.api.records.Container mockContainer = + mock(org.apache.hadoop.yarn.api.records.Container.class); cId = BuilderUtils.newContainerId(appId, 1, timestamp, id); when(ctxt.getUser()).thenReturn(this.user); - when(ctxt.getContainerId()).thenReturn(cId); + when(mockContainer.getId()).thenReturn(cId); Resource resource = BuilderUtils.newResource(1024, 1); - when(ctxt.getResource()).thenReturn(resource); + when(mockContainer.getResource()).thenReturn(resource); if (withLocalRes) { Random r = new Random(); @@ -599,7 +602,7 @@ public class TestContainer { } when(ctxt.getServiceData()).thenReturn(serviceData); - c = newContainer(dispatcher, ctxt); + c = newContainer(dispatcher, ctxt, mockContainer); dispatcher.start(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java index 822835dc3d0..702707209d9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java @@ -43,6 +43,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerState; @@ -57,11 +58,14 @@ import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal; import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch; +import org.apache.hadoop.yarn.util.BuilderUtils; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.LinuxResourceCalculatorPlugin; import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; import org.junit.Before; import org.junit.Test; +import static org.mockito.Mockito.*; + import junit.framework.Assert; public class TestContainerLaunch extends BaseContainerManagerTest { @@ -184,6 +188,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest { ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class); + Container mockContainer = mock(Container.class); // ////// Construct the Container-id ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class); appId.setClusterTimestamp(0); @@ -195,7 +200,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest { ContainerId cId = recordFactory.newRecordInstance(ContainerId.class); cId.setApplicationAttemptId(appAttemptId); - containerLaunchContext.setContainerId(cId); + when(mockContainer.getId()).thenReturn(cId); containerLaunchContext.setUser(user); @@ -222,11 +227,11 @@ public class TestContainerLaunch extends BaseContainerManagerTest { commands.add("/bin/bash"); commands.add(scriptFile.getAbsolutePath()); containerLaunchContext.setCommands(commands); - containerLaunchContext.setResource(recordFactory - .newRecordInstance(Resource.class)); - containerLaunchContext.getResource().setMemory(1024); + when(mockContainer.getResource()).thenReturn( + BuilderUtils.newResource(1024, 1)); StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class); startRequest.setContainerLaunchContext(containerLaunchContext); + startRequest.setContainer(mockContainer); containerManager.startContainer(startRequest); int timeoutSecs = 0; @@ -301,7 +306,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest { ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class); - + Container mockContainer = mock(Container.class); // ////// Construct the Container-id ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class); appId.setClusterTimestamp(1); @@ -313,7 +318,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest { ContainerId cId = recordFactory.newRecordInstance(ContainerId.class); cId.setApplicationAttemptId(appAttemptId); - containerLaunchContext.setContainerId(cId); + when(mockContainer.getId()).thenReturn(cId); containerLaunchContext.setUser(user); @@ -339,11 +344,11 @@ public class TestContainerLaunch extends BaseContainerManagerTest { List commands = new ArrayList(); commands.add(scriptFile.getAbsolutePath()); containerLaunchContext.setCommands(commands); - containerLaunchContext.setResource(recordFactory - .newRecordInstance(Resource.class)); - containerLaunchContext.getResource().setMemory(1024); + when(mockContainer.getResource()).thenReturn( + BuilderUtils.newResource(1024, 1)); StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class); startRequest.setContainerLaunchContext(containerLaunchContext); + startRequest.setContainer(mockContainer); containerManager.startContainer(startRequest); int timeoutSecs = 0; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/MockLocalizerHeartbeatResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/MockLocalizerHeartbeatResponse.java index ca4739400de..1fcf5bf4cfd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/MockLocalizerHeartbeatResponse.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/MockLocalizerHeartbeatResponse.java @@ -20,7 +20,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer; import java.util.ArrayList; import java.util.List; -import org.apache.hadoop.yarn.api.records.LocalResource; +import org.apache.hadoop.yarn.server.nodemanager.api.ResourceLocalizationSpec; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerAction; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerHeartbeatResponse; @@ -28,28 +28,30 @@ public class MockLocalizerHeartbeatResponse implements LocalizerHeartbeatResponse { LocalizerAction action; - List rsrc; + List resourceSpecs; MockLocalizerHeartbeatResponse() { - rsrc = new ArrayList(); + resourceSpecs = new ArrayList(); } MockLocalizerHeartbeatResponse( - LocalizerAction action, List rsrc) { + LocalizerAction action, List resources) { this.action = action; - this.rsrc = rsrc; + this.resourceSpecs = resources; } public LocalizerAction getLocalizerAction() { return action; } - public List getAllResources() { return rsrc; } - public LocalResource getLocalResource(int i) { return rsrc.get(i); } public void setLocalizerAction(LocalizerAction action) { this.action = action; } - public void addAllResources(List resources) { - rsrc.addAll(resources); - } - public void addResource(LocalResource resource) { rsrc.add(resource); } - public void removeResource(int index) { rsrc.remove(index); } - public void clearResources() { rsrc.clear(); } + + @Override + public List getResourceSpecs() { + return resourceSpecs; +} + + @Override + public void setResourceSpecs(List resourceSpecs) { + this.resourceSpecs = resourceSpecs; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestContainerLocalizer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestContainerLocalizer.java index 829eba4e1c0..38d513603f6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestContainerLocalizer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestContainerLocalizer.java @@ -50,7 +50,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.AbstractFileSystem; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileContext; -import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.io.DataInputBuffer; @@ -66,9 +65,11 @@ import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.server.nodemanager.api.LocalizationProtocol; +import org.apache.hadoop.yarn.server.nodemanager.api.ResourceLocalizationSpec; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalResourceStatus; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerAction; import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerStatus; +import org.apache.hadoop.yarn.util.ConverterUtils; import org.junit.Test; import org.mockito.ArgumentMatcher; import org.mockito.invocation.InvocationOnMock; @@ -95,12 +96,33 @@ public class TestContainerLocalizer { public void testContainerLocalizerMain() throws Exception { ContainerLocalizer localizer = setupContainerLocalizerForTest(); + // verify created cache + List privCacheList = new ArrayList(); + List appCacheList = new ArrayList(); + for (Path p : localDirs) { + Path base = new Path(new Path(p, ContainerLocalizer.USERCACHE), appUser); + Path privcache = new Path(base, ContainerLocalizer.FILECACHE); + privCacheList.add(privcache); + Path appDir = + new Path(base, new Path(ContainerLocalizer.APPCACHE, appId)); + Path appcache = new Path(appDir, ContainerLocalizer.FILECACHE); + appCacheList.add(appcache); + } + // mock heartbeat responses from NM - LocalResource rsrcA = getMockRsrc(random, LocalResourceVisibility.PRIVATE); - LocalResource rsrcB = getMockRsrc(random, LocalResourceVisibility.PRIVATE); - LocalResource rsrcC = getMockRsrc(random, - LocalResourceVisibility.APPLICATION); - LocalResource rsrcD = getMockRsrc(random, LocalResourceVisibility.PRIVATE); + ResourceLocalizationSpec rsrcA = + getMockRsrc(random, LocalResourceVisibility.PRIVATE, + privCacheList.get(0)); + ResourceLocalizationSpec rsrcB = + getMockRsrc(random, LocalResourceVisibility.PRIVATE, + privCacheList.get(0)); + ResourceLocalizationSpec rsrcC = + getMockRsrc(random, LocalResourceVisibility.APPLICATION, + appCacheList.get(0)); + ResourceLocalizationSpec rsrcD = + getMockRsrc(random, LocalResourceVisibility.PRIVATE, + privCacheList.get(0)); + when(nmProxy.heartbeat(isA(LocalizerStatus.class))) .thenReturn(new MockLocalizerHeartbeatResponse(LocalizerAction.LIVE, Collections.singletonList(rsrcA))) @@ -111,27 +133,33 @@ public class TestContainerLocalizer { .thenReturn(new MockLocalizerHeartbeatResponse(LocalizerAction.LIVE, Collections.singletonList(rsrcD))) .thenReturn(new MockLocalizerHeartbeatResponse(LocalizerAction.LIVE, - Collections.emptyList())) + Collections.emptyList())) .thenReturn(new MockLocalizerHeartbeatResponse(LocalizerAction.DIE, null)); - doReturn(new FakeDownload(rsrcA.getResource().getFile(), true)).when( - localizer).download(isA(LocalDirAllocator.class), eq(rsrcA), + LocalResource tRsrcA = rsrcA.getResource(); + LocalResource tRsrcB = rsrcB.getResource(); + LocalResource tRsrcC = rsrcC.getResource(); + LocalResource tRsrcD = rsrcD.getResource(); + doReturn( + new FakeDownload(rsrcA.getResource().getResource().getFile(), true)) + .when(localizer).download(isA(Path.class), eq(tRsrcA), isA(UserGroupInformation.class)); - doReturn(new FakeDownload(rsrcB.getResource().getFile(), true)).when( - localizer).download(isA(LocalDirAllocator.class), eq(rsrcB), + doReturn( + new FakeDownload(rsrcB.getResource().getResource().getFile(), true)) + .when(localizer).download(isA(Path.class), eq(tRsrcB), isA(UserGroupInformation.class)); - doReturn(new FakeDownload(rsrcC.getResource().getFile(), true)).when( - localizer).download(isA(LocalDirAllocator.class), eq(rsrcC), + doReturn( + new FakeDownload(rsrcC.getResource().getResource().getFile(), true)) + .when(localizer).download(isA(Path.class), eq(tRsrcC), isA(UserGroupInformation.class)); - doReturn(new FakeDownload(rsrcD.getResource().getFile(), true)).when( - localizer).download(isA(LocalDirAllocator.class), eq(rsrcD), + doReturn( + new FakeDownload(rsrcD.getResource().getResource().getFile(), true)) + .when(localizer).download(isA(Path.class), eq(tRsrcD), isA(UserGroupInformation.class)); // run localization assertEquals(0, localizer.runLocalization(nmAddr)); - - // verify created cache for (Path p : localDirs) { Path base = new Path(new Path(p, ContainerLocalizer.USERCACHE), appUser); Path privcache = new Path(base, ContainerLocalizer.FILECACHE); @@ -143,15 +171,14 @@ public class TestContainerLocalizer { Path appcache = new Path(appDir, ContainerLocalizer.FILECACHE); verify(spylfs).mkdir(eq(appcache), isA(FsPermission.class), eq(false)); } - // verify tokens read at expected location verify(spylfs).open(tokenPath); // verify downloaded resources reported to NM - verify(nmProxy).heartbeat(argThat(new HBMatches(rsrcA))); - verify(nmProxy).heartbeat(argThat(new HBMatches(rsrcB))); - verify(nmProxy).heartbeat(argThat(new HBMatches(rsrcC))); - verify(nmProxy).heartbeat(argThat(new HBMatches(rsrcD))); + verify(nmProxy).heartbeat(argThat(new HBMatches(rsrcA.getResource()))); + verify(nmProxy).heartbeat(argThat(new HBMatches(rsrcB.getResource()))); + verify(nmProxy).heartbeat(argThat(new HBMatches(rsrcC.getResource()))); + verify(nmProxy).heartbeat(argThat(new HBMatches(rsrcD.getResource()))); // verify all HB use localizerID provided verify(nmProxy, never()).heartbeat(argThat( @@ -306,10 +333,12 @@ public class TestContainerLocalizer { return mockRF; } - static LocalResource getMockRsrc(Random r, - LocalResourceVisibility vis) { - LocalResource rsrc = mock(LocalResource.class); + static ResourceLocalizationSpec getMockRsrc(Random r, + LocalResourceVisibility vis, Path p) { + ResourceLocalizationSpec resourceLocalizationSpec = + mock(ResourceLocalizationSpec.class); + LocalResource rsrc = mock(LocalResource.class); String name = Long.toHexString(r.nextLong()); URL uri = mock(org.apache.hadoop.yarn.api.records.URL.class); when(uri.getScheme()).thenReturn("file"); @@ -322,7 +351,10 @@ public class TestContainerLocalizer { when(rsrc.getType()).thenReturn(LocalResourceType.FILE); when(rsrc.getVisibility()).thenReturn(vis); - return rsrc; + when(resourceLocalizationSpec.getResource()).thenReturn(rsrc); + when(resourceLocalizationSpec.getDestinationDirectory()). + thenReturn(ConverterUtils.getYarnUrlFromPath(p)); + return resourceLocalizationSpec; } @SuppressWarnings({ "rawtypes", "unchecked" }) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalCacheDirectoryManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalCacheDirectoryManager.java new file mode 100644 index 00000000000..057d7cce6fc --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalCacheDirectoryManager.java @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer; + +import junit.framework.Assert; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.yarn.YarnException; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.junit.Test; + +public class TestLocalCacheDirectoryManager { + + @Test(timeout = 10000) + public void testHierarchicalSubDirectoryCreation() { + // setting per directory file limit to 1. + YarnConfiguration conf = new YarnConfiguration(); + conf.set(YarnConfiguration.NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY, "37"); + + LocalCacheDirectoryManager hDir = new LocalCacheDirectoryManager(conf); + // Test root directory path = "" + Assert.assertTrue(hDir.getRelativePathForLocalization().isEmpty()); + + // Testing path generation from "0" to "0/0/z/z" + for (int i = 1; i <= 37 * 36 * 36; i++) { + StringBuffer sb = new StringBuffer(); + String num = Integer.toString(i - 1, 36); + if (num.length() == 1) { + sb.append(num.charAt(0)); + } else { + sb.append(Integer.toString( + Integer.parseInt(num.substring(0, 1), 36) - 1, 36)); + } + for (int j = 1; j < num.length(); j++) { + sb.append(Path.SEPARATOR).append(num.charAt(j)); + } + Assert.assertEquals(sb.toString(), hDir.getRelativePathForLocalization()); + } + + String testPath1 = "4"; + String testPath2 = "2"; + /* + * Making sure directory "4" and "2" becomes non-full so that they are + * reused for future getRelativePathForLocalization() calls in the order + * they are freed. + */ + hDir.decrementFileCountForPath(testPath1); + hDir.decrementFileCountForPath(testPath2); + // After below call directory "4" should become full. + Assert.assertEquals(testPath1, hDir.getRelativePathForLocalization()); + Assert.assertEquals(testPath2, hDir.getRelativePathForLocalization()); + } + + @Test(timeout = 10000) + public void testMinimumPerDirectoryFileLimit() { + YarnConfiguration conf = new YarnConfiguration(); + conf.set(YarnConfiguration.NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY, "1"); + Exception e = null; + ResourceLocalizationService service = + new ResourceLocalizationService(null, null, null, null); + try { + service.init(conf); + } catch (Exception e1) { + e = e1; + } + Assert.assertNotNull(e); + Assert.assertEquals(YarnException.class, e.getClass()); + Assert.assertEquals(e.getMessage(), + YarnConfiguration.NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY + + " parameter is configured with a value less than 37."); + + } + + @Test(timeout = 1000) + public void testDirectoryStateChangeFromFullToNonFull() { + YarnConfiguration conf = new YarnConfiguration(); + conf.set(YarnConfiguration.NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY, "40"); + LocalCacheDirectoryManager dir = new LocalCacheDirectoryManager(conf); + + // checking for first four paths + String rootPath = ""; + String firstSubDir = "0"; + for (int i = 0; i < 4; i++) { + Assert.assertEquals(rootPath, dir.getRelativePathForLocalization()); + } + // Releasing two files from the root directory. + dir.decrementFileCountForPath(rootPath); + dir.decrementFileCountForPath(rootPath); + // Space for two files should be available in root directory. + Assert.assertEquals(rootPath, dir.getRelativePathForLocalization()); + Assert.assertEquals(rootPath, dir.getRelativePathForLocalization()); + // As no space is now available in root directory so it should be from + // first sub directory + Assert.assertEquals(firstSubDir, dir.getRelativePathForLocalization()); + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalResourcesTrackerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalResourcesTrackerImpl.java index 0e0a47200a5..b2caba02e81 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalResourcesTrackerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalResourcesTrackerImpl.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer; import static org.mockito.Mockito.any; +import static org.mockito.Matchers.isA; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -36,31 +37,36 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.DrainDispatcher; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerResourceFailedEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerResourceLocalizedEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerResourceRequestEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceLocalizedEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRequestEvent; import org.apache.hadoop.yarn.util.BuilderUtils; import org.junit.Test; -import org.mortbay.log.Log; public class TestLocalResourcesTrackerImpl { - @Test + @Test(timeout=10000) @SuppressWarnings("unchecked") public void test() { String user = "testuser"; DrainDispatcher dispatcher = null; try { - dispatcher = createDispatcher(new Configuration()); + Configuration conf = new Configuration(); + dispatcher = createDispatcher(conf); EventHandler localizerEventHandler = mock(EventHandler.class); EventHandler containerEventHandler = @@ -86,7 +92,8 @@ public class TestLocalResourcesTrackerImpl { localrsrc.put(req1, lr1); localrsrc.put(req2, lr2); LocalResourcesTracker tracker = - new LocalResourcesTrackerImpl(user, dispatcher, localrsrc); + new LocalResourcesTrackerImpl(user, dispatcher, localrsrc, false, + conf); ResourceEvent req11Event = new ResourceRequestEvent(req1, LocalResourceVisibility.PUBLIC, lc1); @@ -152,13 +159,14 @@ public class TestLocalResourcesTrackerImpl { } } - @Test + @Test(timeout=10000) @SuppressWarnings("unchecked") public void testConsistency() { String user = "testuser"; DrainDispatcher dispatcher = null; try { - dispatcher = createDispatcher(new Configuration()); + Configuration conf = new Configuration(); + dispatcher = createDispatcher(conf); EventHandler localizerEventHandler = mock(EventHandler.class); EventHandler containerEventHandler = mock(EventHandler.class); dispatcher.register(LocalizerEventType.class, localizerEventHandler); @@ -172,7 +180,7 @@ public class TestLocalResourcesTrackerImpl { ConcurrentMap localrsrc = new ConcurrentHashMap(); localrsrc.put(req1, lr1); LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user, - dispatcher, localrsrc); + dispatcher, localrsrc, false, conf); ResourceEvent req11Event = new ResourceRequestEvent(req1, LocalResourceVisibility.PUBLIC, lc1); @@ -221,6 +229,259 @@ public class TestLocalResourcesTrackerImpl { } } + @Test(timeout = 1000) + @SuppressWarnings("unchecked") + public void testLocalResourceCache() { + String user = "testuser"; + DrainDispatcher dispatcher = null; + try { + Configuration conf = new Configuration(); + dispatcher = createDispatcher(conf); + + EventHandler localizerEventHandler = + mock(EventHandler.class); + EventHandler containerEventHandler = + mock(EventHandler.class); + + // Registering event handlers. + dispatcher.register(LocalizerEventType.class, localizerEventHandler); + dispatcher.register(ContainerEventType.class, containerEventHandler); + + ConcurrentMap localrsrc = + new ConcurrentHashMap(); + LocalResourcesTracker tracker = + new LocalResourcesTrackerImpl(user, dispatcher, localrsrc, true, conf); + + LocalResourceRequest lr = + createLocalResourceRequest(user, 1, 1, LocalResourceVisibility.PUBLIC); + + // Creating 2 containers for same application which will be requesting + // same local resource. + // Container 1 requesting local resource. + ContainerId cId1 = BuilderUtils.newContainerId(1, 1, 1, 1); + LocalizerContext lc1 = new LocalizerContext(user, cId1, null); + ResourceEvent reqEvent1 = + new ResourceRequestEvent(lr, LocalResourceVisibility.PRIVATE, lc1); + + // No resource request is initially present in local cache + Assert.assertEquals(0, localrsrc.size()); + + // Container-1 requesting local resource. + tracker.handle(reqEvent1); + + // New localized Resource should have been added to local resource map + // and the requesting container will be added to its waiting queue. + Assert.assertEquals(1, localrsrc.size()); + Assert.assertTrue(localrsrc.containsKey(lr)); + Assert.assertEquals(1, localrsrc.get(lr).getRefCount()); + Assert.assertTrue(localrsrc.get(lr).ref.contains(cId1)); + Assert.assertEquals(ResourceState.DOWNLOADING, localrsrc.get(lr) + .getState()); + + // Container 2 requesting the resource + ContainerId cId2 = BuilderUtils.newContainerId(1, 1, 1, 2); + LocalizerContext lc2 = new LocalizerContext(user, cId2, null); + ResourceEvent reqEvent2 = + new ResourceRequestEvent(lr, LocalResourceVisibility.PRIVATE, lc2); + tracker.handle(reqEvent2); + + // Container 2 should have been added to the waiting queue of the local + // resource + Assert.assertEquals(2, localrsrc.get(lr).getRefCount()); + Assert.assertTrue(localrsrc.get(lr).ref.contains(cId2)); + + // Failing resource localization + ResourceEvent resourceFailedEvent = + new ResourceFailedLocalizationEvent(lr, new Exception("test")); + + // Backing up the resource to track its state change as it will be + // removed after the failed event. + LocalizedResource localizedResource = localrsrc.get(lr); + + tracker.handle(resourceFailedEvent); + + // After receiving failed resource event; all waiting containers will be + // notified with Container Resource Failed Event. + Assert.assertEquals(0, localrsrc.size()); + verify(containerEventHandler, times(2)).handle( + isA(ContainerResourceFailedEvent.class)); + Assert.assertEquals(ResourceState.FAILED, localizedResource.getState()); + + // Container 1 trying to release the resource (This resource is already + // deleted from the cache. This call should return silently without + // exception. + ResourceReleaseEvent relEvent1 = new ResourceReleaseEvent(lr, cId1); + tracker.handle(relEvent1); + + // Container-3 now requests for the same resource. This request call + // is coming prior to Container-2's release call. + ContainerId cId3 = BuilderUtils.newContainerId(1, 1, 1, 3); + LocalizerContext lc3 = new LocalizerContext(user, cId3, null); + ResourceEvent reqEvent3 = + new ResourceRequestEvent(lr, LocalResourceVisibility.PRIVATE, lc3); + tracker.handle(reqEvent3); + + // Local resource cache now should have the requested resource and the + // number of waiting containers should be 1. + Assert.assertEquals(1, localrsrc.size()); + Assert.assertTrue(localrsrc.containsKey(lr)); + Assert.assertEquals(1, localrsrc.get(lr).getRefCount()); + Assert.assertTrue(localrsrc.get(lr).ref.contains(cId3)); + + // Container-2 Releases the resource + ResourceReleaseEvent relEvent2 = new ResourceReleaseEvent(lr, cId2); + tracker.handle(relEvent2); + + // Making sure that there is no change in the cache after the release. + Assert.assertEquals(1, localrsrc.size()); + Assert.assertTrue(localrsrc.containsKey(lr)); + Assert.assertEquals(1, localrsrc.get(lr).getRefCount()); + Assert.assertTrue(localrsrc.get(lr).ref.contains(cId3)); + + // Sending ResourceLocalizedEvent to tracker. In turn resource should + // send Container Resource Localized Event to waiting containers. + Path localizedPath = new Path("/tmp/file1"); + ResourceLocalizedEvent localizedEvent = + new ResourceLocalizedEvent(lr, localizedPath, 123L); + tracker.handle(localizedEvent); + + // Verifying ContainerResourceLocalizedEvent . + verify(containerEventHandler, times(1)).handle( + isA(ContainerResourceLocalizedEvent.class)); + Assert.assertEquals(ResourceState.LOCALIZED, localrsrc.get(lr) + .getState()); + Assert.assertEquals(1, localrsrc.get(lr).getRefCount()); + + // Container-3 releasing the resource. + ResourceReleaseEvent relEvent3 = new ResourceReleaseEvent(lr, cId3); + tracker.handle(relEvent3); + + Assert.assertEquals(0, localrsrc.get(lr).getRefCount()); + + } finally { + if (dispatcher != null) { + dispatcher.stop(); + } + } + } + + @Test(timeout = 100000) + @SuppressWarnings("unchecked") + public void testHierarchicalLocalCacheDirectories() { + String user = "testuser"; + DrainDispatcher dispatcher = null; + try { + Configuration conf = new Configuration(); + // setting per directory file limit to 1. + conf.set(YarnConfiguration.NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY, "37"); + dispatcher = createDispatcher(conf); + + EventHandler localizerEventHandler = + mock(EventHandler.class); + EventHandler containerEventHandler = + mock(EventHandler.class); + dispatcher.register(LocalizerEventType.class, localizerEventHandler); + dispatcher.register(ContainerEventType.class, containerEventHandler); + + DeletionService mockDelService = mock(DeletionService.class); + + ConcurrentMap localrsrc = + new ConcurrentHashMap(); + LocalResourcesTracker tracker = new LocalResourcesTrackerImpl(user, + dispatcher, localrsrc, true, conf); + + // This is a random path. NO File creation will take place at this place. + Path localDir = new Path("/tmp"); + + // Container 1 needs lr1 resource + ContainerId cId1 = BuilderUtils.newContainerId(1, 1, 1, 1); + LocalResourceRequest lr1 = createLocalResourceRequest(user, 1, 1, + LocalResourceVisibility.PUBLIC); + LocalizerContext lc1 = new LocalizerContext(user, cId1, null); + + // Container 1 requests lr1 to be localized + ResourceEvent reqEvent1 = new ResourceRequestEvent(lr1, + LocalResourceVisibility.PUBLIC, lc1); + tracker.handle(reqEvent1); + + // Simulate the process of localization of lr1 + Path hierarchicalPath1 = tracker.getPathForLocalization(lr1, localDir); + // Simulate lr1 getting localized + ResourceLocalizedEvent rle1 = + new ResourceLocalizedEvent(lr1, + new Path(hierarchicalPath1.toUri().toString() + + Path.SEPARATOR + "file1"), 120); + tracker.handle(rle1); + // Localization successful. + + LocalResourceRequest lr2 = createLocalResourceRequest(user, 3, 3, + LocalResourceVisibility.PUBLIC); + // Container 1 requests lr2 to be localized. + ResourceEvent reqEvent2 = + new ResourceRequestEvent(lr2, LocalResourceVisibility.PUBLIC, lc1); + tracker.handle(reqEvent2); + + Path hierarchicalPath2 = tracker.getPathForLocalization(lr2, localDir); + // localization failed. + ResourceFailedLocalizationEvent rfe2 = + new ResourceFailedLocalizationEvent(lr2, new Exception("Test")); + tracker.handle(rfe2); + + /* + * The path returned for two localization should be different because we + * are limiting one file per sub-directory. + */ + Assert.assertNotSame(hierarchicalPath1, hierarchicalPath2); + + LocalResourceRequest lr3 = createLocalResourceRequest(user, 2, 2, + LocalResourceVisibility.PUBLIC); + ResourceEvent reqEvent3 = new ResourceRequestEvent(lr3, + LocalResourceVisibility.PUBLIC, lc1); + tracker.handle(reqEvent3); + Path hierarchicalPath3 = tracker.getPathForLocalization(lr3, localDir); + // localization successful + ResourceLocalizedEvent rle3 = + new ResourceLocalizedEvent(lr3, new Path(hierarchicalPath3.toUri() + .toString() + Path.SEPARATOR + "file3"), 120); + tracker.handle(rle3); + + // Verifying that path created is inside the subdirectory + Assert.assertEquals(hierarchicalPath3.toUri().toString(), + hierarchicalPath1.toUri().toString() + Path.SEPARATOR + "0"); + + // Container 1 releases resource lr1 + ResourceEvent relEvent1 = new ResourceReleaseEvent(lr1, cId1); + tracker.handle(relEvent1); + + // Validate the file counts now + int resources = 0; + Iterator iter = tracker.iterator(); + while (iter.hasNext()) { + iter.next(); + resources++; + } + // There should be only two resources lr1 and lr3 now. + Assert.assertEquals(2, resources); + + // Now simulate cache cleanup - removes unused resources. + iter = tracker.iterator(); + while (iter.hasNext()) { + LocalizedResource rsrc = iter.next(); + if (rsrc.getRefCount() == 0) { + Assert.assertTrue(tracker.remove(rsrc, mockDelService)); + resources--; + } + } + // lr1 is not used by anyone and will be removed, only lr3 will hang + // around + Assert.assertEquals(1, resources); + } finally { + if (dispatcher != null) { + dispatcher.stop(); + } + } + } + private boolean createdummylocalizefile(Path path) { boolean ret = false; File file = new File(path.toUri().getRawPath().toString()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java index 7ca2c91e3c7..77bde7b1795 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceLocalizationService.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import static org.mockito.Matchers.anyBoolean; import static org.mockito.Matchers.anyInt; import static org.mockito.Matchers.anyLong; @@ -35,6 +36,7 @@ import static org.mockito.Mockito.spy; import static org.mockito.Mockito.timeout; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import static org.mockito.Mockito.times; import java.io.IOException; import java.net.InetSocketAddress; @@ -375,7 +377,7 @@ public class TestResourceLocalizationService { } } - @Test + @Test( timeout = 10000) @SuppressWarnings("unchecked") // mocked generics public void testLocalizationHeartbeat() throws Exception { Configuration conf = new YarnConfiguration(); @@ -386,12 +388,17 @@ public class TestResourceLocalizationService { isA(Path.class), isA(FsPermission.class), anyBoolean()); List localDirs = new ArrayList(); - String[] sDirs = new String[4]; - for (int i = 0; i < 4; ++i) { - localDirs.add(lfs.makeQualified(new Path(basedir, i + ""))); - sDirs[i] = localDirs.get(i).toString(); - } + String[] sDirs = new String[1]; + // Making sure that we have only one local disk so that it will only be + // selected for consecutive resource localization calls. This is required + // to test LocalCacheDirectoryManager. + localDirs.add(lfs.makeQualified(new Path(basedir, 0 + ""))); + sDirs[0] = localDirs.get(0).toString(); + conf.setStrings(YarnConfiguration.NM_LOCAL_DIRS, sDirs); + // Adding configuration to make sure there is only one file per + // directory + conf.set(YarnConfiguration.NM_LOCAL_CACHE_MAX_FILES_PER_DIRECTORY, "37"); String logDir = lfs.makeQualified(new Path(basedir, "logdir " )).toString(); conf.set(YarnConfiguration.NM_LOG_DIRS, logDir); DrainDispatcher dispatcher = new DrainDispatcher(); @@ -452,12 +459,23 @@ public class TestResourceLocalizationService { doReturn(out).when(spylfs).createInternal(isA(Path.class), isA(EnumSet.class), isA(FsPermission.class), anyInt(), anyShort(), anyLong(), isA(Progressable.class), isA(ChecksumOpt.class), anyBoolean()); - final LocalResource resource = getPrivateMockedResource(r); - final LocalResourceRequest req = new LocalResourceRequest(resource); + final LocalResource resource1 = getPrivateMockedResource(r); + LocalResource resource2 = null; + do { + resource2 = getPrivateMockedResource(r); + } while (resource2 == null || resource2.equals(resource1)); + // above call to make sure we don't get identical resources. + + final LocalResourceRequest req1 = new LocalResourceRequest(resource1); + final LocalResourceRequest req2 = new LocalResourceRequest(resource2); Map> rsrcs = new HashMap>(); - rsrcs.put(LocalResourceVisibility.PRIVATE, Collections.singletonList(req)); + List privateResourceList = + new ArrayList(); + privateResourceList.add(req1); + privateResourceList.add(req2); + rsrcs.put(LocalResourceVisibility.PRIVATE, privateResourceList); spyService.handle(new ContainerLocalizationRequestEvent(c, rsrcs)); // Sigh. Thread init of private localizer not accessible Thread.sleep(1000); @@ -471,33 +489,67 @@ public class TestResourceLocalizationService { Path localizationTokenPath = tokenPathCaptor.getValue(); // heartbeat from localizer - LocalResourceStatus rsrcStat = mock(LocalResourceStatus.class); + LocalResourceStatus rsrcStat1 = mock(LocalResourceStatus.class); + LocalResourceStatus rsrcStat2 = mock(LocalResourceStatus.class); LocalizerStatus stat = mock(LocalizerStatus.class); when(stat.getLocalizerId()).thenReturn(ctnrStr); - when(rsrcStat.getResource()).thenReturn(resource); - when(rsrcStat.getLocalSize()).thenReturn(4344L); + when(rsrcStat1.getResource()).thenReturn(resource1); + when(rsrcStat2.getResource()).thenReturn(resource2); + when(rsrcStat1.getLocalSize()).thenReturn(4344L); + when(rsrcStat2.getLocalSize()).thenReturn(2342L); URL locPath = getPath("/cache/private/blah"); - when(rsrcStat.getLocalPath()).thenReturn(locPath); - when(rsrcStat.getStatus()).thenReturn(ResourceStatusType.FETCH_SUCCESS); + when(rsrcStat1.getLocalPath()).thenReturn(locPath); + when(rsrcStat2.getLocalPath()).thenReturn(locPath); + when(rsrcStat1.getStatus()).thenReturn(ResourceStatusType.FETCH_SUCCESS); + when(rsrcStat2.getStatus()).thenReturn(ResourceStatusType.FETCH_SUCCESS); when(stat.getResources()) .thenReturn(Collections.emptyList()) - .thenReturn(Collections.singletonList(rsrcStat)) + .thenReturn(Collections.singletonList(rsrcStat1)) + .thenReturn(Collections.singletonList(rsrcStat2)) .thenReturn(Collections.emptyList()); - // get rsrc + String localPath = Path.SEPARATOR + ContainerLocalizer.USERCACHE + + Path.SEPARATOR + "user0" + Path.SEPARATOR + + ContainerLocalizer.FILECACHE; + + // get first resource LocalizerHeartbeatResponse response = spyService.heartbeat(stat); assertEquals(LocalizerAction.LIVE, response.getLocalizerAction()); - assertEquals(req, new LocalResourceRequest(response.getLocalResource(0))); + assertEquals(1, response.getResourceSpecs().size()); + assertEquals(req1, + new LocalResourceRequest(response.getResourceSpecs().get(0).getResource())); + URL localizedPath = + response.getResourceSpecs().get(0).getDestinationDirectory(); + // Appending to local path unique number(10) generated as a part of + // LocalResourcesTracker + assertTrue(localizedPath.getFile().endsWith( + localPath + Path.SEPARATOR + "10")); + + // get second resource + response = spyService.heartbeat(stat); + assertEquals(LocalizerAction.LIVE, response.getLocalizerAction()); + assertEquals(1, response.getResourceSpecs().size()); + assertEquals(req2, new LocalResourceRequest(response.getResourceSpecs() + .get(0).getResource())); + localizedPath = + response.getResourceSpecs().get(0).getDestinationDirectory(); + // Resource's destination path should be now inside sub directory 0 as + // LocalCacheDirectoryManager will be used and we have restricted number + // of files per directory to 1. + assertTrue(localizedPath.getFile().endsWith( + localPath + Path.SEPARATOR + "0" + Path.SEPARATOR + "11")); // empty rsrc response = spyService.heartbeat(stat); assertEquals(LocalizerAction.LIVE, response.getLocalizerAction()); - assertEquals(0, response.getAllResources().size()); + assertEquals(0, response.getResourceSpecs().size()); // get shutdown response = spyService.heartbeat(stat); assertEquals(LocalizerAction.DIE, response.getLocalizerAction()); + + dispatcher.await(); // verify container notification ArgumentMatcher matchesContainerLoc = new ArgumentMatcher() { @@ -508,9 +560,9 @@ public class TestResourceLocalizationService { && c.getContainerID() == evt.getContainerID(); } }; - dispatcher.await(); - verify(containerBus).handle(argThat(matchesContainerLoc)); - + // total 2 resource localzation calls. one for each resource. + verify(containerBus, times(2)).handle(argThat(matchesContainerLoc)); + // Verify deletion of localization token. verify(delService).delete((String)isNull(), eq(localizationTokenPath)); } finally { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceRetention.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceRetention.java index ee24548c5c4..f3f7cc5067c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceRetention.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestResourceRetention.java @@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; @@ -76,10 +77,11 @@ public class TestResourceRetention { LocalResourcesTracker createMockTracker(String user, final long rsrcSize, long nRsrcs, long timestamp, long tsstep) { + Configuration conf = new Configuration(); ConcurrentMap trackerResources = new ConcurrentHashMap(); LocalResourcesTracker ret = spy(new LocalResourcesTrackerImpl(user, null, - trackerResources)); + trackerResources, false, conf)); for (int i = 0; i < nRsrcs; ++i) { final LocalResourceRequest req = new LocalResourceRequest( new Path("file:///" + user + "/rsrc" + i), timestamp + i * tsstep, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java index 6a9a6767567..ccbf9f76bf1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java @@ -56,13 +56,13 @@ import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; -import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.DrainDispatcher; @@ -91,6 +91,7 @@ import org.mockito.Mockito; import org.mortbay.util.MultiException; + //@Ignore public class TestLogAggregationService extends BaseContainerManagerTest { @@ -679,7 +680,7 @@ public class TestLogAggregationService extends BaseContainerManagerTest { ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class); - + Container mockContainer = mock(Container.class); // ////// Construct the Container-id ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class); @@ -689,7 +690,7 @@ public class TestLogAggregationService extends BaseContainerManagerTest { BuilderUtils.newApplicationAttemptId(appId, 1); ContainerId cId = BuilderUtils.newContainerId(appAttemptId, 0); - containerLaunchContext.setContainerId(cId); + when(mockContainer.getId()).thenReturn(cId); containerLaunchContext.setUser(this.user); @@ -713,12 +714,12 @@ public class TestLogAggregationService extends BaseContainerManagerTest { commands.add("/bin/bash"); commands.add(scriptFile.getAbsolutePath()); containerLaunchContext.setCommands(commands); - containerLaunchContext.setResource(recordFactory - .newRecordInstance(Resource.class)); - containerLaunchContext.getResource().setMemory(100 * 1024 * 1024); + when(mockContainer.getResource()).thenReturn( + BuilderUtils.newResource(100 * 1024 * 1024, 1)); StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class); startRequest.setContainerLaunchContext(containerLaunchContext); + startRequest.setContainer(mockContainer); this.containerManager.startContainer(startRequest); BaseContainerManagerTest.waitForContainerState(this.containerManager, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java index 99d7d4d444a..a27b3575072 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java @@ -21,7 +21,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.*; import java.io.BufferedReader; import java.io.File; @@ -44,6 +44,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerState; @@ -51,7 +52,6 @@ import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; -import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; @@ -60,6 +60,7 @@ import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; +import org.apache.hadoop.yarn.util.BuilderUtils; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.LinuxResourceCalculatorPlugin; import org.apache.hadoop.yarn.util.ProcfsBasedProcessTree; @@ -197,7 +198,7 @@ public class TestContainersMonitor extends BaseContainerManagerTest { ContainerLaunchContext containerLaunchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class); - + Container mockContainer = mock(Container.class); // ////// Construct the Container-id ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class); @@ -210,7 +211,7 @@ public class TestContainersMonitor extends BaseContainerManagerTest { ContainerId cId = recordFactory.newRecordInstance(ContainerId.class); cId.setId(0); cId.setApplicationAttemptId(appAttemptId); - containerLaunchContext.setContainerId(cId); + when(mockContainer.getId()).thenReturn(cId); containerLaunchContext.setUser(user); @@ -234,12 +235,12 @@ public class TestContainersMonitor extends BaseContainerManagerTest { commands.add("/bin/bash"); commands.add(scriptFile.getAbsolutePath()); containerLaunchContext.setCommands(commands); - containerLaunchContext.setResource(recordFactory - .newRecordInstance(Resource.class)); - containerLaunchContext.getResource().setMemory(8 * 1024 * 1024); + when(mockContainer.getResource()).thenReturn( + BuilderUtils.newResource(8 * 1024 * 1024, 1)); StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class); startRequest.setContainerLaunchContext(containerLaunchContext); + startRequest.setContainer(mockContainer); containerManager.startContainer(startRequest); int timeoutSecs = 0; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java index 519ff183484..dbb50bba3f2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java @@ -30,6 +30,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; @@ -58,7 +59,6 @@ public class MockContainer implements Container { uniqId); this.launchContext = recordFactory .newRecordInstance(ContainerLaunchContext.class); - launchContext.setContainerId(id); launchContext.setUser(user); this.state = ContainerState.NEW; @@ -104,7 +104,6 @@ public class MockContainer implements Container { .newRecordInstance(ContainerStatus.class); containerStatus .setState(org.apache.hadoop.yarn.api.records.ContainerState.RUNNING); - containerStatus.setContainerId(this.launchContext.getContainerId()); containerStatus.setDiagnostics("testing"); containerStatus.setExitStatus(0); return containerStatus; @@ -119,4 +118,9 @@ public class MockContainer implements Container { public void handle(ContainerEvent event) { } + @Override + public Resource getResource() { + return null; + } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java index d29e73eff43..48abd9e7b19 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java @@ -179,15 +179,19 @@ public class TestNMWebServer { // TODO: Use builder utils ContainerLaunchContext launchContext = recordFactory.newRecordInstance(ContainerLaunchContext.class); - launchContext.setContainerId(containerId); + org.apache.hadoop.yarn.api.records.Container mockContainer = + mock(org.apache.hadoop.yarn.api.records.Container.class); + when(mockContainer.getId()).thenReturn(containerId); launchContext.setUser(user); Container container = - new ContainerImpl(conf, dispatcher, launchContext, null, metrics) { - @Override - public ContainerState getContainerState() { - return ContainerState.RUNNING; - }; - }; + new ContainerImpl(conf, dispatcher, launchContext, mockContainer, + null, metrics) { + + @Override + public ContainerState getContainerState() { + return ContainerState.RUNNING; + }; + }; nmContext.getContainers().put(containerId, container); //TODO: Gross hack. Fix in code. ApplicationId applicationId = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java index 60d4d7e9bbf..c198603d434 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java @@ -54,15 +54,17 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.ipc.RPCUtil; import org.apache.hadoop.yarn.ipc.YarnRPC; import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; -import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStatusupdateEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.InvalidResourceRequestException; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.security.authorize.RMPolicyProvider; import org.apache.hadoop.yarn.service.AbstractService; @@ -276,6 +278,14 @@ public class ApplicationMasterService extends AbstractService implements List ask = request.getAskList(); List release = request.getReleaseList(); + // sanity check + try { + SchedulerUtils.validateResourceRequests(ask, + rScheduler.getMaximumResourceCapability()); + } catch (InvalidResourceRequestException e) { + LOG.warn("Invalid resource ask by application " + appAttemptId, e); + throw RPCUtil.getRemoteException(e); + } // Send new requests to appAttempt. Allocation allocation = this.rScheduler.allocate(appAttemptId, ask, release); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java index 1aaca4e8c29..1c3c55ea730 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java @@ -72,6 +72,7 @@ import org.apache.hadoop.yarn.api.records.DelegationToken; import org.apache.hadoop.yarn.api.records.NodeReport; import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.YarnClusterMetrics; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRemoteException; @@ -86,8 +87,11 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.InvalidResourceRequestException; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.security.authorize.RMPolicyProvider; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; @@ -262,7 +266,7 @@ public class ClientRMService extends AbstractService implements ApplicationSubmissionContext submissionContext = request .getApplicationSubmissionContext(); ApplicationId applicationId = submissionContext.getApplicationId(); - String user = submissionContext.getUser(); + String user = submissionContext.getAMContainerSpec().getUser(); try { user = UserGroupInformation.getCurrentUser().getShortUserName(); if (rmContext.getRMApps().get(applicationId) != null) { @@ -271,7 +275,22 @@ public class ClientRMService extends AbstractService implements } // Safety - submissionContext.setUser(user); + submissionContext.getAMContainerSpec().setUser(user); + + // Check whether AM resource requirements are within required limits + if (!submissionContext.getUnmanagedAM()) { + ResourceRequest amReq = BuilderUtils.newResourceRequest( + RMAppAttemptImpl.AM_CONTAINER_PRIORITY, ResourceRequest.ANY, + submissionContext.getResource(), 1); + try { + SchedulerUtils.validateResourceRequest(amReq, + scheduler.getMaximumResourceCapability()); + } catch (InvalidResourceRequestException e) { + LOG.warn("RM app submission failed in validating AM resource request" + + " for application " + applicationId, e); + throw RPCUtil.getRemoteException(e); + } + } // This needs to be synchronous as the client can query // immediately following the submission to get the application status. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index 70fd2576ab0..7c4f9d75d5c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -57,6 +57,7 @@ public class RMAppManager implements EventHandler, private static final Log LOG = LogFactory.getLog(RMAppManager.class); private int completedAppsMax = YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS; + private int globalMaxAppAttempts; private LinkedList completedApps = new LinkedList(); private final RMContext rmContext; @@ -76,6 +77,8 @@ public class RMAppManager implements EventHandler, setCompletedAppsMax(conf.getInt( YarnConfiguration.RM_MAX_COMPLETED_APPLICATIONS, YarnConfiguration.DEFAULT_RM_MAX_COMPLETED_APPLICATIONS)); + globalMaxAppAttempts = conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, + YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS); } /** @@ -246,10 +249,11 @@ public class RMAppManager implements EventHandler, // Create RMApp application = new RMAppImpl(applicationId, rmContext, this.conf, - submissionContext.getApplicationName(), - submissionContext.getUser(), submissionContext.getQueue(), - submissionContext, this.scheduler, this.masterService, - submitTime); + submissionContext.getApplicationName(), + submissionContext.getAMContainerSpec().getUser(), + submissionContext.getQueue(), + submissionContext, this.scheduler, this.masterService, + submitTime); // Sanity check - duplicate? if (rmContext.getRMApps().putIfAbsent(applicationId, application) != @@ -308,6 +312,7 @@ public class RMAppManager implements EventHandler, Map appStates = state.getApplicationState(); LOG.info("Recovering " + appStates.size() + " applications"); for(ApplicationState appState : appStates.values()) { + boolean shouldRecover = true; // re-submit the application // this is going to send an app start event but since the async dispatcher // has not started that event will be queued until we have completed re @@ -318,16 +323,39 @@ public class RMAppManager implements EventHandler, // This will need to be changed in work preserving recovery in which // RM will re-connect with the running AM's instead of restarting them LOG.info("Not recovering unmanaged application " + appState.getAppId()); - store.removeApplication(appState); + shouldRecover = false; + } + int individualMaxAppAttempts = appState.getApplicationSubmissionContext() + .getMaxAppAttempts(); + int maxAppAttempts; + if (individualMaxAppAttempts <= 0 || + individualMaxAppAttempts > globalMaxAppAttempts) { + maxAppAttempts = globalMaxAppAttempts; + LOG.warn("The specific max attempts: " + individualMaxAppAttempts + + " for application: " + appState.getAppId() + + " is invalid, because it is out of the range [1, " + + globalMaxAppAttempts + "]. Use the global max attempts instead."); } else { + maxAppAttempts = individualMaxAppAttempts; + } + if(appState.getAttemptCount() >= maxAppAttempts) { + LOG.info("Not recovering application " + appState.getAppId() + + " due to recovering attempt is beyond maxAppAttempt limit"); + shouldRecover = false; + } + + if(shouldRecover) { LOG.info("Recovering application " + appState.getAppId()); submitApplication(appState.getApplicationSubmissionContext(), - appState.getSubmitTime()); + appState.getSubmitTime()); // re-populate attempt information in application RMAppImpl appImpl = (RMAppImpl) rmContext.getRMApps().get( - appState.getAppId()); + appState.getAppId()); appImpl.recover(state); } + else { + store.removeApplication(appState); + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index a16aa357758..4cd19697417 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -327,12 +327,52 @@ public class ResourceManager extends CompositeService implements Recoverable { this.applicationACLsManager, this.conf); } + // sanity check for configurations protected static void validateConfigs(Configuration conf) { - int globalMaxAppAttempts = conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, + // validate max-attempts + int globalMaxAppAttempts = + conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS); if (globalMaxAppAttempts <= 0) { - throw new YarnException( - "The global max attempts should be a positive integer."); + throw new YarnException("Invalid global max attempts configuration" + + ", " + YarnConfiguration.RM_AM_MAX_ATTEMPTS + + "=" + globalMaxAppAttempts + ", it should be a positive integer."); + } + + // validate scheduler memory allocation setting + int minMem = conf.getInt( + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB); + int maxMem = conf.getInt( + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB); + + if (minMem <= 0 || minMem > maxMem) { + throw new YarnException("Invalid resource scheduler memory" + + " allocation configuration" + + ", " + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB + + "=" + minMem + + ", " + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB + + "=" + maxMem + ", min and max should be greater than 0" + + ", max should be no smaller than min."); + } + + // validate scheduler vcores allocation setting + int minVcores = conf.getInt( + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + int maxVcores = conf.getInt( + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES); + + if (minVcores <= 0 || minVcores > maxVcores) { + throw new YarnException("Invalid resource scheduler vcores" + + " allocation configuration" + + ", " + YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES + + "=" + minVcores + + ", " + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES + + "=" + maxVcores + ", min and max should be greater than 0" + + ", max should be no smaller than min."); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java index 815df2fe403..258c7dc0e47 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java @@ -27,6 +27,7 @@ import org.apache.hadoop.ipc.Server; import org.apache.hadoop.net.Node; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.PolicyProvider; +import org.apache.hadoop.yarn.YarnException; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -51,6 +52,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeReconnectEvent import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.resourcemanager.security.authorize.RMPolicyProvider; +import org.apache.hadoop.yarn.server.utils.YarnServerBuilderUtils; import org.apache.hadoop.yarn.service.AbstractService; import org.apache.hadoop.yarn.util.RackResolver; @@ -67,16 +69,17 @@ public class ResourceTrackerService extends AbstractService implements private final NMLivelinessMonitor nmLivelinessMonitor; private final RMContainerTokenSecretManager containerTokenSecretManager; + private long nextHeartBeatInterval; private Server server; private InetSocketAddress resourceTrackerAddress; - private static final NodeHeartbeatResponse reboot = recordFactory + private static final NodeHeartbeatResponse resync = recordFactory .newRecordInstance(NodeHeartbeatResponse.class); private static final NodeHeartbeatResponse shutDown = recordFactory .newRecordInstance(NodeHeartbeatResponse.class); static { - reboot.setNodeAction(NodeAction.REBOOT); + resync.setNodeAction(NodeAction.RESYNC); shutDown.setNodeAction(NodeAction.SHUTDOWN); } @@ -100,6 +103,14 @@ public class ResourceTrackerService extends AbstractService implements YarnConfiguration.DEFAULT_RM_RESOURCE_TRACKER_PORT); RackResolver.init(conf); + nextHeartBeatInterval = + conf.getLong(YarnConfiguration.RM_NM_HEARTBEAT_INTERVAL_MS, + YarnConfiguration.DEFAULT_RM_NM_HEARTBEAT_INTERVAL_MS); + if (nextHeartBeatInterval <= 0) { + throw new YarnException("Invalid Configuration. " + + YarnConfiguration.RM_NM_HEARTBEAT_INTERVAL_MS + + " should be larger than 0."); + } super.init(conf); } @@ -209,7 +220,7 @@ public class ResourceTrackerService extends AbstractService implements if (rmNode == null) { /* node does not exist */ LOG.info("Node not found rebooting " + remoteNodeStatus.getNodeId()); - return reboot; + return resync; } // Send ping @@ -223,9 +234,6 @@ public class ResourceTrackerService extends AbstractService implements new RMNodeEvent(nodeId, RMNodeEventType.DECOMMISSION)); return shutDown; } - - NodeHeartbeatResponse nodeHeartBeatResponse = recordFactory - .newRecordInstance(NodeHeartbeatResponse.class); // 3. Check if it's a 'fresh' heartbeat i.e. not duplicate heartbeat NodeHeartbeatResponse lastNodeHeartbeatResponse = rmNode.getLastNodeHeartBeatResponse(); @@ -242,14 +250,15 @@ public class ResourceTrackerService extends AbstractService implements // TODO: Just sending reboot is not enough. Think more. this.rmContext.getDispatcher().getEventHandler().handle( new RMNodeEvent(nodeId, RMNodeEventType.REBOOTING)); - return reboot; + return resync; } // Heartbeat response - nodeHeartBeatResponse.setResponseId(lastNodeHeartbeatResponse.getResponseId() + 1); + NodeHeartbeatResponse nodeHeartBeatResponse = YarnServerBuilderUtils + .newNodeHeartbeatResponse(lastNodeHeartbeatResponse. + getResponseId() + 1, NodeAction.NORMAL, null, null, null, + nextHeartBeatInterval); rmNode.updateNodeHeartbeatResponseForCleanup(nodeHeartBeatResponse); - nodeHeartBeatResponse.setNodeAction(NodeAction.NORMAL); - // Check if node's masterKey needs to be updated and if the currentKey has // roller over, send it across if (isSecurityEnabled()) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java index e45e1dd8f81..517d7fb31d7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java @@ -77,6 +77,7 @@ public class AMLauncher implements Runnable { RecordFactoryProvider.getRecordFactory(null); private final AMLauncherEventType eventType; private final RMContext rmContext; + private final Container masterContainer; @SuppressWarnings("rawtypes") private final EventHandler handler; @@ -88,34 +89,36 @@ public class AMLauncher implements Runnable { this.eventType = eventType; this.rmContext = rmContext; this.handler = rmContext.getDispatcher().getEventHandler(); + this.masterContainer = application.getMasterContainer(); } private void connect() throws IOException { - ContainerId masterContainerID = application.getMasterContainer().getId(); + ContainerId masterContainerID = masterContainer.getId(); containerMgrProxy = getContainerMgrProxy(masterContainerID); } private void launch() throws IOException { connect(); - ContainerId masterContainerID = application.getMasterContainer().getId(); + ContainerId masterContainerID = masterContainer.getId(); ApplicationSubmissionContext applicationContext = application.getSubmissionContext(); - LOG.info("Setting up container " + application.getMasterContainer() + LOG.info("Setting up container " + masterContainer + " for AM " + application.getAppAttemptId()); ContainerLaunchContext launchContext = createAMContainerLaunchContext(applicationContext, masterContainerID); StartContainerRequest request = recordFactory.newRecordInstance(StartContainerRequest.class); request.setContainerLaunchContext(launchContext); + request.setContainer(masterContainer); containerMgrProxy.startContainer(request); - LOG.info("Done launching container " + application.getMasterContainer() + LOG.info("Done launching container " + masterContainer + " for AM " + application.getAppAttemptId()); } private void cleanup() throws IOException { connect(); - ContainerId containerId = application.getMasterContainer().getId(); + ContainerId containerId = masterContainer.getId(); StopContainerRequest stopRequest = recordFactory.newRecordInstance(StopContainerRequest.class); stopRequest.setContainerId(containerId); @@ -126,9 +129,7 @@ public class AMLauncher implements Runnable { protected ContainerManager getContainerMgrProxy( final ContainerId containerId) { - Container container = application.getMasterContainer(); - - final NodeId node = container.getNodeId(); + final NodeId node = masterContainer.getNodeId(); final InetSocketAddress containerManagerBindAddress = NetUtils.createSocketAddrForHost(node.getHost(), node.getPort()); @@ -138,8 +139,8 @@ public class AMLauncher implements Runnable { .createRemoteUser(containerId.toString()); if (UserGroupInformation.isSecurityEnabled()) { Token token = - ProtoUtils.convertFromProtoFormat(container.getContainerToken(), - containerManagerBindAddress); + ProtoUtils.convertFromProtoFormat(masterContainer + .getContainerToken(), containerManagerBindAddress); currentUser.addToken(token); } return currentUser.doAs(new PrivilegedAction() { @@ -165,30 +166,28 @@ public class AMLauncher implements Runnable { new String[0]))); // Finalize the container - container.setContainerId(containerID); - container.setUser(applicationMasterContext.getUser()); - setupTokensAndEnv(container); + container.setUser(applicationMasterContext.getAMContainerSpec().getUser()); + setupTokensAndEnv(container, containerID); return container; } private void setupTokensAndEnv( - ContainerLaunchContext container) + ContainerLaunchContext container, ContainerId containerID) throws IOException { Map environment = container.getEnvironment(); - environment.put(ApplicationConstants.APPLICATION_WEB_PROXY_BASE_ENV, application.getWebProxyBase()); // Set the AppAttemptId, containerId, NMHTTPAdress, AppSubmitTime to be // consumable by the AM. - environment.put(ApplicationConstants.AM_CONTAINER_ID_ENV, container - .getContainerId().toString()); - environment.put(ApplicationConstants.NM_HOST_ENV, application - .getMasterContainer().getNodeId().getHost()); + environment.put(ApplicationConstants.AM_CONTAINER_ID_ENV, + containerID.toString()); + environment.put(ApplicationConstants.NM_HOST_ENV, masterContainer + .getNodeId().getHost()); environment.put(ApplicationConstants.NM_PORT_ENV, - String.valueOf(application.getMasterContainer().getNodeId().getPort())); + String.valueOf(masterContainer.getNodeId().getPort())); String parts[] = - application.getMasterContainer().getNodeHttpAddress().split(":"); + masterContainer.getNodeHttpAddress().split(":"); environment.put(ApplicationConstants.NM_HTTP_PORT_ENV, parts[1]); ApplicationId applicationId = application.getAppAttemptId().getApplicationId(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/DefaultResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/DefaultResourceCalculator.java index 295389c1ae5..5a691afab70 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/DefaultResourceCalculator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/DefaultResourceCalculator.java @@ -53,12 +53,14 @@ public class DefaultResourceCalculator extends ResourceCalculator { } @Override - public Resource normalize(Resource r, Resource minimumResource) { - return Resources.createResource( + public Resource normalize(Resource r, Resource minimumResource, + Resource maximumResource) { + int normalizedMemory = Math.min( roundUp( Math.max(r.getMemory(), minimumResource.getMemory()), - minimumResource.getMemory()) - ); + minimumResource.getMemory()), + maximumResource.getMemory()); + return Resources.createResource(normalizedMemory); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/DominantResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/DominantResourceCalculator.java index ad672399bcb..2f6699038fb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/DominantResourceCalculator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/DominantResourceCalculator.java @@ -123,15 +123,20 @@ public class DominantResourceCalculator extends ResourceCalculator { } @Override - public Resource normalize(Resource r, Resource minimumResource) { - return Resources.createResource( + public Resource normalize(Resource r, Resource minimumResource, + Resource maximumResource) { + int normalizedMemory = Math.min( roundUp( - Math.max(r.getMemory(), minimumResource.getMemory()), + Math.max(r.getMemory(), minimumResource.getMemory()), minimumResource.getMemory()), + maximumResource.getMemory()); + int normalizedCores = Math.min( roundUp( Math.max(r.getVirtualCores(), minimumResource.getVirtualCores()), - minimumResource.getVirtualCores()) - ); + minimumResource.getVirtualCores()), + maximumResource.getVirtualCores()); + return Resources.createResource(normalizedMemory, + normalizedCores); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceCalculator.java index 0cde50905f4..b2dd19bbf5c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceCalculator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceCalculator.java @@ -88,13 +88,16 @@ public abstract class ResourceCalculator { /** * Normalize resource r given the base - * minimumResource. + * minimumResource and verify against max allowed + * maximumResource * * @param r resource * @param minimumResource step-factor + * @param maximumResource the upper bound of the resource to be allocated * @return normalized resource */ - public abstract Resource normalize(Resource r, Resource minimumResource); + public abstract Resource normalize(Resource r, Resource minimumResource, + Resource maximumResource); /** * Round-up resource r given factor stepFactor. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/Resources.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/Resources.java index 3aae1425526..5c94f927838 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/Resources.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/Resources.java @@ -132,8 +132,9 @@ public class Resources { } public static Resource normalize( - ResourceCalculator calculator, Resource lhs, Resource factor) { - return calculator.normalize(lhs, factor); + ResourceCalculator calculator, Resource lhs, Resource factor, + Resource limit) { + return calculator.normalize(lhs, factor, limit); } public static Resource roundUp( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index c58613e70c8..eaa15f53875 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -690,7 +690,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { appAttempt.eventHandler.handle( new AppAddedSchedulerEvent(appAttempt.applicationAttemptId, appAttempt.submissionContext.getQueue(), - appAttempt.submissionContext.getUser())); + appAttempt.submissionContext.getAMContainerSpec().getUser())); } } @@ -736,10 +736,13 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { RMAppEventType.APP_ACCEPTED)); // Request a container for the AM. - ResourceRequest request = BuilderUtils.newResourceRequest( - AM_CONTAINER_PRIORITY, ResourceRequest.ANY, appAttempt.submissionContext - .getAMContainerSpec().getResource(), 1); + ResourceRequest request = + BuilderUtils.newResourceRequest( + AM_CONTAINER_PRIORITY, ResourceRequest.ANY, appAttempt + .getSubmissionContext().getResource(), 1); + // SchedulerUtils.validateResourceRequests is not necessary because + // AM resource has been checked when submission Allocation amContainerAllocation = appAttempt.scheduler.allocate( appAttempt.applicationAttemptId, Collections.singletonList(request), EMPTY_CONTAINER_RELEASE_LIST); @@ -771,7 +774,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { // Set the masterContainer appAttempt.setMasterContainer(amContainerAllocation.getContainers().get( 0)); - + appAttempt.getSubmissionContext().setResource( + appAttempt.getMasterContainer().getResource()); RMStateStore store = appAttempt.rmContext.getStateStore(); appAttempt.storeAttempt(store); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java index 69ac6012eb8..d44fd3f32c2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java @@ -288,14 +288,9 @@ public class RMContainerImpl implements RMContainer { public void transition(RMContainerImpl container, RMContainerEvent event) { RMContainerFinishedEvent finishedEvent = (RMContainerFinishedEvent) event; - // Update container-status for diagnostics. Today we completely - // replace it on finish. We may just need to update diagnostics. - container.container.setContainerStatus(finishedEvent - .getRemoteContainerStatus()); - // Inform AppAttempt container.eventHandler.handle(new RMAppAttemptContainerFinishedEvent( - container.appAttemptId, container.container.getContainerStatus())); + container.appAttemptId, finishedEvent.getRemoteContainerStatus())); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/InvalidResourceRequestException.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/InvalidResourceRequestException.java new file mode 100644 index 00000000000..3d1e7dda432 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/InvalidResourceRequestException.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler; + +import org.apache.hadoop.yarn.YarnException; + +/** + * The exception is thrown when the requested resource is out of the range + * of the configured lower and upper resource boundaries. + * + */ +public class InvalidResourceRequestException extends YarnException { + + public InvalidResourceRequestException(Throwable cause) { + super(cause); + } + + public InvalidResourceRequestException(String message) { + super(message); + } + + public InvalidResourceRequestException(String message, Throwable cause) { + super(message, cause); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java index f651566d657..802eff551b2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java @@ -89,10 +89,12 @@ public class SchedulerUtils { List asks, ResourceCalculator resourceCalculator, Resource clusterResource, - Resource minimumResource) { + Resource minimumResource, + Resource maximumResource) { for (ResourceRequest ask : asks) { normalizeRequest( - ask, resourceCalculator, clusterResource, minimumResource); + ask, resourceCalculator, clusterResource, minimumResource, + maximumResource); } } @@ -104,12 +106,50 @@ public class SchedulerUtils { ResourceRequest ask, ResourceCalculator resourceCalculator, Resource clusterResource, - Resource minimumResource) { + Resource minimumResource, + Resource maximumResource) { Resource normalized = Resources.normalize( - resourceCalculator, ask.getCapability(), minimumResource); - ask.getCapability().setMemory(normalized.getMemory()); - ask.getCapability().setVirtualCores(normalized.getVirtualCores()); + resourceCalculator, ask.getCapability(), minimumResource, + maximumResource); + ask.setCapability(normalized); + } + + /** + * Utility method to validate a resource request, by insuring that the + * requested memory/vcore is non-negative and not greater than max + */ + public static void validateResourceRequest(ResourceRequest resReq, + Resource maximumResource) throws InvalidResourceRequestException { + if (resReq.getCapability().getMemory() < 0 || + resReq.getCapability().getMemory() > maximumResource.getMemory()) { + throw new InvalidResourceRequestException("Invalid resource request" + + ", requested memory < 0" + + ", or requested memory > max configured" + + ", requestedMemory=" + resReq.getCapability().getMemory() + + ", maxMemory=" + maximumResource.getMemory()); + } + if (resReq.getCapability().getVirtualCores() < 0 || + resReq.getCapability().getVirtualCores() > + maximumResource.getVirtualCores()) { + throw new InvalidResourceRequestException("Invalid resource request" + + ", requested virtual cores < 0" + + ", or requested virtual cores > max configured" + + ", requestedVirtualCores=" + + resReq.getCapability().getVirtualCores() + + ", maxVirtualCores=" + maximumResource.getVirtualCores()); + } + } + + /** + * Utility method to validate a list resource requests, by insuring that the + * requested memory/vcore is non-negative and not greater than max + */ + public static void validateResourceRequests(List ask, + Resource maximumResource) throws InvalidResourceRequestException { + for (ResourceRequest resReq : ask) { + validateResourceRequest(resReq, maximumResource); + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 1b3858cc864..aca2a1292e9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -108,7 +108,7 @@ implements ResourceScheduler, CapacitySchedulerContext, Configurable { new Comparator() { @Override public int compare(FiCaSchedulerApp a1, FiCaSchedulerApp a2) { - return a1.getApplicationId().getId() - a2.getApplicationId().getId(); + return a1.getApplicationId().compareTo(a2.getApplicationId()); } }; @@ -483,7 +483,8 @@ implements ResourceScheduler, CapacitySchedulerContext, Configurable { // Sanity check SchedulerUtils.normalizeRequests( - ask, calculator, getClusterResources(), minimumAllocation); + ask, calculator, getClusterResources(), minimumAllocation, + maximumAllocation); // Release containers for (ContainerId releasedContainerId : release) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java index 10906a48bd3..8d71f00538f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java @@ -315,8 +315,8 @@ public class CapacitySchedulerConfiguration extends Configuration { YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB); int maximumCores = getInt( - YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_CORES, - YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_CORES); + YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES); return Resources.createResource(maximumMemory, maximumCores); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index d0e03a73ee4..58dcb73767d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -89,6 +89,8 @@ public class LeafQueue implements CSQueue { private int maxActiveAppsUsingAbsCap; // Based on absolute capacity private int maxActiveApplicationsPerUser; + private int nodeLocalityDelay; + private Resource usedResources = Resources.createResource(0, 0); private float usedCapacity = 0.0f; private volatile int numContainers; @@ -123,8 +125,6 @@ public class LeafQueue implements CSQueue { private final ActiveUsersManager activeUsersManager; - private final int nodeLocalityDelay; - private final ResourceCalculator resourceCalculator; public LeafQueue(CapacitySchedulerContext cs, @@ -196,9 +196,6 @@ public class LeafQueue implements CSQueue { Map acls = cs.getConfiguration().getAcls(getQueuePath()); - this.nodeLocalityDelay = - cs.getConfiguration().getNodeLocalityDelay(); - setupQueueConfigs( cs.getClusterResources(), capacity, absoluteCapacity, @@ -206,7 +203,7 @@ public class LeafQueue implements CSQueue { userLimit, userLimitFactor, maxApplications, maxApplicationsPerUser, maxActiveApplications, maxActiveApplicationsPerUser, - state, acls); + state, acls, cs.getConfiguration().getNodeLocalityDelay()); if(LOG.isDebugEnabled()) { LOG.debug("LeafQueue:" + " name=" + queueName @@ -227,7 +224,8 @@ public class LeafQueue implements CSQueue { int userLimit, float userLimitFactor, int maxApplications, int maxApplicationsPerUser, int maxActiveApplications, int maxActiveApplicationsPerUser, - QueueState state, Map acls) + QueueState state, Map acls, + int nodeLocalityDelay) { // Sanity check CSQueueUtils.checkMaxCapacity(getQueueName(), capacity, maximumCapacity); @@ -256,6 +254,8 @@ public class LeafQueue implements CSQueue { this.queueInfo.setCapacity(this.capacity); this.queueInfo.setMaximumCapacity(this.maximumCapacity); this.queueInfo.setQueueState(this.state); + + this.nodeLocalityDelay = nodeLocalityDelay; StringBuilder aclsString = new StringBuilder(); for (Map.Entry e : acls.entrySet()) { @@ -319,7 +319,8 @@ public class LeafQueue implements CSQueue { "state = " + state + " [= configuredState ]" + "\n" + "acls = " + aclsString + - " [= configuredAcls ]" + "\n"); + " [= configuredAcls ]" + "\n" + + "nodeLocalityDelay = " + nodeLocalityDelay + "\n"); } @Override @@ -605,7 +606,8 @@ public class LeafQueue implements CSQueue { newlyParsedLeafQueue.getMaxApplicationsPerUser(), newlyParsedLeafQueue.getMaximumActiveApplications(), newlyParsedLeafQueue.getMaximumActiveApplicationsPerUser(), - newlyParsedLeafQueue.state, newlyParsedLeafQueue.acls); + newlyParsedLeafQueue.state, newlyParsedLeafQueue.acls, + newlyParsedLeafQueue.getNodeLocalityDelay()); // queue metrics are updated, more resource may be available // activate the pending applications if possible diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 49226a05797..238432e941c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -650,11 +650,12 @@ public class FairScheduler implements ResourceScheduler { * * @param asks a list of resource requests * @param minMemory the configured minimum memory allocation + * @param maxMemory the configured maximum memory allocation */ static void normalizeRequests(List asks, - int minMemory) { + int minMemory, int maxMemory) { for (ResourceRequest ask : asks) { - normalizeRequest(ask, minMemory); + normalizeRequest(ask, minMemory, maxMemory); } } @@ -664,11 +665,14 @@ public class FairScheduler implements ResourceScheduler { * * @param ask the resource request * @param minMemory the configured minimum memory allocation + * @param maxMemory the configured maximum memory allocation */ - static void normalizeRequest(ResourceRequest ask, int minMemory) { + static void normalizeRequest(ResourceRequest ask, int minMemory, + int maxMemory) { int memory = Math.max(ask.getCapability().getMemory(), minMemory); - ask.getCapability().setMemory( - minMemory * ((memory / minMemory) + (memory % minMemory > 0 ? 1 : 0))); + int normalizedMemory = + minMemory * ((memory / minMemory) + (memory % minMemory > 0 ? 1 : 0)); + ask.getCapability().setMemory(Math.min(normalizedMemory, maxMemory)); } @Override @@ -684,7 +688,8 @@ public class FairScheduler implements ResourceScheduler { } // Sanity check - normalizeRequests(ask, minimumAllocation.getMemory()); + normalizeRequests(ask, minimumAllocation.getMemory(), + maximumAllocation.getMemory()); // Release containers for (ContainerId releasedContainerId : release) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index 56d66c34e36..d5a542700f4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -232,7 +232,7 @@ public class FifoScheduler implements ResourceScheduler, Configurable { // Sanity check SchedulerUtils.normalizeRequests(ask, resourceCalculator, - clusterResource, minimumAllocation); + clusterResource, minimumAllocation, maximumAllocation); // Release containers for (ContainerId releasedContainer : release) { @@ -462,7 +462,7 @@ public class FifoScheduler implements ResourceScheduler, Configurable { FiCaSchedulerApp application, Priority priority) { int assignedContainers = 0; ResourceRequest request = - application.getResourceRequest(priority, node.getRMNode().getNodeAddress()); + application.getResourceRequest(priority, node.getHostName()); if (request != null) { // Don't allocate on this node if we don't need containers on this rack ResourceRequest rackRequest = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAttemptInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAttemptInfo.java index 61b4880e137..875212f5558 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAttemptInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAttemptInfo.java @@ -59,8 +59,8 @@ public class AppAttemptInfo { this.logsLink = join(HttpConfig.getSchemePrefix(), masterContainer.getNodeHttpAddress(), "/node", "/containerlogs/", - ConverterUtils.toString(masterContainer.getId()), - "/", attempt.getSubmissionContext().getUser()); + ConverterUtils.toString(masterContainer.getId()), "/", + attempt.getSubmissionContext().getAMContainerSpec().getUser()); } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java index 20ffeabfeba..3ce45ac6335 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java @@ -130,7 +130,7 @@ public class Application { public synchronized void submit() throws IOException { ApplicationSubmissionContext context = recordFactory.newRecordInstance(ApplicationSubmissionContext.class); context.setApplicationId(this.applicationId); - context.setUser(this.user); + context.getAMContainerSpec().setUser(this.user); context.setQueue(this.queue); SubmitApplicationRequest request = recordFactory .newRecordInstance(SubmitApplicationRequest.class); @@ -340,7 +340,8 @@ public class Application { // Launch the container StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class); - startRequest.setContainerLaunchContext(createCLC(container)); + startRequest.setContainerLaunchContext(createCLC()); + startRequest.setContainer(container); nodeManager.startContainer(startRequest); break; } @@ -396,11 +397,9 @@ public class Application { } } - private ContainerLaunchContext createCLC(Container container) { + private ContainerLaunchContext createCLC() { ContainerLaunchContext clc = recordFactory.newRecordInstance(ContainerLaunchContext.class); - clc.setContainerId(container.getId()); clc.setUser(this.user); - clc.setResource(container.getResource()); return clc; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java index c85f233ee18..a2f59ef9202 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java @@ -26,7 +26,6 @@ import java.util.Map; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeHealthStatus; @@ -71,11 +70,11 @@ public class MockNM { this.resourceTracker = resourceTracker; } - public void containerStatus(Container container) throws Exception { + public void containerStatus(ContainerStatus containerStatus) throws Exception { Map> conts = new HashMap>(); - conts.put(container.getId().getApplicationAttemptId().getApplicationId(), - Arrays.asList(new ContainerStatus[] { container.getContainerStatus() })); + conts.put(containerStatus.getContainerId().getApplicationAttemptId().getApplicationId(), + Arrays.asList(new ContainerStatus[] { containerStatus })); nodeHeartbeat(conts, true); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java index c3fe72d99cb..ae6d5814a46 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java @@ -209,6 +209,7 @@ public class MockNodes { final String rackName = "rack"+ rack; final int nid = hostnum; final String hostName = "host"+ nid; + final String nodeAddr = hostName + ":" + nid; final int port = 123; final NodeId nodeID = newNodeID(hostName, port); final String httpAddress = httpAddr; @@ -218,7 +219,7 @@ public class MockNodes { nodeHealthStatus.setIsNodeHealthy(true); nodeHealthStatus.setHealthReport("HealthyMe"); } - return new MockRMNodeImpl(nodeID, hostName, httpAddress, perNode, rackName, + return new MockRMNodeImpl(nodeID, nodeAddr, httpAddress, perNode, rackName, nodeHealthStatus, nid, hostName, state); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index 12391c60978..e39f303ce72 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -128,21 +128,28 @@ public class MockRM extends ResourceManager { // client public RMApp submitApp(int masterMemory, String name, String user) throws Exception { - return submitApp(masterMemory, name, user, null, false, null); + return submitApp(masterMemory, name, user, null, false, null, + super.getConfig().getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, + YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)); } public RMApp submitApp(int masterMemory, String name, String user, Map acls) throws Exception { - return submitApp(masterMemory, name, user, acls, false, null); + return submitApp(masterMemory, name, user, acls, false, null, + super.getConfig().getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, + YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)); } public RMApp submitApp(int masterMemory, String name, String user, Map acls, String queue) throws Exception { - return submitApp(masterMemory, name, user, acls, false, queue); - } + return submitApp(masterMemory, name, user, acls, false, queue, + super.getConfig().getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, + YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)); + } public RMApp submitApp(int masterMemory, String name, String user, - Map acls, boolean unmanaged, String queue) throws Exception { + Map acls, boolean unmanaged, String queue, + int maxAppAttempts) throws Exception { ClientRMProtocol client = getClientRMService(); GetNewApplicationResponse resp = client.getNewApplication(Records .newRecord(GetNewApplicationRequest.class)); @@ -154,7 +161,7 @@ public class MockRM extends ResourceManager { .newRecord(ApplicationSubmissionContext.class); sub.setApplicationId(appId); sub.setApplicationName(name); - sub.setUser(user); + sub.setMaxAppAttempts(maxAppAttempts); if(unmanaged) { sub.setUnmanagedAM(true); } @@ -163,13 +170,13 @@ public class MockRM extends ResourceManager { } ContainerLaunchContext clc = Records .newRecord(ContainerLaunchContext.class); - Resource capability = Records.newRecord(Resource.class); + final Resource capability = Records.newRecord(Resource.class); capability.setMemory(masterMemory); - clc.setResource(capability); + sub.setResource(capability); clc.setApplicationACLs(acls); + clc.setUser(user); sub.setAMContainerSpec(clc); req.setApplicationSubmissionContext(sub); - UserGroupInformation fakeUser = UserGroupInformation.createUserForTesting(user, new String[] {"someGroup"}); PrivilegedAction action = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java index 170938d66d6..dba5acdd82d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java @@ -40,7 +40,6 @@ import org.apache.hadoop.yarn.api.protocolrecords.StopContainerResponse; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeHealthStatus; @@ -76,6 +75,9 @@ public class NodeManager implements ContainerManager { final Map> containers = new HashMap>(); + final Map containerStatusMap = + new HashMap(); + public NodeManager(String hostName, int containerManagerPort, int httpPort, String rackName, Resource capability, ResourceTrackerService resourceTrackerService, RMContext rmContext) @@ -137,7 +139,7 @@ public class NodeManager implements ContainerManager { List containerStatuses = new ArrayList(); for (List appContainers : containers.values()) { for (Container container : appContainers) { - containerStatuses.add(container.getContainerStatus()); + containerStatuses.add(containerStatusMap.get(container)); } } return containerStatuses; @@ -159,11 +161,10 @@ public class NodeManager implements ContainerManager { synchronized public StartContainerResponse startContainer( StartContainerRequest request) throws YarnRemoteException { - ContainerLaunchContext containerLaunchContext = - request.getContainerLaunchContext(); - + Container requestContainer = request.getContainer(); + ApplicationId applicationId = - containerLaunchContext.getContainerId().getApplicationAttemptId(). + requestContainer.getId().getApplicationAttemptId(). getApplicationId(); List applicationContainers = containers.get(applicationId); @@ -174,25 +175,28 @@ public class NodeManager implements ContainerManager { // Sanity check for (Container container : applicationContainers) { - if (container.getId().compareTo(containerLaunchContext.getContainerId()) + if (container.getId().compareTo(requestContainer.getId()) == 0) { throw new IllegalStateException( - "Container " + containerLaunchContext.getContainerId() + + "Container " + requestContainer.getId() + " already setup on node " + containerManagerAddress); } } Container container = - BuilderUtils.newContainer(containerLaunchContext.getContainerId(), + BuilderUtils.newContainer(requestContainer.getId(), this.nodeId, nodeHttpAddress, - containerLaunchContext.getResource(), + requestContainer.getResource(), null, null // DKDC - Doesn't matter ); + ContainerStatus containerStatus = + BuilderUtils.newContainerStatus(container.getId(), ContainerState.NEW, + "", -1000); applicationContainers.add(container); - - Resources.subtractFrom(available, containerLaunchContext.getResource()); - Resources.addTo(used, containerLaunchContext.getResource()); + containerStatusMap.put(container, containerStatus); + Resources.subtractFrom(available, requestContainer.getResource()); + Resources.addTo(used, requestContainer.getResource()); if(LOG.isDebugEnabled()) { LOG.debug("startContainer:" + " node=" + containerManagerAddress @@ -223,7 +227,9 @@ public class NodeManager implements ContainerManager { List applicationContainers = containers.get(applicationId); for (Container c : applicationContainers) { if (c.getId().compareTo(containerID) == 0) { - c.setState(ContainerState.COMPLETE); + ContainerStatus containerStatus = containerStatusMap.get(c); + containerStatus.setState(ContainerState.COMPLETE); + containerStatusMap.put(c, containerStatus); } } @@ -277,8 +283,8 @@ public class NodeManager implements ContainerManager { } GetContainerStatusResponse response = recordFactory.newRecordInstance(GetContainerStatusResponse.class); - if (container != null && container.getContainerStatus() != null) { - response.setStatus(container.getContainerStatus()); + if (container != null && containerStatusMap.get(container).getState() != null) { + response.setStatus(containerStatusMap.get(container)); } return response; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java index 7e06fac573b..fb74cb605b2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java @@ -50,6 +50,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.Capacity import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.service.Service; +import org.apache.hadoop.yarn.util.BuilderUtils; import org.junit.Test; import com.google.common.collect.Lists; @@ -503,6 +504,10 @@ public class TestAppManager{ RMApp appOrig = rmContext.getRMApps().get(appID); Assert.assertTrue("app name matches but shouldn't", "testApp1" != appOrig.getName()); + ContainerLaunchContext clc = + BuilderUtils.newContainerLaunchContext(null, null, null, null, null, + null, null); + context.setAMContainerSpec(clc); // our testApp1 should be rejected and original app with same id should be left in place appMonitor.submitApplication(context); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationACLs.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationACLs.java index 1b778f2d4c9..2f9aa6db92d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationACLs.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationACLs.java @@ -169,7 +169,7 @@ public class TestApplicationACLs { ContainerLaunchContext amContainer = recordFactory .newRecordInstance(ContainerLaunchContext.class); Resource resource = BuilderUtils.newResource(1024, 1); - amContainer.setResource(resource); + context.setResource(resource); amContainer.setApplicationACLs(acls); context.setAMContainerSpec(amContainer); submitRequest.setApplicationSubmissionContext(context); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java index 84fd9a6dd51..5c6247b3803 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java @@ -134,7 +134,7 @@ public class TestApplicationMasterLauncher { Assert.assertEquals(app.getSubmitTime(), containerManager.submitTimeAtContainerManager); Assert.assertEquals(app.getRMAppAttempt(appAttemptId) - .getSubmissionContext().getAMContainerSpec().getContainerId() + .getMasterContainer().getId() .toString(), containerManager.containerIdAtContainerManager); Assert.assertEquals(nm1.getNodeId().getHost(), containerManager.nmHostAtContainerManager); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java index 0d5ef6d6f35..aa7af9c18db 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java @@ -67,6 +67,7 @@ import org.apache.hadoop.yarn.ipc.YarnRPC; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.server.RMDelegationTokenSecretManager; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; +import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl; @@ -254,6 +255,12 @@ public class TestClientRMService { public void testConcurrentAppSubmit() throws IOException, InterruptedException, BrokenBarrierException { YarnScheduler yarnScheduler = mock(YarnScheduler.class); + when(yarnScheduler.getMinimumResourceCapability()).thenReturn( + Resources.createResource( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB)); + when(yarnScheduler.getMaximumResourceCapability()).thenReturn( + Resources.createResource( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB)); RMContext rmContext = mock(RMContext.class); mockRMContext(yarnScheduler, rmContext); RMStateStore stateStore = mock(RMStateStore.class); @@ -311,21 +318,63 @@ public class TestClientRMService { endBarrier.await(); t.join(); } + + @Test (timeout = 30000) + public void testInvalidResourceRequestWhenSubmittingApplication() + throws IOException, InterruptedException, BrokenBarrierException { + YarnScheduler yarnScheduler = mock(YarnScheduler.class); + when(yarnScheduler.getMinimumResourceCapability()).thenReturn( + Resources.createResource( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB)); + when(yarnScheduler.getMaximumResourceCapability()).thenReturn( + Resources.createResource( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB)); + RMContext rmContext = mock(RMContext.class); + mockRMContext(yarnScheduler, rmContext); + RMStateStore stateStore = mock(RMStateStore.class); + when(rmContext.getStateStore()).thenReturn(stateStore); + RMAppManager appManager = new RMAppManager(rmContext, yarnScheduler, + null, mock(ApplicationACLsManager.class), new Configuration()); + + final ApplicationId appId = getApplicationId(100); + final SubmitApplicationRequest submitRequest = mockSubmitAppRequest(appId); + Resource resource = Resources.createResource( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB + 1); + when(submitRequest.getApplicationSubmissionContext() + .getResource()).thenReturn(resource); + + final ClientRMService rmService = + new ClientRMService(rmContext, yarnScheduler, appManager, null, null); + + // submit an app + try { + rmService.submitApplication(submitRequest); + Assert.fail("Application submission should fail because resource" + + " request is invalid."); + } catch (YarnRemoteException e) { + // Exception is expected + Assert.assertTrue("The thrown exception is not" + + " InvalidResourceRequestException", + e.getMessage().startsWith("Invalid resource request")); + } + } private SubmitApplicationRequest mockSubmitAppRequest(ApplicationId appId) { String user = MockApps.newUserName(); String queue = MockApps.newQueue(); ContainerLaunchContext amContainerSpec = mock(ContainerLaunchContext.class); - Resource resource = mock(Resource.class); - when(amContainerSpec.getResource()).thenReturn(resource); + + Resource resource = Resources.createResource( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB); ApplicationSubmissionContext submissionContext = mock(ApplicationSubmissionContext.class); - when(submissionContext.getUser()).thenReturn(user); - when(submissionContext.getQueue()).thenReturn(queue); when(submissionContext.getAMContainerSpec()).thenReturn(amContainerSpec); + when(submissionContext.getAMContainerSpec().getUser()).thenReturn(user); + when(submissionContext.getQueue()).thenReturn(queue); when(submissionContext.getApplicationId()).thenReturn(appId); - + when(submissionContext.getResource()).thenReturn(resource); + SubmitApplicationRequest submitRequest = recordFactory.newRecordInstance(SubmitApplicationRequest.class); submitRequest.setApplicationSubmissionContext(submissionContext); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java index 4780ec38821..2b7991fb26a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestFifoScheduler.java @@ -139,8 +139,9 @@ public class TestFifoScheduler { Container c1 = allocated1.get(0); Assert.assertEquals(GB, c1.getResource().getMemory()); - c1.setState(ContainerState.COMPLETE); - nm1.containerStatus(c1); + ContainerStatus containerStatus = BuilderUtils.newContainerStatus( + c1.getId(), ContainerState.COMPLETE, "", 0); + nm1.containerStatus(containerStatus); int waitCount = 0; while (attempt1.getJustFinishedContainers().size() < 1 && waitCount++ != 20) { @@ -197,6 +198,8 @@ public class TestFifoScheduler { int allocMB = 1536; YarnConfiguration conf = new YarnConfiguration(TestFifoScheduler.conf); conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, allocMB); + conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, + allocMB * 10); // Test for something lesser than this. testMinimumAllocation(conf, allocMB / 2); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java index 2057d8ab419..78adf79eba0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -19,11 +19,13 @@ package org.apache.hadoop.yarn.server.resourcemanager; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; +import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; @@ -62,6 +64,7 @@ public class TestRMRestart { "org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore"); conf.set(YarnConfiguration.RM_SCHEDULER, "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler"); + conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 5); MemoryRMStateStore memStore = new MemoryRMStateStore(); memStore.init(conf); @@ -152,7 +155,9 @@ public class TestRMRestart { .getApplicationId()); // create unmanaged app - RMApp appUnmanaged = rm1.submitApp(200, "someApp", "someUser", null, true, null); + RMApp appUnmanaged = rm1.submitApp(200, "someApp", "someUser", null, true, + null, conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, + YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS)); ApplicationAttemptId unmanagedAttemptId = appUnmanaged.getCurrentAppAttempt().getAppAttemptId(); // assert appUnmanaged info is saved @@ -220,9 +225,9 @@ public class TestRMRestart { // NM should be rebooted on heartbeat, even first heartbeat for nm2 NodeHeartbeatResponse hbResponse = nm1.nodeHeartbeat(true); - Assert.assertEquals(NodeAction.REBOOT, hbResponse.getNodeAction()); + Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction()); hbResponse = nm2.nodeHeartbeat(true); - Assert.assertEquals(NodeAction.REBOOT, hbResponse.getNodeAction()); + Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction()); // new NM to represent NM re-register nm1 = rm2.registerNode("h1:1234", 15120); @@ -230,9 +235,9 @@ public class TestRMRestart { // verify no more reboot response sent hbResponse = nm1.nodeHeartbeat(true); - Assert.assertTrue(NodeAction.REBOOT != hbResponse.getNodeAction()); + Assert.assertTrue(NodeAction.RESYNC != hbResponse.getNodeAction()); hbResponse = nm2.nodeHeartbeat(true); - Assert.assertTrue(NodeAction.REBOOT != hbResponse.getNodeAction()); + Assert.assertTrue(NodeAction.RESYNC != hbResponse.getNodeAction()); // assert app1 attempt is saved attempt1 = loadedApp1.getCurrentAppAttempt(); @@ -306,4 +311,74 @@ public class TestRMRestart { Assert.assertEquals(0, rmAppState.size()); } + @Test + public void testRMRestartOnMaxAppAttempts() throws Exception { + Logger rootLogger = LogManager.getRootLogger(); + rootLogger.setLevel(Level.DEBUG); + ExitUtil.disableSystemExit(); + + YarnConfiguration conf = new YarnConfiguration(); + conf.set(YarnConfiguration.RECOVERY_ENABLED, "true"); + conf.set(YarnConfiguration.RM_STORE, + "org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore"); + conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2); + + MemoryRMStateStore memStore = new MemoryRMStateStore(); + memStore.init(conf); + RMState rmState = memStore.getState(); + + Map rmAppState = + rmState.getApplicationState(); + MockRM rm1 = new MockRM(conf, memStore); + rm1.start(); + MockNM nm1 = new MockNM("h1:1234", 15120, rm1.getResourceTrackerService()); + nm1.registerNode(); + + // submit an app with maxAppAttempts equals to 1 + RMApp app1 = rm1.submitApp(200, "name", "user", + new HashMap(), false, "default", 1); + // submit an app with maxAppAttempts equals to -1 + RMApp app2 = rm1.submitApp(200, "name", "user", + new HashMap(), false, "default", -1); + + // assert app1 info is saved + ApplicationState appState = rmAppState.get(app1.getApplicationId()); + Assert.assertNotNull(appState); + Assert.assertEquals(0, appState.getAttemptCount()); + Assert.assertEquals(appState.getApplicationSubmissionContext() + .getApplicationId(), app1.getApplicationSubmissionContext() + .getApplicationId()); + + // Allocate the AM + nm1.nodeHeartbeat(true); + RMAppAttempt attempt = app1.getCurrentAppAttempt(); + ApplicationAttemptId attemptId1 = attempt.getAppAttemptId(); + rm1.waitForState(attemptId1, RMAppAttemptState.ALLOCATED); + Assert.assertEquals(1, appState.getAttemptCount()); + ApplicationAttemptState attemptState = + appState.getAttempt(attemptId1); + Assert.assertNotNull(attemptState); + Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1), + attemptState.getMasterContainer().getId()); + rm1.stop(); + + // start new RM + MockRM rm2 = new MockRM(conf, memStore); + rm2.start(); + + // verify that maxAppAttempts is set to global value + Assert.assertEquals(2, + rm2.getRMContext().getRMApps().get(app2.getApplicationId()) + .getMaxAppAttempts()); + + // verify that app2 exists app1 is removed + Assert.assertEquals(1, rm2.getRMContext().getRMApps().size()); + Assert.assertNotNull(rm2.getRMContext().getRMApps() + .get(app2.getApplicationId())); + Assert.assertNull(rm2.getRMContext().getRMApps() + .get(app1.getApplicationId())); + + // stop the RM + rm2.stop(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java index 4b2323d241d..af95d2b1695 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.IOException; @@ -192,9 +193,41 @@ public class TestResourceManager { conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, -1); try { resourceManager.init(conf); - fail("Exception is expected because the global max attempts is negative."); + fail("Exception is expected because the global max attempts" + + " is negative."); } catch (YarnException e) { // Exception is expected. + assertTrue("The thrown exception is not the expected one.", + e.getMessage().startsWith( + "Invalid global max attempts configuration")); + } + + conf = new YarnConfiguration(); + conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 2048); + conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 1024); + try { + resourceManager.init(conf); + fail("Exception is expected because the min memory allocation is" + + " larger than the max memory allocation."); + } catch (YarnException e) { + // Exception is expected. + assertTrue("The thrown exception is not the expected one.", + e.getMessage().startsWith( + "Invalid resource scheduler memory")); + } + + conf = new YarnConfiguration(); + conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, 2); + conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, 1); + try { + resourceManager.init(conf); + fail("Exception is expected because the min vcores allocation is" + + " larger than the max vcores allocation."); + } catch (YarnException e) { + // Exception is expected. + assertTrue("The thrown exception is not the expected one.", + e.getMessage().startsWith( + "Invalid resource scheduler vcores")); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java index f969c6c888d..af9d5d2c0bf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java @@ -53,6 +53,29 @@ public class TestResourceTrackerService { private File hostFile = new File(TEMP_DIR + File.separator + "hostFile.txt"); private MockRM rm; + /** + * Test RM read NM next heartBeat Interval correctly from Configuration file, + * and NM get next heartBeat Interval from RM correctly + */ + @Test (timeout = 5000) + public void testGetNextHeartBeatInterval() throws Exception { + Configuration conf = new Configuration(); + conf.set(YarnConfiguration.RM_NM_HEARTBEAT_INTERVAL_MS, "4000"); + + rm = new MockRM(conf); + rm.start(); + + MockNM nm1 = rm.registerNode("host1:1234", 5120); + MockNM nm2 = rm.registerNode("host2:5678", 10240); + + NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); + Assert.assertEquals(4000, nodeHeartbeat.getNextHeartBeatInterval()); + + NodeHeartbeatResponse nodeHeartbeat2 = nm2.nodeHeartbeat(true); + Assert.assertEquals(4000, nodeHeartbeat2.getNextHeartBeatInterval()); + + } + /** * Decommissioning using a pre-configured include hosts file */ @@ -259,7 +282,7 @@ public class TestResourceTrackerService { nodeHeartbeat = nm2.nodeHeartbeat( new HashMap>(), true, -100); - Assert.assertTrue(NodeAction.REBOOT.equals(nodeHeartbeat.getNodeAction())); + Assert.assertTrue(NodeAction.RESYNC.equals(nodeHeartbeat.getNodeAction())); checkRebootedNMCount(rm, ++initialMetricCount); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java index 984d7cdfcf5..1fd1b2c9d36 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java @@ -130,6 +130,6 @@ public class TestRMNMRPCResponseId { nodeStatus.setResponseId(0); response = resourceTrackerService.nodeHeartbeat(nodeHeartBeatRequest); - Assert.assertTrue(NodeAction.REBOOT.equals(response.getNodeAction())); + Assert.assertTrue(NodeAction.RESYNC.equals(response.getNodeAction())); } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java index f10e646b85d..f736edf2038 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java @@ -200,13 +200,14 @@ public class TestRMAppAttemptTransitions { final String user = MockApps.newUserName(); final String queue = MockApps.newQueue(); submissionContext = mock(ApplicationSubmissionContext.class); - when(submissionContext.getUser()).thenReturn(user); when(submissionContext.getQueue()).thenReturn(queue); - ContainerLaunchContext amContainerSpec = mock(ContainerLaunchContext.class); - Resource resource = mock(Resource.class); - when(amContainerSpec.getResource()).thenReturn(resource); + Resource resource = BuilderUtils.newResource(1536, 1); + ContainerLaunchContext amContainerSpec = + BuilderUtils.newContainerLaunchContext(user, null, null, + null, null, null, null); when(submissionContext.getAMContainerSpec()).thenReturn(amContainerSpec); - + when(submissionContext.getResource()).thenReturn(resource); + unmanagedAM = false; application = mock(RMApp.class); @@ -469,8 +470,10 @@ public class TestRMAppAttemptTransitions { // Mock the allocation of AM container Container container = mock(Container.class); + Resource resource = BuilderUtils.newResource(2048, 1); when(container.getId()).thenReturn( BuilderUtils.newContainerId(applicationAttempt.getAppAttemptId(), 1)); + when(container.getResource()).thenReturn(resource); Allocation allocation = mock(Allocation.class); when(allocation.getContainers()). thenReturn(Collections.singletonList(container)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerUtils.java index bc806f60a2e..f17da43b94c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestSchedulerUtils.java @@ -19,100 +19,225 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler; import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.mockito.Mockito.mock; +import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.impl.pb.ResourceRequestPBImpl; +import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.resource.DefaultResourceCalculator; import org.apache.hadoop.yarn.server.resourcemanager.resource.DominantResourceCalculator; import org.apache.hadoop.yarn.server.resourcemanager.resource.ResourceCalculator; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; +import org.apache.hadoop.yarn.util.BuilderUtils; import org.junit.Test; public class TestSchedulerUtils { - @Test + @Test (timeout = 30000) public void testNormalizeRequest() { ResourceCalculator resourceCalculator = new DefaultResourceCalculator(); final int minMemory = 1024; + final int maxMemory = 8192; Resource minResource = Resources.createResource(minMemory, 0); + Resource maxResource = Resources.createResource(maxMemory, 0); ResourceRequest ask = new ResourceRequestPBImpl(); // case negative memory ask.setCapability(Resources.createResource(-1024)); - Resource before = ask.getCapability(); - SchedulerUtils.normalizeRequest(ask, resourceCalculator, null, minResource); - Resource after = ask.getCapability(); + SchedulerUtils.normalizeRequest(ask, resourceCalculator, null, minResource, + maxResource); assertEquals(minMemory, ask.getCapability().getMemory()); - assertTrue(before == after); // case zero memory ask.setCapability(Resources.createResource(0)); - before = ask.getCapability(); - SchedulerUtils.normalizeRequest(ask, resourceCalculator, null, minResource); - after = ask.getCapability(); + SchedulerUtils.normalizeRequest(ask, resourceCalculator, null, minResource, + maxResource); assertEquals(minMemory, ask.getCapability().getMemory()); - assertTrue(before == after); // case memory is a multiple of minMemory ask.setCapability(Resources.createResource(2 * minMemory)); - before = ask.getCapability(); - SchedulerUtils.normalizeRequest(ask, resourceCalculator, null, minResource); - after = ask.getCapability(); + SchedulerUtils.normalizeRequest(ask, resourceCalculator, null, minResource, + maxResource); assertEquals(2 * minMemory, ask.getCapability().getMemory()); - assertTrue(before == after); // case memory is not a multiple of minMemory ask.setCapability(Resources.createResource(minMemory + 10)); - before = ask.getCapability(); - SchedulerUtils.normalizeRequest(ask, resourceCalculator, null, minResource); - after = ask.getCapability(); + SchedulerUtils.normalizeRequest(ask, resourceCalculator, null, minResource, + maxResource); assertEquals(2 * minMemory, ask.getCapability().getMemory()); - assertTrue(before == after); + // case memory is equal to max allowed + ask.setCapability(Resources.createResource(maxMemory)); + SchedulerUtils.normalizeRequest(ask, resourceCalculator, null, minResource, + maxResource); + assertEquals(maxMemory, ask.getCapability().getMemory()); + + // case memory is just less than max + ask.setCapability(Resources.createResource(maxMemory - 10)); + SchedulerUtils.normalizeRequest(ask, resourceCalculator, null, minResource, + maxResource); + assertEquals(maxMemory, ask.getCapability().getMemory()); + + // max is not a multiple of min + maxResource = Resources.createResource(maxMemory - 10, 0); + ask.setCapability(Resources.createResource(maxMemory - 100)); + // multiple of minMemory > maxMemory, then reduce to maxMemory + SchedulerUtils.normalizeRequest(ask, resourceCalculator, null, minResource, + maxResource); + assertEquals(maxResource.getMemory(), ask.getCapability().getMemory()); + + // ask is more than max + maxResource = Resources.createResource(maxMemory, 0); + ask.setCapability(Resources.createResource(maxMemory + 100)); + SchedulerUtils.normalizeRequest(ask, resourceCalculator, null, minResource, + maxResource); + assertEquals(maxResource.getMemory(), ask.getCapability().getMemory()); } - @Test + @Test (timeout = 30000) public void testNormalizeRequestWithDominantResourceCalculator() { ResourceCalculator resourceCalculator = new DominantResourceCalculator(); Resource minResource = Resources.createResource(1024, 1); + Resource maxResource = Resources.createResource(10240, 10); Resource clusterResource = Resources.createResource(10 * 1024, 10); ResourceRequest ask = new ResourceRequestPBImpl(); // case negative memory/vcores ask.setCapability(Resources.createResource(-1024, -1)); - Resource before = ask.getCapability(); SchedulerUtils.normalizeRequest( - ask, resourceCalculator, clusterResource, minResource); - Resource after = ask.getCapability(); + ask, resourceCalculator, clusterResource, minResource, maxResource); assertEquals(minResource, ask.getCapability()); - assertTrue(before == after); // case zero memory/vcores ask.setCapability(Resources.createResource(0, 0)); - before = ask.getCapability(); SchedulerUtils.normalizeRequest( - ask, resourceCalculator, clusterResource, minResource); - after = ask.getCapability(); + ask, resourceCalculator, clusterResource, minResource, maxResource); assertEquals(minResource, ask.getCapability()); assertEquals(1, ask.getCapability().getVirtualCores()); assertEquals(1024, ask.getCapability().getMemory()); - assertTrue(before == after); // case non-zero memory & zero cores ask.setCapability(Resources.createResource(1536, 0)); - before = ask.getCapability(); SchedulerUtils.normalizeRequest( - ask, resourceCalculator, clusterResource, minResource); - after = ask.getCapability(); + ask, resourceCalculator, clusterResource, minResource, maxResource); assertEquals(Resources.createResource(2048, 1), ask.getCapability()); assertEquals(1, ask.getCapability().getVirtualCores()); assertEquals(2048, ask.getCapability().getMemory()); - assertTrue(before == after); } + + @Test (timeout = 30000) + public void testValidateResourceRequest() { + Resource maxResource = Resources.createResource( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES); + + // zero memory + try { + Resource resource = Resources.createResource( + 0, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + ResourceRequest resReq = BuilderUtils.newResourceRequest( + mock(Priority.class), ResourceRequest.ANY, resource, 1); + SchedulerUtils.validateResourceRequest(resReq, maxResource); + } catch (InvalidResourceRequestException e) { + fail("Zero memory should be accepted"); + } + + // zero vcores + try { + Resource resource = Resources.createResource( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB, + 0); + ResourceRequest resReq = BuilderUtils.newResourceRequest( + mock(Priority.class), ResourceRequest.ANY, resource, 1); + SchedulerUtils.validateResourceRequest(resReq, maxResource); + } catch (InvalidResourceRequestException e) { + fail("Zero vcores should be accepted"); + } + + // max memory + try { + Resource resource = Resources.createResource( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + ResourceRequest resReq = BuilderUtils.newResourceRequest( + mock(Priority.class), ResourceRequest.ANY, resource, 1); + SchedulerUtils.validateResourceRequest(resReq, maxResource); + } catch (InvalidResourceRequestException e) { + fail("Max memory should be accepted"); + } + + // max vcores + try { + Resource resource = Resources.createResource( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES); + ResourceRequest resReq = BuilderUtils.newResourceRequest( + mock(Priority.class), ResourceRequest.ANY, resource, 1); + SchedulerUtils.validateResourceRequest(resReq, maxResource); + } catch (InvalidResourceRequestException e) { + fail("Max vcores should not be accepted"); + } + + // negative memory + try { + Resource resource = Resources.createResource( + -1, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + ResourceRequest resReq = BuilderUtils.newResourceRequest( + mock(Priority.class), ResourceRequest.ANY, resource, 1); + SchedulerUtils.validateResourceRequest(resReq, maxResource); + fail("Negative memory should not be accepted"); + } catch (InvalidResourceRequestException e) { + // expected + } + + // negative vcores + try { + Resource resource = Resources.createResource( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB, + -1); + ResourceRequest resReq = BuilderUtils.newResourceRequest( + mock(Priority.class), ResourceRequest.ANY, resource, 1); + SchedulerUtils.validateResourceRequest(resReq, maxResource); + fail("Negative vcores should not be accepted"); + } catch (InvalidResourceRequestException e) { + // expected + } + + // more than max memory + try { + Resource resource = Resources.createResource( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB + 1, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES); + ResourceRequest resReq = BuilderUtils.newResourceRequest( + mock(Priority.class), ResourceRequest.ANY, resource, 1); + SchedulerUtils.validateResourceRequest(resReq, maxResource); + fail("More than max memory should not be accepted"); + } catch (InvalidResourceRequestException e) { + // expected + } + + // more than max vcores + try { + Resource resource = Resources.createResource( + YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB, + YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES + + 1); + ResourceRequest resReq = BuilderUtils.newResourceRequest( + mock(Priority.class), ResourceRequest.ANY, resource, 1); + SchedulerUtils.validateResourceRequest(resReq, maxResource); + fail("More than max vcores should not be accepted"); + } catch (InvalidResourceRequestException e) { + // expected + } + } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java index 48717ed8d02..66f9059de99 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java @@ -19,8 +19,10 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.io.IOException; +import java.util.Comparator; import java.util.List; import junit.framework.Assert; @@ -43,13 +45,19 @@ import org.apache.hadoop.yarn.server.resourcemanager.Task; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.mockito.Mockito; +import static org.mockito.Mockito.*; + public class TestCapacityScheduler { private static final Log LOG = LogFactory.getLog(TestCapacityScheduler.class); @@ -452,5 +460,33 @@ public class TestCapacityScheduler { } return result; } + + + @Test (timeout = 5000) + public void testApplicationComparator() + { + CapacityScheduler cs = new CapacityScheduler(); + Comparator appComparator= cs.getApplicationComparator(); + ApplicationId id1 = RecordFactoryProvider.getRecordFactory(null).newRecordInstance(ApplicationId.class); + id1.setClusterTimestamp(1); + id1.setId(1); + ApplicationId id2 = RecordFactoryProvider.getRecordFactory(null).newRecordInstance(ApplicationId.class); + id2.setClusterTimestamp(1); + id2.setId(2); + ApplicationId id3 = RecordFactoryProvider.getRecordFactory(null).newRecordInstance(ApplicationId.class); + id3.setClusterTimestamp(2); + id3.setId(1); + //same clusterId + FiCaSchedulerApp app1 = Mockito.mock(FiCaSchedulerApp.class); + when(app1.getApplicationId()).thenReturn(id1); + FiCaSchedulerApp app2 = Mockito.mock(FiCaSchedulerApp.class); + when(app2.getApplicationId()).thenReturn(id2); + FiCaSchedulerApp app3 = Mockito.mock(FiCaSchedulerApp.class); + when(app3.getApplicationId()).thenReturn(id3); + assertTrue(appComparator.compare(app1, app2) < 0); + //different clusterId + assertTrue(appComparator.compare(app1, app3) < 0); + assertTrue(appComparator.compare(app2, app3) < 0); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index 65b9d0cc1f9..174692bb980 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -1623,6 +1623,30 @@ public class TestLeafQueue { assertEquals(3, e.activeApplications.size()); assertEquals(0, e.pendingApplications.size()); } + + @Test (timeout = 30000) + public void testNodeLocalityAfterQueueRefresh() throws Exception { + + // Manipulate queue 'e' + LeafQueue e = stubLeafQueue((LeafQueue)queues.get(E)); + + // before reinitialization + assertEquals(0, e.getNodeLocalityDelay()); + + csConf.setInt(CapacitySchedulerConfiguration + .NODE_LOCALITY_DELAY, 60); + Map newQueues = new HashMap(); + CSQueue newRoot = + CapacityScheduler.parseQueue(csContext, csConf, null, + CapacitySchedulerConfiguration.ROOT, + newQueues, queues, + TestUtils.spyHook); + queues = newQueues; + root.reinitialize(newRoot, cs.getClusterResources()); + + // after reinitialization + assertEquals(60, e.getNodeLocalityDelay()); + } @Test (timeout = 30000) public void testActivateApplicationByUpdatingClusterResource() diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java index 7ca43421d99..113e16ed7f4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java @@ -49,6 +49,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaS import org.apache.hadoop.yarn.server.resourcemanager.security.ApplicationTokenSecretManager; import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; +import org.apache.hadoop.yarn.util.BuilderUtils; public class TestUtils { private static final Log LOG = LogFactory.getLog(TestUtils.class); @@ -136,9 +137,7 @@ public class TestUtils { public static ApplicationAttemptId getMockApplicationAttemptId(int appId, int attemptId) { - ApplicationId applicationId = mock(ApplicationId.class); - when(applicationId.getClusterTimestamp()).thenReturn(0L); - when(applicationId.getId()).thenReturn(appId); + ApplicationId applicationId = BuilderUtils.newApplicationId(0l, appId); ApplicationAttemptId applicationAttemptId = mock(ApplicationAttemptId.class); when(applicationAttemptId.getApplicationId()).thenReturn(applicationId); when(applicationAttemptId.getAttemptId()).thenReturn(attemptId); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index caab5ed0725..4758d62defb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -45,6 +45,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueACL; @@ -72,6 +73,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSc import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.modes.FifoSchedulingMode; +import org.apache.hadoop.yarn.util.BuilderUtils; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -110,6 +112,8 @@ public class TestFairScheduler { public void setUp() throws IOException { scheduler = new FairScheduler(); Configuration conf = createConfiguration(); + conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 1024); + conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 10240); // All tests assume only one assignment per node update conf.set(FairSchedulerConfiguration.ASSIGN_MULTIPLE, "false"); resourceManager = new ResourceManager(); @@ -1404,6 +1408,10 @@ public class TestFairScheduler { ApplicationMasterService masterService = new ApplicationMasterService(resourceManager.getRMContext(), scheduler); ApplicationSubmissionContext submissionContext = new ApplicationSubmissionContextPBImpl(); + ContainerLaunchContext clc = + BuilderUtils.newContainerLaunchContext(user, null, null, null, null, + null, null); + submissionContext.setAMContainerSpec(clc); RMApp application = new RMAppImpl(applicationId, resourceManager.getRMContext(), conf, name, user, queue, submissionContext, scheduler, masterService, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java index 8f45f4535cb..85076baaef3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java @@ -19,6 +19,8 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import junit.framework.Assert; @@ -28,6 +30,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.Resource; @@ -35,15 +38,22 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.InlineDispatcher; +import org.apache.hadoop.yarn.factories.RecordFactory; +import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.server.resourcemanager.Application; +import org.apache.hadoop.yarn.server.resourcemanager.MockNodes; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.Task; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; import org.apache.hadoop.yarn.util.BuilderUtils; import org.junit.After; @@ -55,6 +65,9 @@ public class TestFifoScheduler { private ResourceManager resourceManager = null; + private static final RecordFactory recordFactory = + RecordFactoryProvider.getRecordFactory(null); + @Before public void setUp() throws Exception { resourceManager = new ResourceManager(); @@ -78,14 +91,38 @@ public class TestFifoScheduler { .getRMContext()); } - @Test + private ApplicationAttemptId createAppAttemptId(int appId, int attemptId) { + ApplicationAttemptId attId = recordFactory + .newRecordInstance(ApplicationAttemptId.class); + ApplicationId appIdImpl = recordFactory + .newRecordInstance(ApplicationId.class); + appIdImpl.setId(appId); + attId.setAttemptId(attemptId); + attId.setApplicationId(appIdImpl); + return attId; + } + + private ResourceRequest createResourceRequest(int memory, String host, + int priority, int numContainers) { + ResourceRequest request = recordFactory + .newRecordInstance(ResourceRequest.class); + request.setCapability(Resources.createResource(memory)); + request.setHostName(host); + request.setNumContainers(numContainers); + Priority prio = recordFactory.newRecordInstance(Priority.class); + prio.setPriority(priority); + request.setPriority(prio); + return request; + } + + @Test(timeout=5000) public void testFifoSchedulerCapacityWhenNoNMs() { FifoScheduler scheduler = new FifoScheduler(); QueueInfo queueInfo = scheduler.getQueueInfo(null, false, false); Assert.assertEquals(0.0f, queueInfo.getCurrentCapacity()); } - @Test + @Test(timeout=5000) public void testAppAttemptMetrics() throws Exception { AsyncDispatcher dispatcher = new InlineDispatcher(); RMContext rmContext = new RMContextImpl(dispatcher, null, @@ -111,6 +148,59 @@ public class TestFifoScheduler { Assert.assertEquals(1, metrics.getAppsSubmitted()); } + @Test(timeout=2000) + public void testNodeLocalAssignment() throws Exception { + AsyncDispatcher dispatcher = new InlineDispatcher(); + RMContext rmContext = new RMContextImpl(dispatcher, null, null, null, null, + null, null, null); + + FifoScheduler scheduler = new FifoScheduler(); + scheduler.reinitialize(new Configuration(), rmContext); + + RMNode node0 = MockNodes.newNodeInfo(1, + Resources.createResource(1024 * 64), 1234); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node0); + scheduler.handle(nodeEvent1); + + int _appId = 1; + int _appAttemptId = 1; + ApplicationAttemptId appAttemptId = createAppAttemptId(_appId, + _appAttemptId); + AppAddedSchedulerEvent appEvent1 = new AppAddedSchedulerEvent(appAttemptId, + "queue1", "user1"); + scheduler.handle(appEvent1); + + int memory = 64; + int nConts = 3; + int priority = 20; + + List ask = new ArrayList(); + ResourceRequest nodeLocal = createResourceRequest(memory, + node0.getHostName(), priority, nConts); + ResourceRequest rackLocal = createResourceRequest(memory, + node0.getRackName(), priority, nConts); + ResourceRequest any = createResourceRequest(memory, ResourceRequest.ANY, priority, + nConts); + ask.add(nodeLocal); + ask.add(rackLocal); + ask.add(any); + scheduler.allocate(appAttemptId, ask, new ArrayList()); + + NodeUpdateSchedulerEvent node0Update = new NodeUpdateSchedulerEvent(node0); + + // Before the node update event, there are 3 local requests outstanding + Assert.assertEquals(3, nodeLocal.getNumContainers()); + + scheduler.handle(node0Update); + + // After the node update event, check that there are no more local requests + // outstanding + Assert.assertEquals(0, nodeLocal.getNumContainers()); + //Also check that the containers were scheduled + SchedulerAppReport info = scheduler.getSchedulerAppInfo(appAttemptId); + Assert.assertEquals(3, info.getLiveContainers().size()); + } + // @Test public void testFifoScheduler() throws Exception { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java index 5d0be9e630d..1d405bef529 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java @@ -1079,8 +1079,9 @@ public class TestRMWebServicesApps extends JerseyTest { .getMasterContainer().getNodeId().toString(), nodeId); assertTrue("logsLink doesn't match", logsLink.startsWith("http://")); - assertTrue("logsLink doesn't contain user info", - logsLink.endsWith("/" + appAttempt.getSubmissionContext().getUser())); + assertTrue( + "logsLink doesn't contain user info", logsLink.endsWith("/" + + appAttempt.getSubmissionContext().getAMContainerSpec().getUser())); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java index 27986cc1a96..69e197aad06 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java @@ -357,8 +357,13 @@ public class TestContainerManagerSecurity { LOG.info("Going to contact NM with expired token"); ContainerLaunchContext context = createContainerLaunchContextForTest(newTokenId); + Container container = + BuilderUtils.newContainer(newTokenId.getContainerID(), null, null, + BuilderUtils.newResource(newTokenId.getResource().getMemory(), + newTokenId.getResource().getVirtualCores()), null, null); StartContainerRequest request = Records.newRecord(StartContainerRequest.class); request.setContainerLaunchContext(context); + request.setContainer(container); //Calling startContainer with an expired token. try { @@ -402,18 +407,19 @@ public class TestContainerManagerSecurity { Arrays.asList("ping", "-n", "100", "127.0.0.1", ">nul") : Arrays.asList("sleep", "100"); - ContainerLaunchContext amContainer = BuilderUtils - .newContainerLaunchContext(null, "testUser", BuilderUtils - .newResource(1024, 1), Collections.emptyMap(), - new HashMap(), cmd, - new HashMap(), null, - new HashMap()); + ContainerLaunchContext amContainer = + BuilderUtils.newContainerLaunchContext("testUser", + Collections. emptyMap(), + new HashMap(), cmd, + new HashMap(), null, + new HashMap()); ApplicationSubmissionContext appSubmissionContext = recordFactory .newRecordInstance(ApplicationSubmissionContext.class); appSubmissionContext.setApplicationId(appID); - appSubmissionContext.setUser("testUser"); appSubmissionContext.setAMContainerSpec(amContainer); + appSubmissionContext.getAMContainerSpec().setUser("testUser"); + appSubmissionContext.setResource(BuilderUtils.newResource(1024, 1)); SubmitApplicationRequest submitRequest = recordFactory .newRecordInstance(SubmitApplicationRequest.class); @@ -539,8 +545,11 @@ public class TestContainerManagerSecurity { // Authenticated but unauthorized, due to wrong resource ContainerLaunchContext context = createContainerLaunchContextForTest(tokenId); - context.getResource().setMemory(2048); // Set a different resource size. + Container container = + BuilderUtils.newContainer(tokenId.getContainerID(), null, null, + BuilderUtils.newResource(2048, 1), null, null); request.setContainerLaunchContext(context); + request.setContainer(container); try { client.startContainer(request); fail("Connection initiation with unauthorized " @@ -551,7 +560,7 @@ public class TestContainerManagerSecurity { "Unauthorized request to start container. ")); Assert.assertTrue(e.getMessage().contains( "\nExpected resource " + tokenId.getResource().toString() - + " but found " + context.getResource().toString())); + + " but found " + container.getResource().toString())); } } @@ -563,7 +572,12 @@ public class TestContainerManagerSecurity { ContainerLaunchContext context = createContainerLaunchContextForTest(tokenId); context.setUser("Saruman"); // Set a different user-name. + Container container = + BuilderUtils.newContainer(tokenId.getContainerID(), null, null, + BuilderUtils.newResource(tokenId.getResource().getMemory(), tokenId + .getResource().getVirtualCores()), null, null); request.setContainerLaunchContext(context); + request.setContainer(container); try { client.startContainer(request); fail("Connection initiation with unauthorized " @@ -581,12 +595,8 @@ public class TestContainerManagerSecurity { private ContainerLaunchContext createContainerLaunchContextForTest( ContainerTokenIdentifier tokenId) { ContainerLaunchContext context = - BuilderUtils.newContainerLaunchContext(tokenId.getContainerID(), - "testUser", - BuilderUtils.newResource( - tokenId.getResource().getMemory(), - tokenId.getResource().getVirtualCores()), - new HashMap(), + BuilderUtils.newContainerLaunchContext( + "testUser", new HashMap(), new HashMap(), new ArrayList(), new HashMap(), null, new HashMap()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java index 559262d8e78..eab19634734 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java @@ -133,10 +133,10 @@ public class TestDiskFailures { dirSvc.init(conf); List localDirs = dirSvc.getLocalDirs(); Assert.assertEquals(1, localDirs.size()); - Assert.assertEquals(localDir2, localDirs.get(0)); + Assert.assertEquals(new Path(localDir2).toString(), localDirs.get(0)); List logDirs = dirSvc.getLogDirs(); Assert.assertEquals(1, logDirs.size()); - Assert.assertEquals(logDir1, logDirs.get(0)); + Assert.assertEquals(new Path(logDir1).toString(), logDirs.get(0)); } private void testDirsFailures(boolean localORLogDirs) throws IOException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm index 2d12699bf73..c7a8229d36f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm @@ -85,8 +85,9 @@ Hadoop MapReduce Next Generation - Fair Scheduler cause too much intermediate data to be created or too much context-switching. Limiting the apps does not cause any subsequently submitted apps to fail, only to wait in the scheduler's queue until some of the user's earlier apps - finish. apps to run from each user/queue are chosen in order of priority and - then submit time, as in the default FIFO scheduler in Hadoop. + finish. Apps to run from each user/queue are chosen in the same fair sharing + manner, but can alternatively be configured to be chosen in order of submit + time, as in the default FIFO scheduler in Hadoop. Certain add-ons are not yet supported which existed in the original (MR1) Fair Scheduler. Among them, is the use of a custom policies governing @@ -142,7 +143,9 @@ Hadoop MapReduce Next Generation - Fair Scheduler * <<>> * Whether to assign shares to individual apps based on their size, rather than - providing an equal share to all apps regardless of size. Defaults to false. + providing an equal share to all apps regardless of size. When set to true, + apps are weighted by the natural logarithm of one plus the app's total + requested memory, divided by the natural logarithm of 2. Defaults to false. * <<>> @@ -180,16 +183,29 @@ Allocation file format * <>, which represent queues. Each may contain the following properties: - * minResources: minimum amount of aggregate memory + * minResources: minimum MB of aggregate memory the queue expects. If a queue + demands resources, and its current allocation is below its configured minimum, + it will be assigned available resources before any queue that is not in this + situation. If multiple queues are in this situation, resources go to the + queue with the smallest ratio between allocation and minimum. Note that it is + possible that a queue that is below its minimum may not immediately get up to + its minimum when it submits an application, because already-running jobs may + be using those resources. - * maxResources: maximum amount of aggregate memory + * maxResources: maximum MB of aggregate memory a queue is allowed. A queue + will never be assigned a container that would put it over this limit. * maxRunningApps: limit the number of apps from the queue to run at once - * weight: to share the cluster non-proportionally with other queues + * weight: to share the cluster non-proportionally with other queues. Weights + default to 1, and a queue with weight 2 should receive approximately twice + as many resources as a queue with the default weight. * schedulingMode: either "fifo" or "fair" depending on the in-queue scheduling - policy desired + policy desired. Defaults to "fair". If "fifo", apps with earlier submit + times are given preference for containers, but apps submitted later may + run concurrently if there is leftover space on the cluster after satisfying + the earlier app's requests. * aclSubmitApps: a list of users that can submit apps to the queue. A (default) value of "*" means that any users can submit apps. A queue inherits the ACL of