From a2edb11b68ae01a44092cb14ac2717a6aad93305 Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Sun, 9 Feb 2014 19:18:51 +0000 Subject: [PATCH 01/47] HDFS-5698. Use protobuf to serialize / deserialize FSImage. Contributed by Haohui Mai. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566359 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 49 ++ .../dev-support/findbugsExcludeFile.xml | 3 + hadoop-hdfs-project/hadoop-hdfs/pom.xml | 1 + .../hadoop-hdfs/src/main/bin/hdfs | 2 +- .../hadoop/hdfs/protocol/LayoutVersion.java | 3 +- .../DelegationTokenSecretManager.java | 87 ++- .../hdfs/server/namenode/CacheManager.java | 147 ++++- .../hadoop/hdfs/server/namenode/FSImage.java | 5 +- .../server/namenode/FSImageCompression.java | 6 +- .../hdfs/server/namenode/FSImageFormat.java | 75 ++- .../server/namenode/FSImageFormatPBINode.java | 466 +++++++++++++++ .../namenode/FSImageFormatProtobuf.java | 551 ++++++++++++++++++ .../hdfs/server/namenode/FSImageUtil.java | 93 +++ .../hdfs/server/namenode/FSNamesystem.java | 22 + .../hdfs/server/namenode/INodeDirectory.java | 2 +- .../hdfs/server/namenode/INodeFile.java | 2 +- .../hadoop/hdfs/server/namenode/INodeMap.java | 5 + .../server/namenode/SaveNamespaceContext.java | 6 +- .../DirectoryWithSnapshotFeature.java | 6 +- .../snapshot/FSImageFormatPBSnapshot.java | 437 ++++++++++++++ .../snapshot/SnapshotFSImageFormat.java | 26 +- .../namenode/snapshot/SnapshotManager.java | 17 + .../FileDistributionCalculator.java | 160 +++++ .../ImageLoaderCurrent.java | 2 +- .../tools/offlineImageViewer/LsrPBImage.java | 233 ++++++++ .../OfflineImageViewerPB.java | 178 ++++++ .../offlineImageViewer/PBImageXmlWriter.java | 415 +++++++++++++ .../hadoop-hdfs/src/main/proto/fsimage.proto | 280 +++++++++ .../hdfs/server/namenode/TestFSImage.java | 138 +++++ .../namenode/TestFSImageWithSnapshot.java | 4 +- .../namenode/ha/TestStandbyCheckpoints.java | 2 +- .../snapshot/TestRenameWithSnapshots.java | 1 - .../namenode/snapshot/TestSnapshot.java | 39 +- .../TestOfflineImageViewer.java | 427 ++++---------- .../src/test/resources/editsStored | Bin 4272 -> 4272 bytes .../src/test/resources/editsStored.xml | 2 +- 36 files changed, 3488 insertions(+), 404 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageUtil.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionCalculator.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsrPBImage.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImage.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 47f931f2215..22b201627a1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -286,6 +286,55 @@ Trunk (Unreleased) HDFS-5794. Fix the inconsistency of layout version number of ADD_DATANODE_AND_STORAGE_UUIDS between trunk and branch-2. (jing9) + BREAKDOWN OF HDFS-5698 SUBTASKS AND RELATED JIRAS + + HDFS-5717. Save FSImage header in protobuf. (Haohui Mai via jing9) + + HDFS-5738. Serialize INode information in protobuf. (Haohui Mai via jing9) + + HDFS-5772. Serialize under-construction file information in FSImage. (jing9) + + HDFS-5783. Compute the digest before loading FSImage. (Haohui Mai via jing9) + + HDFS-5785. Serialize symlink in protobuf. (Haohui Mai via jing9) + + HDFS-5793. Optimize the serialization of PermissionStatus. (Haohui Mai via + jing9) + + HDFS-5743. Use protobuf to serialize snapshot information. (jing9) + + HDFS-5774. Serialize CachePool directives in protobuf. (Haohui Mai via jing9) + + HDFS-5744. Serialize information for token managers in protobuf. (Haohui Mai + via jing9) + + HDFS-5824. Add a Type field in Snapshot DiffEntry's protobuf definition. + (jing9) + + HDFS-5808. Implement cancellation when saving FSImage. (Haohui Mai via jing9) + + HDFS-5826. Update the stored edit logs to be consistent with the changes in + HDFS-5698 branch. (Haohui Mai via jing9) + + HDFS-5797. Implement offline image viewer. (Haohui Mai via jing9) + + HDFS-5771. Track progress when loading fsimage. (Haohui Mai via cnauroth) + + HDFS-5871. Use PBHelper to serialize CacheDirectiveInfoExpirationProto. + (Haohui Mai via jing9) + + HDFS-5884. LoadDelegator should use IOUtils.readFully() to read the magic + header. (Haohui Mai via jing9) + + HDFS-5885. Add annotation for repeated fields in the protobuf definition. + (Haohui Mai via jing9) + + HDFS-5906. Fixing findbugs and javadoc warnings in the HDFS-5698 branch. + (Haohui Mai via jing9) + + HDFS-5911. The id of a CacheDirective instance does not get serialized in + the protobuf-fsimage. (Haohui Mai via jing9) + Release 2.4.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml index 028e64cad94..70b7e65f842 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml @@ -8,6 +8,9 @@ + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml index 0b1e55d46c5..6cd9fea1dd7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml @@ -458,6 +458,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> ClientDatanodeProtocol.proto DatanodeProtocol.proto + fsimage.proto ${project.build.directory}/generated-sources/java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs index fa00cd47d0e..5d823b7dd21 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs @@ -139,7 +139,7 @@ elif [ "$COMMAND" = "balancer" ] ; then elif [ "$COMMAND" = "jmxget" ] ; then CLASS=org.apache.hadoop.hdfs.tools.JMXGet elif [ "$COMMAND" = "oiv" ] ; then - CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer + CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB elif [ "$COMMAND" = "oev" ] ; then CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer elif [ "$COMMAND" = "fetchdt" ] ; then diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java index 923ed70ac8f..9842b53fbd3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java @@ -112,7 +112,8 @@ public class LayoutVersion { ADD_DATANODE_AND_STORAGE_UUIDS(-49, "Replace StorageID with DatanodeUuid." + " Use distinct StorageUuid per storage directory."), ADD_LAYOUT_FLAGS(-50, "Add support for layout flags."), - CACHING(-51, "Support for cache pools and path-based caching"); + CACHING(-51, "Support for cache pools and path-based caching"), + PROTOBUF_FORMAT(-52, "Use protobuf to serialize FSImage"); final int lv; final int ancestorLV; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java index e291204cc23..b9fce60446b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java @@ -23,12 +23,16 @@ import java.io.DataOutputStream; import java.io.IOException; import java.io.InterruptedIOException; import java.net.InetSocketAddress; +import java.util.ArrayList; import java.util.Iterator; +import java.util.List; +import java.util.Map.Entry; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory; import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; @@ -46,6 +50,10 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; import org.apache.hadoop.security.token.delegation.DelegationKey; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.google.protobuf.ByteString; + /** * A HDFS specific delegation token secret manager. * The secret manager is responsible for generating and accepting the password @@ -167,7 +175,45 @@ public class DelegationTokenSecretManager } serializerCompat.load(in); } - + + public static class SecretManagerState { + public final SecretManagerSection section; + public final List keys; + public final List tokens; + + public SecretManagerState( + SecretManagerSection s, + List keys, + List tokens) { + this.section = s; + this.keys = keys; + this.tokens = tokens; + } + } + + public synchronized void loadSecretManagerState(SecretManagerState state) + throws IOException { + Preconditions.checkState(!running, + "Can't load state from image in a running SecretManager."); + + currentId = state.section.getCurrentId(); + delegationTokenSequenceNumber = state.section.getTokenSequenceNumber(); + for (SecretManagerSection.DelegationKey k : state.keys) { + addKey(new DelegationKey(k.getId(), k.getExpiryDate(), k.hasKey() ? k + .getKey().toByteArray() : null)); + } + + for (SecretManagerSection.PersistToken t : state.tokens) { + DelegationTokenIdentifier id = new DelegationTokenIdentifier(new Text( + t.getOwner()), new Text(t.getRenewer()), new Text(t.getRealUser())); + id.setIssueDate(t.getIssueDate()); + id.setMaxDate(t.getMaxDate()); + id.setSequenceNumber(t.getSequenceNumber()); + id.setMasterKeyId(t.getMasterKeyId()); + addPersistedDelegationToken(id, t.getExpiryDate()); + } + } + /** * Store the current state of the SecretManager for persistence * @@ -179,7 +225,43 @@ public class DelegationTokenSecretManager String sdPath) throws IOException { serializerCompat.save(out, sdPath); } - + + public synchronized SecretManagerState saveSecretManagerState() { + SecretManagerSection s = SecretManagerSection.newBuilder() + .setCurrentId(currentId) + .setTokenSequenceNumber(delegationTokenSequenceNumber) + .setNumKeys(allKeys.size()).setNumTokens(currentTokens.size()).build(); + ArrayList keys = Lists + .newArrayListWithCapacity(allKeys.size()); + ArrayList tokens = Lists + .newArrayListWithCapacity(currentTokens.size()); + + for (DelegationKey v : allKeys.values()) { + SecretManagerSection.DelegationKey.Builder b = SecretManagerSection.DelegationKey + .newBuilder().setId(v.getKeyId()).setExpiryDate(v.getExpiryDate()); + if (v.getEncodedKey() != null) { + b.setKey(ByteString.copyFrom(v.getEncodedKey())); + } + keys.add(b.build()); + } + + for (Entry e : currentTokens + .entrySet()) { + DelegationTokenIdentifier id = e.getKey(); + SecretManagerSection.PersistToken.Builder b = SecretManagerSection.PersistToken + .newBuilder().setOwner(id.getOwner().toString()) + .setRenewer(id.getRenewer().toString()) + .setRealUser(id.getRealUser().toString()) + .setIssueDate(id.getIssueDate()).setMaxDate(id.getMaxDate()) + .setSequenceNumber(id.getSequenceNumber()) + .setMasterKeyId(id.getMasterKeyId()) + .setExpiryDate(e.getValue().getRenewDate()); + tokens.add(b.build()); + } + + return new SecretManagerState(s, keys, tokens); + } + /** * This method is intended to be used only while reading edit logs. * @@ -431,4 +513,5 @@ public class DelegationTokenSecretManager prog.endStep(Phase.LOADING_FSIMAGE, step); } } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java index ba3936ca997..de536b30d79 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java @@ -50,8 +50,10 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries; import org.apache.hadoop.fs.CacheFlag; import org.apache.hadoop.fs.InvalidRequestException; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.protocol.CacheDirective; import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry; @@ -62,11 +64,15 @@ import org.apache.hadoop.hdfs.protocol.CachePoolEntry; import org.apache.hadoop.hdfs.protocol.CachePoolInfo; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.LocatedBlock; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto; +import org.apache.hadoop.hdfs.protocolPB.PBHelper; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList.Type; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection; import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; @@ -81,6 +87,7 @@ import org.apache.hadoop.util.LightWeightGSet; import org.apache.hadoop.util.Time; import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; /** * The Cache Manager handles caching on DataNodes. @@ -167,6 +174,19 @@ public final class CacheManager { */ private CacheReplicationMonitor monitor; + public static final class PersistState { + public final CacheManagerSection section; + public final List pools; + public final List directives; + + public PersistState(CacheManagerSection section, + List pools, List directives) { + this.section = section; + this.pools = pools; + this.directives = directives; + } + } + CacheManager(FSNamesystem namesystem, Configuration conf, BlockManager blockManager) { this.namesystem = namesystem; @@ -944,6 +964,64 @@ public final class CacheManager { serializerCompat.save(out, sdPath); } + public PersistState saveState() throws IOException { + ArrayList pools = Lists + .newArrayListWithCapacity(cachePools.size()); + ArrayList directives = Lists + .newArrayListWithCapacity(directivesById.size()); + + for (CachePool pool : cachePools.values()) { + CachePoolInfo p = pool.getInfo(true); + CachePoolInfoProto.Builder b = CachePoolInfoProto.newBuilder() + .setPoolName(p.getPoolName()); + + if (p.getOwnerName() != null) + b.setOwnerName(p.getOwnerName()); + + if (p.getGroupName() != null) + b.setGroupName(p.getGroupName()); + + if (p.getMode() != null) + b.setMode(p.getMode().toShort()); + + if (p.getLimit() != null) + b.setLimit(p.getLimit()); + + pools.add(b.build()); + } + + for (CacheDirective directive : directivesById.values()) { + CacheDirectiveInfo info = directive.toInfo(); + CacheDirectiveInfoProto.Builder b = CacheDirectiveInfoProto.newBuilder() + .setId(info.getId()); + + if (info.getPath() != null) { + b.setPath(info.getPath().toUri().getPath()); + } + + if (info.getReplication() != null) { + b.setReplication(info.getReplication()); + } + + if (info.getPool() != null) { + b.setPool(info.getPool()); + } + + Expiration expiry = info.getExpiration(); + if (expiry != null) { + assert (!expiry.isRelative()); + b.setExpiration(PBHelper.convert(expiry)); + } + + directives.add(b.build()); + } + CacheManagerSection s = CacheManagerSection.newBuilder() + .setNextDirectiveId(nextDirectiveId).setNumPools(pools.size()) + .setNumDirectives(directives.size()).build(); + + return new PersistState(s, pools, directives); + } + /** * Reloads CacheManager state from the passed DataInput. Used during namenode * startup to restore CacheManager state from an FSImage. @@ -954,6 +1032,56 @@ public final class CacheManager { serializerCompat.load(in); } + public void loadState(PersistState s) throws IOException { + nextDirectiveId = s.section.getNextDirectiveId(); + for (CachePoolInfoProto p : s.pools) { + CachePoolInfo info = new CachePoolInfo(p.getPoolName()); + if (p.hasOwnerName()) + info.setOwnerName(p.getOwnerName()); + + if (p.hasGroupName()) + info.setGroupName(p.getGroupName()); + + if (p.hasMode()) + info.setMode(new FsPermission((short) p.getMode())); + + if (p.hasLimit()) + info.setLimit(p.getLimit()); + + addCachePool(info); + } + + for (CacheDirectiveInfoProto p : s.directives) { + // Get pool reference by looking it up in the map + final String poolName = p.getPool(); + CacheDirective directive = new CacheDirective(p.getId(), new Path( + p.getPath()).toUri().getPath(), (short) p.getReplication(), p + .getExpiration().getMillis()); + addCacheDirective(poolName, directive); + } + } + + private void addCacheDirective(final String poolName, + final CacheDirective directive) throws IOException { + CachePool pool = cachePools.get(poolName); + if (pool == null) { + throw new IOException("Directive refers to pool " + poolName + + ", which does not exist."); + } + boolean addedDirective = pool.getDirectiveList().add(directive); + assert addedDirective; + if (directivesById.put(directive.getId(), directive) != null) { + throw new IOException("A directive with ID " + directive.getId() + + " already exists"); + } + List directives = directivesByPath.get(directive.getPath()); + if (directives == null) { + directives = new LinkedList(); + directivesByPath.put(directive.getPath(), directives); + } + directives.add(directive); + } + private final class SerializerCompat { private void save(DataOutputStream out, String sdPath) throws IOException { out.writeLong(nextDirectiveId); @@ -1036,27 +1164,10 @@ public final class CacheManager { CacheDirectiveInfo info = FSImageSerialization.readCacheDirectiveInfo(in); // Get pool reference by looking it up in the map final String poolName = info.getPool(); - CachePool pool = cachePools.get(poolName); - if (pool == null) { - throw new IOException("Directive refers to pool " + poolName + - ", which does not exist."); - } CacheDirective directive = new CacheDirective(info.getId(), info.getPath().toUri().getPath(), info.getReplication(), info.getExpiration().getAbsoluteMillis()); - boolean addedDirective = pool.getDirectiveList().add(directive); - assert addedDirective; - if (directivesById.put(directive.getId(), directive) != null) { - throw new IOException("A directive with ID " + directive.getId() + - " already exists"); - } - List directives = - directivesByPath.get(directive.getPath()); - if (directives == null) { - directives = new LinkedList(); - directivesByPath.put(directive.getPath(), directives); - } - directives.add(directive); + addCacheDirective(poolName, directive); counter.increment(); } prog.endStep(Phase.LOADING_FSIMAGE, step); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java index 166ffb2fd9b..62020173f4c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java @@ -797,8 +797,7 @@ public class FSImage implements Closeable { */ private void loadFSImage(File curFile, MD5Hash expectedMd5, FSNamesystem target, MetaRecoveryContext recovery) throws IOException { - FSImageFormat.Loader loader = new FSImageFormat.Loader( - conf, target); + FSImageFormat.LoaderDelegator loader = FSImageFormat.newLoader(conf, target); loader.load(curFile); target.setBlockPoolId(this.getBlockPoolID()); @@ -827,7 +826,7 @@ public class FSImage implements Closeable { File newFile = NNStorage.getStorageFile(sd, NameNodeFile.IMAGE_NEW, txid); File dstFile = NNStorage.getStorageFile(sd, NameNodeFile.IMAGE, txid); - FSImageFormat.Saver saver = new FSImageFormat.Saver(context); + FSImageFormatProtobuf.Saver saver = new FSImageFormatProtobuf.Saver(context); FSImageCompression compression = FSImageCompression.createCompression(conf); saver.save(newFile, compression); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageCompression.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageCompression.java index e0a46f15445..872ee74c802 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageCompression.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageCompression.java @@ -57,6 +57,10 @@ class FSImageCompression { imageCodec = codec; } + public CompressionCodec getImageCodec() { + return imageCodec; + } + /** * Create a "noop" compression - i.e. uncompressed */ @@ -89,7 +93,7 @@ class FSImageCompression { * Create a compression instance using the codec specified by * codecClassName */ - private static FSImageCompression createCompression(Configuration conf, + static FSImageCompression createCompression(Configuration conf, String codecClassName) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java index 3ad258a4512..bcbad75d810 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java @@ -68,12 +68,13 @@ import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Co import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType; import org.apache.hadoop.hdfs.util.ReadOnlyList; +import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.MD5Hash; import org.apache.hadoop.io.Text; import org.apache.hadoop.util.StringUtils; -import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import com.google.common.annotations.VisibleForTesting; /** * Contains inner classes for reading or writing the on-disk format for @@ -180,16 +181,74 @@ import com.google.common.base.Preconditions; @InterfaceStability.Evolving public class FSImageFormat { private static final Log LOG = FSImage.LOG; - + // Static-only class private FSImageFormat() {} - + + interface AbstractLoader { + MD5Hash getLoadedImageMd5(); + long getLoadedImageTxId(); + } + + static class LoaderDelegator implements AbstractLoader { + private AbstractLoader impl; + private final Configuration conf; + private final FSNamesystem fsn; + + LoaderDelegator(Configuration conf, FSNamesystem fsn) { + this.conf = conf; + this.fsn = fsn; + } + + @Override + public MD5Hash getLoadedImageMd5() { + return impl.getLoadedImageMd5(); + } + + @Override + public long getLoadedImageTxId() { + return impl.getLoadedImageTxId(); + } + + public void load(File file) throws IOException { + Preconditions.checkState(impl == null, "Image already loaded!"); + + FileInputStream is = null; + try { + is = new FileInputStream(file); + byte[] magic = new byte[FSImageUtil.MAGIC_HEADER.length]; + IOUtils.readFully(is, magic, 0, magic.length); + if (Arrays.equals(magic, FSImageUtil.MAGIC_HEADER)) { + FSImageFormatProtobuf.Loader loader = new FSImageFormatProtobuf.Loader( + conf, fsn); + impl = loader; + loader.load(file); + } else { + Loader loader = new Loader(conf, fsn); + impl = loader; + loader.load(file); + } + + } finally { + IOUtils.cleanup(LOG, is); + } + } + } + + /** + * Construct a loader class to load the image. It chooses the loader based on + * the layout version. + */ + public static LoaderDelegator newLoader(Configuration conf, FSNamesystem fsn) { + return new LoaderDelegator(conf, fsn); + } + /** * A one-shot class responsible for loading an image. The load() function * should be called once, after which the getter methods may be used to retrieve * information about the image that was loaded, if loading was successful. */ - public static class Loader { + public static class Loader implements AbstractLoader { private final Configuration conf; /** which namesystem this loader is working for */ private final FSNamesystem namesystem; @@ -214,12 +273,14 @@ public class FSImageFormat { * Return the MD5 checksum of the image that has been loaded. * @throws IllegalStateException if load() has not yet been called. */ - MD5Hash getLoadedImageMd5() { + @Override + public MD5Hash getLoadedImageMd5() { checkLoaded(); return imgDigest; } - long getLoadedImageTxId() { + @Override + public long getLoadedImageTxId() { checkLoaded(); return imgTxId; } @@ -242,7 +303,7 @@ public class FSImageFormat { } } - void load(File curFile) throws IOException { + public void load(File curFile) throws IOException { checkNotLoaded(); assert curFile != null : "curFile is null"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java new file mode 100644 index 00000000000..5ade5cec6a3 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java @@ -0,0 +1,466 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.HadoopIllegalArgumentException; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.permission.PermissionStatus; +import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto; +import org.apache.hadoop.hdfs.protocolPB.PBHelper; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; +import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.StringMap; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection.FileUnderConstructionEntry; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection; +import org.apache.hadoop.hdfs.server.namenode.INodeReference.DstReference; +import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithCount; +import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithName; +import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; +import org.apache.hadoop.hdfs.util.ReadOnlyList; + +import com.google.common.base.Preconditions; +import com.google.protobuf.ByteString; + +@InterfaceAudience.Private +public final class FSImageFormatPBINode { + private final static long USER_GROUP_STRID_MASK = (1 << 24) - 1; + private final static int USER_STRID_OFFSET = 40; + private final static int GROUP_STRID_OFFSET = 16; + private static final Log LOG = LogFactory.getLog(FSImageFormatProtobuf.class); + + public final static class Loader { + public static PermissionStatus loadPermission(long id, + final String[] stringTable) { + short perm = (short) (id & ((1 << GROUP_STRID_OFFSET) - 1)); + int gsid = (int) ((id >> GROUP_STRID_OFFSET) & USER_GROUP_STRID_MASK); + int usid = (int) ((id >> USER_STRID_OFFSET) & USER_GROUP_STRID_MASK); + return new PermissionStatus(stringTable[usid], stringTable[gsid], + new FsPermission(perm)); + } + + public static INodeReference loadINodeReference( + INodeSection.INodeReference r, FSDirectory dir) throws IOException { + long referredId = r.getReferredId(); + INode referred = dir.getInode(referredId); + WithCount withCount = (WithCount) referred.getParentReference(); + if (withCount == null) { + withCount = new INodeReference.WithCount(null, referred); + } + final INodeReference ref; + if (r.hasDstSnapshotId()) { // DstReference + ref = new INodeReference.DstReference(null, withCount, + r.getDstSnapshotId()); + } else { + ref = new INodeReference.WithName(null, withCount, r.getName() + .toByteArray(), r.getLastSnapshotId()); + } + return ref; + } + + public static INodeDirectory loadINodeDirectory(INodeSection.INode n, + final String[] stringTable) { + assert n.getType() == INodeSection.INode.Type.DIRECTORY; + INodeSection.INodeDirectory d = n.getDirectory(); + + final PermissionStatus permissions = loadPermission(d.getPermission(), + stringTable); + final INodeDirectory dir = new INodeDirectory(n.getId(), n.getName() + .toByteArray(), permissions, d.getModificationTime()); + + final long nsQuota = d.getNsQuota(), dsQuota = d.getDsQuota(); + if (nsQuota >= 0 || dsQuota >= 0) { + dir.addDirectoryWithQuotaFeature(nsQuota, dsQuota); + } + return dir; + } + + public static void updateBlocksMap(INodeFile file, BlockManager bm) { + // Add file->block mapping + final BlockInfo[] blocks = file.getBlocks(); + if (blocks != null) { + for (int i = 0; i < blocks.length; i++) { + file.setBlock(i, bm.addBlockCollection(blocks[i], file)); + } + } + } + + private final FSDirectory dir; + private final FSNamesystem fsn; + private final FSImageFormatProtobuf.Loader parent; + + Loader(FSNamesystem fsn, final FSImageFormatProtobuf.Loader parent) { + this.fsn = fsn; + this.dir = fsn.dir; + this.parent = parent; + } + + void loadINodeDirectorySection(InputStream in) throws IOException { + while (true) { + INodeDirectorySection.DirEntry e = INodeDirectorySection.DirEntry + .parseDelimitedFrom(in); + // note that in is a LimitedInputStream + if (e == null) { + break; + } + INodeDirectory p = dir.getInode(e.getParent()).asDirectory(); + for (long id : e.getChildrenList()) { + INode child = dir.getInode(id); + addToParent(p, child); + } + for (int i = 0; i < e.getNumOfRef(); i++) { + INodeReference ref = loadINodeReference(in); + addToParent(p, ref); + } + } + } + + private INodeReference loadINodeReference(InputStream in) + throws IOException { + INodeSection.INodeReference ref = INodeSection.INodeReference + .parseDelimitedFrom(in); + return loadINodeReference(ref, dir); + } + + void loadINodeSection(InputStream in) throws IOException { + INodeSection s = INodeSection.parseDelimitedFrom(in); + fsn.resetLastInodeId(s.getLastInodeId()); + LOG.info("Loading " + s.getNumInodes() + " INodes."); + for (int i = 0; i < s.getNumInodes(); ++i) { + INodeSection.INode p = INodeSection.INode.parseDelimitedFrom(in); + if (p.getId() == INodeId.ROOT_INODE_ID) { + loadRootINode(p); + } else { + INode n = loadINode(p); + dir.addToInodeMap(n); + } + } + } + + /** + * Load the under-construction files section, and update the lease map + */ + void loadFilesUnderConstructionSection(InputStream in) throws IOException { + while (true) { + FileUnderConstructionEntry entry = FileUnderConstructionEntry + .parseDelimitedFrom(in); + if (entry == null) { + break; + } + // update the lease manager + INodeFile file = dir.getInode(entry.getInodeId()).asFile(); + FileUnderConstructionFeature uc = file.getFileUnderConstructionFeature(); + Preconditions.checkState(uc != null); // file must be under-construction + fsn.leaseManager.addLease(uc.getClientName(), entry.getFullPath()); + } + } + + private void addToParent(INodeDirectory parent, INode child) { + if (parent == dir.rootDir && FSDirectory.isReservedName(child)) { + throw new HadoopIllegalArgumentException("File name \"" + + child.getLocalName() + "\" is reserved. Please " + + " change the name of the existing file or directory to another " + + "name before upgrading to this release."); + } + // NOTE: This does not update space counts for parents + if (!parent.addChild(child)) { + return; + } + dir.cacheName(child); + + if (child.isFile()) { + updateBlocksMap(child.asFile(), fsn.getBlockManager()); + } + } + + private INode loadINode(INodeSection.INode n) { + switch (n.getType()) { + case FILE: + return loadINodeFile(n); + case DIRECTORY: + return loadINodeDirectory(n, parent.getStringTable()); + case SYMLINK: + return loadINodeSymlink(n); + default: + break; + } + return null; + } + + private INodeFile loadINodeFile(INodeSection.INode n) { + assert n.getType() == INodeSection.INode.Type.FILE; + INodeSection.INodeFile f = n.getFile(); + List bp = f.getBlocksList(); + short replication = (short) f.getReplication(); + + BlockInfo[] blocks = new BlockInfo[bp.size()]; + for (int i = 0, e = bp.size(); i < e; ++i) { + blocks[i] = new BlockInfo(PBHelper.convert(bp.get(i)), replication); + } + final PermissionStatus permissions = loadPermission(f.getPermission(), + parent.getStringTable()); + + final INodeFile file = new INodeFile(n.getId(), + n.getName().toByteArray(), permissions, f.getModificationTime(), + f.getAccessTime(), blocks, replication, f.getPreferredBlockSize()); + // under-construction information + if (f.hasFileUC()) { + INodeSection.FileUnderConstructionFeature uc = f.getFileUC(); + file.toUnderConstruction(uc.getClientName(), uc.getClientMachine(), + null); + if (blocks.length > 0) { + BlockInfo lastBlk = file.getLastBlock(); + // replace the last block of file + file.setBlock(file.numBlocks() - 1, new BlockInfoUnderConstruction( + lastBlk, replication)); + } + } + return file; + } + + + private INodeSymlink loadINodeSymlink(INodeSection.INode n) { + assert n.getType() == INodeSection.INode.Type.SYMLINK; + INodeSection.INodeSymlink s = n.getSymlink(); + final PermissionStatus permissions = loadPermission(s.getPermission(), + parent.getStringTable()); + return new INodeSymlink(n.getId(), n.getName().toByteArray(), permissions, + 0, 0, s.getTarget().toStringUtf8()); + } + + private void loadRootINode(INodeSection.INode p) { + INodeDirectory root = loadINodeDirectory(p, parent.getStringTable()); + final Quota.Counts q = root.getQuotaCounts(); + final long nsQuota = q.get(Quota.NAMESPACE); + final long dsQuota = q.get(Quota.DISKSPACE); + if (nsQuota != -1 || dsQuota != -1) { + dir.rootDir.getDirectoryWithQuotaFeature().setQuota(nsQuota, dsQuota); + } + dir.rootDir.cloneModificationTime(root); + dir.rootDir.clonePermissionStatus(root); + } + } + + public final static class Saver { + private static long buildPermissionStatus(INodeAttributes n, + final StringMap stringMap) { + long userId = stringMap.getStringId(n.getUserName()); + long groupId = stringMap.getStringId(n.getGroupName()); + return ((userId & USER_GROUP_STRID_MASK) << USER_STRID_OFFSET) + | ((groupId & USER_GROUP_STRID_MASK) << GROUP_STRID_OFFSET) + | n.getFsPermissionShort(); + } + + public static INodeSection.INodeFile.Builder buildINodeFile( + INodeFileAttributes file, final StringMap stringMap) { + INodeSection.INodeFile.Builder b = INodeSection.INodeFile.newBuilder() + .setAccessTime(file.getAccessTime()) + .setModificationTime(file.getModificationTime()) + .setPermission(buildPermissionStatus(file, stringMap)) + .setPreferredBlockSize(file.getPreferredBlockSize()) + .setReplication(file.getFileReplication()); + return b; + } + + public static INodeSection.INodeDirectory.Builder buildINodeDirectory( + INodeDirectoryAttributes dir, final StringMap stringMap) { + Quota.Counts quota = dir.getQuotaCounts(); + INodeSection.INodeDirectory.Builder b = INodeSection.INodeDirectory + .newBuilder().setModificationTime(dir.getModificationTime()) + .setNsQuota(quota.get(Quota.NAMESPACE)) + .setDsQuota(quota.get(Quota.DISKSPACE)) + .setPermission(buildPermissionStatus(dir, stringMap)); + return b; + } + + public static INodeSection.INodeReference.Builder buildINodeReference( + INodeReference ref) throws IOException { + INodeSection.INodeReference.Builder rb = INodeSection.INodeReference + .newBuilder().setReferredId(ref.getId()); + if (ref instanceof WithName) { + rb.setLastSnapshotId(((WithName) ref).getLastSnapshotId()).setName( + ByteString.copyFrom(ref.getLocalNameBytes())); + } else if (ref instanceof DstReference) { + rb.setDstSnapshotId(((DstReference) ref).getDstSnapshotId()); + } + return rb; + } + + private final FSNamesystem fsn; + private final FileSummary.Builder summary; + private final SaveNamespaceContext context; + private final FSImageFormatProtobuf.Saver parent; + + Saver(FSImageFormatProtobuf.Saver parent, FileSummary.Builder summary) { + this.parent = parent; + this.summary = summary; + this.context = parent.getContext(); + this.fsn = context.getSourceNamesystem(); + } + + void serializeINodeDirectorySection(OutputStream out) throws IOException { + Iterator iter = fsn.getFSDirectory() + .getINodeMap().getMapIterator(); + int i = 0; + while (iter.hasNext()) { + INodeWithAdditionalFields n = iter.next(); + if (!n.isDirectory()) { + continue; + } + + ReadOnlyList children = n.asDirectory().getChildrenList( + Snapshot.CURRENT_STATE_ID); + if (children.size() > 0) { + INodeDirectorySection.DirEntry.Builder b = INodeDirectorySection. + DirEntry.newBuilder().setParent(n.getId()); + List refs = new ArrayList(); + for (INode inode : children) { + if (!inode.isReference()) { + b.addChildren(inode.getId()); + } else { + refs.add(inode.asReference()); + } + } + b.setNumOfRef(refs.size()); + INodeDirectorySection.DirEntry e = b.build(); + e.writeDelimitedTo(out); + for (INodeReference ref : refs) { + INodeSection.INodeReference.Builder rb = buildINodeReference(ref); + rb.build().writeDelimitedTo(out); + } + } + + ++i; + if (i % FSImageFormatProtobuf.Saver.CHECK_CANCEL_INTERVAL == 0) { + context.checkCancelled(); + } + } + parent.commitSection(summary, + FSImageFormatProtobuf.SectionName.INODE_DIR); + } + + void serializeINodeSection(OutputStream out) throws IOException { + INodeMap inodesMap = fsn.dir.getINodeMap(); + + INodeSection.Builder b = INodeSection.newBuilder() + .setLastInodeId(fsn.getLastInodeId()).setNumInodes(inodesMap.size()); + INodeSection s = b.build(); + s.writeDelimitedTo(out); + + int i = 0; + Iterator iter = inodesMap.getMapIterator(); + while (iter.hasNext()) { + INodeWithAdditionalFields n = iter.next(); + save(out, n); + ++i; + if (i % FSImageFormatProtobuf.Saver.CHECK_CANCEL_INTERVAL == 0) { + context.checkCancelled(); + } + } + parent.commitSection(summary, FSImageFormatProtobuf.SectionName.INODE); + } + + void serializeFilesUCSection(OutputStream out) throws IOException { + Map ucMap = fsn.getFilesUnderConstruction(); + for (Map.Entry entry : ucMap.entrySet()) { + String path = entry.getKey(); + INodeFile file = entry.getValue(); + FileUnderConstructionEntry.Builder b = FileUnderConstructionEntry + .newBuilder().setInodeId(file.getId()).setFullPath(path); + FileUnderConstructionEntry e = b.build(); + e.writeDelimitedTo(out); + } + parent.commitSection(summary, + FSImageFormatProtobuf.SectionName.FILES_UNDERCONSTRUCTION); + } + + private void save(OutputStream out, INode n) throws IOException { + if (n.isDirectory()) { + save(out, n.asDirectory()); + } else if (n.isFile()) { + save(out, n.asFile()); + } else if (n.isSymlink()) { + save(out, n.asSymlink()); + } + } + + private void save(OutputStream out, INodeDirectory n) throws IOException { + INodeSection.INodeDirectory.Builder b = buildINodeDirectory(n, + parent.getStringMap()); + INodeSection.INode r = buildINodeCommon(n) + .setType(INodeSection.INode.Type.DIRECTORY).setDirectory(b).build(); + r.writeDelimitedTo(out); + } + + private void save(OutputStream out, INodeFile n) throws IOException { + INodeSection.INodeFile.Builder b = buildINodeFile(n, + parent.getStringMap()); + + for (Block block : n.getBlocks()) { + b.addBlocks(PBHelper.convert(block)); + } + + FileUnderConstructionFeature uc = n.getFileUnderConstructionFeature(); + if (uc != null) { + INodeSection.FileUnderConstructionFeature f = + INodeSection.FileUnderConstructionFeature + .newBuilder().setClientName(uc.getClientName()) + .setClientMachine(uc.getClientMachine()).build(); + b.setFileUC(f); + } + + INodeSection.INode r = buildINodeCommon(n) + .setType(INodeSection.INode.Type.FILE).setFile(b).build(); + r.writeDelimitedTo(out); + } + + private void save(OutputStream out, INodeSymlink n) throws IOException { + INodeSection.INodeSymlink.Builder b = INodeSection.INodeSymlink + .newBuilder() + .setPermission(buildPermissionStatus(n, parent.getStringMap())) + .setTarget(ByteString.copyFrom(n.getSymlink())); + INodeSection.INode r = buildINodeCommon(n) + .setType(INodeSection.INode.Type.SYMLINK).setSymlink(b).build(); + r.writeDelimitedTo(out); + } + + private final INodeSection.INode.Builder buildINodeCommon(INode n) { + return INodeSection.INode.newBuilder() + .setId(n.getId()) + .setName(ByteString.copyFrom(n.getLocalNameBytes())); + } + } + + private FSImageFormatPBINode() { + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java new file mode 100644 index 00000000000..2edc57b18d7 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java @@ -0,0 +1,551 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.security.DigestOutputStream; +import java.security.MessageDigest; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.protocol.LayoutVersion; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto; +import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.StringTableSection; +import org.apache.hadoop.hdfs.server.namenode.snapshot.FSImageFormatPBSnapshot; +import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; +import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; +import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; +import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType; +import org.apache.hadoop.hdfs.util.MD5FileUtils; +import org.apache.hadoop.io.MD5Hash; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressorStream; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.io.LimitInputStream; +import com.google.protobuf.CodedOutputStream; + +/** + * Utility class to read / write fsimage in protobuf format. + */ +@InterfaceAudience.Private +public final class FSImageFormatProtobuf { + private static final Log LOG = LogFactory.getLog(FSImageFormatProtobuf.class); + + public static final class Loader implements FSImageFormat.AbstractLoader { + static final int MINIMUM_FILE_LENGTH = 8; + private final Configuration conf; + private final FSNamesystem fsn; + + private String[] stringTable; + + /** The MD5 sum of the loaded file */ + private MD5Hash imgDigest; + /** The transaction ID of the last edit represented by the loaded file */ + private long imgTxId; + + Loader(Configuration conf, FSNamesystem fsn) { + this.conf = conf; + this.fsn = fsn; + } + + @Override + public MD5Hash getLoadedImageMd5() { + return imgDigest; + } + + @Override + public long getLoadedImageTxId() { + return imgTxId; + } + + public String[] getStringTable() { + return stringTable; + } + + void load(File file) throws IOException { + long start = System.currentTimeMillis(); + imgDigest = MD5FileUtils.computeMd5ForFile(file); + RandomAccessFile raFile = new RandomAccessFile(file, "r"); + FileInputStream fin = new FileInputStream(file); + try { + loadInternal(raFile, fin); + long end = System.currentTimeMillis(); + LOG.info("Loaded FSImage in " + (end - start) / 1000 + " seconds."); + } finally { + fin.close(); + raFile.close(); + } + } + + private void loadInternal(RandomAccessFile raFile, FileInputStream fin) + throws IOException { + if (!FSImageUtil.checkFileFormat(raFile)) { + throw new IOException("Unrecognized file format"); + } + FileSummary summary = FSImageUtil.loadSummary(raFile); + + FileChannel channel = fin.getChannel(); + + FSImageFormatPBINode.Loader inodeLoader = new FSImageFormatPBINode.Loader( + fsn, this); + FSImageFormatPBSnapshot.Loader snapshotLoader = new FSImageFormatPBSnapshot.Loader( + fsn, this); + + ArrayList sections = Lists.newArrayList(summary + .getSectionsList()); + Collections.sort(sections, new Comparator() { + @Override + public int compare(FileSummary.Section s1, FileSummary.Section s2) { + SectionName n1 = SectionName.fromString(s1.getName()); + SectionName n2 = SectionName.fromString(s2.getName()); + if (n1 == null) { + return n2 == null ? 0 : -1; + } else if (n2 == null) { + return -1; + } else { + return n1.ordinal() - n2.ordinal(); + } + } + }); + + StartupProgress prog = NameNode.getStartupProgress(); + /** + * beginStep() and the endStep() calls do not match the boundary of the + * sections. This is because that the current implementation only allows + * a particular step to be started for once. + */ + Step currentStep = null; + + for (FileSummary.Section s : sections) { + channel.position(s.getOffset()); + InputStream in = new BufferedInputStream(new LimitInputStream(fin, + s.getLength())); + + in = FSImageUtil.wrapInputStreamForCompression(conf, + summary.getCodec(), in); + + String n = s.getName(); + + switch (SectionName.fromString(n)) { + case NS_INFO: + loadNameSystemSection(in); + break; + case STRING_TABLE: + loadStringTableSection(in); + break; + case INODE: { + currentStep = new Step(StepType.INODES); + prog.beginStep(Phase.LOADING_FSIMAGE, currentStep); + inodeLoader.loadINodeSection(in); + } + break; + case INODE_DIR: + inodeLoader.loadINodeDirectorySection(in); + break; + case FILES_UNDERCONSTRUCTION: + inodeLoader.loadFilesUnderConstructionSection(in); + break; + case SNAPSHOT: + snapshotLoader.loadSnapshotSection(in); + break; + case SNAPSHOT_DIFF: + snapshotLoader.loadSnapshotDiffSection(in); + break; + case SECRET_MANAGER: { + prog.endStep(Phase.LOADING_FSIMAGE, currentStep); + Step step = new Step(StepType.DELEGATION_TOKENS); + prog.beginStep(Phase.LOADING_FSIMAGE, step); + loadSecretManagerSection(in); + prog.endStep(Phase.LOADING_FSIMAGE, step); + } + break; + case CACHE_MANAGER: { + Step step = new Step(StepType.CACHE_POOLS); + prog.beginStep(Phase.LOADING_FSIMAGE, step); + loadCacheManagerSection(in); + prog.endStep(Phase.LOADING_FSIMAGE, step); + } + break; + default: + LOG.warn("Unregconized section " + n); + break; + } + } + } + + private void loadNameSystemSection(InputStream in) throws IOException { + NameSystemSection s = NameSystemSection.parseDelimitedFrom(in); + fsn.setGenerationStampV1(s.getGenstampV1()); + fsn.setGenerationStampV2(s.getGenstampV2()); + fsn.setGenerationStampV1Limit(s.getGenstampV1Limit()); + fsn.setLastAllocatedBlockId(s.getLastAllocatedBlockId()); + imgTxId = s.getTransactionId(); + } + + private void loadStringTableSection(InputStream in) throws IOException { + StringTableSection s = StringTableSection.parseDelimitedFrom(in); + stringTable = new String[s.getNumEntry() + 1]; + for (int i = 0; i < s.getNumEntry(); ++i) { + StringTableSection.Entry e = StringTableSection.Entry + .parseDelimitedFrom(in); + stringTable[e.getId()] = e.getStr(); + } + } + + private void loadSecretManagerSection(InputStream in) throws IOException { + SecretManagerSection s = SecretManagerSection.parseDelimitedFrom(in); + int numKeys = s.getNumKeys(), numTokens = s.getNumTokens(); + ArrayList keys = Lists + .newArrayListWithCapacity(numKeys); + ArrayList tokens = Lists + .newArrayListWithCapacity(numTokens); + + for (int i = 0; i < numKeys; ++i) + keys.add(SecretManagerSection.DelegationKey.parseDelimitedFrom(in)); + + for (int i = 0; i < numTokens; ++i) + tokens.add(SecretManagerSection.PersistToken.parseDelimitedFrom(in)); + + fsn.loadSecretManagerState(s, keys, tokens); + } + + private void loadCacheManagerSection(InputStream in) throws IOException { + CacheManagerSection s = CacheManagerSection.parseDelimitedFrom(in); + ArrayList pools = Lists.newArrayListWithCapacity(s + .getNumPools()); + ArrayList directives = Lists + .newArrayListWithCapacity(s.getNumDirectives()); + for (int i = 0; i < s.getNumPools(); ++i) + pools.add(CachePoolInfoProto.parseDelimitedFrom(in)); + for (int i = 0; i < s.getNumDirectives(); ++i) + directives.add(CacheDirectiveInfoProto.parseDelimitedFrom(in)); + fsn.getCacheManager().loadState( + new CacheManager.PersistState(s, pools, directives)); + } + + } + + public static final class Saver { + private final SaveNamespaceContext context; + private long currentOffset = FSImageUtil.MAGIC_HEADER.length; + private MD5Hash savedDigest; + private StringMap stringMap = new StringMap(); + + private FileChannel fileChannel; + // OutputStream for the section data + private OutputStream sectionOutputStream; + private CompressionCodec codec; + private OutputStream underlyingOutputStream; + public static final int CHECK_CANCEL_INTERVAL = 4096; + + Saver(SaveNamespaceContext context) { + this.context = context; + } + + public MD5Hash getSavedDigest() { + return savedDigest; + } + + public SaveNamespaceContext getContext() { + return context; + } + + public void commitSection(FileSummary.Builder summary, SectionName name) + throws IOException { + long oldOffset = currentOffset; + flushSectionOutputStream(); + + if (codec != null) { + sectionOutputStream = codec.createOutputStream(underlyingOutputStream); + } else { + sectionOutputStream = underlyingOutputStream; + } + long length = fileChannel.position() - oldOffset; + summary.addSections(FileSummary.Section.newBuilder().setName(name.name) + .setLength(length).setOffset(currentOffset)); + currentOffset += length; + } + + private void flushSectionOutputStream() throws IOException { + if (codec != null) { + ((CompressorStream) sectionOutputStream).finish(); + } + sectionOutputStream.flush(); + } + + void save(File file, FSImageCompression compression) throws IOException { + FileOutputStream fout = new FileOutputStream(file); + fileChannel = fout.getChannel(); + try { + saveInternal(fout, compression, file.getAbsolutePath().toString()); + } finally { + fout.close(); + } + } + + private static void saveFileSummary(OutputStream out, FileSummary summary) + throws IOException { + summary.writeDelimitedTo(out); + int length = getOndiskTrunkSize(summary); + byte[] lengthBytes = new byte[4]; + ByteBuffer.wrap(lengthBytes).asIntBuffer().put(length); + out.write(lengthBytes); + } + + private void saveInodes(FileSummary.Builder summary) throws IOException { + FSImageFormatPBINode.Saver saver = new FSImageFormatPBINode.Saver(this, + summary); + + saver.serializeINodeSection(sectionOutputStream); + saver.serializeINodeDirectorySection(sectionOutputStream); + saver.serializeFilesUCSection(sectionOutputStream); + } + + private void saveSnapshots(FileSummary.Builder summary) throws IOException { + FSImageFormatPBSnapshot.Saver snapshotSaver = new FSImageFormatPBSnapshot.Saver( + this, summary, context, context.getSourceNamesystem()); + + snapshotSaver.serializeSnapshotSection(sectionOutputStream); + snapshotSaver.serializeSnapshotDiffSection(sectionOutputStream); + } + + private void saveInternal(FileOutputStream fout, + FSImageCompression compression, String filePath) throws IOException { + StartupProgress prog = NameNode.getStartupProgress(); + MessageDigest digester = MD5Hash.getDigester(); + + underlyingOutputStream = new DigestOutputStream(new BufferedOutputStream( + fout), digester); + underlyingOutputStream.write(FSImageUtil.MAGIC_HEADER); + + fileChannel = fout.getChannel(); + + FileSummary.Builder b = FileSummary.newBuilder() + .setOndiskVersion(FSImageUtil.FILE_VERSION) + .setLayoutVersion(LayoutVersion.getCurrentLayoutVersion()); + + codec = compression.getImageCodec(); + if (codec != null) { + b.setCodec(codec.getClass().getCanonicalName()); + sectionOutputStream = codec.createOutputStream(underlyingOutputStream); + } else { + sectionOutputStream = underlyingOutputStream; + } + + saveNameSystemSection(b); + // Check for cancellation right after serializing the name system section. + // Some unit tests, such as TestSaveNamespace#testCancelSaveNameSpace + // depends on this behavior. + context.checkCancelled(); + + Step step = new Step(StepType.INODES, filePath); + prog.beginStep(Phase.SAVING_CHECKPOINT, step); + saveInodes(b); + saveSnapshots(b); + prog.endStep(Phase.SAVING_CHECKPOINT, step); + + step = new Step(StepType.DELEGATION_TOKENS, filePath); + prog.beginStep(Phase.SAVING_CHECKPOINT, step); + saveSecretManagerSection(b); + prog.endStep(Phase.SAVING_CHECKPOINT, step); + + step = new Step(StepType.CACHE_POOLS, filePath); + prog.beginStep(Phase.SAVING_CHECKPOINT, step); + saveCacheManagerSection(b); + prog.endStep(Phase.SAVING_CHECKPOINT, step); + + saveStringTableSection(b); + + // We use the underlyingOutputStream to write the header. Therefore flush + // the buffered stream (which is potentially compressed) first. + flushSectionOutputStream(); + + FileSummary summary = b.build(); + saveFileSummary(underlyingOutputStream, summary); + underlyingOutputStream.close(); + savedDigest = new MD5Hash(digester.digest()); + } + + private void saveSecretManagerSection(FileSummary.Builder summary) + throws IOException { + final FSNamesystem fsn = context.getSourceNamesystem(); + DelegationTokenSecretManager.SecretManagerState state = fsn + .saveSecretManagerState(); + state.section.writeDelimitedTo(sectionOutputStream); + for (SecretManagerSection.DelegationKey k : state.keys) + k.writeDelimitedTo(sectionOutputStream); + + for (SecretManagerSection.PersistToken t : state.tokens) + t.writeDelimitedTo(sectionOutputStream); + + commitSection(summary, SectionName.SECRET_MANAGER); + } + + private void saveCacheManagerSection(FileSummary.Builder summary) + throws IOException { + final FSNamesystem fsn = context.getSourceNamesystem(); + CacheManager.PersistState state = fsn.getCacheManager().saveState(); + state.section.writeDelimitedTo(sectionOutputStream); + + for (CachePoolInfoProto p : state.pools) + p.writeDelimitedTo(sectionOutputStream); + + for (CacheDirectiveInfoProto p : state.directives) + p.writeDelimitedTo(sectionOutputStream); + + commitSection(summary, SectionName.CACHE_MANAGER); + } + + private void saveNameSystemSection(FileSummary.Builder summary) + throws IOException { + final FSNamesystem fsn = context.getSourceNamesystem(); + OutputStream out = sectionOutputStream; + NameSystemSection.Builder b = NameSystemSection.newBuilder() + .setGenstampV1(fsn.getGenerationStampV1()) + .setGenstampV1Limit(fsn.getGenerationStampV1Limit()) + .setGenstampV2(fsn.getGenerationStampV2()) + .setLastAllocatedBlockId(fsn.getLastAllocatedBlockId()) + .setTransactionId(context.getTxId()); + + // We use the non-locked version of getNamespaceInfo here since + // the coordinating thread of saveNamespace already has read-locked + // the namespace for us. If we attempt to take another readlock + // from the actual saver thread, there's a potential of a + // fairness-related deadlock. See the comments on HDFS-2223. + b.setNamespaceId(fsn.unprotectedGetNamespaceInfo().getNamespaceID()); + NameSystemSection s = b.build(); + s.writeDelimitedTo(out); + + commitSection(summary, SectionName.NS_INFO); + } + + private void saveStringTableSection(FileSummary.Builder summary) + throws IOException { + OutputStream out = sectionOutputStream; + StringTableSection.Builder b = StringTableSection.newBuilder() + .setNumEntry(stringMap.size()); + b.build().writeDelimitedTo(out); + for (Entry e : stringMap.entrySet()) { + StringTableSection.Entry.Builder eb = StringTableSection.Entry + .newBuilder().setId(e.getValue()).setStr(e.getKey()); + eb.build().writeDelimitedTo(out); + } + commitSection(summary, SectionName.STRING_TABLE); + } + + public StringMap getStringMap() { + return stringMap; + } + } + + public static class StringMap { + private final Map stringMap; + + public StringMap() { + stringMap = Maps.newHashMap(); + } + + int getStringId(String str) { + if (str == null) { + return 0; + } + Integer v = stringMap.get(str); + if (v == null) { + int nv = stringMap.size() + 1; + stringMap.put(str, nv); + return nv; + } + return v; + } + + int size() { + return stringMap.size(); + } + + Set> entrySet() { + return stringMap.entrySet(); + } + } + + /** + * Supported section name. The order of the enum determines the order of + * loading. + */ + public enum SectionName { + NS_INFO("NS_INFO"), + STRING_TABLE("STRING_TABLE"), + INODE("INODE"), + SNAPSHOT("SNAPSHOT"), + INODE_DIR("INODE_DIR"), + FILES_UNDERCONSTRUCTION("FILES_UNDERCONSTRUCTION"), + SNAPSHOT_DIFF("SNAPSHOT_DIFF"), + SECRET_MANAGER("SECRET_MANAGER"), + CACHE_MANAGER("CACHE_MANAGER"); + + private static final SectionName[] values = SectionName.values(); + + public static SectionName fromString(String name) { + for (SectionName n : values) { + if (n.name.equals(name)) + return n; + } + return null; + } + + private final String name; + + private SectionName(String name) { + this.name = name; + } + } + + private static int getOndiskTrunkSize(com.google.protobuf.GeneratedMessage s) { + return CodedOutputStream.computeRawVarint32Size(s.getSerializedSize()) + + s.getSerializedSize(); + } + + private FSImageFormatProtobuf() { + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageUtil.java new file mode 100644 index 00000000000..b9953480f26 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageUtil.java @@ -0,0 +1,93 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.RandomAccessFile; +import java.util.Arrays; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.protocol.LayoutVersion; +import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; +import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.Loader; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary; +import org.apache.hadoop.io.compress.CompressionCodec; + +@InterfaceAudience.Private +public final class FSImageUtil { + public static final byte[] MAGIC_HEADER = "HDFSIMG1".getBytes(); + public static final int FILE_VERSION = 1; + + public static boolean checkFileFormat(RandomAccessFile file) + throws IOException { + if (file.length() < Loader.MINIMUM_FILE_LENGTH) + return false; + + byte[] magic = new byte[MAGIC_HEADER.length]; + file.readFully(magic); + if (!Arrays.equals(MAGIC_HEADER, magic)) + return false; + + return true; + } + + public static FileSummary loadSummary(RandomAccessFile file) + throws IOException { + final int FILE_LENGTH_FIELD_SIZE = 4; + long fileLength = file.length(); + file.seek(fileLength - FILE_LENGTH_FIELD_SIZE); + int summaryLength = file.readInt(); + + if (summaryLength <= 0) { + throw new IOException("Negative length of the file"); + } + file.seek(fileLength - FILE_LENGTH_FIELD_SIZE - summaryLength); + + byte[] summaryBytes = new byte[summaryLength]; + file.readFully(summaryBytes); + + FileSummary summary = FileSummary + .parseDelimitedFrom(new ByteArrayInputStream(summaryBytes)); + if (summary.getOndiskVersion() != FILE_VERSION) { + throw new IOException("Unsupported file version " + + summary.getOndiskVersion()); + } + + if (!LayoutVersion.supports(Feature.PROTOBUF_FORMAT, + summary.getLayoutVersion())) { + throw new IOException("Unsupported layout version " + + summary.getLayoutVersion()); + } + return summary; + } + + public static InputStream wrapInputStreamForCompression( + Configuration conf, String codec, InputStream in) throws IOException { + if (codec.isEmpty()) + return in; + + FSImageCompression compression = FSImageCompression.createCompression( + conf, codec); + CompressionCodec imageCodec = compression.getImageCodec(); + return imageCodec.createInputStream(in); + } + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 4e209767dfc..f91c41c7610 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -179,6 +179,7 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager; import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; +import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager.SecretManagerState; import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; @@ -196,6 +197,8 @@ import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.server.common.Util; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection.PersistToken; import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo; import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream; import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; @@ -6012,6 +6015,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } } + /** + * @return all the under-construction files in the lease map + */ + Map getFilesUnderConstruction() { + synchronized (leaseManager) { + return leaseManager.getINodesUnderConstruction(); + } + } + /** * Register a Backup name-node, verifying that it belongs * to the correct namespace, and adding it to the set of @@ -6288,6 +6300,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats, dtSecretManager.saveSecretManagerStateCompat(out, sdPath); } + SecretManagerState saveSecretManagerState() { + return dtSecretManager.saveSecretManagerState(); + } + /** * @param in load the state of secret manager from input stream */ @@ -6295,6 +6311,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats, dtSecretManager.loadSecretManagerStateCompat(in); } + void loadSecretManagerState(SecretManagerSection s, + List keys, + List tokens) throws IOException { + dtSecretManager.loadSecretManagerState(new SecretManagerState(s, keys, tokens)); + } + /** * Log the updateMasterKey operation to edit logs * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java index 83cb0a4eb94..f9a06f1e5bd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java @@ -171,7 +171,7 @@ public class INodeDirectory extends INodeWithAdditionalFields return children == null? -1: Collections.binarySearch(children, name); } - protected DirectoryWithSnapshotFeature addSnapshotFeature( + public DirectoryWithSnapshotFeature addSnapshotFeature( DirectoryDiffList diffs) { Preconditions.checkState(!isWithSnapshot(), "Directory is already with snapshot"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java index 500405e09d4..80abb5268dc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java @@ -252,7 +252,7 @@ public class INodeFile extends INodeWithAdditionalFields /* Start of Snapshot Feature */ - private FileWithSnapshotFeature addSnapshotFeature(FileDiffList diffs) { + public FileWithSnapshotFeature addSnapshotFeature(FileDiffList diffs) { Preconditions.checkState(!isWithSnapshot(), "File is already with snapshot"); FileWithSnapshotFeature sf = new FileWithSnapshotFeature(diffs); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java index 5ffcc21f5bb..bd0355b6618 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode; +import java.util.Iterator; import java.util.List; import org.apache.hadoop.fs.permission.FsPermission; @@ -46,6 +47,10 @@ public class INodeMap { /** Synchronized by external lock. */ private final GSet map; + public Iterator getMapIterator() { + return map.iterator(); + } + private INodeMap(GSet map) { Preconditions.checkArgument(map != null); this.map = map; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceContext.java index 67ee88e11de..a7c4c75f005 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceContext.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceContext.java @@ -22,6 +22,7 @@ import java.util.Collections; import java.util.List; import java.util.concurrent.CountDownLatch; +import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.util.Canceler; @@ -32,7 +33,8 @@ import com.google.common.base.Preconditions; * allows cancellation, and also is responsible for accumulating * failed storage directories. */ -class SaveNamespaceContext { +@InterfaceAudience.Private +public class SaveNamespaceContext { private final FSNamesystem sourceNamesystem; private final long txid; private final List errorSDs = @@ -72,7 +74,7 @@ class SaveNamespaceContext { completionLatch.countDown(); } - void checkCancelled() throws SaveNamespaceCancelledException { + public void checkCancelled() throws SaveNamespaceCancelledException { if (canceller.isCancelled()) { throw new SaveNamespaceCancelledException( canceller.getCancellationReason()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java index 06f7a89e33a..a9cad94f0b2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java @@ -244,7 +244,7 @@ public class DirectoryWithSnapshotFeature implements INode.Feature { this.isSnapshotRoot = isSnapshotRoot; } - ChildrenDiff getChildrenDiff() { + public ChildrenDiff getChildrenDiff() { return diff; } @@ -343,6 +343,10 @@ public class DirectoryWithSnapshotFeature implements INode.Feature { return super.toString() + " childrenSize=" + childrenSize + ", " + diff; } + int getChildrenSize() { + return childrenSize; + } + @Override void write(DataOutput out, ReferenceMap referenceMap) throws IOException { writeSnapshot(out); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java new file mode 100644 index 00000000000..06cc1d0ac1f --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java @@ -0,0 +1,437 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode.snapshot; + +import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Loader.loadINodeDirectory; +import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Loader.loadINodeReference; +import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Loader.loadPermission; +import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Loader.updateBlocksMap; +import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Saver.buildINodeDirectory; +import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Saver.buildINodeFile; +import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Saver.buildINodeReference; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.permission.PermissionStatus; +import org.apache.hadoop.hdfs.server.namenode.FSDirectory; +import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf; +import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection.CreatedListEntry; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection.DiffEntry.Type; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotSection; +import org.apache.hadoop.hdfs.server.namenode.INode; +import org.apache.hadoop.hdfs.server.namenode.INodeDirectory; +import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryAttributes; +import org.apache.hadoop.hdfs.server.namenode.INodeFile; +import org.apache.hadoop.hdfs.server.namenode.INodeFileAttributes; +import org.apache.hadoop.hdfs.server.namenode.INodeMap; +import org.apache.hadoop.hdfs.server.namenode.INodeReference; +import org.apache.hadoop.hdfs.server.namenode.INodeWithAdditionalFields; +import org.apache.hadoop.hdfs.server.namenode.SaveNamespaceContext; +import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiff; +import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiffList; +import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.Root; +import org.apache.hadoop.hdfs.util.Diff.ListType; + +import com.google.common.base.Preconditions; +import com.google.protobuf.ByteString; + +@InterfaceAudience.Private +public class FSImageFormatPBSnapshot { + /** + * Loading snapshot related information from protobuf based FSImage + */ + public final static class Loader { + private final FSNamesystem fsn; + private final FSDirectory fsDir; + private final FSImageFormatProtobuf.Loader parent; + private final Map snapshotMap; + + + public Loader(FSNamesystem fsn, FSImageFormatProtobuf.Loader parent) { + this.fsn = fsn; + this.fsDir = fsn.getFSDirectory(); + this.snapshotMap = new HashMap(); + this.parent = parent; + } + + /** + * Load the snapshots section from fsimage. Also convert snapshottable + * directories into {@link INodeDirectorySnapshottable}. + * + */ + public void loadSnapshotSection(InputStream in) throws IOException { + SnapshotManager sm = fsn.getSnapshotManager(); + SnapshotSection section = SnapshotSection.parseDelimitedFrom(in); + int snum = section.getNumSnapshots(); + sm.setNumSnapshots(snum); + sm.setSnapshotCounter(section.getSnapshotCounter()); + for (long sdirId : section.getSnapshottableDirList()) { + INodeDirectory dir = fsDir.getInode(sdirId).asDirectory(); + final INodeDirectorySnapshottable sdir; + if (!dir.isSnapshottable()) { + sdir = new INodeDirectorySnapshottable(dir); + fsDir.addToInodeMap(sdir); + } else { + // dir is root, and admin set root to snapshottable before + sdir = (INodeDirectorySnapshottable) dir; + sdir.setSnapshotQuota(INodeDirectorySnapshottable.SNAPSHOT_LIMIT); + } + sm.addSnapshottable(sdir); + } + loadSnapshots(in, snum); + } + + private void loadSnapshots(InputStream in, int size) throws IOException { + for (int i = 0; i < size; i++) { + SnapshotSection.Snapshot pbs = SnapshotSection.Snapshot + .parseDelimitedFrom(in); + INodeDirectory root = loadINodeDirectory(pbs.getRoot(), + parent.getStringTable()); + int sid = pbs.getSnapshotId(); + INodeDirectorySnapshottable parent = (INodeDirectorySnapshottable) fsDir + .getInode(root.getId()).asDirectory(); + Snapshot snapshot = new Snapshot(sid, root, parent); + // add the snapshot to parent, since we follow the sequence of + // snapshotsByNames when saving, we do not need to sort when loading + parent.addSnapshot(snapshot); + snapshotMap.put(sid, snapshot); + } + } + + /** + * Load the snapshot diff section from fsimage. + */ + public void loadSnapshotDiffSection(InputStream in) throws IOException { + while (true) { + SnapshotDiffSection.DiffEntry entry = SnapshotDiffSection.DiffEntry + .parseDelimitedFrom(in); + if (entry == null) { + break; + } + long inodeId = entry.getInodeId(); + INode inode = fsDir.getInode(inodeId); + SnapshotDiffSection.DiffEntry.Type type = entry.getType(); + switch (type) { + case FILEDIFF: + loadFileDiffList(in, inode.asFile(), entry.getNumOfDiff()); + break; + case DIRECTORYDIFF: + loadDirectoryDiffList(in, inode.asDirectory(), entry.getNumOfDiff()); + break; + } + } + } + + /** Load FileDiff list for a file with snapshot feature */ + private void loadFileDiffList(InputStream in, INodeFile file, int size) + throws IOException { + final FileDiffList diffs = new FileDiffList(); + for (int i = 0; i < size; i++) { + SnapshotDiffSection.FileDiff pbf = SnapshotDiffSection.FileDiff + .parseDelimitedFrom(in); + INodeFileAttributes copy = null; + if (pbf.hasSnapshotCopy()) { + INodeSection.INodeFile fileInPb = pbf.getSnapshotCopy(); + PermissionStatus permission = loadPermission( + fileInPb.getPermission(), parent.getStringTable()); + copy = new INodeFileAttributes.SnapshotCopy(pbf.getName() + .toByteArray(), permission, fileInPb.getModificationTime(), + fileInPb.getAccessTime(), (short) fileInPb.getReplication(), + fileInPb.getPreferredBlockSize()); + } + + FileDiff diff = new FileDiff(pbf.getSnapshotId(), copy, null, + pbf.getFileSize()); + diffs.addFirst(diff); + } + file.addSnapshotFeature(diffs); + } + + /** Load the created list in a DirectoryDiff */ + private List loadCreatedList(InputStream in, INodeDirectory dir, + int size) throws IOException { + List clist = new ArrayList(size); + for (long c = 0; c < size; c++) { + CreatedListEntry entry = CreatedListEntry.parseDelimitedFrom(in); + INode created = SnapshotFSImageFormat.loadCreated(entry.getName() + .toByteArray(), dir); + clist.add(created); + } + return clist; + } + + private void addToDeletedList(INode dnode, INodeDirectory parent) { + dnode.setParent(parent); + if (dnode.isFile()) { + updateBlocksMap(dnode.asFile(), fsn.getBlockManager()); + } + } + + /** + * Load the deleted list in a DirectoryDiff + * @param totalSize the total size of the deleted list + * @param deletedNodes non-reference inodes in the deleted list. These + * inodes' ids are directly recorded in protobuf + */ + private List loadDeletedList(InputStream in, INodeDirectory dir, + int refNum, List deletedNodes) throws IOException { + List dlist = new ArrayList(refNum + deletedNodes.size()); + // load non-reference inodes + for (long deletedId : deletedNodes) { + INode deleted = fsDir.getInode(deletedId); + dlist.add(deleted); + addToDeletedList(deleted, dir); + } + // load reference nodes in the deleted list + for (int r = 0; r < refNum; r++) { + INodeSection.INodeReference ref = INodeSection.INodeReference + .parseDelimitedFrom(in); + INodeReference refNode = loadINodeReference(ref, fsDir); + dlist.add(refNode); + addToDeletedList(refNode, dir); + } + Collections.sort(dlist, new Comparator() { + @Override + public int compare(INode n1, INode n2) { + return n1.compareTo(n2.getLocalNameBytes()); + } + }); + return dlist; + } + + /** Load DirectoryDiff list for a directory with snapshot feature */ + private void loadDirectoryDiffList(InputStream in, INodeDirectory dir, + int size) throws IOException { + if (!dir.isWithSnapshot()) { + dir.addSnapshotFeature(null); + } + DirectoryDiffList diffs = dir.getDiffs(); + for (int i = 0; i < size; i++) { + // load a directory diff + SnapshotDiffSection.DirectoryDiff diffInPb = SnapshotDiffSection. + DirectoryDiff.parseDelimitedFrom(in); + final int snapshotId = diffInPb.getSnapshotId(); + final Snapshot snapshot = snapshotMap.get(snapshotId); + int childrenSize = diffInPb.getChildrenSize(); + boolean useRoot = diffInPb.getIsSnapshotRoot(); + INodeDirectoryAttributes copy = null; + if (useRoot) { + copy = snapshot.getRoot(); + }else if (diffInPb.hasSnapshotCopy()) { + INodeSection.INodeDirectory dirCopyInPb = diffInPb.getSnapshotCopy(); + final byte[] name = diffInPb.getName().toByteArray(); + PermissionStatus permission = loadPermission(dirCopyInPb + .getPermission(), parent.getStringTable()); + long modTime = dirCopyInPb.getModificationTime(); + boolean noQuota = dirCopyInPb.getNsQuota() == -1 + && dirCopyInPb.getDsQuota() == -1; + copy = noQuota ? new INodeDirectoryAttributes.SnapshotCopy(name, + permission, modTime) + : new INodeDirectoryAttributes.CopyWithQuota(name, permission, + modTime, dirCopyInPb.getNsQuota(), dirCopyInPb.getDsQuota()); + } + // load created list + List clist = loadCreatedList(in, dir, + diffInPb.getCreatedListSize()); + // load deleted list + List dlist = loadDeletedList(in, dir, + diffInPb.getNumOfDeletedRef(), diffInPb.getDeletedINodeList()); + // create the directory diff + DirectoryDiff diff = new DirectoryDiff(snapshotId, copy, null, + childrenSize, clist, dlist, useRoot); + diffs.addFirst(diff); + } + } + } + + /** + * Saving snapshot related information to protobuf based FSImage + */ + public final static class Saver { + private final FSNamesystem fsn; + private final FileSummary.Builder headers; + private final FSImageFormatProtobuf.Saver parent; + private final SaveNamespaceContext context; + + public Saver(FSImageFormatProtobuf.Saver parent, + FileSummary.Builder headers, SaveNamespaceContext context, FSNamesystem fsn) { + this.parent = parent; + this.headers = headers; + this.context = context; + this.fsn = fsn; + } + + /** + * save all the snapshottable directories and snapshots to fsimage + */ + public void serializeSnapshotSection(OutputStream out) throws IOException { + SnapshotManager sm = fsn.getSnapshotManager(); + SnapshotSection.Builder b = SnapshotSection.newBuilder() + .setSnapshotCounter(sm.getSnapshotCounter()) + .setNumSnapshots(sm.getNumSnapshots()); + + INodeDirectorySnapshottable[] snapshottables = sm.getSnapshottableDirs(); + for (INodeDirectorySnapshottable sdir : snapshottables) { + b.addSnapshottableDir(sdir.getId()); + } + b.build().writeDelimitedTo(out); + int i = 0; + for(INodeDirectorySnapshottable sdir : snapshottables) { + for(Snapshot s : sdir.getSnapshotsByNames()) { + Root sroot = s.getRoot(); + SnapshotSection.Snapshot.Builder sb = SnapshotSection.Snapshot + .newBuilder().setSnapshotId(s.getId()); + INodeSection.INodeDirectory.Builder db = buildINodeDirectory(sroot, + parent.getStringMap()); + INodeSection.INode r = INodeSection.INode.newBuilder() + .setId(sroot.getId()) + .setType(INodeSection.INode.Type.DIRECTORY) + .setName(ByteString.copyFrom(sroot.getLocalNameBytes())) + .setDirectory(db).build(); + sb.setRoot(r).build().writeDelimitedTo(out); + i++; + if (i % FSImageFormatProtobuf.Saver.CHECK_CANCEL_INTERVAL == 0) { + context.checkCancelled(); + } + } + } + Preconditions.checkState(i == sm.getNumSnapshots()); + parent.commitSection(headers, FSImageFormatProtobuf.SectionName.SNAPSHOT); + } + + /** + * save all the snapshot diff to fsimage + */ + public void serializeSnapshotDiffSection(OutputStream out) + throws IOException { + INodeMap inodesMap = fsn.getFSDirectory().getINodeMap(); + int i = 0; + Iterator iter = inodesMap.getMapIterator(); + while (iter.hasNext()) { + INodeWithAdditionalFields inode = iter.next(); + if (inode.isFile()) { + serializeFileDiffList(inode.asFile(), out); + } else if (inode.isDirectory()) { + serializeDirDiffList(inode.asDirectory(), out); + } + ++i; + if (i % FSImageFormatProtobuf.Saver.CHECK_CANCEL_INTERVAL == 0) { + context.checkCancelled(); + } + } + parent.commitSection(headers, + FSImageFormatProtobuf.SectionName.SNAPSHOT_DIFF); + } + + private void serializeFileDiffList(INodeFile file, OutputStream out) + throws IOException { + FileWithSnapshotFeature sf = file.getFileWithSnapshotFeature(); + if (sf != null) { + List diffList = sf.getDiffs().asList(); + SnapshotDiffSection.DiffEntry entry = SnapshotDiffSection.DiffEntry + .newBuilder().setInodeId(file.getId()).setType(Type.FILEDIFF) + .setNumOfDiff(diffList.size()).build(); + entry.writeDelimitedTo(out); + for (int i = diffList.size() - 1; i >= 0; i--) { + FileDiff diff = diffList.get(i); + SnapshotDiffSection.FileDiff.Builder fb = SnapshotDiffSection.FileDiff + .newBuilder().setSnapshotId(diff.getSnapshotId()) + .setFileSize(diff.getFileSize()); + INodeFileAttributes copy = diff.snapshotINode; + if (copy != null) { + fb.setName(ByteString.copyFrom(copy.getLocalNameBytes())) + .setSnapshotCopy(buildINodeFile(copy, parent.getStringMap())); + } + fb.build().writeDelimitedTo(out); + } + } + } + + private void saveCreatedDeletedList(List created, + List deletedRefs, OutputStream out) throws IOException { + // local names of the created list member + for (INode c : created) { + SnapshotDiffSection.CreatedListEntry.newBuilder() + .setName(ByteString.copyFrom(c.getLocalNameBytes())).build() + .writeDelimitedTo(out); + } + // reference nodes in deleted list + for (INodeReference ref : deletedRefs) { + INodeSection.INodeReference.Builder rb = buildINodeReference(ref); + rb.build().writeDelimitedTo(out); + } + } + + private void serializeDirDiffList(INodeDirectory dir, OutputStream out) + throws IOException { + DirectoryWithSnapshotFeature sf = dir.getDirectoryWithSnapshotFeature(); + if (sf != null) { + List diffList = sf.getDiffs().asList(); + SnapshotDiffSection.DiffEntry entry = SnapshotDiffSection.DiffEntry + .newBuilder().setInodeId(dir.getId()).setType(Type.DIRECTORYDIFF) + .setNumOfDiff(diffList.size()).build(); + entry.writeDelimitedTo(out); + for (int i = diffList.size() - 1; i >= 0; i--) { // reverse order! + DirectoryDiff diff = diffList.get(i); + SnapshotDiffSection.DirectoryDiff.Builder db = SnapshotDiffSection. + DirectoryDiff.newBuilder().setSnapshotId(diff.getSnapshotId()) + .setChildrenSize(diff.getChildrenSize()) + .setIsSnapshotRoot(diff.isSnapshotRoot()); + INodeDirectoryAttributes copy = diff.snapshotINode; + if (!diff.isSnapshotRoot() && copy != null) { + db.setName(ByteString.copyFrom(copy.getLocalNameBytes())) + .setSnapshotCopy( + buildINodeDirectory(copy, parent.getStringMap())); + } + // process created list and deleted list + List created = diff.getChildrenDiff() + .getList(ListType.CREATED); + db.setCreatedListSize(created.size()); + List deleted = diff.getChildrenDiff().getList(ListType.DELETED); + List refs = new ArrayList(); + for (INode d : deleted) { + if (d.isReference()) { + refs.add(d.asReference()); + } else { + db.addDeletedINode(d.getId()); + } + } + db.setNumOfDeletedRef(refs.size()); + db.build().writeDelimitedTo(out); + saveCreatedDeletedList(created, refs, out); + } + } + } + } + + private FSImageFormatPBSnapshot(){} +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java index e836cd87959..69fdf97391c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java @@ -27,7 +27,6 @@ import java.util.Map; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.server.namenode.FSImageFormat; -import org.apache.hadoop.hdfs.server.namenode.FSImageFormat.Loader; import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization; import org.apache.hadoop.hdfs.server.namenode.INode; import org.apache.hadoop.hdfs.server.namenode.INodeAttributes; @@ -137,7 +136,7 @@ public class SnapshotFSImageFormat { * @param parent The directory that the created list belongs to. * @return The created node. */ - private static INode loadCreated(byte[] createdNodeName, + public static INode loadCreated(byte[] createdNodeName, INodeDirectory parent) throws IOException { // the INode in the created list should be a reference to another INode // in posterior SnapshotDiffs or one of the current children @@ -209,11 +208,13 @@ public class SnapshotFSImageFormat { /** * Load snapshots and snapshotQuota for a Snapshottable directory. - * @param snapshottableParent The snapshottable directory for loading. - * @param numSnapshots The number of snapshots that the directory has. - * @param in The {@link DataInput} instance to read. - * @param loader The {@link Loader} instance that this loading procedure is - * using. + * + * @param snapshottableParent + * The snapshottable directory for loading. + * @param numSnapshots + * The number of snapshots that the directory has. + * @param loader + * The loader */ public static void loadSnapshotList( INodeDirectorySnapshottable snapshottableParent, int numSnapshots, @@ -231,10 +232,13 @@ public class SnapshotFSImageFormat { /** * Load the {@link SnapshotDiff} list for the INodeDirectoryWithSnapshot * directory. - * @param dir The snapshottable directory for loading. - * @param in The {@link DataInput} instance to read. - * @param loader The {@link Loader} instance that this loading procedure is - * using. + * + * @param dir + * The snapshottable directory for loading. + * @param in + * The {@link DataInput} instance to read. + * @param loader + * The loader */ public static void loadDirectoryDiffList(INodeDirectory dir, DataInput in, FSImageFormat.Loader loader) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java index 8fa0f0c932b..be1ddc0e9e6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java @@ -270,6 +270,23 @@ public class SnapshotManager implements SnapshotStats { return numSnapshots.get(); } + void setNumSnapshots(int num) { + numSnapshots.set(num); + } + + int getSnapshotCounter() { + return snapshotCounter; + } + + void setSnapshotCounter(int counter) { + snapshotCounter = counter; + } + + INodeDirectorySnapshottable[] getSnapshottableDirs() { + return snapshottables.values().toArray( + new INodeDirectorySnapshottable[snapshottables.size()]); + } + /** * Write {@link #snapshotCounter}, {@link #numSnapshots}, * and all snapshots to the DataOutput. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionCalculator.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionCalculator.java new file mode 100644 index 00000000000..2433b28a859 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionCalculator.java @@ -0,0 +1,160 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.tools.offlineImageViewer; + +import java.io.BufferedInputStream; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.PrintWriter; +import java.io.RandomAccessFile; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto; +import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName; +import org.apache.hadoop.hdfs.server.namenode.FSImageUtil; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection; +import org.apache.hadoop.io.IOUtils; + +import com.google.common.base.Preconditions; +import com.google.common.io.LimitInputStream; + +/** + * This is the tool for analyzing file sizes in the namespace image. In order to + * run the tool one should define a range of integers [0, maxSize] by + * specifying maxSize and a step. The range of integers is + * divided into segments of size step: + * [0, s1, ..., sn-1, maxSize], and the visitor + * calculates how many files in the system fall into each segment + * [si-1, si). Note that files larger than + * maxSize always fall into the very last segment. + * + *

Input.

+ *
    + *
  • filename specifies the location of the image file;
  • + *
  • maxSize determines the range [0, maxSize] of files + * sizes considered by the visitor;
  • + *
  • step the range is divided into segments of size step.
  • + *
+ * + *

Output.

The output file is formatted as a tab separated two column + * table: Size and NumFiles. Where Size represents the start of the segment, and + * numFiles is the number of files form the image which size falls in this + * segment. + * + */ +final class FileDistributionCalculator { + private final static long MAX_SIZE_DEFAULT = 0x2000000000L; // 1/8 TB = 2^37 + private final static int INTERVAL_DEFAULT = 0x200000; // 2 MB = 2^21 + + private final Configuration conf; + private final long maxSize; + private final int steps; + private final PrintWriter out; + + private int[] distribution; + private int totalFiles; + private int totalDirectories; + private int totalBlocks; + private long totalSpace; + private long maxFileSize; + + FileDistributionCalculator(Configuration conf, long maxSize, int steps, + PrintWriter out) { + this.conf = conf; + this.maxSize = maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize; + this.steps = steps == 0 ? INTERVAL_DEFAULT : steps; + this.out = out; + long numIntervals = this.maxSize / this.steps; + this.distribution = new int[1 + (int) (numIntervals)]; + Preconditions.checkState(numIntervals < Integer.MAX_VALUE, + "Too many distribution intervals"); + } + + void visit(RandomAccessFile file) throws IOException { + if (!FSImageUtil.checkFileFormat(file)) { + throw new IOException("Unrecognized FSImage"); + } + + FileSummary summary = FSImageUtil.loadSummary(file); + FileInputStream in = null; + try { + in = new FileInputStream(file.getFD()); + for (FileSummary.Section s : summary.getSectionsList()) { + if (SectionName.fromString(s.getName()) != SectionName.INODE) { + continue; + } + + in.getChannel().position(s.getOffset()); + InputStream is = FSImageUtil.wrapInputStreamForCompression(conf, + summary.getCodec(), new BufferedInputStream(new LimitInputStream( + in, s.getLength()))); + run(is); + output(); + } + } finally { + IOUtils.cleanup(null, in); + } + } + + private void run(InputStream in) throws IOException { + INodeSection s = INodeSection.parseDelimitedFrom(in); + for (int i = 0; i < s.getNumInodes(); ++i) { + INodeSection.INode p = INodeSection.INode.parseDelimitedFrom(in); + if (p.getType() == INodeSection.INode.Type.FILE) { + ++totalFiles; + INodeSection.INodeFile f = p.getFile(); + totalBlocks += f.getBlocksCount(); + long fileSize = 0; + for (BlockProto b : f.getBlocksList()) { + fileSize += b.getNumBytes() * f.getReplication(); + } + maxFileSize = Math.max(fileSize, maxFileSize); + totalSpace += fileSize; + + int bucket = fileSize > maxSize ? distribution.length - 1 : (int) Math + .ceil((double)fileSize / steps); + ++distribution[bucket]; + + } else if (p.getType() == INodeSection.INode.Type.DIRECTORY) { + ++totalDirectories; + } + + if (i % (1 << 20) == 0) { + out.println("Processed " + i + " inodes."); + } + } + } + + private void output() { + // write the distribution into the output file + out.print("Size\tNumFiles\n"); + for (int i = 0; i < distribution.length; i++) { + if (distribution[i] != 0) { + out.print(((long) i * steps) + "\t" + distribution[i]); + out.print('\n'); + } + } + out.print("totalFiles = " + totalFiles + "\n"); + out.print("totalDirectories = " + totalDirectories + "\n"); + out.print("totalBlocks = " + totalBlocks + "\n"); + out.print("totalSpace = " + totalSpace + "\n"); + out.print("maxFileSize = " + maxFileSize + "\n"); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java index c529fb5cdc2..19b859118ec 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java @@ -127,7 +127,7 @@ class ImageLoaderCurrent implements ImageLoader { new SimpleDateFormat("yyyy-MM-dd HH:mm"); private static int[] versions = { -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39, - -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51 }; + -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51, -52 }; private int imageVersion = 0; private final Map subtreeMap = new HashMap(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsrPBImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsrPBImage.java new file mode 100644 index 00000000000..e467725646e --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsrPBImage.java @@ -0,0 +1,233 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.tools.offlineImageViewer; + +import java.io.BufferedInputStream; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.PrintWriter; +import java.io.RandomAccessFile; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.permission.PermissionStatus; +import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto; +import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode; +import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName; +import org.apache.hadoop.hdfs.server.namenode.FSImageUtil; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeDirectory; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeFile; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeSymlink; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.StringTableSection; +import org.apache.hadoop.hdfs.server.namenode.INodeId; +import org.apache.hadoop.io.IOUtils; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.io.LimitInputStream; + +/** + * This is the tool for analyzing file sizes in the namespace image. In order to + * run the tool one should define a range of integers [0, maxSize] by + * specifying maxSize and a step. The range of integers is + * divided into segments of size step: + * [0, s1, ..., sn-1, maxSize], and the visitor + * calculates how many files in the system fall into each segment + * [si-1, si). Note that files larger than + * maxSize always fall into the very last segment. + * + *

Input.

+ *
    + *
  • filename specifies the location of the image file;
  • + *
  • maxSize determines the range [0, maxSize] of files + * sizes considered by the visitor;
  • + *
  • step the range is divided into segments of size step.
  • + *
+ * + *

Output.

The output file is formatted as a tab separated two column + * table: Size and NumFiles. Where Size represents the start of the segment, and + * numFiles is the number of files form the image which size falls in this + * segment. + * + */ +final class LsrPBImage { + private final Configuration conf; + private final PrintWriter out; + private String[] stringTable; + private HashMap inodes = Maps.newHashMap(); + private HashMap dirmap = Maps.newHashMap(); + + public LsrPBImage(Configuration conf, PrintWriter out) { + this.conf = conf; + this.out = out; + } + + public void visit(RandomAccessFile file) throws IOException { + if (!FSImageUtil.checkFileFormat(file)) { + throw new IOException("Unrecognized FSImage"); + } + + FileSummary summary = FSImageUtil.loadSummary(file); + FileInputStream fin = null; + try { + fin = new FileInputStream(file.getFD()); + + ArrayList sections = Lists.newArrayList(summary + .getSectionsList()); + Collections.sort(sections, new Comparator() { + @Override + public int compare(FileSummary.Section s1, FileSummary.Section s2) { + SectionName n1 = SectionName.fromString(s1.getName()); + SectionName n2 = SectionName.fromString(s2.getName()); + if (n1 == null) { + return n2 == null ? 0 : -1; + } else if (n2 == null) { + return -1; + } else { + return n1.ordinal() - n2.ordinal(); + } + } + }); + + for (FileSummary.Section s : sections) { + fin.getChannel().position(s.getOffset()); + InputStream is = FSImageUtil.wrapInputStreamForCompression(conf, + summary.getCodec(), new BufferedInputStream(new LimitInputStream( + fin, s.getLength()))); + + switch (SectionName.fromString(s.getName())) { + case STRING_TABLE: + loadStringTable(is); + break; + case INODE: + loadINodeSection(is); + break; + case INODE_DIR: + loadINodeDirectorySection(is); + break; + default: + break; + } + } + list("", INodeId.ROOT_INODE_ID); + } finally { + IOUtils.cleanup(null, fin); + } + } + + private void list(String parent, long dirId) { + INode inode = inodes.get(dirId); + listINode(parent.isEmpty() ? "/" : parent, inode); + long[] children = dirmap.get(dirId); + if (children == null) { + return; + } + String newParent = parent + inode.getName().toStringUtf8() + "/"; + for (long cid : children) { + list(newParent, cid); + } + } + + private void listINode(String parent, INode inode) { + switch (inode.getType()) { + case FILE: { + INodeFile f = inode.getFile(); + PermissionStatus p = FSImageFormatPBINode.Loader.loadPermission( + f.getPermission(), stringTable); + out.print(String.format("-%s %2s %8s %10s %10s %10d %s%s\n", p + .getPermission().toString(), f.getReplication(), p.getUserName(), p + .getGroupName(), f.getModificationTime(), getFileSize(f), parent, + inode.getName().toStringUtf8())); + } + break; + case DIRECTORY: { + INodeDirectory d = inode.getDirectory(); + PermissionStatus p = FSImageFormatPBINode.Loader.loadPermission( + d.getPermission(), stringTable); + out.print(String.format("d%s - %8s %10s %10s %10d %s%s\n", p + .getPermission().toString(), p.getUserName(), p.getGroupName(), d + .getModificationTime(), 0, parent, inode.getName().toStringUtf8())); + } + break; + case SYMLINK: { + INodeSymlink d = inode.getSymlink(); + PermissionStatus p = FSImageFormatPBINode.Loader.loadPermission( + d.getPermission(), stringTable); + out.print(String.format("-%s - %8s %10s %10s %10d %s%s -> %s\n", p + .getPermission().toString(), p.getUserName(), p.getGroupName(), 0, 0, + parent, inode.getName().toStringUtf8(), d.getTarget().toStringUtf8())); + } + break; + default: + break; + } + } + + private long getFileSize(INodeFile f) { + long size = 0; + for (BlockProto p : f.getBlocksList()) { + size += p.getNumBytes(); + } + return size; + } + + private void loadINodeDirectorySection(InputStream in) throws IOException { + while (true) { + INodeDirectorySection.DirEntry e = INodeDirectorySection.DirEntry + .parseDelimitedFrom(in); + // note that in is a LimitedInputStream + if (e == null) { + break; + } + long[] l = new long[e.getChildrenCount()]; + for (int i = 0; i < l.length; ++i) { + l[i] = e.getChildren(i); + } + dirmap.put(e.getParent(), l); + for (int i = 0; i < e.getNumOfRef(); i++) { + INodeSection.INodeReference.parseDelimitedFrom(in); + } + } + } + + private void loadINodeSection(InputStream in) throws IOException { + INodeSection s = INodeSection.parseDelimitedFrom(in); + for (int i = 0; i < s.getNumInodes(); ++i) { + INodeSection.INode p = INodeSection.INode.parseDelimitedFrom(in); + inodes.put(p.getId(), p); + } + } + + private void loadStringTable(InputStream in) throws IOException { + StringTableSection s = StringTableSection.parseDelimitedFrom(in); + stringTable = new String[s.getNumEntry() + 1]; + for (int i = 0; i < s.getNumEntry(); ++i) { + StringTableSection.Entry e = StringTableSection.Entry + .parseDelimitedFrom(in); + stringTable[e.getId()] = e.getStr(); + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java new file mode 100644 index 00000000000..2d8c42d39d1 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java @@ -0,0 +1,178 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.tools.offlineImageViewer; + +import java.io.EOFException; +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.RandomAccessFile; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.cli.PosixParser; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; + +/** + * OfflineImageViewer to dump the contents of an Hadoop image file to XML or the + * console. Main entry point into utility, either via the command line or + * programatically. + */ +@InterfaceAudience.Private +public class OfflineImageViewerPB { + public static final Log LOG = LogFactory.getLog(OfflineImageViewerPB.class); + + private final static String usage = "Usage: bin/hdfs oiv [OPTIONS] -i INPUTFILE -o OUTPUTFILE\n" + + "Offline Image Viewer\n" + + "View a Hadoop fsimage INPUTFILE using the specified PROCESSOR,\n" + + "saving the results in OUTPUTFILE.\n" + + "\n" + + "The oiv utility will attempt to parse correctly formed image files\n" + + "and will abort fail with mal-formed image files.\n" + + "\n" + + "The tool works offline and does not require a running cluster in\n" + + "order to process an image file.\n" + + "\n" + + "The following image processors are available:\n" + + " * Ls: The default image processor generates an lsr-style listing\n" + + " of the files in the namespace, with the same fields in the same\n" + + " order. Note that in order to correctly determine file sizes,\n" + + " this formatter cannot skip blocks and will override the\n" + + " -skipBlocks option.\n" + + " * XML: This processor creates an XML document with all elements of\n" + + " the fsimage enumerated, suitable for further analysis by XML\n" + + " tools.\n" + + " * FileDistribution: This processor analyzes the file size\n" + + " distribution in the image.\n" + + " -maxSize specifies the range [0, maxSize] of file sizes to be\n" + + " analyzed (128GB by default).\n" + + " -step defines the granularity of the distribution. (2MB by default)\n" + + "\n" + + "Required command line arguments:\n" + + "-i,--inputFile FSImage file to process.\n" + + "-o,--outputFile Name of output file. If the specified\n" + + " file exists, it will be overwritten.\n" + + "\n" + + "Optional command line arguments:\n" + + "-p,--processor Select which type of processor to apply\n" + + " against image file." + + " (Ls|XML|FileDistribution).\n" + + "-h,--help Display usage information and exit\n"; + + /** + * Build command-line options and descriptions + */ + private static Options buildOptions() { + Options options = new Options(); + + // Build in/output file arguments, which are required, but there is no + // addOption method that can specify this + OptionBuilder.isRequired(); + OptionBuilder.hasArgs(); + OptionBuilder.withLongOpt("outputFile"); + options.addOption(OptionBuilder.create("o")); + + OptionBuilder.isRequired(); + OptionBuilder.hasArgs(); + OptionBuilder.withLongOpt("inputFile"); + options.addOption(OptionBuilder.create("i")); + + options.addOption("p", "processor", true, ""); + options.addOption("h", "help", false, ""); + options.addOption("skipBlocks", false, ""); + options.addOption("printToScreen", false, ""); + options.addOption("delimiter", true, ""); + + return options; + } + + /** + * Entry point to command-line-driven operation. User may specify options and + * start fsimage viewer from the command line. Program will process image file + * and exit cleanly or, if an error is encountered, inform user and exit. + * + * @param args + * Command line options + * @throws IOException + */ + public static void main(String[] args) throws IOException { + Options options = buildOptions(); + if (args.length == 0) { + printUsage(); + return; + } + + CommandLineParser parser = new PosixParser(); + CommandLine cmd; + + try { + cmd = parser.parse(options, args); + } catch (ParseException e) { + System.out.println("Error parsing command-line options: "); + printUsage(); + return; + } + + if (cmd.hasOption("h")) { // print help and exit + printUsage(); + return; + } + + String inputFile = cmd.getOptionValue("i"); + String processor = cmd.getOptionValue("p", "Ls"); + String outputFile = cmd.getOptionValue("o"); + + PrintWriter out = (outputFile == null || outputFile.equals("-")) ? new PrintWriter( + System.out) : new PrintWriter(new File(outputFile)); + + Configuration conf = new Configuration(); + try { + if (processor.equals("FileDistribution")) { + long maxSize = Long.parseLong(cmd.getOptionValue("maxSize", "0")); + int step = Integer.parseInt(cmd.getOptionValue("step", "0")); + new FileDistributionCalculator(conf, maxSize, step, out) + .visit(new RandomAccessFile(inputFile, "r")); + } else if (processor.equals("XML")) { + new PBImageXmlWriter(conf, out).visit(new RandomAccessFile(inputFile, + "r")); + } else { + new LsrPBImage(conf, out).visit(new RandomAccessFile(inputFile, "r")); + } + } catch (EOFException e) { + System.err.println("Input file ended unexpectedly. Exiting"); + } catch (IOException e) { + System.err.println("Encountered exception. Exiting: " + e.getMessage()); + } finally { + out.close(); + } + + } + + /** + * Print application usage instructions. + */ + private static void printUsage() { + System.out.println(usage); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java new file mode 100644 index 00000000000..7ebf1196c4b --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java @@ -0,0 +1,415 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.tools.offlineImageViewer; + +import java.io.BufferedInputStream; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.PrintWriter; +import java.io.RandomAccessFile; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoExpirationProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto; +import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode; +import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName; +import org.apache.hadoop.hdfs.server.namenode.FSImageUtil; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection.FileUnderConstructionEntry; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeDirectory; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeSymlink; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.StringTableSection; +import org.apache.hadoop.io.IOUtils; + +import com.google.common.collect.Lists; +import com.google.common.io.LimitInputStream; + +/** + * This is the tool for analyzing file sizes in the namespace image. In order to + * run the tool one should define a range of integers [0, maxSize] by + * specifying maxSize and a step. The range of integers is + * divided into segments of size step: + * [0, s1, ..., sn-1, maxSize], and the visitor + * calculates how many files in the system fall into each segment + * [si-1, si). Note that files larger than + * maxSize always fall into the very last segment. + * + *

Input.

+ *
    + *
  • filename specifies the location of the image file;
  • + *
  • maxSize determines the range [0, maxSize] of files + * sizes considered by the visitor;
  • + *
  • step the range is divided into segments of size step.
  • + *
+ * + *

Output.

The output file is formatted as a tab separated two column + * table: Size and NumFiles. Where Size represents the start of the segment, and + * numFiles is the number of files form the image which size falls in this + * segment. + * + */ +@InterfaceAudience.Private +public final class PBImageXmlWriter { + private final Configuration conf; + private final PrintWriter out; + private String[] stringTable; + + public PBImageXmlWriter(Configuration conf, PrintWriter out) { + this.conf = conf; + this.out = out; + } + + public void visit(RandomAccessFile file) throws IOException { + if (!FSImageUtil.checkFileFormat(file)) { + throw new IOException("Unrecognized FSImage"); + } + + FileSummary summary = FSImageUtil.loadSummary(file); + FileInputStream fin = null; + try { + fin = new FileInputStream(file.getFD()); + out.print("\n"); + + ArrayList sections = Lists.newArrayList(summary + .getSectionsList()); + Collections.sort(sections, new Comparator() { + @Override + public int compare(FileSummary.Section s1, FileSummary.Section s2) { + SectionName n1 = SectionName.fromString(s1.getName()); + SectionName n2 = SectionName.fromString(s2.getName()); + if (n1 == null) { + return n2 == null ? 0 : -1; + } else if (n2 == null) { + return -1; + } else { + return n1.ordinal() - n2.ordinal(); + } + } + }); + + for (FileSummary.Section s : sections) { + fin.getChannel().position(s.getOffset()); + InputStream is = FSImageUtil.wrapInputStreamForCompression(conf, + summary.getCodec(), new BufferedInputStream(new LimitInputStream( + fin, s.getLength()))); + + switch (SectionName.fromString(s.getName())) { + case NS_INFO: + dumpNameSection(is); + break; + case STRING_TABLE: + loadStringTable(is); + break; + case INODE: + dumpINodeSection(is); + break; + case INODE_DIR: + dumpINodeDirectorySection(is); + break; + case FILES_UNDERCONSTRUCTION: + dumpFileUnderConstructionSection(is); + break; + case SNAPSHOT: + dumpSnapshotSection(is); + break; + case SNAPSHOT_DIFF: + dumpSnapshotDiffSection(is); + break; + case SECRET_MANAGER: + dumpSecretManagerSection(is); + break; + case CACHE_MANAGER: + dumpCacheManagerSection(is); + break; + default: + break; + } + } + } finally { + IOUtils.cleanup(null, fin); + } + } + + private void dumpCacheManagerSection(InputStream is) throws IOException { + out.print(""); + CacheManagerSection s = CacheManagerSection.parseDelimitedFrom(is); + o("nextDirectiveId", s.getNextDirectiveId()); + for (int i = 0; i < s.getNumPools(); ++i) { + CachePoolInfoProto p = CachePoolInfoProto.parseDelimitedFrom(is); + out.print(""); + o("poolName", p.getPoolName()).o("ownerName", p.getOwnerName()) + .o("groupName", p.getGroupName()).o("mode", p.getMode()) + .o("limit", p.getLimit()) + .o("maxRelativeExpiry", p.getMaxRelativeExpiry()); + out.print("\n"); + } + for (int i = 0; i < s.getNumPools(); ++i) { + CacheDirectiveInfoProto p = CacheDirectiveInfoProto + .parseDelimitedFrom(is); + out.print(""); + o("id", p.getId()).o("path", p.getPath()) + .o("replication", p.getReplication()).o("pool", p.getPool()); + out.print(""); + CacheDirectiveInfoExpirationProto e = p.getExpiration(); + o("millis", e.getMillis()).o("relatilve", e.getIsRelative()); + out.print("\n"); + out.print("\n"); + } + out.print("\n"); + + } + + private void dumpFileUnderConstructionSection(InputStream in) + throws IOException { + out.print(""); + while (true) { + FileUnderConstructionEntry e = FileUnderConstructionEntry + .parseDelimitedFrom(in); + if (e == null) { + break; + } + out.print(""); + o("id", e.getInodeId()).o("path", e.getFullPath()); + out.print("\n"); + } + out.print("\n"); + } + + private void dumpINodeDirectory(INodeDirectory d) { + o("mtime", d.getModificationTime()).o("permission", + dumpPermission(d.getPermission())); + + if (d.hasDsQuota() && d.hasNsQuota()) { + o("nsquota", d.getNsQuota()).o("dsquota", d.getDsQuota()); + } + } + + private void dumpINodeDirectorySection(InputStream in) throws IOException { + out.print(""); + while (true) { + INodeDirectorySection.DirEntry e = INodeDirectorySection.DirEntry + .parseDelimitedFrom(in); + // note that in is a LimitedInputStream + if (e == null) { + break; + } + out.print(""); + o("parent", e.getParent()); + for (long id : e.getChildrenList()) { + o("inode", id); + } + for (int i = 0; i < e.getNumOfRef(); i++) { + INodeSection.INodeReference r = INodeSection.INodeReference + .parseDelimitedFrom(in); + dumpINodeReference(r); + + } + out.print("\n"); + } + out.print("\n"); + } + + private void dumpINodeReference(INodeSection.INodeReference r) { + out.print(""); + o("referredId", r.getReferredId()).o("name", r.getName().toStringUtf8()) + .o("dstSnapshotId", r.getDstSnapshotId()) + .o("lastSnapshotId", r.getLastSnapshotId()); + out.print("\n"); + } + + private void dumpINodeFile(INodeSection.INodeFile f) { + o("replication", f.getReplication()).o("mtime", f.getModificationTime()) + .o("atime", f.getAccessTime()) + .o("perferredBlockSize", f.getPreferredBlockSize()) + .o("permission", dumpPermission(f.getPermission())); + + if (f.getBlocksCount() > 0) { + out.print(""); + for (BlockProto b : f.getBlocksList()) { + out.print(""); + o("id", b.getBlockId()).o("genstamp", b.getGenStamp()).o("numBytes", + b.getNumBytes()); + out.print("\n"); + } + out.print("\n"); + } + + if (f.hasFileUC()) { + INodeSection.FileUnderConstructionFeature u = f.getFileUC(); + out.print(""); + o("clientName", u.getClientName()).o("clientMachine", + u.getClientMachine()); + out.print("\n"); + } + } + + private void dumpINodeSection(InputStream in) throws IOException { + INodeSection s = INodeSection.parseDelimitedFrom(in); + out.print(""); + o("lastInodeId", s.getLastInodeId()); + for (int i = 0; i < s.getNumInodes(); ++i) { + INodeSection.INode p = INodeSection.INode.parseDelimitedFrom(in); + out.print(""); + o("id", p.getId()).o("type", p.getType()).o("name", + p.getName().toStringUtf8()); + + if (p.hasFile()) { + dumpINodeFile(p.getFile()); + } else if (p.hasDirectory()) { + dumpINodeDirectory(p.getDirectory()); + } else if (p.hasSymlink()) { + dumpINodeSymlink(p.getSymlink()); + } + + out.print("\n"); + } + out.print("\n"); + } + + private void dumpINodeSymlink(INodeSymlink s) { + o("permission", dumpPermission(s.getPermission())).o("target", + s.getTarget().toStringUtf8()); + } + + private void dumpNameSection(InputStream in) throws IOException { + NameSystemSection s = NameSystemSection.parseDelimitedFrom(in); + out.print("\n"); + o("genstampV1", s.getGenstampV1()).o("genstampV2", s.getGenstampV2()) + .o("genstampV1Limit", s.getGenstampV1Limit()) + .o("lastAllocatedBlockId", s.getLastAllocatedBlockId()) + .o("txid", s.getTransactionId()); + out.print("\n"); + } + + private String dumpPermission(long permission) { + return FSImageFormatPBINode.Loader.loadPermission(permission, stringTable) + .toString(); + } + + private void dumpSecretManagerSection(InputStream is) throws IOException { + out.print(""); + SecretManagerSection s = SecretManagerSection.parseDelimitedFrom(is); + o("currentId", s.getCurrentId()).o("tokenSequenceNumber", + s.getTokenSequenceNumber()); + out.print(""); + } + + private void dumpSnapshotDiffSection(InputStream in) throws IOException { + out.print(""); + while (true) { + SnapshotDiffSection.DiffEntry e = SnapshotDiffSection.DiffEntry + .parseDelimitedFrom(in); + if (e == null) { + break; + } + out.print(""); + o("inodeid", e.getInodeId()); + switch (e.getType()) { + case FILEDIFF: { + for (int i = 0; i < e.getNumOfDiff(); ++i) { + out.print(""); + SnapshotDiffSection.FileDiff f = SnapshotDiffSection.FileDiff + .parseDelimitedFrom(in); + o("snapshotId", f.getSnapshotId()).o("size", f.getFileSize()).o( + "name", f.getName().toStringUtf8()); + out.print("\n"); + } + } + break; + case DIRECTORYDIFF: { + for (int i = 0; i < e.getNumOfDiff(); ++i) { + out.print(""); + SnapshotDiffSection.DirectoryDiff d = SnapshotDiffSection.DirectoryDiff + .parseDelimitedFrom(in); + o("snapshotId", d.getSnapshotId()) + .o("isSnapshotroot", d.getIsSnapshotRoot()) + .o("childrenSize", d.getChildrenSize()) + .o("name", d.getName().toStringUtf8()); + + for (int j = 0; j < d.getCreatedListSize(); ++j) { + SnapshotDiffSection.CreatedListEntry ce = SnapshotDiffSection.CreatedListEntry + .parseDelimitedFrom(in); + out.print(""); + o("name", ce.getName().toStringUtf8()); + out.print("\n"); + } + for (int j = 0; j < d.getNumOfDeletedRef(); ++j) { + INodeSection.INodeReference r = INodeSection.INodeReference + .parseDelimitedFrom(in); + dumpINodeReference(r); + } + out.print("\n"); + } + } + break; + default: + break; + } + out.print(""); + } + out.print("\n"); + } + + private void dumpSnapshotSection(InputStream in) throws IOException { + out.print(""); + SnapshotSection s = SnapshotSection.parseDelimitedFrom(in); + o("snapshotCounter", s.getSnapshotCounter()); + if (s.getSnapshottableDirCount() > 0) { + out.print(""); + for (long id : s.getSnapshottableDirList()) { + o("dir", id); + } + out.print("\n"); + } + for (int i = 0; i < s.getNumSnapshots(); ++i) { + SnapshotSection.Snapshot pbs = SnapshotSection.Snapshot + .parseDelimitedFrom(in); + o("snapshot", pbs.getSnapshotId()); + } + out.print("\n"); + } + + private void loadStringTable(InputStream in) throws IOException { + StringTableSection s = StringTableSection.parseDelimitedFrom(in); + stringTable = new String[s.getNumEntry() + 1]; + for (int i = 0; i < s.getNumEntry(); ++i) { + StringTableSection.Entry e = StringTableSection.Entry + .parseDelimitedFrom(in); + stringTable[e.getId()] = e.getStr(); + } + } + + private PBImageXmlWriter o(final String e, final Object v) { + out.print("<" + e + ">" + v + ""); + return this; + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto new file mode 100644 index 00000000000..af7ba874d29 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto @@ -0,0 +1,280 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +option java_package = "org.apache.hadoop.hdfs.server.namenode"; +option java_outer_classname = "FsImageProto"; + +package hadoop.hdfs.fsimage; + +import "hdfs.proto"; + +/** + * This file defines the on-disk layout of the file system image. The + * layout is defined by the following EBNF grammar, in which angle + * brackets mark protobuf definitions. (e.g., ) + * + * FILE := MAGIC SECTION* FileSummaryLength + * MAGIC := 'HDFSIMG1' + * SECTION := | ... + * FileSummaryLength := 4 byte int + * + * Some notes: + * + * The codec field in FileSummary describes the compression codec used + * for all sections. The fileheader is always uncompressed. + * + * All protobuf messages are serialized in delimited form, which means + * that there always will be an integer indicates the size of the + * protobuf message. + * + */ + +message FileSummary { + // The version of the above EBNF grammars. + required uint32 ondiskVersion = 1; + // layoutVersion describes which features are available in the + // FSImage. + required uint32 layoutVersion = 2; + optional string codec = 3; + // index for each section + message Section { + optional string name = 1; + optional uint64 length = 2; + optional uint64 offset = 3; + } + repeated Section sections = 4; +} + +/** + * Name: NS_INFO + */ +message NameSystemSection { + optional uint32 namespaceId = 1; + optional uint64 genstampV1 = 2; + optional uint64 genstampV2 = 3; + optional uint64 genstampV1Limit = 4; + optional uint64 lastAllocatedBlockId = 5; + optional uint64 transactionId = 6; +} + +/** + * Permission is serialized as a 64-bit long. [0:24):[25:48):[48:64) (in Big Endian). + * The first and the second parts are the string ids of the user and + * group name, and the last 16 bits are the permission bits. + * + * Name: INODE + */ +message INodeSection { + /** + * under-construction feature for INodeFile + */ + message FileUnderConstructionFeature { + optional string clientName = 1; + optional string clientMachine = 2; + } + + message INodeFile { + optional uint32 replication = 1; + optional uint64 modificationTime = 2; + optional uint64 accessTime = 3; + optional uint64 preferredBlockSize = 4; + optional fixed64 permission = 5; + repeated BlockProto blocks = 6; + optional FileUnderConstructionFeature fileUC = 7; + } + + message INodeDirectory { + optional uint64 modificationTime = 1; + // namespace quota + optional uint64 nsQuota = 2; + // diskspace quota + optional uint64 dsQuota = 3; + optional fixed64 permission = 4; + } + + message INodeSymlink { + optional fixed64 permission = 1; + optional bytes target = 2; + } + + message INodeReference { + // id of the referred inode + optional uint64 referredId = 1; + // local name recorded in WithName + optional bytes name = 2; + // recorded in DstReference + optional uint32 dstSnapshotId = 3; + // recorded in WithName + optional uint32 lastSnapshotId = 4; + } + + message INode { + enum Type { + FILE = 1; + DIRECTORY = 2; + SYMLINK = 3; + }; + required Type type = 1; + required uint64 id = 2; + optional bytes name = 3; + + optional INodeFile file = 4; + optional INodeDirectory directory = 5; + optional INodeSymlink symlink = 6; + } + + optional uint64 lastInodeId = 1; + optional uint64 numInodes = 2; + // repeated INodes.. +} + +/** + * This section records information about under-construction files for + * reconstructing the lease map. + * NAME: FILES_UNDERCONSTRUCTION + */ +message FilesUnderConstructionSection { + message FileUnderConstructionEntry { + optional uint64 inodeId = 1; + optional string fullPath = 2; + } + // repeated FileUnderConstructionEntry... +} + +/** + * This section records the children of each directories + * NAME: INODE_DIR + */ +message INodeDirectorySection { + message DirEntry { + optional uint64 parent = 1; + repeated uint64 children = 2 [packed = true]; + optional uint64 numOfRef = 3; + // repeated INodeReference... + } + // repeated DirEntry, ended at the boundary of the section. +} + +/** + * This section records the information about snapshot + * NAME: SNAPSHOT + */ +message SnapshotSection { + message Snapshot { + optional uint32 snapshotId = 1; + // Snapshot root + optional INodeSection.INode root = 2; + } + + optional uint32 snapshotCounter = 1; + repeated uint64 snapshottableDir = 2 [packed = true]; + // total number of snapshots + optional uint32 numSnapshots = 3; + // repeated Snapshot... +} + +/** + * This section records information about snapshot diffs + * NAME: SNAPSHOT_DIFF + */ +message SnapshotDiffSection { + message CreatedListEntry { + optional bytes name = 1; + } + + message DirectoryDiff { + optional uint32 snapshotId = 1; + optional uint32 childrenSize = 2; + optional bool isSnapshotRoot = 3; + optional bytes name = 4; + optional INodeSection.INodeDirectory snapshotCopy = 5; + optional uint32 createdListSize = 6; + optional uint32 numOfDeletedRef = 7; // number of reference nodes in deleted list + repeated uint64 deletedINode = 8 [packed = true]; // id of deleted inode + // repeated CreatedListEntry (size is specified by createdListSize) + // repeated INodeReference (reference inodes in deleted list) + } + + message FileDiff { + optional uint32 snapshotId = 1; + optional uint64 fileSize = 2; + optional bytes name = 3; + optional INodeSection.INodeFile snapshotCopy = 4; + } + + message DiffEntry { + enum Type { + FILEDIFF = 1; + DIRECTORYDIFF = 2; + } + required Type type = 1; + optional uint64 inodeId = 2; + optional uint32 numOfDiff = 3; + + // repeated DirectoryDiff or FileDiff + } + + // repeated DiffEntry +} + +/** + * This section maps string to id + * NAME: STRING_TABLE + */ +message StringTableSection { + message Entry { + optional uint32 id = 1; + optional string str = 2; + } + optional uint32 numEntry = 1; + // repeated Entry +} + +message SecretManagerSection { + message DelegationKey { + optional uint32 id = 1; + optional uint64 expiryDate = 2; + optional bytes key = 3; + } + message PersistToken { + optional uint32 version = 1; + optional string owner = 2; + optional string renewer = 3; + optional string realUser = 4; + optional uint64 issueDate = 5; + optional uint64 maxDate = 6; + optional uint32 sequenceNumber = 7; + optional uint32 masterKeyId = 8; + optional uint64 expiryDate = 9; + } + optional uint32 currentId = 1; + optional uint32 tokenSequenceNumber = 2; + optional uint32 numKeys = 3; + optional uint32 numTokens = 4; + // repeated DelegationKey keys + // repeated PersistToken tokens +} + +message CacheManagerSection { + required uint64 nextDirectiveId = 1; + required uint32 numPools = 2; + required uint32 numDirectives = 3; + // repeated CachePoolInfoProto pools + // repeated CacheDirectiveInfoProto directives +} + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImage.java new file mode 100644 index 00000000000..552b091b7b4 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImage.java @@ -0,0 +1,138 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.IOException; +import java.util.EnumSet; + +import junit.framework.Assert; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSOutputStream; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; +import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; +import org.apache.hadoop.hdfs.util.MD5FileUtils; +import org.junit.Test; + +public class TestFSImage { + + @Test + public void testPersist() throws IOException { + Configuration conf = new Configuration(); + testPersistHelper(conf); + } + + @Test + public void testCompression() throws IOException { + Configuration conf = new Configuration(); + conf.setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, true); + conf.set(DFSConfigKeys.DFS_IMAGE_COMPRESSION_CODEC_KEY, + "org.apache.hadoop.io.compress.GzipCodec"); + testPersistHelper(conf); + } + + private void testPersistHelper(Configuration conf) throws IOException { + MiniDFSCluster cluster = null; + try { + cluster = new MiniDFSCluster.Builder(conf).build(); + cluster.waitActive(); + FSNamesystem fsn = cluster.getNamesystem(); + DistributedFileSystem fs = cluster.getFileSystem(); + + final Path dir = new Path("/abc/def"); + final Path file1 = new Path(dir, "f1"); + final Path file2 = new Path(dir, "f2"); + + // create an empty file f1 + fs.create(file1).close(); + + // create an under-construction file f2 + FSDataOutputStream out = fs.create(file2); + out.writeBytes("hello"); + ((DFSOutputStream) out.getWrappedStream()).hsync(EnumSet + .of(SyncFlag.UPDATE_LENGTH)); + + // checkpoint + fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); + fs.saveNamespace(); + fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); + + cluster.restartNameNode(); + cluster.waitActive(); + fs = cluster.getFileSystem(); + + assertTrue(fs.isDirectory(dir)); + assertTrue(fs.exists(file1)); + assertTrue(fs.exists(file2)); + + // check internals of file2 + INodeFile file2Node = fsn.dir.getINode4Write(file2.toString()).asFile(); + assertEquals("hello".length(), file2Node.computeFileSize()); + assertTrue(file2Node.isUnderConstruction()); + BlockInfo[] blks = file2Node.getBlocks(); + assertEquals(1, blks.length); + assertEquals(BlockUCState.UNDER_CONSTRUCTION, blks[0].getBlockUCState()); + // check lease manager + Lease lease = fsn.leaseManager.getLeaseByPath(file2.toString()); + Assert.assertNotNull(lease); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } + + /** + * Ensure that the digest written by the saver equals to the digest of the + * file. + */ + @Test + public void testDigest() throws IOException { + Configuration conf = new Configuration(); + MiniDFSCluster cluster = null; + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build(); + DistributedFileSystem fs = cluster.getFileSystem(); + fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); + fs.saveNamespace(); + fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); + File currentDir = FSImageTestUtil.getNameNodeCurrentDirs(cluster, 0).get( + 0); + File fsimage = FSImageTestUtil.findNewestImageFile(currentDir + .getAbsolutePath()); + assertEquals(MD5FileUtils.readStoredMd5ForFile(fsimage), + MD5FileUtils.computeMd5ForFile(fsimage)); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java index 21935d05d9c..f3cbf15aae2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java @@ -140,7 +140,7 @@ public class TestFSImageWithSnapshot { private File saveFSImageToTempFile() throws IOException { SaveNamespaceContext context = new SaveNamespaceContext(fsn, txid, new Canceler()); - FSImageFormat.Saver saver = new FSImageFormat.Saver(context); + FSImageFormatProtobuf.Saver saver = new FSImageFormatProtobuf.Saver(context); FSImageCompression compression = FSImageCompression.createCompression(conf); File imageFile = getImageFile(testDir, txid); fsn.readLock(); @@ -154,7 +154,7 @@ public class TestFSImageWithSnapshot { /** Load the fsimage from a temp file */ private void loadFSImageFromTempFile(File imageFile) throws IOException { - FSImageFormat.Loader loader = new FSImageFormat.Loader(conf, fsn); + FSImageFormat.LoaderDelegator loader = FSImageFormat.newLoader(conf, fsn); fsn.writeLock(); fsn.getFSDirectory().writeLock(); try { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java index 3ff5d54dc66..0ca112da5c3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java @@ -287,7 +287,6 @@ public class TestStandbyCheckpoints { doEdits(0, 1000); nn0.getRpcServer().rollEditLog(); answerer.waitForCall(); - answerer.proceed(); assertTrue("SBN is not performing checkpoint but it should be.", answerer.getFireCount() == 1 && answerer.getResultCount() == 0); @@ -306,6 +305,7 @@ public class TestStandbyCheckpoints { // RPC to the SBN happened during the checkpoint. assertTrue("SBN should have still been checkpointing.", answerer.getFireCount() == 1 && answerer.getResultCount() == 0); + answerer.proceed(); answerer.waitForResult(); assertTrue("SBN should have finished checkpointing.", answerer.getFireCount() == 1 && answerer.getResultCount() == 1); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java index 7fe8087f2a4..d4e887949e0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java @@ -73,7 +73,6 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; -; /** Testing rename with snapshots. */ public class TestRenameWithSnapshots { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshot.java index 27228bd0482..20cc1351e8d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshot.java @@ -25,6 +25,9 @@ import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; +import java.io.PrintWriter; +import java.io.RandomAccessFile; +import java.io.StringWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.EnumSet; @@ -53,8 +56,7 @@ import org.apache.hadoop.hdfs.server.namenode.INode; import org.apache.hadoop.hdfs.server.namenode.INodeDirectory; import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper.TestDirectoryTree; import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper.TestDirectoryTree.Node; -import org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer; -import org.apache.hadoop.hdfs.tools.offlineImageViewer.XmlImageVisitor; +import org.apache.hadoop.hdfs.tools.offlineImageViewer.PBImageXmlWriter; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Time; @@ -245,8 +247,8 @@ public class TestSnapshot { * snapshots */ @Test - public void testOfflineImageViewer() throws Throwable { - runTestSnapshot(SNAPSHOT_ITERATION_NUMBER); + public void testOfflineImageViewer() throws Exception { + runTestSnapshot(1); // retrieve the fsimage. Note that we already save namespace to fsimage at // the end of each iteration of runTestSnapshot. @@ -254,31 +256,10 @@ public class TestSnapshot { FSImageTestUtil.getFSImage( cluster.getNameNode()).getStorage().getStorageDir(0)); assertNotNull("Didn't generate or can't find fsimage", originalFsimage); - - String ROOT = System.getProperty("test.build.data", "build/test/data"); - File testFile = new File(ROOT, "/image"); - String xmlImage = ROOT + "/image_xml"; - boolean success = false; - - try { - DFSTestUtil.copyFile(originalFsimage, testFile); - XmlImageVisitor v = new XmlImageVisitor(xmlImage, true); - OfflineImageViewer oiv = new OfflineImageViewer(testFile.getPath(), v, - true); - oiv.go(); - success = true; - } finally { - if (testFile.exists()) { - testFile.delete(); - } - // delete the xml file if the parsing is successful - if (success) { - File xmlImageFile = new File(xmlImage); - if (xmlImageFile.exists()) { - xmlImageFile.delete(); - } - } - } + StringWriter output = new StringWriter(); + PrintWriter o = new PrintWriter(output); + PBImageXmlWriter v = new PBImageXmlWriter(new Configuration(), o); + v.visit(new RandomAccessFile(originalFsimage, "r")); } private void runTestSnapshot(int iteration) throws Exception { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java index 11aa3b821f0..91a5c1521c7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java @@ -20,23 +20,20 @@ package org.apache.hadoop.hdfs.tools.offlineImageViewer; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; import java.io.BufferedReader; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.EOFException; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; +import java.io.PrintWriter; +import java.io.RandomAccessFile; +import java.io.StringWriter; import java.util.HashMap; -import java.util.LinkedList; -import java.util.List; import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -46,27 +43,29 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil; +import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.test.PathUtils; import org.junit.AfterClass; import org.junit.BeforeClass; +import org.junit.Rule; import org.junit.Test; - +import org.junit.rules.TemporaryFolder; /** - * Test function of OfflineImageViewer by: - * * confirming it can correctly process a valid fsimage file and that - * the processing generates a correct representation of the namespace - * * confirming it correctly fails to process an fsimage file with a layout - * version it shouldn't be able to handle - * * confirm it correctly bails on malformed image files, in particular, a - * file that ends suddenly. + * Test function of OfflineImageViewer by: * confirming it can correctly process + * a valid fsimage file and that the processing generates a correct + * representation of the namespace * confirming it correctly fails to process an + * fsimage file with a layout version it shouldn't be able to handle * confirm + * it correctly bails on malformed image files, in particular, a file that ends + * suddenly. */ public class TestOfflineImageViewer { private static final Log LOG = LogFactory.getLog(OfflineImageViewer.class); @@ -76,22 +75,22 @@ public class TestOfflineImageViewer { private static File originalFsimage = null; // Elements of lines of ls-file output to be compared to FileStatus instance - private static class LsElements { - public String perms; - public int replication; - public String username; - public String groupname; - public long filesize; - public char dir; // d if dir, - otherwise + private static final class LsElements { + private String perms; + private int replication; + private String username; + private String groupname; + private long filesize; + private boolean isDir; } - + // namespace as written to dfs, to be compared with viewer's output - final static HashMap writtenFiles = - new HashMap(); - - private static String ROOT = PathUtils.getTestDirName(TestOfflineImageViewer.class); - - // Create a populated namespace for later testing. Save its contents to a + final static HashMap writtenFiles = new HashMap(); + + @Rule + public TemporaryFolder folder = new TemporaryFolder(); + + // Create a populated namespace for later testing. Save its contents to a // data structure and store its fsimage location. // We only want to generate the fsimage file once and use it for // multiple tests. @@ -100,35 +99,39 @@ public class TestOfflineImageViewer { MiniDFSCluster cluster = null; try { Configuration conf = new HdfsConfiguration(); - conf.setLong(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY, 10000); - conf.setLong(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY, 5000); - conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true); + conf.setLong( + DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY, 10000); + conf.setLong( + DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY, 5000); + conf.setBoolean( + DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true); conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL, "RULE:[2:$1@$0](JobTracker@.*FOO.COM)s/@.*//" + "DEFAULT"); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build(); cluster.waitActive(); FileSystem hdfs = cluster.getFileSystem(); - + int filesize = 256; - - // Create a reasonable namespace - for(int i = 0; i < NUM_DIRS; i++) { + + // Create a reasonable namespace + for (int i = 0; i < NUM_DIRS; i++) { Path dir = new Path("/dir" + i); hdfs.mkdirs(dir); writtenFiles.put(dir.toString(), pathToFileEntry(hdfs, dir.toString())); - for(int j = 0; j < FILES_PER_DIR; j++) { + for (int j = 0; j < FILES_PER_DIR; j++) { Path file = new Path(dir, "file" + j); FSDataOutputStream o = hdfs.create(file); - o.write(new byte[ filesize++ ]); + o.write(new byte[filesize++]); o.close(); - - writtenFiles.put(file.toString(), pathToFileEntry(hdfs, file.toString())); + + writtenFiles.put(file.toString(), + pathToFileEntry(hdfs, file.toString())); } } // Get delegation tokens so we log the delegation token op - Token[] delegationTokens = - hdfs.addDelegationTokens(TEST_RENEWER, null); + Token[] delegationTokens = hdfs + .addDelegationTokens(TEST_RENEWER, null); for (Token t : delegationTokens) { LOG.debug("got token " + t); } @@ -137,329 +140,113 @@ public class TestOfflineImageViewer { cluster.getNameNodeRpc() .setSafeMode(SafeModeAction.SAFEMODE_ENTER, false); cluster.getNameNodeRpc().saveNamespace(); - + // Determine location of fsimage file - originalFsimage = FSImageTestUtil.findLatestImageFile( - FSImageTestUtil.getFSImage( - cluster.getNameNode()).getStorage().getStorageDir(0)); + originalFsimage = FSImageTestUtil.findLatestImageFile(FSImageTestUtil + .getFSImage(cluster.getNameNode()).getStorage().getStorageDir(0)); if (originalFsimage == null) { throw new RuntimeException("Didn't generate or can't find fsimage"); } LOG.debug("original FS image file is " + originalFsimage); } finally { - if(cluster != null) + if (cluster != null) cluster.shutdown(); } } - + @AfterClass public static void deleteOriginalFSImage() throws IOException { - if(originalFsimage != null && originalFsimage.exists()) { + if (originalFsimage != null && originalFsimage.exists()) { originalFsimage.delete(); } } - - // Convenience method to generate a file status from file system for + + // Convenience method to generate a file status from file system for // later comparison - private static FileStatus pathToFileEntry(FileSystem hdfs, String file) - throws IOException { + private static FileStatus pathToFileEntry(FileSystem hdfs, String file) + throws IOException { return hdfs.getFileStatus(new Path(file)); } - - // Verify that we can correctly generate an ls-style output for a valid + + // Verify that we can correctly generate an ls-style output for a valid // fsimage @Test public void outputOfLSVisitor() throws IOException { - File testFile = new File(ROOT, "/basicCheck"); - File outputFile = new File(ROOT, "/basicCheckOutput"); - - try { - DFSTestUtil.copyFile(originalFsimage, testFile); - - ImageVisitor v = new LsImageVisitor(outputFile.getPath(), true); - OfflineImageViewer oiv = new OfflineImageViewer(testFile.getPath(), v, false); - - oiv.go(); - - HashMap fileOutput = readLsfile(outputFile); - - compareNamespaces(writtenFiles, fileOutput); - } finally { - if(testFile.exists()) testFile.delete(); - if(outputFile.exists()) outputFile.delete(); - } - LOG.debug("Correctly generated ls-style output."); - } - - // Confirm that attempting to read an fsimage file with an unsupported - // layout results in an error - @Test - public void unsupportedFSLayoutVersion() throws IOException { - File testFile = new File(ROOT, "/invalidLayoutVersion"); - File outputFile = new File(ROOT, "invalidLayoutVersionOutput"); - - try { - int badVersionNum = -432; - changeLayoutVersion(originalFsimage, testFile, badVersionNum); - ImageVisitor v = new LsImageVisitor(outputFile.getPath(), true); - OfflineImageViewer oiv = new OfflineImageViewer(testFile.getPath(), v, false); - - try { - oiv.go(); - fail("Shouldn't be able to read invalid laytout version"); - } catch(IOException e) { - if(!e.getMessage().contains(Integer.toString(badVersionNum))) - throw e; // wasn't error we were expecting - LOG.debug("Correctly failed at reading bad image version."); + StringWriter output = new StringWriter(); + PrintWriter out = new PrintWriter(output); + LsrPBImage v = new LsrPBImage(new Configuration(), out); + v.visit(new RandomAccessFile(originalFsimage, "r")); + out.close(); + Pattern pattern = Pattern + .compile("([d\\-])([rwx\\-]{9})\\s*(-|\\d+)\\s*(\\w+)\\s*(\\w+)\\s*(\\d+)\\s*(\\d+)\\s*([\b/]+)"); + int count = 0; + for (String s : output.toString().split("\n")) { + Matcher m = pattern.matcher(s); + assertTrue(m.find()); + LsElements e = new LsElements(); + e.isDir = m.group(1).equals("d"); + e.perms = m.group(2); + e.replication = m.group(3).equals("-") ? 0 : Integer.parseInt(m.group(3)); + e.username = m.group(4); + e.groupname = m.group(5); + e.filesize = Long.parseLong(m.group(7)); + String path = m.group(8); + if (!path.equals("/")) { + compareFiles(writtenFiles.get(path), e); } - } finally { - if(testFile.exists()) testFile.delete(); - if(outputFile.exists()) outputFile.delete(); + ++count; } + assertEquals(writtenFiles.size() + 1, count); } - - // Verify that image viewer will bail on a file that ends unexpectedly - @Test - public void truncatedFSImage() throws IOException { - File testFile = new File(ROOT, "/truncatedFSImage"); - File outputFile = new File(ROOT, "/trucnatedFSImageOutput"); - try { - copyPartOfFile(originalFsimage, testFile); - assertTrue("Created truncated fsimage", testFile.exists()); - - ImageVisitor v = new LsImageVisitor(outputFile.getPath(), true); - OfflineImageViewer oiv = new OfflineImageViewer(testFile.getPath(), v, false); - try { - oiv.go(); - fail("Managed to process a truncated fsimage file"); - } catch (EOFException e) { - LOG.debug("Correctly handled EOF"); - } - - } finally { - if(testFile.exists()) testFile.delete(); - if(outputFile.exists()) outputFile.delete(); - } + @Test(expected = IOException.class) + public void testTruncatedFSImage() throws IOException { + File truncatedFile = folder.newFile(); + StringWriter output = new StringWriter(); + copyPartOfFile(originalFsimage, truncatedFile); + new FileDistributionCalculator(new Configuration(), 0, 0, new PrintWriter( + output)).visit(new RandomAccessFile(truncatedFile, "r")); } - - // Test that our ls file has all the same compenents of the original namespace - private void compareNamespaces(HashMap written, - HashMap fileOutput) { - assertEquals( "Should be the same number of files in both, plus one for root" - + " in fileoutput", fileOutput.keySet().size(), - written.keySet().size() + 1); - Set inFile = fileOutput.keySet(); - // For each line in the output file, verify that the namespace had a - // filestatus counterpart - for (String path : inFile) { - if (path.equals("/")) // root's not included in output from system call - continue; - - assertTrue("Path in file (" + path + ") was written to fs", written - .containsKey(path)); - - compareFiles(written.get(path), fileOutput.get(path)); - - written.remove(path); - } - - assertEquals("No more files were written to fs", 0, written.size()); - } - // Compare two files as listed in the original namespace FileStatus and // the output of the ls file from the image processor private void compareFiles(FileStatus fs, LsElements elements) { - assertEquals("directory listed as such", - fs.isDirectory() ? 'd' : '-', elements.dir); - assertEquals("perms string equal", - fs.getPermission().toString(), elements.perms); + assertEquals("directory listed as such", fs.isDirectory(), elements.isDir); + assertEquals("perms string equal", fs.getPermission().toString(), + elements.perms); assertEquals("replication equal", fs.getReplication(), elements.replication); assertEquals("owner equal", fs.getOwner(), elements.username); assertEquals("group equal", fs.getGroup(), elements.groupname); assertEquals("lengths equal", fs.getLen(), elements.filesize); } - // Read the contents of the file created by the Ls processor - private HashMap readLsfile(File lsFile) throws IOException { - BufferedReader br = new BufferedReader(new FileReader(lsFile)); - String line = null; - HashMap fileContents = new HashMap(); - - while((line = br.readLine()) != null) - readLsLine(line, fileContents); - - br.close(); - return fileContents; - } - - // Parse a line from the ls output. Store permissions, replication, - // username, groupname and filesize in hashmap keyed to the path name - private void readLsLine(String line, HashMap fileContents) { - String elements [] = line.split("\\s+"); - - assertEquals("Not enough elements in ls output", 8, elements.length); - - LsElements lsLine = new LsElements(); - - lsLine.dir = elements[0].charAt(0); - lsLine.perms = elements[0].substring(1); - lsLine.replication = elements[1].equals("-") - ? 0 : Integer.valueOf(elements[1]); - lsLine.username = elements[2]; - lsLine.groupname = elements[3]; - lsLine.filesize = Long.valueOf(elements[4]); - // skipping date and time - - String path = elements[7]; - - // Check that each file in the ls output was listed once - assertFalse("LS file had duplicate file entries", - fileContents.containsKey(path)); - - fileContents.put(path, lsLine); - } - - // Copy one fsimage to another, changing the layout version in the process - private void changeLayoutVersion(File src, File dest, int newVersion) - throws IOException { - DataInputStream in = null; - DataOutputStream out = null; - - try { - in = new DataInputStream(new FileInputStream(src)); - out = new DataOutputStream(new FileOutputStream(dest)); - - in.readInt(); - out.writeInt(newVersion); - - byte [] b = new byte[1024]; - while( in.read(b) > 0 ) { - out.write(b); - } - } finally { - if(in != null) in.close(); - if(out != null) out.close(); - } - } - - // Only copy part of file into the other. Used for testing truncated fsimage private void copyPartOfFile(File src, File dest) throws IOException { - InputStream in = null; - OutputStream out = null; - - byte [] b = new byte[256]; - int bytesWritten = 0; - int count; - int maxBytes = 700; - + FileInputStream in = null; + FileOutputStream out = null; + final int MAX_BYTES = 700; try { in = new FileInputStream(src); out = new FileOutputStream(dest); - - while( (count = in.read(b)) > 0 && bytesWritten < maxBytes ) { - out.write(b); - bytesWritten += count; - } + in.getChannel().transferTo(0, MAX_BYTES, out.getChannel()); } finally { - if(in != null) in.close(); - if(out != null) out.close(); + IOUtils.cleanup(null, in); + IOUtils.cleanup(null, out); } } @Test - public void outputOfFileDistributionVisitor() throws IOException { - File testFile = new File(ROOT, "/basicCheck"); - File outputFile = new File(ROOT, "/fileDistributionCheckOutput"); + public void testFileDistributionVisitor() throws IOException { + StringWriter output = new StringWriter(); + PrintWriter o = new PrintWriter(output); + new FileDistributionCalculator(new Configuration(), 0, 0, o) + .visit(new RandomAccessFile(originalFsimage, "r")); + o.close(); - int totalFiles = 0; - BufferedReader reader = null; - try { - DFSTestUtil.copyFile(originalFsimage, testFile); - ImageVisitor v = new FileDistributionVisitor(outputFile.getPath(), 0, 0); - OfflineImageViewer oiv = - new OfflineImageViewer(testFile.getPath(), v, false); + Pattern p = Pattern.compile("totalFiles = (\\d+)\n"); + Matcher matcher = p.matcher(output.getBuffer()); - oiv.go(); - - reader = new BufferedReader(new FileReader(outputFile)); - String line = reader.readLine(); - assertEquals(line, "Size\tNumFiles"); - while((line = reader.readLine()) != null) { - String[] row = line.split("\t"); - assertEquals(row.length, 2); - totalFiles += Integer.parseInt(row[1]); - } - } finally { - if (reader != null) { - reader.close(); - } - if(testFile.exists()) testFile.delete(); - if(outputFile.exists()) outputFile.delete(); - } + assertTrue(matcher.find() && matcher.groupCount() == 1); + int totalFiles = Integer.parseInt(matcher.group(1)); assertEquals(totalFiles, NUM_DIRS * FILES_PER_DIR); } - - private static class TestImageVisitor extends ImageVisitor { - private List delegationTokenRenewers = new LinkedList(); - TestImageVisitor() { - } - - List getDelegationTokenRenewers() { - return delegationTokenRenewers; - } - - @Override - void start() throws IOException { - } - - @Override - void finish() throws IOException { - } - - @Override - void finishAbnormally() throws IOException { - } - - @Override - void visit(ImageElement element, String value) throws IOException { - if (element == ImageElement.DELEGATION_TOKEN_IDENTIFIER_RENEWER) { - delegationTokenRenewers.add(value); - } - } - - @Override - void visitEnclosingElement(ImageElement element) throws IOException { - } - - @Override - void visitEnclosingElement(ImageElement element, ImageElement key, - String value) throws IOException { - } - - @Override - void leaveEnclosingElement() throws IOException { - } - } - - @Test - public void outputOfTestVisitor() throws IOException { - File testFile = new File(ROOT, "/basicCheck"); - - try { - DFSTestUtil.copyFile(originalFsimage, testFile); - TestImageVisitor v = new TestImageVisitor(); - OfflineImageViewer oiv = new OfflineImageViewer(testFile.getPath(), v, true); - oiv.go(); - - // Validated stored delegation token identifiers. - List dtrs = v.getDelegationTokenRenewers(); - assertEquals(1, dtrs.size()); - assertEquals(TEST_RENEWER, dtrs.get(0)); - } finally { - if(testFile.exists()) testFile.delete(); - } - LOG.debug("Passed TestVisitor validation."); - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored index c6174327d112c413f165f5bc912915952859e83f..a3f3511c9eb15da0f4391d84096c45e678606fc1 100644 GIT binary patch delta 13 Vcmdm>xIvNS|NsAIHnOY{001zi2H^kz delta 13 Vcmdm>xIvNS|NsAIH?ph|001zn2I2q! diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml index 3a60b6dc5c5..c7fafcccf5e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml @@ -1,6 +1,6 @@ - -51 + -52 OP_START_LOG_SEGMENT From 204704a92df407c06951cca9a47e85f1e3ef5ba7 Mon Sep 17 00:00:00 2001 From: Konstantin Shvachko Date: Sun, 9 Feb 2014 20:49:18 +0000 Subject: [PATCH 02/47] HDFS-5837. dfs.namenode.replication.considerLoad should consider decommissioned nodes. Contributed by Tao Luo. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566410 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../BlockPlacementPolicyDefault.java | 8 +- .../hdfs/server/namenode/FSClusterStats.java | 6 + .../hdfs/server/namenode/FSNamesystem.java | 7 +- .../TestReplicationPolicyConsiderLoad.java | 161 ++++++++++++++++++ 5 files changed, 181 insertions(+), 4 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyConsiderLoad.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 22b201627a1..fe5e8bc59a9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -940,6 +940,9 @@ Release 2.3.0 - UNRELEASED HDFS-5873. dfs.http.policy should have higher precedence over dfs.https.enable. (Haohui Mai via jing9) + HDFS-5837. dfs.namenode.replication.considerLoad should consider + decommissioned nodes. (Tao Luo via shv) + BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS HDFS-4985. Add storage type to the protocol and expose it in block report diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java index f4dc208d731..8b740cd94c2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java @@ -633,9 +633,11 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { // check the communication traffic of the target machine if (considerLoad) { double avgLoad = 0; - int size = clusterMap.getNumOfLeaves(); - if (size != 0 && stats != null) { - avgLoad = (double)stats.getTotalLoad()/size; + if (stats != null) { + int size = stats.getNumDatanodesInService(); + if (size != 0) { + avgLoad = (double)stats.getTotalLoad()/size; + } } if (node.getXceiverCount() > (2.0 * avgLoad)) { logNodeIsNotChosen(storage, "the node is too busy "); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSClusterStats.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSClusterStats.java index f4827f38c8a..676aa0826c0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSClusterStats.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSClusterStats.java @@ -42,6 +42,12 @@ public interface FSClusterStats { * for writing targets, and false otherwise. */ public boolean isAvoidingStaleDataNodesForWrite(); + + /** + * Indicates number of datanodes that are in service. + * @return Number of datanodes that are both alive and not decommissioned. + */ + public int getNumDatanodesInService(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index f91c41c7610..5cd22ab4dd0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -6842,7 +6842,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats, return this.blockManager.getDatanodeManager() .shouldAvoidStaleDataNodesForWrite(); } - + + @Override // FSClusterStats + public int getNumDatanodesInService() { + return getNumLiveDataNodes() - getNumDecomLiveDataNodes(); + } + public SnapshotManager getSnapshotManager() { return snapshotManager; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyConsiderLoad.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyConsiderLoad.java new file mode 100644 index 00000000000..0b84fd7c953 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyConsiderLoad.java @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.blockmanagement; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.StorageType; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys; +import org.apache.hadoop.hdfs.server.common.StorageInfo; +import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; +import org.apache.hadoop.test.PathUtils; +import org.apache.hadoop.util.VersionInfo; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TestReplicationPolicyConsiderLoad { + + private static NameNode namenode; + private static DatanodeManager dnManager; + private static List dnrList; + private static DatanodeDescriptor[] dataNodes; + private static DatanodeStorageInfo[] storages; + + @BeforeClass + public static void setupCluster() throws IOException { + Configuration conf = new HdfsConfiguration(); + final String[] racks = { + "/rack1", + "/rack1", + "/rack1", + "/rack2", + "/rack2", + "/rack2"}; + storages = DFSTestUtil.createDatanodeStorageInfos(racks); + dataNodes = DFSTestUtil.toDatanodeDescriptor(storages); + FileSystem.setDefaultUri(conf, "hdfs://localhost:0"); + conf.set(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY, "0.0.0.0:0"); + File baseDir = PathUtils.getTestDir(TestReplicationPolicy.class); + conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, + new File(baseDir, "name").getPath()); + conf.setBoolean( + DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY, true); + conf.setBoolean( + DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_WRITE_KEY, true); + conf.setBoolean( + DFSConfigKeys.DFS_NAMENODE_REPLICATION_CONSIDERLOAD_KEY, true); + DFSTestUtil.formatNameNode(conf); + namenode = new NameNode(conf); + int blockSize = 1024; + + dnrList = new ArrayList(); + dnManager = namenode.getNamesystem().getBlockManager().getDatanodeManager(); + + // Register DNs + for (int i=0; i < 6; i++) { + DatanodeRegistration dnr = new DatanodeRegistration(dataNodes[i], + new StorageInfo(), new ExportedBlockKeys(), VersionInfo.getVersion()); + dnrList.add(dnr); + dnManager.registerDatanode(dnr); + dataNodes[i].getStorageInfos()[0].setUtilizationForTesting( + 2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*blockSize, 0L, + 2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*blockSize, 0L); + dataNodes[i].updateHeartbeat( + BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[i]), + 0L, 0L, 0, 0); + } + } + + /** + * Tests that chooseTarget with considerLoad set to true correctly calculates + * load with decommissioned nodes. + */ + @Test + public void testChooseTargetWithDecomNodes() throws IOException { + namenode.getNamesystem().writeLock(); + try { + // Decommission DNs so BlockPlacementPolicyDefault.isGoodTarget() + // returns false + for (int i = 0; i < 3; i++) { + DatanodeInfo d = dnManager.getDatanodeByXferAddr( + dnrList.get(i).getIpAddr(), + dnrList.get(i).getXferPort()); + d.setDecommissioned(); + } + String blockPoolId = namenode.getNamesystem().getBlockPoolId(); + dnManager.handleHeartbeat(dnrList.get(3), + BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[3]), + blockPoolId, dataNodes[3].getCacheCapacity(), + dataNodes[3].getCacheRemaining(), + 2, 0, 0); + dnManager.handleHeartbeat(dnrList.get(4), + BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[4]), + blockPoolId, dataNodes[4].getCacheCapacity(), + dataNodes[4].getCacheRemaining(), + 4, 0, 0); + dnManager.handleHeartbeat(dnrList.get(5), + BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[5]), + blockPoolId, dataNodes[5].getCacheCapacity(), + dataNodes[5].getCacheRemaining(), + 4, 0, 0); + + // Call chooseTarget() + DatanodeStorageInfo[] targets = namenode.getNamesystem().getBlockManager() + .getBlockPlacementPolicy().chooseTarget("testFile.txt", 3, + dataNodes[0], new ArrayList(), false, null, + 1024, StorageType.DEFAULT); + + assertEquals(3, targets.length); + Set targetSet = new HashSet( + Arrays.asList(targets)); + for (int i = 3; i < storages.length; i++) { + assertTrue(targetSet.contains(storages[i])); + } + } finally { + dataNodes[0].stopDecommission(); + dataNodes[1].stopDecommission(); + dataNodes[2].stopDecommission(); + namenode.getNamesystem().writeUnlock(); + } + NameNode.LOG.info("Done working on it"); + } + + @AfterClass + public static void teardownCluster() { + if (namenode != null) namenode.stop(); + } + +} From ff24753aa7ba3aadfff8080d9709c6bc9cf07811 Mon Sep 17 00:00:00 2001 From: Konstantin Shvachko Date: Sun, 9 Feb 2014 21:07:28 +0000 Subject: [PATCH 03/47] HDFS-4370. Fix typo Blanacer in DataNode. Contributed by Chu Tong. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566422 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 ++ .../java/org/apache/hadoop/hdfs/server/datanode/DataNode.java | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index fe5e8bc59a9..0adccb73376 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -360,6 +360,8 @@ Release 2.4.0 - UNRELEASED HDFS-4911. Reduce PeerCache timeout to be commensurate with dfs.datanode.socket.reuse.keepalive (cmccabe) + HDFS-4370. Fix typo Blanacer in DataNode. (Chu Tong via shv) + OPTIMIZATIONS HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index ad580a53d1d..42a63e73f72 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -2494,7 +2494,7 @@ public class DataNode extends Configured /** * Get current value of the max balancer bandwidth in bytes per second. * - * @return bandwidth Blanacer bandwidth in bytes per second for this datanode. + * @return Balancer bandwidth in bytes per second for this datanode. */ public Long getBalancerBandwidth() { DataXceiverServer dxcs = From c7e265bf26a58d710967a56620c3eecc99b6b45b Mon Sep 17 00:00:00 2001 From: Brandon Li Date: Sun, 9 Feb 2014 23:15:48 +0000 Subject: [PATCH 04/47] HDFS-5886. Potential null pointer deference in RpcProgramNfs3#readlink(). Contributed by Brandon Li git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566468 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java | 3 ++- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java index c2fc70990ca..533fa220774 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java @@ -545,7 +545,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { return new READLINK3Response(Nfs3Status.NFS3ERR_SERVERFAULT); } if (MAX_READ_TRANSFER_SIZE < target.getBytes().length) { - return new READLINK3Response(Nfs3Status.NFS3ERR_IO, postOpAttr, null); + return new READLINK3Response(Nfs3Status.NFS3ERR_IO, postOpAttr, + new byte[0]); } return new READLINK3Response(Nfs3Status.NFS3_OK, postOpAttr, diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 0adccb73376..5a0e4216536 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -404,6 +404,9 @@ Release 2.4.0 - UNRELEASED HDFS-5900. Cannot set cache pool limit of "unlimited" via CacheAdmin. (wang) + HDFS-5886. Potential null pointer deference in RpcProgramNfs3#readlink() + (brandonli) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES From 1c151c31faca77b67b7f4d50d682e76bc519d79a Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Mon, 10 Feb 2014 09:19:26 +0000 Subject: [PATCH 05/47] YARN-1497. Fix comment and remove accidental println git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566537 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/hadoop/yarn/client/cli/ApplicationCLI.java | 6 +----- .../java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java index 80e548d26e6..4332f5beeaf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java @@ -382,11 +382,7 @@ public class ApplicationCLI extends YarnCLI { } /** - * Kills the application with the application id as appId - * - * @param applicationId - * @throws YarnException - * @throws IOException + * Moves the application with the given ID to the given queue. */ private void moveApplicationAcrossQueues(String applicationId, String queue) throws YarnException, IOException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java index 12bc6be7316..97721864968 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java @@ -675,7 +675,6 @@ public class TestYarnCLI { int result = spyCli.run(new String[] { "-help" }); Assert.assertTrue(result == 0); verify(spyCli).printUsage(any(Options.class)); - System.err.println(sysOutStream.toString()); //todo sandyt remove this hejfkdsl Assert.assertEquals(createApplicationCLIHelpMessage(), sysOutStream.toString()); From bfd158f3231de96cab3308b219cb5278a43d0fe9 Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Mon, 10 Feb 2014 19:34:54 +0000 Subject: [PATCH 06/47] =?UTF-8?q?HADOOP-10333.=20Fix=20grammatical=20error?= =?UTF-8?q?=20in=20overview.html=20document.=20Contributed=20by=20Ren?= =?UTF-8?q?=C3=A9=20Nyffenegger.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566709 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../hadoop-common/src/main/java/overview.html | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 7a7106197d9..9eb7fae0f53 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -312,6 +312,9 @@ Release 2.4.0 - UNRELEASED HADOOP-10295. Allow distcp to automatically identify the checksum type of source files and use it for the target. (jing9 and Laurent Goujon) + HADOOP-10333. Fix grammatical error in overview.html document. + (René Nyffenegger via suresh) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-common-project/hadoop-common/src/main/java/overview.html b/hadoop-common-project/hadoop-common/src/main/java/overview.html index 759c093aa59..5868617709b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/overview.html +++ b/hadoop-common-project/hadoop-common/src/main/java/overview.html @@ -57,7 +57,7 @@ that process vast amounts of data. Here's what makes Hadoop especially useful:
  • - Hadoop was been demonstrated on GNU/Linux clusters with 2000 nodes. + Hadoop has been demonstrated on GNU/Linux clusters with more than 4000 nodes.
  • Windows is also a supported platform. From e74e117ad3e0b6c0572913f602a28934f87bba70 Mon Sep 17 00:00:00 2001 From: Zhijie Shen Date: Mon, 10 Feb 2014 21:31:34 +0000 Subject: [PATCH 07/47] YARN-1637. Implemented a client library for Java users to post timeline entities and events. Contributed by Zhijie Shen. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566752 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../hadoop-yarn/hadoop-yarn-client/pom.xml | 4 + .../yarn/client/api/TimelineClient.java | 70 +++++++++ .../client/api/impl/TimelineClientImpl.java | 106 ++++++++++++++ .../client/api/impl/TestTimelineClient.java | 137 ++++++++++++++++++ 5 files changed, 320 insertions(+) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/TimelineClient.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 3039c6f9a07..59afe849981 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -119,6 +119,9 @@ Release 2.4.0 - UNRELEASED YARN-1635. Implemented a Leveldb based ApplicationTimelineStore. (Billie Rinaldi via zjshen) + YARN-1637. Implemented a client library for Java users to post timeline + entities and events. (zjshen) + IMPROVEMENTS YARN-1007. Enhance History Reader interface for Containers. (Mayank Bansal via diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml index 54da659fee6..6091686a036 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml @@ -79,6 +79,10 @@ org.mortbay.jetty jetty-util + + com.sun.jersey + jersey-client + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/TimelineClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/TimelineClient.java new file mode 100644 index 00000000000..8be00ac6ff6 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/TimelineClient.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.client.api; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.yarn.api.records.apptimeline.ATSEntity; +import org.apache.hadoop.yarn.api.records.apptimeline.ATSPutErrors; +import org.apache.hadoop.yarn.client.api.impl.TimelineClientImpl; +import org.apache.hadoop.yarn.exceptions.YarnException; + +/** + * A client library that can be used to post some information in terms of a + * number of conceptual entities. + * + * @See ATSEntity + */ +@Public +@Unstable +public abstract class TimelineClient extends AbstractService { + + @Public + public static TimelineClient createTimelineClient() { + TimelineClient client = new TimelineClientImpl(); + return client; + } + + @Private + protected TimelineClient(String name) { + super(name); + } + + /** + *

    + * Post the information of a number of conceptual entities of an application + * to the timeline server. It is a blocking API. The method will not return + * until it gets the response from the timeline server. + *

    + * + * @param entities + * the collection of {@link ATSEntity} + * @return the error information if the post entities are not correctly stored + * @throws IOException + * @throws YarnException + */ + @Public + public abstract ATSPutErrors postEntities( + ATSEntity... entities) throws IOException, YarnException; + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java new file mode 100644 index 00000000000..9fcc2bd6e3d --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java @@ -0,0 +1,106 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.client.api.impl; + +import java.io.IOException; +import java.net.URI; +import java.util.Arrays; + +import javax.ws.rs.core.MediaType; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.http.HttpConfig; +import org.apache.hadoop.yarn.api.records.apptimeline.ATSEntities; +import org.apache.hadoop.yarn.api.records.apptimeline.ATSEntity; +import org.apache.hadoop.yarn.api.records.apptimeline.ATSPutErrors; +import org.apache.hadoop.yarn.client.api.TimelineClient; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.webapp.YarnJacksonJaxbJsonProvider; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Joiner; +import com.sun.jersey.api.client.Client; +import com.sun.jersey.api.client.ClientResponse; +import com.sun.jersey.api.client.WebResource; +import com.sun.jersey.api.client.config.ClientConfig; +import com.sun.jersey.api.client.config.DefaultClientConfig; + +@Private +@Unstable +public class TimelineClientImpl extends TimelineClient { + + private static final Log LOG = LogFactory.getLog(TimelineClientImpl.class); + private static final String RESOURCE_URI_STR = "/ws/v1/apptimeline/"; + private static final Joiner JOINER = Joiner.on(""); + + private Client client; + private URI resURI; + + public TimelineClientImpl() { + super(TimelineClientImpl.class.getName()); + ClientConfig cc = new DefaultClientConfig(); + cc.getClasses().add(YarnJacksonJaxbJsonProvider.class); + client = Client.create(cc); + } + + protected void serviceInit(Configuration conf) throws Exception { + resURI = new URI(JOINER.join(HttpConfig.getSchemePrefix(), + HttpConfig.isSecure() ? conf.get( + YarnConfiguration.AHS_WEBAPP_HTTPS_ADDRESS, + YarnConfiguration.DEFAULT_AHS_WEBAPP_HTTPS_ADDRESS) : conf.get( + YarnConfiguration.AHS_WEBAPP_ADDRESS, + YarnConfiguration.DEFAULT_AHS_WEBAPP_ADDRESS), RESOURCE_URI_STR)); + super.serviceInit(conf); + } + + @Override + public ATSPutErrors postEntities( + ATSEntity... entities) throws IOException, YarnException { + ATSEntities entitiesContainer = new ATSEntities(); + entitiesContainer.addEntities(Arrays.asList(entities)); + ClientResponse resp = doPostingEntities(entitiesContainer); + if (resp.getClientResponseStatus() != ClientResponse.Status.OK) { + String msg = + "Failed to get the response from the timeline server."; + LOG.error(msg); + if (LOG.isDebugEnabled()) { + String output = resp.getEntity(String.class); + LOG.debug("HTTP error code: " + resp.getStatus() + + " Server response : \n" + output); + } + throw new YarnException(msg); + } + return resp.getEntity(ATSPutErrors.class); + } + + @Private + @VisibleForTesting + public ClientResponse doPostingEntities(ATSEntities entities) { + WebResource webResource = client.resource(resURI); + return webResource.accept(MediaType.APPLICATION_JSON) + .type(MediaType.APPLICATION_JSON) + .post(ClientResponse.class, entities); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java new file mode 100644 index 00000000000..a3917a2da57 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.client.api.impl; + +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; +import junit.framework.Assert; + +import org.apache.hadoop.yarn.api.records.apptimeline.ATSEntities; +import org.apache.hadoop.yarn.api.records.apptimeline.ATSEntity; +import org.apache.hadoop.yarn.api.records.apptimeline.ATSEvent; +import org.apache.hadoop.yarn.api.records.apptimeline.ATSPutErrors; +import org.apache.hadoop.yarn.client.api.TimelineClient; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import com.sun.jersey.api.client.ClientResponse; + +public class TestTimelineClient { + + private TimelineClientImpl client; + + @Before + public void setup() { + client = spy((TimelineClientImpl) TimelineClient.createTimelineClient()); + client.init(new YarnConfiguration()); + client.start(); + } + + @After + public void tearDown() { + client.stop(); + } + + @Test + public void testPostEntities() throws Exception { + mockClientResponse(ClientResponse.Status.OK, false); + try { + ATSPutErrors errors = client.postEntities(generateATSEntity()); + Assert.assertEquals(0, errors.getErrors().size()); + } catch (YarnException e) { + Assert.fail("Exception is not expected"); + } + } + + @Test + public void testPostEntitiesWithError() throws Exception { + mockClientResponse(ClientResponse.Status.OK, true); + try { + ATSPutErrors errors = client.postEntities(generateATSEntity()); + Assert.assertEquals(1, errors.getErrors().size()); + Assert.assertEquals("test entity id", errors.getErrors().get(0) + .getEntityId()); + Assert.assertEquals("test entity type", errors.getErrors().get(0) + .getEntityType()); + Assert.assertEquals(ATSPutErrors.ATSPutError.IO_EXCEPTION, + errors.getErrors().get(0).getErrorCode()); + } catch (YarnException e) { + Assert.fail("Exception is not expected"); + } + } + + @Test + public void testPostEntitiesNoResponse() throws Exception { + mockClientResponse(ClientResponse.Status.INTERNAL_SERVER_ERROR, false); + try { + client.postEntities(generateATSEntity()); + Assert.fail("Exception is expected"); + } catch (YarnException e) { + Assert.assertTrue(e.getMessage().contains( + "Failed to get the response from the timeline server.")); + } + } + + private ClientResponse mockClientResponse(ClientResponse.Status status, + boolean hasError) { + ClientResponse response = mock(ClientResponse.class); + doReturn(response).when(client) + .doPostingEntities(any(ATSEntities.class)); + when(response.getClientResponseStatus()).thenReturn(status); + ATSPutErrors.ATSPutError error = new ATSPutErrors.ATSPutError(); + error.setEntityId("test entity id"); + error.setEntityType("test entity type"); + error.setErrorCode(ATSPutErrors.ATSPutError.IO_EXCEPTION); + ATSPutErrors errors = new ATSPutErrors(); + if (hasError) { + errors.addError(error); + } + when(response.getEntity(ATSPutErrors.class)).thenReturn(errors); + return response; + } + + private static ATSEntity generateATSEntity() { + ATSEntity entity = new ATSEntity(); + entity.setEntityId("entity id"); + entity.setEntityType("entity type"); + entity.setStartTime(System.currentTimeMillis()); + for (int i = 0; i < 2; ++i) { + ATSEvent event = new ATSEvent(); + event.setTimestamp(System.currentTimeMillis()); + event.setEventType("test event type " + i); + event.addEventInfo("key1", "val1"); + event.addEventInfo("key2", "val2"); + entity.addEvent(event); + } + entity.addRelatedEntity("test ref type 1", "test ref id 1"); + entity.addRelatedEntity("test ref type 2", "test ref id 2"); + entity.addPrimaryFilter("pkey1", "pval1"); + entity.addPrimaryFilter("pkey2", "pval2"); + entity.addOtherInfo("okey1", "oval1"); + entity.addOtherInfo("okey2", "oval2"); + return entity; + } + +} From 1fa6ab249b0fa63cab550e1b7703339c4d888c5d Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Mon, 10 Feb 2014 22:50:15 +0000 Subject: [PATCH 08/47] YARN-1459. Changed ResourceManager to depend its service initialization on the configuration-provider mechanism during startup too. Contributed by Xuan Gong. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566791 13f79535-47bb-0310-9956-ffa450edef68 --- .../java/org/apache/hadoop/ipc/Server.java | 6 +- .../ServiceAuthorizationManager.java | 6 +- hadoop-yarn-project/CHANGES.txt | 4 + .../dev-support/findbugs-exclude.xml | 6 + .../yarn/conf/ConfigurationProvider.java | 15 +- .../conf/ConfigurationProviderFactory.java | 12 +- .../FileSystemBasedConfigurationProvider.java | 17 +- .../yarn/LocalConfigurationProvider.java | 9 +- .../server/resourcemanager/AdminService.java | 46 +++-- .../ApplicationMasterService.java | 20 +-- .../resourcemanager/ClientRMService.java | 15 +- .../server/resourcemanager/RMContext.java | 2 + .../server/resourcemanager/RMContextImpl.java | 18 +- .../resourcemanager/ResourceManager.java | 23 ++- .../ResourceTrackerService.java | 15 +- .../scheduler/capacity/CapacityScheduler.java | 26 ++- .../security/authorize/RMPolicyProvider.java | 19 ++ .../resourcemanager/TestRMAdminService.java | 170 ++++++++++-------- .../capacity/TestCapacityScheduler.java | 13 +- 19 files changed, 266 insertions(+), 176 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index 9c67146265b..9871a3d138a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -66,6 +66,7 @@ import javax.security.sasl.SaslServer; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration.IntegerRanges; @@ -454,9 +455,10 @@ public abstract class Server { * Refresh the service authorization ACL for the service handled by this server * using the specified Configuration. */ - public void refreshServiceAclWithConfigration(Configuration conf, + @Private + public void refreshServiceAclWithLoadedConfiguration(Configuration conf, PolicyProvider provider) { - serviceAuthorizationManager.refreshWithConfiguration(conf, provider); + serviceAuthorizationManager.refreshWithLoadedConfiguration(conf, provider); } /** * Returns a handle to the serviceAuthorizationManager (required in tests) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java index cf032ba0980..66ab50cc58e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java @@ -26,6 +26,7 @@ import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; @@ -122,10 +123,11 @@ public class ServiceAuthorizationManager { // Make a copy of the original config, and load the policy file Configuration policyConf = new Configuration(conf); policyConf.addResource(policyFile); - refreshWithConfiguration(policyConf, provider); + refreshWithLoadedConfiguration(policyConf, provider); } - public synchronized void refreshWithConfiguration(Configuration conf, + @Private + public synchronized void refreshWithLoadedConfiguration(Configuration conf, PolicyProvider provider) { final Map, AccessControlList> newAcls = new IdentityHashMap, AccessControlList>(); diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 59afe849981..d31a3efe5b8 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -169,6 +169,10 @@ Release 2.4.0 - UNRELEASED YARN-1493. Changed ResourceManager and Scheduler interfacing to recognize app-attempts separately from apps. (Jian He via vinodkv) + YARN-1459. Changed ResourceManager to depend its service initialization + on the configuration-provider mechanism during startup too. (Xuan Gong via + vinodkv) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml index 74ca61b8578..0fac0b98f1f 100644 --- a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml @@ -309,4 +309,10 @@ + + + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/ConfigurationProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/ConfigurationProvider.java index 78c34d9de98..b31573d39eb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/ConfigurationProvider.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/ConfigurationProvider.java @@ -19,7 +19,6 @@ package org.apache.hadoop.yarn.conf; import java.io.IOException; - import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; @@ -34,8 +33,8 @@ import org.apache.hadoop.yarn.exceptions.YarnException; */ public abstract class ConfigurationProvider { - public void init(Configuration conf) throws Exception { - initInternal(conf); + public void init(Configuration bootstrapConf) throws Exception { + initInternal(bootstrapConf); } public void close() throws Exception { @@ -43,19 +42,21 @@ public abstract class ConfigurationProvider { } /** - * Get the configuration. + * Get the configuration and combine with bootstrapConf + * @param bootstrapConf Configuration * @param name The configuration file name * @return configuration * @throws YarnException * @throws IOException */ - public abstract Configuration getConfiguration(String name) - throws YarnException, IOException; + public abstract Configuration getConfiguration(Configuration bootstrapConf, + String name) throws YarnException, IOException; /** * Derived classes initialize themselves using this method. */ - public abstract void initInternal(Configuration conf) throws Exception; + public abstract void initInternal(Configuration bootstrapConf) + throws Exception; /** * Derived classes close themselves using this method. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/ConfigurationProviderFactory.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/ConfigurationProviderFactory.java index 4adc72e1f11..3562f173acb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/ConfigurationProviderFactory.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/ConfigurationProviderFactory.java @@ -33,12 +33,12 @@ public class ConfigurationProviderFactory { /** * Creates an instance of {@link ConfigurationProvider} using given * configuration. - * @param conf + * @param bootstrapConf * @return configurationProvider */ @SuppressWarnings("unchecked") public static ConfigurationProvider - getConfigurationProvider(Configuration conf) { + getConfigurationProvider(Configuration bootstrapConf) { Class defaultProviderClass; try { defaultProviderClass = (Class) @@ -49,9 +49,11 @@ public class ConfigurationProviderFactory { "Invalid default configuration provider class" + YarnConfiguration.DEFAULT_RM_CONFIGURATION_PROVIDER_CLASS, e); } - ConfigurationProvider configurationProvider = ReflectionUtils.newInstance( - conf.getClass(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, - defaultProviderClass, ConfigurationProvider.class), conf); + ConfigurationProvider configurationProvider = + ReflectionUtils.newInstance(bootstrapConf.getClass( + YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, + defaultProviderClass, ConfigurationProvider.class), + bootstrapConf); return configurationProvider; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/FileSystemBasedConfigurationProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/FileSystemBasedConfigurationProvider.java index 709f54a3529..390aace7d21 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/FileSystemBasedConfigurationProvider.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/FileSystemBasedConfigurationProvider.java @@ -19,7 +19,6 @@ package org.apache.hadoop.yarn; import java.io.IOException; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -42,24 +41,24 @@ public class FileSystemBasedConfigurationProvider private Path configDir; @Override - public synchronized Configuration getConfiguration(String name) - throws IOException, YarnException { + public synchronized Configuration getConfiguration(Configuration bootstrapConf, + String name) throws IOException, YarnException { Path configPath = new Path(this.configDir, name); if (!fs.exists(configPath)) { throw new YarnException("Can not find Configuration: " + name + " in " + configDir); } - Configuration conf = new Configuration(false); - conf.addResource(fs.open(configPath)); - return conf; + bootstrapConf.addResource(fs.open(configPath)); + return bootstrapConf; } @Override - public synchronized void initInternal(Configuration conf) throws Exception { + public synchronized void initInternal(Configuration bootstrapConf) + throws Exception { configDir = - new Path(conf.get(YarnConfiguration.FS_BASED_RM_CONF_STORE, + new Path(bootstrapConf.get(YarnConfiguration.FS_BASED_RM_CONF_STORE, YarnConfiguration.DEFAULT_FS_BASED_RM_CONF_STORE)); - fs = configDir.getFileSystem(conf); + fs = configDir.getFileSystem(bootstrapConf); if (!fs.exists(configDir)) { fs.mkdirs(configDir); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/LocalConfigurationProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/LocalConfigurationProvider.java index d152c353f08..3e6996036f6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/LocalConfigurationProvider.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/LocalConfigurationProvider.java @@ -19,7 +19,6 @@ package org.apache.hadoop.yarn; import java.io.IOException; - import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; @@ -31,13 +30,13 @@ import org.apache.hadoop.yarn.exceptions.YarnException; public class LocalConfigurationProvider extends ConfigurationProvider { @Override - public Configuration getConfiguration(String name) - throws IOException, YarnException { - return new Configuration(); + public Configuration getConfiguration(Configuration bootstrapConf, + String name) throws IOException, YarnException { + return bootstrapConf; } @Override - public void initInternal(Configuration conf) throws Exception { + public void initInternal(Configuration bootstrapConf) throws Exception { // Do nothing } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java index d9c239e220a..da479b47ee8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java @@ -26,6 +26,7 @@ import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.LocalConfigurationProvider; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.ha.HAServiceStatus; @@ -45,11 +46,8 @@ import org.apache.hadoop.security.authorize.AccessControlList; import org.apache.hadoop.security.authorize.PolicyProvider; import org.apache.hadoop.security.authorize.ProxyUsers; import org.apache.hadoop.service.CompositeService; -import org.apache.hadoop.yarn.LocalConfigurationProvider; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.ResourceOption; -import org.apache.hadoop.yarn.conf.ConfigurationProvider; -import org.apache.hadoop.yarn.conf.ConfigurationProviderFactory; import org.apache.hadoop.yarn.conf.HAUtil; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; @@ -92,8 +90,6 @@ public class AdminService extends CompositeService implements private Server server; private InetSocketAddress masterServiceAddress; private AccessControlList adminAcl; - - private ConfigurationProvider configurationProvider = null; private final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); @@ -115,10 +111,6 @@ public class AdminService extends CompositeService implements } } - this.configurationProvider = - ConfigurationProviderFactory.getConfigurationProvider(conf); - configurationProvider.init(conf); - masterServiceAddress = conf.getSocketAddr( YarnConfiguration.RM_ADMIN_ADDRESS, YarnConfiguration.DEFAULT_RM_ADMIN_ADDRESS, @@ -139,9 +131,6 @@ public class AdminService extends CompositeService implements @Override protected synchronized void serviceStop() throws Exception { stopServer(); - if (this.configurationProvider != null) { - configurationProvider.close(); - } super.serviceStop(); } @@ -158,7 +147,10 @@ public class AdminService extends CompositeService implements if (conf.getBoolean( CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, false)) { - refreshServiceAcls(conf, new RMPolicyProvider()); + refreshServiceAcls( + getConfiguration(conf, + YarnConfiguration.HADOOP_POLICY_CONFIGURATION_FILE), + RMPolicyProvider.getInstance()); } if (rmContext.isHAEnabled()) { @@ -321,8 +313,8 @@ public class AdminService extends CompositeService implements RefreshQueuesResponse response = recordFactory.newRecordInstance(RefreshQueuesResponse.class); try { - Configuration conf = - getConfiguration(YarnConfiguration.CS_CONFIGURATION_FILE); + Configuration conf = getConfiguration(getConfig(), + YarnConfiguration.CS_CONFIGURATION_FILE); rmContext.getScheduler().reinitialize(conf, this.rmContext); RMAuditLogger.logSuccess(user.getShortUserName(), argName, "AdminService"); @@ -376,7 +368,8 @@ public class AdminService extends CompositeService implements } Configuration conf = - getConfiguration(YarnConfiguration.CORE_SITE_CONFIGURATION_FILE); + getConfiguration(getConfig(), + YarnConfiguration.CORE_SITE_CONFIGURATION_FILE); ProxyUsers.refreshSuperUserGroupsConfiguration(conf); RMAuditLogger.logSuccess(user.getShortUserName(), argName, "AdminService"); @@ -421,7 +414,7 @@ public class AdminService extends CompositeService implements throwStandbyException(); } Configuration conf = - getConfiguration(YarnConfiguration.YARN_SITE_XML_FILE); + getConfiguration(getConfig(), YarnConfiguration.YARN_SITE_XML_FILE); adminAcl = new AccessControlList(conf.get( YarnConfiguration.YARN_ADMIN_ACL, YarnConfiguration.DEFAULT_YARN_ADMIN_ACL)); @@ -452,9 +445,10 @@ public class AdminService extends CompositeService implements throwStandbyException(); } - PolicyProvider policyProvider = new RMPolicyProvider(); + PolicyProvider policyProvider = RMPolicyProvider.getInstance(); Configuration conf = - getConfiguration(YarnConfiguration.HADOOP_POLICY_CONFIGURATION_FILE); + getConfiguration(getConfig(), + YarnConfiguration.HADOOP_POLICY_CONFIGURATION_FILE); refreshServiceAcls(conf, policyProvider); rmContext.getClientRMService().refreshServiceAcls(conf, policyProvider); @@ -466,12 +460,13 @@ public class AdminService extends CompositeService implements return recordFactory.newRecordInstance(RefreshServiceAclsResponse.class); } - synchronized void refreshServiceAcls(Configuration configuration, + private synchronized void refreshServiceAcls(Configuration configuration, PolicyProvider policyProvider) { - if (this.configurationProvider instanceof LocalConfigurationProvider) { + if (this.rmContext.getConfigurationProvider() instanceof + LocalConfigurationProvider) { this.server.refreshServiceAcl(configuration, policyProvider); } else { - this.server.refreshServiceAclWithConfigration(configuration, + this.server.refreshServiceAclWithLoadedConfiguration(configuration, policyProvider); } } @@ -521,9 +516,10 @@ public class AdminService extends CompositeService implements return response; } - private synchronized Configuration getConfiguration(String confFileName) - throws YarnException, IOException { - return this.configurationProvider.getConfiguration(confFileName); + private synchronized Configuration getConfiguration(Configuration conf, + String confFileName) throws YarnException, IOException { + return this.rmContext.getConfigurationProvider().getConfiguration(conf, + confFileName); } @VisibleForTesting diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java index 2c4be13ee92..0c56134b811 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java @@ -105,7 +105,6 @@ public class ApplicationMasterService extends AbstractService implements private final AllocateResponse resync = recordFactory.newRecordInstance(AllocateResponse.class); private final RMContext rmContext; - private boolean useLocalConfigurationProvider; public ApplicationMasterService(RMContext rmContext, YarnScheduler scheduler) { super(ApplicationMasterService.class.getName()); @@ -115,15 +114,6 @@ public class ApplicationMasterService extends AbstractService implements this.rmContext = rmContext; } - @Override - protected void serviceInit(Configuration conf) throws Exception { - this.useLocalConfigurationProvider = - (LocalConfigurationProvider.class.isAssignableFrom(conf.getClass( - YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, - LocalConfigurationProvider.class))); - super.serviceInit(conf); - } - @Override protected void serviceStart() throws Exception { Configuration conf = getConfig(); @@ -150,7 +140,10 @@ public class ApplicationMasterService extends AbstractService implements if (conf.getBoolean( CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, false)) { - refreshServiceAcls(conf, new RMPolicyProvider()); + refreshServiceAcls( + this.rmContext.getConfigurationProvider().getConfiguration(conf, + YarnConfiguration.HADOOP_POLICY_CONFIGURATION_FILE), + RMPolicyProvider.getInstance()); } this.server.start(); @@ -591,10 +584,11 @@ public class ApplicationMasterService extends AbstractService implements public void refreshServiceAcls(Configuration configuration, PolicyProvider policyProvider) { - if (this.useLocalConfigurationProvider) { + if (this.rmContext.getConfigurationProvider() instanceof + LocalConfigurationProvider) { this.server.refreshServiceAcl(configuration, policyProvider); } else { - this.server.refreshServiceAclWithConfigration(configuration, + this.server.refreshServiceAclWithLoadedConfiguration(configuration, policyProvider); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java index 2f8526a7c71..43e94edd1a8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java @@ -136,7 +136,6 @@ public class ClientRMService extends AbstractService implements private final ApplicationACLsManager applicationsACLsManager; private final QueueACLsManager queueACLsManager; - private boolean useLocalConfigurationProvider; public ClientRMService(RMContext rmContext, YarnScheduler scheduler, RMAppManager rmAppManager, ApplicationACLsManager applicationACLsManager, @@ -154,10 +153,6 @@ public class ClientRMService extends AbstractService implements @Override protected void serviceInit(Configuration conf) throws Exception { clientBindAddress = getBindAddress(conf); - this.useLocalConfigurationProvider = - (LocalConfigurationProvider.class.isAssignableFrom(conf.getClass( - YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, - LocalConfigurationProvider.class))); super.serviceInit(conf); } @@ -176,7 +171,10 @@ public class ClientRMService extends AbstractService implements if (conf.getBoolean( CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, false)) { - refreshServiceAcls(conf, new RMPolicyProvider()); + refreshServiceAcls( + this.rmContext.getConfigurationProvider().getConfiguration(conf, + YarnConfiguration.HADOOP_POLICY_CONFIGURATION_FILE), + RMPolicyProvider.getInstance()); } this.server.start(); @@ -809,10 +807,11 @@ public class ClientRMService extends AbstractService implements void refreshServiceAcls(Configuration configuration, PolicyProvider policyProvider) { - if (this.useLocalConfigurationProvider) { + if (this.rmContext.getConfigurationProvider() instanceof + LocalConfigurationProvider) { this.server.refreshServiceAcl(configuration, policyProvider); } else { - this.server.refreshServiceAclWithConfigration(configuration, + this.server.refreshServiceAclWithLoadedConfiguration(configuration, policyProvider); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java index 64a4165feb4..79fb5dfa23e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java @@ -23,6 +23,7 @@ import java.util.concurrent.ConcurrentMap; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.conf.ConfigurationProvider; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; @@ -97,4 +98,5 @@ public interface RMContext { void setRMApplicationHistoryWriter( RMApplicationHistoryWriter rmApplicationHistoryWriter); + ConfigurationProvider getConfigurationProvider(); } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java index 79e59831e9d..689a0914190 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java @@ -23,8 +23,10 @@ import java.util.concurrent.ConcurrentMap; import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; +import org.apache.hadoop.yarn.LocalConfigurationProvider; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.conf.ConfigurationProvider; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter; @@ -78,7 +80,7 @@ public class RMContextImpl implements RMContext { private ResourceTrackerService resourceTrackerService; private ApplicationMasterService applicationMasterService; private RMApplicationHistoryWriter rmApplicationHistoryWriter; - + private ConfigurationProvider configurationProvider; /** * Default constructor. To be used in conjunction with setter methods for * individual fields. @@ -119,8 +121,11 @@ public class RMContextImpl implements RMContext { } catch (Exception e) { assert false; } + + ConfigurationProvider provider = new LocalConfigurationProvider(); + setConfigurationProvider(provider); } - + @Override public Dispatcher getDispatcher() { return this.rmDispatcher; @@ -334,4 +339,13 @@ public class RMContextImpl implements RMContext { this.rmApplicationHistoryWriter = rmApplicationHistoryWriter; } + @Override + public ConfigurationProvider getConfigurationProvider() { + return this.configurationProvider; + } + + public void setConfigurationProvider( + ConfigurationProvider configurationProvider) { + this.configurationProvider = configurationProvider; + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index 8575cd57d65..1040cc5c526 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -42,10 +42,13 @@ import org.apache.hadoop.util.ExitUtil; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.ShutdownHookManager; import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.yarn.LocalConfigurationProvider; import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.conf.ConfigurationProvider; +import org.apache.hadoop.yarn.conf.ConfigurationProviderFactory; import org.apache.hadoop.yarn.conf.HAUtil; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; @@ -154,7 +157,7 @@ public class ResourceManager extends CompositeService implements Recoverable { private boolean recoveryEnabled; private String webAppAddress; - + private ConfigurationProvider configurationProvider = null; /** End of Active services */ private Configuration conf; @@ -182,6 +185,21 @@ public class ResourceManager extends CompositeService implements Recoverable { this.conf = conf; this.rmContext = new RMContextImpl(); + this.configurationProvider = + ConfigurationProviderFactory.getConfigurationProvider(conf); + this.configurationProvider.init(this.conf); + rmContext.setConfigurationProvider(configurationProvider); + if (!(this.configurationProvider instanceof LocalConfigurationProvider)) { + // load yarn-site.xml + this.conf = + this.configurationProvider.getConfiguration(this.conf, + YarnConfiguration.YARN_SITE_XML_FILE); + // load core-site.xml + this.conf = + this.configurationProvider.getConfiguration(this.conf, + YarnConfiguration.CORE_SITE_CONFIGURATION_FILE); + } + // register the handlers for all AlwaysOn services using setupDispatcher(). rmDispatcher = setupDispatcher(); addIfService(rmDispatcher); @@ -884,6 +902,9 @@ public class ResourceManager extends CompositeService implements Recoverable { if (fetcher != null) { fetcher.stop(); } + if (configurationProvider != null) { + configurationProvider.close(); + } super.serviceStop(); transitionToStandby(false); rmContext.setHAServiceState(HAServiceState.STOPPING); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java index 4f74179717f..8136c056129 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java @@ -95,7 +95,6 @@ public class ResourceTrackerService extends AbstractService implements private int minAllocMb; private int minAllocVcores; - private boolean useLocalConfigurationProvider; static { resync.setNodeAction(NodeAction.RESYNC); @@ -145,10 +144,6 @@ public class ResourceTrackerService extends AbstractService implements YarnConfiguration.RM_NODEMANAGER_MINIMUM_VERSION, YarnConfiguration.DEFAULT_RM_NODEMANAGER_MINIMUM_VERSION); - this.useLocalConfigurationProvider = - (LocalConfigurationProvider.class.isAssignableFrom(conf.getClass( - YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, - LocalConfigurationProvider.class))); super.serviceInit(conf); } @@ -169,7 +164,10 @@ public class ResourceTrackerService extends AbstractService implements if (conf.getBoolean( CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, false)) { - refreshServiceAcls(conf, new RMPolicyProvider()); + refreshServiceAcls( + this.rmContext.getConfigurationProvider().getConfiguration(conf, + YarnConfiguration.HADOOP_POLICY_CONFIGURATION_FILE), + RMPolicyProvider.getInstance()); } this.server.start(); @@ -423,10 +421,11 @@ public class ResourceTrackerService extends AbstractService implements void refreshServiceAcls(Configuration configuration, PolicyProvider policyProvider) { - if (this.useLocalConfigurationProvider) { + if (this.rmContext.getConfigurationProvider() instanceof + LocalConfigurationProvider) { this.server.refreshServiceAcl(configuration, policyProvider); } else { - this.server.refreshServiceAclWithConfigration(configuration, + this.server.refreshServiceAclWithLoadedConfiguration(configuration, policyProvider); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index b019a762515..eb4f814e1e7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -196,7 +196,6 @@ public class CapacityScheduler extends AbstractYarnScheduler private ResourceCalculator calculator; private boolean usePortForNodeName; - private boolean useLocalConfigurationProvider; public CapacityScheduler() {} @@ -262,14 +261,21 @@ public class CapacityScheduler extends AbstractYarnScheduler @Override public synchronized void reinitialize(Configuration conf, RMContext rmContext) throws IOException { + Configuration configuration = new Configuration(conf); if (!initialized) { - this.useLocalConfigurationProvider = - (LocalConfigurationProvider.class.isAssignableFrom(conf.getClass( - YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, - LocalConfigurationProvider.class))); - this.conf = - new CapacitySchedulerConfiguration(conf, - this.useLocalConfigurationProvider); + if (rmContext.getConfigurationProvider() instanceof + LocalConfigurationProvider) { + this.conf = new CapacitySchedulerConfiguration(configuration, true); + } else { + try { + this.conf = + new CapacitySchedulerConfiguration(rmContext + .getConfigurationProvider().getConfiguration(configuration, + YarnConfiguration.CS_CONFIGURATION_FILE), false); + } catch (Exception e) { + throw new IOException(e); + } + } validateConf(this.conf); this.minimumAllocation = this.conf.getMinimumAllocation(); this.maximumAllocation = this.conf.getMaximumAllocation(); @@ -290,7 +296,8 @@ public class CapacityScheduler extends AbstractYarnScheduler CapacitySchedulerConfiguration oldConf = this.conf; this.conf = new CapacitySchedulerConfiguration(conf, - this.useLocalConfigurationProvider); + rmContext.getConfigurationProvider() instanceof + LocalConfigurationProvider); validateConf(this.conf); try { LOG.info("Re-initializing queues..."); @@ -316,6 +323,7 @@ public class CapacityScheduler extends AbstractYarnScheduler @Lock(CapacityScheduler.class) private void initializeQueues(CapacitySchedulerConfiguration conf) throws IOException { + root = parseQueue(this, conf, null, CapacitySchedulerConfiguration.ROOT, queues, queues, noop); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/authorize/RMPolicyProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/authorize/RMPolicyProvider.java index bdab4f37715..8c5efa15e44 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/authorize/RMPolicyProvider.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/authorize/RMPolicyProvider.java @@ -18,7 +18,9 @@ package org.apache.hadoop.yarn.server.resourcemanager.security.authorize; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.security.authorize.PolicyProvider; @@ -37,6 +39,23 @@ import org.apache.hadoop.yarn.server.api.ResourceTrackerPB; @InterfaceStability.Unstable public class RMPolicyProvider extends PolicyProvider { + private static RMPolicyProvider rmPolicyProvider = null; + + private RMPolicyProvider() {} + + @Private + @Unstable + public static RMPolicyProvider getInstance() { + if (rmPolicyProvider == null) { + synchronized(RMPolicyProvider.class) { + if (rmPolicyProvider == null) { + rmPolicyProvider = new RMPolicyProvider(); + } + } + } + return rmPolicyProvider; + } + private static final Service[] resourceManagerServices = new Service[] { new Service( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java index 5372c18832d..ee008e93b43 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java @@ -26,7 +26,6 @@ import java.io.FileOutputStream; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.security.authorize.AccessControlList; @@ -105,34 +104,34 @@ public class TestRMAdminService { throws IOException, YarnException { configuration.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider"); - rm = new MockRM(configuration); - rm.init(configuration); - rm.start(); + try { + rm = new MockRM(configuration); + rm.init(configuration); + rm.start(); + fail("Should throw an exception"); + } catch(Exception ex) { + // Expect exception here + } - // clean the remoteDirectory - cleanRemoteDirectory(); + //upload default configurations + uploadDefaultConfiguration(); + + try { + rm = new MockRM(configuration); + rm.init(configuration); + rm.start(); + } catch(Exception ex) { + fail("Should not get any exceptions"); + } CapacityScheduler cs = (CapacityScheduler) rm.getRMContext().getScheduler(); int maxAppsBefore = cs.getConfiguration().getMaximumSystemApplications(); - try { - rm.adminService.refreshQueues(RefreshQueuesRequest.newInstance()); - fail("FileSystemBasedConfigurationProvider is used." + - " Should get an exception here"); - } catch (Exception ex) { - Assert.assertTrue(ex.getMessage().contains( - "Can not find Configuration: capacity-scheduler.xml")); - } - CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(); csConf.set("yarn.scheduler.capacity.maximum-applications", "5000"); - String csConfFile = writeConfigurationXML(csConf, - "capacity-scheduler.xml"); - - // upload the file into Remote File System - uploadToRemoteFileSystem(new Path(csConfFile)); + uploadConfiguration(csConf, "capacity-scheduler.xml"); rm.adminService.refreshQueues(RefreshQueuesRequest.newInstance()); @@ -159,20 +158,24 @@ public class TestRMAdminService { throws IOException, YarnException { configuration.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider"); - rm = new MockRM(configuration); - rm.init(configuration); - rm.start(); + try { + rm = new MockRM(configuration); + rm.init(configuration); + rm.start(); + fail("Should throw an exception"); + } catch(Exception ex) { + // Expect exception here + } - // clean the remoteDirectory - cleanRemoteDirectory(); + //upload default configurations + uploadDefaultConfiguration(); try { - rm.adminService.refreshAdminAcls(RefreshAdminAclsRequest.newInstance()); - fail("FileSystemBasedConfigurationProvider is used." + - " Should get an exception here"); - } catch (Exception ex) { - Assert.assertTrue(ex.getMessage().contains( - "Can not find Configuration: yarn-site.xml")); + rm = new MockRM(configuration); + rm.init(configuration); + rm.start(); + } catch(Exception ex) { + fail("Should not get any exceptions"); } String aclStringBefore = @@ -180,10 +183,8 @@ public class TestRMAdminService { YarnConfiguration yarnConf = new YarnConfiguration(); yarnConf.set(YarnConfiguration.YARN_ADMIN_ACL, "world:anyone:rwcda"); - String yarnConfFile = writeConfigurationXML(yarnConf, "yarn-site.xml"); + uploadConfiguration(yarnConf, "yarn-site.xml"); - // upload the file into Remote File System - uploadToRemoteFileSystem(new Path(yarnConfFile)); rm.adminService.refreshAdminAcls(RefreshAdminAclsRequest.newInstance()); String aclStringAfter = @@ -214,7 +215,6 @@ public class TestRMAdminService { } } - @SuppressWarnings("resource") @Test public void testServiceAclsRefreshWithFileSystemBasedConfigurationProvider() throws IOException, YarnException { @@ -224,33 +224,33 @@ public class TestRMAdminService { "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider"); ResourceManager resourceManager = null; try { - resourceManager = new ResourceManager(); - resourceManager.init(configuration); - resourceManager.start(); - - // clean the remoteDirectory - cleanRemoteDirectory(); - try { - resourceManager.adminService - .refreshServiceAcls(RefreshServiceAclsRequest - .newInstance()); - fail("FileSystemBasedConfigurationProvider is used." + - " Should get an exception here"); + resourceManager = new ResourceManager(); + resourceManager.init(configuration); + resourceManager.start(); + fail("Should throw an exception"); } catch (Exception ex) { - Assert.assertTrue(ex.getMessage().contains( - "Can not find Configuration: hadoop-policy.xml")); + // expect to get an exception here } - String aclsString = "alice,bob users,wheel"; + //upload default configurations + uploadDefaultConfiguration(); Configuration conf = new Configuration(); conf.setBoolean( CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, true); - conf.set("security.applicationclient.protocol.acl", aclsString); - String hadoopConfFile = writeConfigurationXML(conf, "hadoop-policy.xml"); + uploadConfiguration(conf, "core-site.xml"); + try { + resourceManager = new ResourceManager(); + resourceManager.init(configuration); + resourceManager.start(); + } catch (Exception ex) { + fail("Should not get any exceptions"); + } - // upload the file into Remote File System - uploadToRemoteFileSystem(new Path(hadoopConfFile)); + String aclsString = "alice,bob users,wheel"; + Configuration newConf = new Configuration(); + newConf.set("security.applicationclient.protocol.acl", aclsString); + uploadConfiguration(newConf, "hadoop-policy.xml"); resourceManager.adminService.refreshServiceAcls(RefreshServiceAclsRequest .newInstance()); @@ -328,31 +328,31 @@ public class TestRMAdminService { throws IOException, YarnException { configuration.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider"); - rm = new MockRM(configuration); - rm.init(configuration); - rm.start(); + try { + rm = new MockRM(configuration); + rm.init(configuration); + rm.start(); + fail("Should throw an exception"); + } catch(Exception ex) { + // Expect exception here + } - // clean the remoteDirectory - cleanRemoteDirectory(); + //upload default configurations + uploadDefaultConfiguration(); try { - rm.adminService.refreshSuperUserGroupsConfiguration( - RefreshSuperUserGroupsConfigurationRequest.newInstance()); - fail("FileSystemBasedConfigurationProvider is used." + - " Should get an exception here"); - } catch (Exception ex) { - Assert.assertTrue(ex.getMessage().contains( - "Can not find Configuration: core-site.xml")); + rm = new MockRM(configuration); + rm.init(configuration); + rm.start(); + } catch(Exception ex) { + fail("Should not get any exceptions"); } Configuration coreConf = new Configuration(false); coreConf.set("hadoop.proxyuser.test.groups", "test_groups"); coreConf.set("hadoop.proxyuser.test.hosts", "test_hosts"); - String coreConfFile = writeConfigurationXML(coreConf, - "core-site.xml"); + uploadConfiguration(coreConf, "core-site.xml"); - // upload the file into Remote File System - uploadToRemoteFileSystem(new Path(coreConfFile)); rm.adminService.refreshSuperUserGroupsConfiguration( RefreshSuperUserGroupsConfigurationRequest.newInstance()); Assert.assertTrue(ProxyUsers.getProxyGroups() @@ -393,11 +393,29 @@ public class TestRMAdminService { fs.copyFromLocalFile(filePath, workingPath); } - private void cleanRemoteDirectory() throws IOException { - if (fs.exists(workingPath)) { - for (FileStatus file : fs.listStatus(workingPath)) { - fs.delete(file.getPath(), true); - } - } + private void uploadConfiguration(Configuration conf, String confFileName) + throws IOException { + String csConfFile = writeConfigurationXML(conf, confFileName); + // upload the file into Remote File System + uploadToRemoteFileSystem(new Path(csConfFile)); + } + + private void uploadDefaultConfiguration() throws IOException { + Configuration conf = new Configuration(); + uploadConfiguration(conf, "core-site.xml"); + + YarnConfiguration yarnConf = new YarnConfiguration(); + yarnConf.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, + "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider"); + uploadConfiguration(yarnConf, "yarn-site.xml"); + + CapacitySchedulerConfiguration csConf = + new CapacitySchedulerConfiguration(); + uploadConfiguration(csConf, "capacity-scheduler.xml"); + + Configuration hadoopPolicyConf = new Configuration(false); + hadoopPolicyConf + .addResource(YarnConfiguration.HADOOP_POLICY_CONFIGURATION_FILE); + uploadConfiguration(hadoopPolicyConf, "hadoop-policy.xml"); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java index ca60db3f04c..47ec5462350 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java @@ -40,6 +40,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.net.NetworkTopology; +import org.apache.hadoop.yarn.LocalConfigurationProvider; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -104,6 +105,7 @@ public class TestCapacityScheduler { private static float B3_CAPACITY = 20; private ResourceManager resourceManager = null; + private RMContext mockContext; @Before public void setUp() throws Exception { @@ -118,6 +120,9 @@ public class TestCapacityScheduler { resourceManager.getRMContainerTokenSecretManager().rollMasterKey(); resourceManager.getRMNMTokenSecretManager().rollMasterKey(); ((AsyncDispatcher)resourceManager.getRMContext().getDispatcher()).start(); + mockContext = mock(RMContext.class); + when(mockContext.getConfigurationProvider()).thenReturn( + new LocalConfigurationProvider()); } @After @@ -133,7 +138,7 @@ public class TestCapacityScheduler { conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 2048); conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 1024); try { - scheduler.reinitialize(conf, null); + scheduler.reinitialize(conf, mockContext); fail("Exception is expected because the min memory allocation is" + " larger than the max memory allocation."); } catch (YarnRuntimeException e) { @@ -147,7 +152,7 @@ public class TestCapacityScheduler { conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, 2); conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, 1); try { - scheduler.reinitialize(conf, null); + scheduler.reinitialize(conf, mockContext); fail("Exception is expected because the min vcores allocation is" + " larger than the max vcores allocation."); } catch (YarnRuntimeException e) { @@ -353,7 +358,7 @@ public class TestCapacityScheduler { conf.setCapacity(A, 80f); conf.setCapacity(B, 20f); - cs.reinitialize(conf,null); + cs.reinitialize(conf, mockContext); checkQueueCapacities(cs, 80f, 20f); } @@ -503,7 +508,7 @@ public class TestCapacityScheduler { conf.setCapacity(B2, B2_CAPACITY); conf.setCapacity(B3, B3_CAPACITY); conf.setCapacity(B4, B4_CAPACITY); - cs.reinitialize(conf,null); + cs.reinitialize(conf,mockContext); checkQueueCapacities(cs, 80f, 20f); // Verify parent for B4 From 5c978a43c3052cc1466b23653c354399186b4e10 Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Mon, 10 Feb 2014 23:13:06 +0000 Subject: [PATCH 09/47] HDFS-5915. Refactor FSImageFormatProtobuf to simplify cross section reads. Contributed by Haohui Mai. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566824 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../server/namenode/FSImageFormatPBINode.java | 29 ++--- .../namenode/FSImageFormatProtobuf.java | 102 ++++++++++-------- .../snapshot/FSImageFormatPBSnapshot.java | 16 +-- .../server/namenode/TestDeduplicationMap.java | 36 +++++++ .../namenode/TestFSImageStorageInspector.java | 5 - 6 files changed, 124 insertions(+), 67 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeduplicationMap.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 5a0e4216536..45d564259e3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -335,6 +335,9 @@ Trunk (Unreleased) HDFS-5911. The id of a CacheDirective instance does not get serialized in the protobuf-fsimage. (Haohui Mai via jing9) + HDFS-5915. Refactor FSImageFormatProtobuf to simplify cross section reads. + (Haohui Mai via cnauroth) + Release 2.4.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java index 5ade5cec6a3..43bbfdbc7ff 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java @@ -38,7 +38,7 @@ import org.apache.hadoop.hdfs.protocolPB.PBHelper; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; -import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.StringMap; +import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SaverContext; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection.FileUnderConstructionEntry; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection; @@ -208,7 +208,7 @@ public final class FSImageFormatPBINode { case FILE: return loadINodeFile(n); case DIRECTORY: - return loadINodeDirectory(n, parent.getStringTable()); + return loadINodeDirectory(n, parent.getLoaderContext().getStringTable()); case SYMLINK: return loadINodeSymlink(n); default: @@ -228,7 +228,7 @@ public final class FSImageFormatPBINode { blocks[i] = new BlockInfo(PBHelper.convert(bp.get(i)), replication); } final PermissionStatus permissions = loadPermission(f.getPermission(), - parent.getStringTable()); + parent.getLoaderContext().getStringTable()); final INodeFile file = new INodeFile(n.getId(), n.getName().toByteArray(), permissions, f.getModificationTime(), @@ -253,13 +253,14 @@ public final class FSImageFormatPBINode { assert n.getType() == INodeSection.INode.Type.SYMLINK; INodeSection.INodeSymlink s = n.getSymlink(); final PermissionStatus permissions = loadPermission(s.getPermission(), - parent.getStringTable()); + parent.getLoaderContext().getStringTable()); return new INodeSymlink(n.getId(), n.getName().toByteArray(), permissions, 0, 0, s.getTarget().toStringUtf8()); } private void loadRootINode(INodeSection.INode p) { - INodeDirectory root = loadINodeDirectory(p, parent.getStringTable()); + INodeDirectory root = loadINodeDirectory(p, parent.getLoaderContext() + .getStringTable()); final Quota.Counts q = root.getQuotaCounts(); final long nsQuota = q.get(Quota.NAMESPACE); final long dsQuota = q.get(Quota.DISKSPACE); @@ -273,16 +274,17 @@ public final class FSImageFormatPBINode { public final static class Saver { private static long buildPermissionStatus(INodeAttributes n, - final StringMap stringMap) { - long userId = stringMap.getStringId(n.getUserName()); - long groupId = stringMap.getStringId(n.getGroupName()); + final SaverContext.DeduplicationMap stringMap) { + long userId = stringMap.getId(n.getUserName()); + long groupId = stringMap.getId(n.getGroupName()); return ((userId & USER_GROUP_STRID_MASK) << USER_STRID_OFFSET) | ((groupId & USER_GROUP_STRID_MASK) << GROUP_STRID_OFFSET) | n.getFsPermissionShort(); } public static INodeSection.INodeFile.Builder buildINodeFile( - INodeFileAttributes file, final StringMap stringMap) { + INodeFileAttributes file, + final SaverContext.DeduplicationMap stringMap) { INodeSection.INodeFile.Builder b = INodeSection.INodeFile.newBuilder() .setAccessTime(file.getAccessTime()) .setModificationTime(file.getModificationTime()) @@ -293,7 +295,8 @@ public final class FSImageFormatPBINode { } public static INodeSection.INodeDirectory.Builder buildINodeDirectory( - INodeDirectoryAttributes dir, final StringMap stringMap) { + INodeDirectoryAttributes dir, + final SaverContext.DeduplicationMap stringMap) { Quota.Counts quota = dir.getQuotaCounts(); INodeSection.INodeDirectory.Builder b = INodeSection.INodeDirectory .newBuilder().setModificationTime(dir.getModificationTime()) @@ -416,7 +419,7 @@ public final class FSImageFormatPBINode { private void save(OutputStream out, INodeDirectory n) throws IOException { INodeSection.INodeDirectory.Builder b = buildINodeDirectory(n, - parent.getStringMap()); + parent.getSaverContext().getStringMap()); INodeSection.INode r = buildINodeCommon(n) .setType(INodeSection.INode.Type.DIRECTORY).setDirectory(b).build(); r.writeDelimitedTo(out); @@ -424,7 +427,7 @@ public final class FSImageFormatPBINode { private void save(OutputStream out, INodeFile n) throws IOException { INodeSection.INodeFile.Builder b = buildINodeFile(n, - parent.getStringMap()); + parent.getSaverContext().getStringMap()); for (Block block : n.getBlocks()) { b.addBlocks(PBHelper.convert(block)); @@ -447,7 +450,7 @@ public final class FSImageFormatPBINode { private void save(OutputStream out, INodeSymlink n) throws IOException { INodeSection.INodeSymlink.Builder b = INodeSection.INodeSymlink .newBuilder() - .setPermission(buildPermissionStatus(n, parent.getStringMap())) + .setPermission(buildPermissionStatus(n, parent.getSaverContext().getStringMap())) .setTarget(ByteString.copyFrom(n.getSymlink())); INodeSection.INode r = buildINodeCommon(n) .setType(INodeSection.INode.Type.SYMLINK).setSymlink(b).build(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java index 2edc57b18d7..c03ba606410 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java @@ -73,12 +73,56 @@ import com.google.protobuf.CodedOutputStream; public final class FSImageFormatProtobuf { private static final Log LOG = LogFactory.getLog(FSImageFormatProtobuf.class); + public static final class LoaderContext { + private String[] stringTable; + + public String[] getStringTable() { + return stringTable; + } + } + + public static final class SaverContext { + public static class DeduplicationMap { + private final Map map = Maps.newHashMap(); + private DeduplicationMap() {} + + static DeduplicationMap newMap() { + return new DeduplicationMap(); + } + + int getId(E value) { + if (value == null) { + return 0; + } + Integer v = map.get(value); + if (v == null) { + int nv = map.size() + 1; + map.put(value, nv); + return nv; + } + return v; + } + + int size() { + return map.size(); + } + + Set> entrySet() { + return map.entrySet(); + } + } + private final DeduplicationMap stringMap = DeduplicationMap.newMap(); + + public DeduplicationMap getStringMap() { + return stringMap; + } + } + public static final class Loader implements FSImageFormat.AbstractLoader { static final int MINIMUM_FILE_LENGTH = 8; private final Configuration conf; private final FSNamesystem fsn; - - private String[] stringTable; + private final LoaderContext ctx; /** The MD5 sum of the loaded file */ private MD5Hash imgDigest; @@ -88,6 +132,7 @@ public final class FSImageFormatProtobuf { Loader(Configuration conf, FSNamesystem fsn) { this.conf = conf; this.fsn = fsn; + this.ctx = new LoaderContext(); } @Override @@ -100,8 +145,8 @@ public final class FSImageFormatProtobuf { return imgTxId; } - public String[] getStringTable() { - return stringTable; + public LoaderContext getLoaderContext() { + return ctx; } void load(File file) throws IOException { @@ -226,11 +271,11 @@ public final class FSImageFormatProtobuf { private void loadStringTableSection(InputStream in) throws IOException { StringTableSection s = StringTableSection.parseDelimitedFrom(in); - stringTable = new String[s.getNumEntry() + 1]; + ctx.stringTable = new String[s.getNumEntry() + 1]; for (int i = 0; i < s.getNumEntry(); ++i) { StringTableSection.Entry e = StringTableSection.Entry .parseDelimitedFrom(in); - stringTable[e.getId()] = e.getStr(); + ctx.stringTable[e.getId()] = e.getStr(); } } @@ -269,9 +314,10 @@ public final class FSImageFormatProtobuf { public static final class Saver { private final SaveNamespaceContext context; + private final SaverContext saverContext; + private long currentOffset = FSImageUtil.MAGIC_HEADER.length; private MD5Hash savedDigest; - private StringMap stringMap = new StringMap(); private FileChannel fileChannel; // OutputStream for the section data @@ -282,6 +328,7 @@ public final class FSImageFormatProtobuf { Saver(SaveNamespaceContext context) { this.context = context; + this.saverContext = new SaverContext(); } public MD5Hash getSavedDigest() { @@ -292,6 +339,10 @@ public final class FSImageFormatProtobuf { return context; } + public SaverContext getSaverContext() { + return saverContext; + } + public void commitSection(FileSummary.Builder summary, SectionName name) throws IOException { long oldOffset = currentOffset; @@ -465,48 +516,15 @@ public final class FSImageFormatProtobuf { throws IOException { OutputStream out = sectionOutputStream; StringTableSection.Builder b = StringTableSection.newBuilder() - .setNumEntry(stringMap.size()); + .setNumEntry(saverContext.stringMap.size()); b.build().writeDelimitedTo(out); - for (Entry e : stringMap.entrySet()) { + for (Entry e : saverContext.stringMap.entrySet()) { StringTableSection.Entry.Builder eb = StringTableSection.Entry .newBuilder().setId(e.getValue()).setStr(e.getKey()); eb.build().writeDelimitedTo(out); } commitSection(summary, SectionName.STRING_TABLE); } - - public StringMap getStringMap() { - return stringMap; - } - } - - public static class StringMap { - private final Map stringMap; - - public StringMap() { - stringMap = Maps.newHashMap(); - } - - int getStringId(String str) { - if (str == null) { - return 0; - } - Integer v = stringMap.get(str); - if (v == null) { - int nv = stringMap.size() + 1; - stringMap.put(str, nv); - return nv; - } - return v; - } - - int size() { - return stringMap.size(); - } - - Set> entrySet() { - return stringMap.entrySet(); - } } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java index 06cc1d0ac1f..b64a3db9325 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java @@ -115,7 +115,7 @@ public class FSImageFormatPBSnapshot { SnapshotSection.Snapshot pbs = SnapshotSection.Snapshot .parseDelimitedFrom(in); INodeDirectory root = loadINodeDirectory(pbs.getRoot(), - parent.getStringTable()); + parent.getLoaderContext().getStringTable()); int sid = pbs.getSnapshotId(); INodeDirectorySnapshottable parent = (INodeDirectorySnapshottable) fsDir .getInode(root.getId()).asDirectory(); @@ -162,7 +162,8 @@ public class FSImageFormatPBSnapshot { if (pbf.hasSnapshotCopy()) { INodeSection.INodeFile fileInPb = pbf.getSnapshotCopy(); PermissionStatus permission = loadPermission( - fileInPb.getPermission(), parent.getStringTable()); + fileInPb.getPermission(), parent.getLoaderContext() + .getStringTable()); copy = new INodeFileAttributes.SnapshotCopy(pbf.getName() .toByteArray(), permission, fileInPb.getModificationTime(), fileInPb.getAccessTime(), (short) fileInPb.getReplication(), @@ -249,8 +250,9 @@ public class FSImageFormatPBSnapshot { }else if (diffInPb.hasSnapshotCopy()) { INodeSection.INodeDirectory dirCopyInPb = diffInPb.getSnapshotCopy(); final byte[] name = diffInPb.getName().toByteArray(); - PermissionStatus permission = loadPermission(dirCopyInPb - .getPermission(), parent.getStringTable()); + PermissionStatus permission = loadPermission( + dirCopyInPb.getPermission(), parent.getLoaderContext() + .getStringTable()); long modTime = dirCopyInPb.getModificationTime(); boolean noQuota = dirCopyInPb.getNsQuota() == -1 && dirCopyInPb.getDsQuota() == -1; @@ -311,7 +313,7 @@ public class FSImageFormatPBSnapshot { SnapshotSection.Snapshot.Builder sb = SnapshotSection.Snapshot .newBuilder().setSnapshotId(s.getId()); INodeSection.INodeDirectory.Builder db = buildINodeDirectory(sroot, - parent.getStringMap()); + parent.getSaverContext().getStringMap()); INodeSection.INode r = INodeSection.INode.newBuilder() .setId(sroot.getId()) .setType(INodeSection.INode.Type.DIRECTORY) @@ -369,7 +371,7 @@ public class FSImageFormatPBSnapshot { INodeFileAttributes copy = diff.snapshotINode; if (copy != null) { fb.setName(ByteString.copyFrom(copy.getLocalNameBytes())) - .setSnapshotCopy(buildINodeFile(copy, parent.getStringMap())); + .setSnapshotCopy(buildINodeFile(copy, parent.getSaverContext().getStringMap())); } fb.build().writeDelimitedTo(out); } @@ -410,7 +412,7 @@ public class FSImageFormatPBSnapshot { if (!diff.isSnapshotRoot() && copy != null) { db.setName(ByteString.copyFrom(copy.getLocalNameBytes())) .setSnapshotCopy( - buildINodeDirectory(copy, parent.getStringMap())); + buildINodeDirectory(copy, parent.getSaverContext().getStringMap())); } // process created list and deleted list List created = diff.getChildrenDiff() diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeduplicationMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeduplicationMap.java new file mode 100644 index 00000000000..447c7ebd0e5 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeduplicationMap.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.namenode; + +import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SaverContext.DeduplicationMap; +import org.junit.Assert; +import org.junit.Test; + +public class TestDeduplicationMap { + @Test + public void testDeduplicationMap() { + DeduplicationMap m = DeduplicationMap.newMap(); + Assert.assertEquals(1, m.getId("1")); + Assert.assertEquals(2, m.getId("2")); + Assert.assertEquals(3, m.getId("3")); + Assert.assertEquals(1, m.getId("1")); + Assert.assertEquals(2, m.getId("2")); + Assert.assertEquals(3, m.getId("3")); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageStorageInspector.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageStorageInspector.java index 5e3ac4b7a2b..bb03b30c860 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageStorageInspector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageStorageInspector.java @@ -27,17 +27,12 @@ import static org.junit.Assert.assertTrue; import java.io.File; import java.io.IOException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; import org.junit.Test; public class TestFSImageStorageInspector { - private static final Log LOG = LogFactory.getLog( - TestFSImageStorageInspector.class); - /** * Simple test with image, edits, and inprogress edits */ From 666684eb90dc7ce8fc809cf371dfbe88c5956306 Mon Sep 17 00:00:00 2001 From: Aaron Myers Date: Tue, 11 Feb 2014 00:46:45 +0000 Subject: [PATCH 10/47] HDFS-5921. Cannot browse file system via NN web UI if any directory has the sticky bit set. Contributed by Aaron T. Myers. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566916 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hadoop-hdfs/src/main/webapps/hdfs/explorer.js | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 45d564259e3..ec44aa253f0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -951,6 +951,9 @@ Release 2.3.0 - UNRELEASED HDFS-5837. dfs.namenode.replication.considerLoad should consider decommissioned nodes. (Tao Luo via shv) + HDFS-5921. Cannot browse file system via NN web UI if any directory has + the sticky bit set. (atm) + BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS HDFS-4985. Add storage type to the protocol and expose it in block report diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js index 2e1af80c70e..1aa0c39079b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js @@ -35,8 +35,8 @@ } if (sticky) { - var exec = ((parms.perm % 10) & 1) == 1; - res[res.length - 1] = exec ? 't' : 'T'; + var otherExec = ((ctx.current().permission % 10) & 1) == 1; + res = res.substr(0, res.length - 1) + (otherExec ? 't' : 'T'); } chunk.write(dir + res); From 7fce641c49ac8a4683f8f1158b47ff9f49579ad3 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Tue, 11 Feb 2014 01:05:16 +0000 Subject: [PATCH 11/47] YARN-1698. Fixed default TimelineStore in code to match what is documented in yarn-default.xml. Contributed by Zhijie Shen. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566937 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../applicationhistoryservice/ApplicationHistoryServer.java | 6 ++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index d31a3efe5b8..0e763c81592 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -240,6 +240,9 @@ Release 2.4.0 - UNRELEASED YARN-1672. YarnConfiguration is missing a default for yarn.nodemanager.log.retain-seconds (Naren Koneru via kasha) + YARN-1698. Fixed default TimelineStore in code to match what is documented + in yarn-default.xml (Zhijie Shen via vinodkv) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java index 4ec986065b6..73a09417a01 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java @@ -34,7 +34,7 @@ import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.server.applicationhistoryservice.apptimeline.ApplicationTimelineStore; -import org.apache.hadoop.yarn.server.applicationhistoryservice.apptimeline.MemoryApplicationTimelineStore; +import org.apache.hadoop.yarn.server.applicationhistoryservice.apptimeline.LeveldbApplicationTimelineStore; import org.apache.hadoop.yarn.server.applicationhistoryservice.webapp.AHSWebApp; import org.apache.hadoop.yarn.webapp.WebApp; import org.apache.hadoop.yarn.webapp.WebApps; @@ -143,10 +143,8 @@ public class ApplicationHistoryServer extends CompositeService { protected ApplicationTimelineStore createApplicationTimelineStore( Configuration conf) { - // TODO: need to replace the MemoryApplicationTimelineStore.class with the - // LevelDB implementation return ReflectionUtils.newInstance(conf.getClass( - YarnConfiguration.ATS_STORE, MemoryApplicationTimelineStore.class, + YarnConfiguration.ATS_STORE, LeveldbApplicationTimelineStore.class, ApplicationTimelineStore.class), conf); } From 5c7b27bae0b52ed5be6d4f7616f99cbfc7bbf8ec Mon Sep 17 00:00:00 2001 From: Aaron Myers Date: Tue, 11 Feb 2014 02:47:05 +0000 Subject: [PATCH 12/47] HADOOP-10326. M/R jobs can not access S3 if Kerberos is enabled. Contributed by bc Wong. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566965 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../main/java/org/apache/hadoop/fs/s3/S3FileSystem.java | 6 ++++++ .../apache/hadoop/fs/s3native/NativeS3FileSystem.java | 6 ++++++ .../hadoop/fs/s3/S3FileSystemContractBaseTest.java | 7 ++++++- .../fs/s3native/NativeS3FileSystemContractBaseTest.java | 9 +++++++-- 5 files changed, 28 insertions(+), 3 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 9eb7fae0f53..161ab457fb0 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -331,6 +331,9 @@ Release 2.4.0 - UNRELEASED HADOOP-10330. TestFrameDecoder fails if it cannot bind port 12345. (Arpit Agarwal) + HADOOP-10326. M/R jobs can not access S3 if Kerberos is enabled. (bc Wong + via atm) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3FileSystem.java index e49eefa1115..9240d3704ef 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3FileSystem.java @@ -443,6 +443,12 @@ public class S3FileSystem extends FileSystem { return getConf().getLong("fs.s3.block.size", 64 * 1024 * 1024); } + @Override + public String getCanonicalServiceName() { + // Does not support Token + return null; + } + // diagnostic methods void dump() throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java index 191baaff410..7847ec5cc6c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java @@ -733,4 +733,10 @@ public class NativeS3FileSystem extends FileSystem { public Path getWorkingDirectory() { return workingDir; } + + @Override + public String getCanonicalServiceName() { + // Does not support Token + return null; + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java index d1770d3b889..d704b006bef 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java @@ -54,5 +54,10 @@ public abstract class S3FileSystemContractBaseTest assertEquals("Double default block size", newBlockSize, fs.getFileStatus(file).getBlockSize()); } - + + public void testCanonicalName() throws Exception { + assertNull("s3 doesn't support security token and shouldn't have canonical name", + fs.getCanonicalServiceName()); + } + } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java index 220e0bd4730..f6f9ae91122 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java @@ -48,7 +48,12 @@ public abstract class NativeS3FileSystemContractBaseTest store.purge("test"); super.tearDown(); } - + + public void testCanonicalName() throws Exception { + assertNull("s3n doesn't support security token and shouldn't have canonical name", + fs.getCanonicalServiceName()); + } + public void testListStatusForRoot() throws Exception { FileStatus[] paths = fs.listStatus(path("/")); assertEquals("Root directory is not empty; ", 0, paths.length); @@ -60,7 +65,7 @@ public abstract class NativeS3FileSystemContractBaseTest assertEquals(1, paths.length); assertEquals(path("/test"), paths[0].getPath()); } - + public void testNoTrailingBackslashOnBucket() throws Exception { assertTrue(fs.getFileStatus(new Path(fs.getUri().toString())).isDirectory()); } From 3587b6774c393e7f3f8b8777429d1716ce06ca91 Mon Sep 17 00:00:00 2001 From: Zhijie Shen Date: Tue, 11 Feb 2014 04:39:37 +0000 Subject: [PATCH 13/47] YARN-1706. Created an utility method to dump timeline records to JSON strings. Contributed by Zhijie Shen. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566982 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../hadoop/yarn/util/TimelineUtils.java | 86 +++++++++++++++++++ .../TestApplicationTimelineRecords.java | 21 ++++- 3 files changed, 106 insertions(+), 4 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/TimelineUtils.java diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 0e763c81592..bfc84618646 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -173,6 +173,9 @@ Release 2.4.0 - UNRELEASED on the configuration-provider mechanism during startup too. (Xuan Gong via vinodkv) + YARN-1706. Created an utility method to dump timeline records to JSON + strings. (zjshen) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/TimelineUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/TimelineUtils.java new file mode 100644 index 00000000000..4ab557e33e1 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/TimelineUtils.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.util; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; +import org.codehaus.jackson.JsonGenerationException; +import org.codehaus.jackson.map.AnnotationIntrospector; +import org.codehaus.jackson.map.JsonMappingException; +import org.codehaus.jackson.map.ObjectMapper; +import org.codehaus.jackson.map.annotate.JsonSerialize.Inclusion; +import org.codehaus.jackson.xc.JaxbAnnotationIntrospector; + +/** + * The helper class for the timeline module. + * + */ +@Public +@Evolving +public class TimelineUtils { + + private static ObjectMapper mapper; + + static { + mapper = new ObjectMapper(); + AnnotationIntrospector introspector = new JaxbAnnotationIntrospector(); + mapper.setAnnotationIntrospector(introspector); + mapper.getSerializationConfig() + .setSerializationInclusion(Inclusion.NON_NULL); + } + + /** + * Serialize a POJO object into a JSON string not in a pretty format + * + * @param o + * an object to serialize + * @return a JSON string + * @throws IOException + * @throws JsonMappingException + * @throws JsonGenerationException + */ + public static String dumpTimelineRecordtoJSON(Object o) + throws JsonGenerationException, JsonMappingException, IOException { + return dumpTimelineRecordtoJSON(o, false); + } + + /** + * Serialize a POJO object into a JSON string + * + * @param o + * an object to serialize + * @param pretty + * whether in a pretty format or not + * @return a JSON string + * @throws IOException + * @throws JsonMappingException + * @throws JsonGenerationException + */ + public static String dumpTimelineRecordtoJSON(Object o, boolean pretty) + throws JsonGenerationException, JsonMappingException, IOException { + if (pretty) { + return mapper.defaultPrettyPrintingWriter().writeValueAsString(o); + } else { + return mapper.writeValueAsString(o); + } + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/apptimeline/TestApplicationTimelineRecords.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/apptimeline/TestApplicationTimelineRecords.java index 24d1ce91e62..330e099364e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/apptimeline/TestApplicationTimelineRecords.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/apptimeline/TestApplicationTimelineRecords.java @@ -19,18 +19,23 @@ package org.apache.hadoop.yarn.api.records.apptimeline; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import junit.framework.Assert; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.yarn.api.records.apptimeline.ATSPutErrors.ATSPutError; +import org.apache.hadoop.yarn.util.TimelineUtils; import org.junit.Test; public class TestApplicationTimelineRecords { + private static final Log LOG = + LogFactory.getLog(TestApplicationTimelineRecords.class); + @Test - public void testATSEntities() { + public void testATSEntities() throws Exception { ATSEntities entities = new ATSEntities(); for (int j = 0; j < 2; ++j) { ATSEntity entity = new ATSEntity(); @@ -53,6 +58,9 @@ public class TestApplicationTimelineRecords { entity.addOtherInfo("okey2", "oval2"); entities.addEntity(entity); } + LOG.info("Entities in JSON:"); + LOG.info(TimelineUtils.dumpTimelineRecordtoJSON(entities, true)); + Assert.assertEquals(2, entities.getEntities().size()); ATSEntity entity1 = entities.getEntities().get(0); Assert.assertEquals("entity id 0", entity1.getEntityId()); @@ -71,7 +79,7 @@ public class TestApplicationTimelineRecords { } @Test - public void testATSEvents() { + public void testATSEvents() throws Exception { ATSEvents events = new ATSEvents(); for (int j = 0; j < 2; ++j) { ATSEvents.ATSEventsOfOneEntity partEvents = @@ -88,6 +96,9 @@ public class TestApplicationTimelineRecords { } events.addEvent(partEvents); } + LOG.info("Events in JSON:"); + LOG.info(TimelineUtils.dumpTimelineRecordtoJSON(events, true)); + Assert.assertEquals(2, events.getAllEvents().size()); ATSEvents.ATSEventsOfOneEntity partEvents1 = events.getAllEvents().get(0); Assert.assertEquals("entity id 0", partEvents1.getEntityId()); @@ -112,7 +123,7 @@ public class TestApplicationTimelineRecords { } @Test - public void testATSPutErrors() { + public void testATSPutErrors() throws Exception { ATSPutErrors atsPutErrors = new ATSPutErrors(); ATSPutError error1 = new ATSPutError(); error1.setEntityId("entity id 1"); @@ -127,6 +138,8 @@ public class TestApplicationTimelineRecords { error2.setErrorCode(ATSPutError.IO_EXCEPTION); errors.add(error2); atsPutErrors.addErrors(errors); + LOG.info("Errors in JSON:"); + LOG.info(TimelineUtils.dumpTimelineRecordtoJSON(atsPutErrors, true)); Assert.assertEquals(3, atsPutErrors.getErrors().size()); ATSPutError e = atsPutErrors.getErrors().get(0); From c43c9dd7b5bf24fadca7dcd805af6f11dc6175e6 Mon Sep 17 00:00:00 2001 From: Arun Murthy Date: Tue, 11 Feb 2014 13:25:30 +0000 Subject: [PATCH 14/47] Preparing to release hadoop-2.3.0 git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567111 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 18 +++++++++++++++--- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 14 +++++++++++++- hadoop-mapreduce-project/CHANGES.txt | 14 +++++++++++++- hadoop-yarn-project/CHANGES.txt | 14 +++++++++++++- 4 files changed, 54 insertions(+), 6 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 161ab457fb0..49a32c72342 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -334,15 +334,27 @@ Release 2.4.0 - UNRELEASED HADOOP-10326. M/R jobs can not access S3 if Kerberos is enabled. (bc Wong via atm) -Release 2.3.0 - UNRELEASED +Release 2.3.1 - UNRELEASED INCOMPATIBLE CHANGES - HADOOP-8545. Filesystem Implementation for OpenStack Swift - (Dmitry Mezhensky, David Dobbins, Stevel via stevel) + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + +Release 2.3.0 - 2014-02-18 + + INCOMPATIBLE CHANGES NEW FEATURES + HADOOP-8545. Filesystem Implementation for OpenStack Swift + (Dmitry Mezhensky, David Dobbins, Stevel via stevel) + IMPROVEMENTS HADOOP-10046. Print a log message when SSL is enabled. diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index ec44aa253f0..31f790fde78 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -410,7 +410,19 @@ Release 2.4.0 - UNRELEASED HDFS-5886. Potential null pointer deference in RpcProgramNfs3#readlink() (brandonli) -Release 2.3.0 - UNRELEASED +Release 2.3.1 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + +Release 2.3.0 - 2014-02-18 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 3b72f402223..e6cc6c5c656 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -159,7 +159,19 @@ Release 2.4.0 - UNRELEASED BUG FIXES -Release 2.3.0 - UNRELEASED +Release 2.3.1 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + +Release 2.3.0 - 2014-02-18 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index bfc84618646..6660a49ca25 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -246,7 +246,19 @@ Release 2.4.0 - UNRELEASED YARN-1698. Fixed default TimelineStore in code to match what is documented in yarn-default.xml (Zhijie Shen via vinodkv) -Release 2.3.0 - UNRELEASED +Release 2.3.1 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + +Release 2.3.0 - 2014-02-18 INCOMPATIBLE CHANGES From 1b9cef0fdd5f0d221046d58cac632640afe5b553 Mon Sep 17 00:00:00 2001 From: Arun Murthy Date: Tue, 11 Feb 2014 13:32:07 +0000 Subject: [PATCH 15/47] Release notes for hadoop-2.3.0. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567118 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/main/docs/releasenotes.html | 2950 +++++++++++++++++ 1 file changed, 2950 insertions(+) diff --git a/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html b/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html index efbaeae4b14..d2b6156573d 100644 --- a/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html +++ b/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html @@ -1,3 +1,2953 @@ + +Hadoop 2.3.0 Release Notes + + + +

    Hadoop 2.3.0 Release Notes

    +These release notes include new developer and user-facing incompatibilities, features, and major improvements. + +

    Changes since Hadoop 2.2.0

    +
      +
    • YARN-1642. + Blocker sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)
      + RMDTRenewer#getRMClient should use ClientRMProxy
      +
      RMDTRenewer#getRMClient gets a proxy to the RM in the conf directly instead of going through ClientRMProxy. + +{code} + final YarnRPC rpc = YarnRPC.create(conf); + return (ApplicationClientProtocol)rpc.getProxy(ApplicationClientProtocol.class, addr, conf); +{code}
    • +
    • YARN-1630. + Major bug reported by Aditya Acharya and fixed by Aditya Acharya (client)
      + Introduce timeout for async polling operations in YarnClientImpl
      +
      I ran an MR2 application that would have been long running, and killed it programmatically using a YarnClient. The app was killed, but the client hung forever. The message that I saw, which spammed the logs, was "Watiting for application application_1389036507624_0018 to be killed." + +The RM log indicated that the app had indeed transitioned from RUNNING to KILLED, but for some reason future responses to the RPC to kill the application did not indicate that the app had been terminated. + +I tracked this down to YarnClientImpl.java, and though I was unable to reproduce the bug, I wrote a patch to introduce a bound on the number of times that YarnClientImpl retries the RPC before giving up.
    • +
    • YARN-1629. + Major bug reported by Sandy Ryza and fixed by Sandy Ryza (scheduler)
      + IndexOutOfBoundsException in Fair Scheduler MaxRunningAppsEnforcer
      +
      This can occur when the second-to-last app in a queue's pending app list is made runnable. The app is pulled out from under the iterator.
    • +
    • YARN-1628. + Major bug reported by Mit Desai and fixed by Vinod Kumar Vavilapalli
      + TestContainerManagerSecurity fails on trunk
      +
      The Test fails with the following error + +{noformat} +java.lang.IllegalArgumentException: java.net.UnknownHostException: InvalidHost + at org.apache.hadoop.security.SecurityUtil.buildTokenService(SecurityUtil.java:377) + at org.apache.hadoop.yarn.server.security.BaseNMTokenSecretManager.newInstance(BaseNMTokenSecretManager.java:145) + at org.apache.hadoop.yarn.server.security.BaseNMTokenSecretManager.createNMToken(BaseNMTokenSecretManager.java:136) + at org.apache.hadoop.yarn.server.TestContainerManagerSecurity.testNMTokens(TestContainerManagerSecurity.java:253) + at org.apache.hadoop.yarn.server.TestContainerManagerSecurity.testContainerManager(TestContainerManagerSecurity.java:144) +{noformat}
    • +
    • YARN-1624. + Major bug reported by Aditya Acharya and fixed by Aditya Acharya (scheduler)
      + QueuePlacementPolicy format is not easily readable via a JAXB parser
      +
      The current format for specifying queue placement rules in the fair scheduler allocations file does not lend itself to easy parsing via a JAXB parser. In particular, relying on the tag name to encode information about which rule to use makes it very difficult for an xsd-based JAXB parser to preserve the order of the rules, which is essential.
    • +
    • YARN-1623. + Major improvement reported by Sandy Ryza and fixed by Sandy Ryza (scheduler)
      + Include queue name in RegisterApplicationMasterResponse
      +
      This provides the YARN change necessary to support MAPREDUCE-5732.
    • +
    • YARN-1618. + Blocker sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)
      + Fix invalid RMApp transition from NEW to FINAL_SAVING
      +
      YARN-891 augments the RMStateStore to store information on completed applications. In the process, it adds transitions from NEW to FINAL_SAVING. This leads to the RM trying to update entries in the state-store that do not exist. On ZKRMStateStore, this leads to the RM crashing. + +Previous description: +ZKRMStateStore fails to handle updates to znodes that don't exist. For instance, this can happen when an app transitions from NEW to FINAL_SAVING. In these cases, the store should create the missing znode and handle the update.
    • +
    • YARN-1616. + Trivial improvement reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)
      + RMFatalEventDispatcher should log the cause of the event
      +
      RMFatalEventDispatcher#handle() logs the receipt of an event and its type, but leaves out the cause. The cause captures why the event was raised and would help debugging issues.
    • +
    • YARN-1608. + Trivial bug reported by Karthik Kambatla and fixed by Karthik Kambatla (nodemanager)
      + LinuxContainerExecutor has a few DEBUG messages at INFO level
      +
      LCE has a few INFO level log messages meant to be at debug level. In fact, they are logged both at INFO and DEBUG.
    • +
    • YARN-1607. + Major bug reported by Sandy Ryza and fixed by Sandy Ryza
      + TestRM expects the capacity scheduler
      +
      We should either explicitly set the Capacity Scheduler or make it scheduler-agnostic
    • +
    • YARN-1603. + Trivial bug reported by Zhijie Shen and fixed by Zhijie Shen
      + Remove two *.orig files which were unexpectedly committed
      +
      FairScheduler.java.orig and TestFifoScheduler.java.orig
    • +
    • YARN-1601. + Major bug reported by Alejandro Abdelnur and fixed by Alejandro Abdelnur
      + 3rd party JARs are missing from hadoop-dist output
      +
      With the build changes of YARN-888 we are leaving out all 3rd party JArs used directly by YARN under /share/hadoop/yarn/lib/. + +We did not notice this when running minicluster because they all happen to be in the classpath from hadoop-common and hadoop-yarn. + +As 3d party JARs are not 'public' interfaces we cannot rely on them being provided to yarn by common and hdfs. (ie if common and hdfs stop using a 3rd party dependency that yarn uses this would break yarn if yarn does not pull that dependency explicitly). + +Also, this will break bigtop hadoop build when they move to use branch-2 as they expect to find jars in /share/hadoop/yarn/lib/
    • +
    • YARN-1600. + Blocker bug reported by Jason Lowe and fixed by Haohui Mai (resourcemanager)
      + RM does not startup when security is enabled without spnego configured
      +
      We have a custom auth filter in front of our various UI pages that handles user authentication. However currently the RM assumes that if security is enabled then the user must have configured spnego as well for the RM web pages which is not true in our case.
    • +
    • YARN-1598. + Critical sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (client , resourcemanager)
      + HA-related rmadmin commands don't work on a secure cluster
      +
      The HA-related commands like -getServiceState -checkHealth etc. don't work in a secure cluster.
    • +
    • YARN-1579. + Trivial sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)
      + ActiveRMInfoProto fields should be optional
      +
      Per discussion on YARN-1568, ActiveRMInfoProto should have optional fields instead of required fields.
    • +
    • YARN-1575. + Critical sub-task reported by Jason Lowe and fixed by Jason Lowe (nodemanager)
      + Public localizer crashes with "Localized unkown resource"
      +
      The public localizer can crash with the error: + +{noformat} +2014-01-08 14:11:43,212 [Thread-467] ERROR org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService: Localized unkonwn resource to java.util.concurrent.FutureTask@852e26 +2014-01-08 14:11:43,212 [Thread-467] INFO org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService: Public cache exiting +{noformat}
    • +
    • YARN-1574. + Blocker sub-task reported by Xuan Gong and fixed by Xuan Gong
      + RMDispatcher should be reset on transition to standby
      +
      Currently, we move rmDispatcher out of ActiveService. But we still register the Event dispatcher, such as schedulerDispatcher, RMAppEventDispatcher when we initiate the ActiveService. + +Almost every time when we transit RM from Active to Standby, we need to initiate the ActiveService. That means we will register the same event Dispatcher which will cause the same event will be handled several times.
    • +
    • YARN-1573. + Major sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)
      + ZK store should use a private password for root-node-acls
      +
      Currently, when HA is enabled, ZK store uses cluster-timestamp as the password for root node ACLs to give the Active RM exclusive access to the store. A more private value like a random number might be better.
    • +
    • YARN-1568. + Trivial task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)
      + Rename clusterid to clusterId in ActiveRMInfoProto
      +
      YARN-1029 introduces ActiveRMInfoProto - just realized it defines a field clusterid, which is inconsistent with other fields. Better to fix it immediately than leave the inconsistency.
    • +
    • YARN-1567. + Major improvement reported by Sandy Ryza and fixed by Sandy Ryza (scheduler)
      + In Fair Scheduler, allow empty queues to change between leaf and parent on allocation file reload
      +
    • +
    • YARN-1560. + Major test reported by Ted Yu and fixed by Ted Yu
      + TestYarnClient#testAMMRTokens fails with null AMRM token
      +
      The following can be reproduced locally: +{code} +testAMMRTokens(org.apache.hadoop.yarn.client.api.impl.TestYarnClient) Time elapsed: 3.341 sec <<< FAILURE! +junit.framework.AssertionFailedError: null + at junit.framework.Assert.fail(Assert.java:48) + at junit.framework.Assert.assertTrue(Assert.java:20) + at junit.framework.Assert.assertNotNull(Assert.java:218) + at junit.framework.Assert.assertNotNull(Assert.java:211) + at org.apache.hadoop.yarn.client.api.impl.TestYarnClient.testAMMRTokens(TestYarnClient.java:382) +{code} +This test didn't appear in https://builds.apache.org/job/Hadoop-Yarn-trunk/442/consoleFull
    • +
    • YARN-1559. + Blocker sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)
      + Race between ServerRMProxy and ClientRMProxy setting RMProxy#INSTANCE
      +
      RMProxy#INSTANCE is a non-final static field and both ServerRMProxy and ClientRMProxy set it. This leads to races as witnessed on - YARN-1482. + +Sample trace: +{noformat} +java.lang.IllegalArgumentException: RM does not support this client protocol + at com.google.common.base.Preconditions.checkArgument(Preconditions.java:88) + at org.apache.hadoop.yarn.client.ClientRMProxy.checkAllowedProtocols(ClientRMProxy.java:119) + at org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider.init(ConfiguredRMFailoverProxyProvider.java:58) + at org.apache.hadoop.yarn.client.RMProxy.createRMFailoverProxyProvider(RMProxy.java:158) + at org.apache.hadoop.yarn.client.RMProxy.createRMProxy(RMProxy.java:88) + at org.apache.hadoop.yarn.server.api.ServerRMProxy.createRMProxy(ServerRMProxy.java:56) +{noformat}
    • +
    • YARN-1549. + Major test reported by Ted Yu and fixed by haosdent
      + TestUnmanagedAMLauncher#testDSShell fails in trunk
      +
      The following error is reproducible: +{code} +testDSShell(org.apache.hadoop.yarn.applications.unmanagedamlauncher.TestUnmanagedAMLauncher) Time elapsed: 14.911 sec <<< ERROR! +java.lang.RuntimeException: Failed to receive final expected state in ApplicationReport, CurrentState=RUNNING, ExpectedStates=FINISHED,FAILED,KILLED + at org.apache.hadoop.yarn.applications.unmanagedamlauncher.UnmanagedAMLauncher.monitorApplication(UnmanagedAMLauncher.java:447) + at org.apache.hadoop.yarn.applications.unmanagedamlauncher.UnmanagedAMLauncher.run(UnmanagedAMLauncher.java:352) + at org.apache.hadoop.yarn.applications.unmanagedamlauncher.TestUnmanagedAMLauncher.testDSShell(TestUnmanagedAMLauncher.java:147) +{code} +See https://builds.apache.org/job/Hadoop-Yarn-trunk/435
    • +
    • YARN-1541. + Major bug reported by Jian He and fixed by Jian He
      + Invalidate AM Host/Port when app attempt is done so that in the mean-while client doesn’t get wrong information.
      +
    • +
    • YARN-1527. + Trivial bug reported by Jian He and fixed by Akira AJISAKA
      + yarn rmadmin command prints wrong usage info:
      +
      The usage should be: yarn rmadmin, instead of java RMAdmin, and the -refreshQueues should be in the second line. +{code} Usage: java RMAdmin -refreshQueues + -refreshNodes + -refreshSuperUserGroupsConfiguration + -refreshUserToGroupsMappings + -refreshAdminAcls + -refreshServiceAcl + -getGroups [username] + -help [cmd] + -transitionToActive <serviceId> + -transitionToStandby <serviceId> + -failover [--forcefence] [--forceactive] <serviceId> <serviceId> + -getServiceState <serviceId> + -checkHealth <serviceId> +{code}
    • +
    • YARN-1523. + Major sub-task reported by Bikas Saha and fixed by Karthik Kambatla
      + Use StandbyException instead of RMNotYetReadyException
      +
    • +
    • YARN-1522. + Major bug reported by Liyin Liang and fixed by Liyin Liang
      + TestApplicationCleanup.testAppCleanup occasionally fails
      +
      TestApplicationCleanup is occasionally failing with the error: +{code} +------------------------------------------------------------------------------- +Test set: org.apache.hadoop.yarn.server.resourcemanager.TestApplicationCleanup +------------------------------------------------------------------------------- +Tests run: 1, Failures: 1, Errors: 0, Skipped: 0, Time elapsed: 6.215 sec <<< FAILURE! - in org.apache.hadoop.yarn.server.resourcemanager.TestApplicationCleanup +testAppCleanup(org.apache.hadoop.yarn.server.resourcemanager.TestApplicationCleanup) Time elapsed: 5.555 sec <<< FAILURE! +junit.framework.AssertionFailedError: expected:<1> but was:<0> +at org.apache.hadoop.yarn.server.resourcemanager.TestApplicationCleanup.testAppCleanup(TestApplicationCleanup.java:119) +{code}
    • +
    • YARN-1505. + Blocker bug reported by Xuan Gong and fixed by Xuan Gong
      + WebAppProxyServer should not set localhost as YarnConfiguration.PROXY_ADDRESS by itself
      +
      At WebAppProxyServer::startServer(), it will set up YarnConfiguration.PROXY_ADDRESS to localhost:9099 by itself. So, no matter what is the value we set YarnConfiguration.PROXY_ADDRESS in configuration, the proxyserver will bind to localhost:9099
    • +
    • YARN-1491. + Trivial bug reported by Jonathan Eagles and fixed by Chen He
      + Upgrade JUnit3 TestCase to JUnit 4
      +
      There are still four references to test classes that extend from junit.framework.TestCase + +hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestYarnVersionInfo.java +hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestWindowsResourceCalculatorPlugin.java +hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestLinuxResourceCalculatorPlugin.java +hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestWindowsBasedProcessTree.java +
    • +
    • YARN-1485. + Major sub-task reported by Xuan Gong and fixed by Xuan Gong
      + Enabling HA should verify the RM service addresses configurations have been set for every RM Ids defined in RM_HA_IDs
      +
      After YARN-1325, the YarnConfiguration.RM_HA_IDS will contain multiple RM_Ids. We need to verify that the RM service addresses configurations have been set for all of RM_Ids.
    • +
    • YARN-1482. + Major sub-task reported by Vinod Kumar Vavilapalli and fixed by Xuan Gong
      + WebApplicationProxy should be always-on w.r.t HA even if it is embedded in the RM
      +
      This way, even if an RM goes to standby mode, we can affect a redirect to the active. And more importantly, users will not suddenly see all their links stop working.
    • +
    • YARN-1481. + Major sub-task reported by Vinod Kumar Vavilapalli and fixed by Vinod Kumar Vavilapalli
      + Move internal services logic from AdminService to ResourceManager
      +
      This is something I found while reviewing YARN-1318, but didn't halt that patch as many cycles went there already. Some top level issues + - Not easy to follow RM's service life cycle + -- RM adds only AdminService as its service directly. + -- Other services are added to RM when AdminService's init calls RM.activeServices.init() + - Overall, AdminService shouldn't encompass all of RM's HA state management. It was originally supposed to be the implementation of just the RPC server.
    • +
    • YARN-1463. + Major test reported by Ted Yu and fixed by Vinod Kumar Vavilapalli
      + Tests should avoid starting http-server where possible or creates spnego keytab/principals
      +
      Here is stack trace: +{code} +testContainerManager[1](org.apache.hadoop.yarn.server.TestContainerManagerSecurity) Time elapsed: 1.756 sec <<< ERROR! +org.apache.hadoop.yarn.exceptions.YarnRuntimeException: java.io.IOException: ResourceManager failed to start. Final state is STOPPED + at org.apache.hadoop.yarn.server.MiniYARNCluster$ResourceManagerWrapper.serviceStart(MiniYARNCluster.java:253) + at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193) + at org.apache.hadoop.service.CompositeService.serviceStart(CompositeService.java:121) + at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193) + at org.apache.hadoop.yarn.server.TestContainerManagerSecurity.testContainerManager(TestContainerManagerSecurity.java:110) +{code}
    • +
    • YARN-1454. + Critical bug reported by Jian He and fixed by Karthik Kambatla
      + TestRMRestart.testRMDelegationTokenRestoredOnRMRestart is failing intermittently
      +
    • +
    • YARN-1451. + Minor bug reported by Sandy Ryza and fixed by Sandy Ryza
      + TestResourceManager relies on the scheduler assigning multiple containers in a single node update
      +
      TestResourceManager rely on the capacity scheduler. + +It relies on a scheduler that assigns multiple containers in a single heartbeat, which not all schedulers do by default. It also relies on schedulers that don't consider CPU capacities. It would be simple to change the test to use multiple heartbeats and increase the vcore capacities of the nodes in the test.
    • +
    • YARN-1450. + Major bug reported by Akira AJISAKA and fixed by Binglin Chang (applications/distributed-shell)
      + TestUnmanagedAMLauncher#testDSShell fails on trunk
      +
      TestUnmanagedAMLauncher fails on trunk. The console output is +{code} +Running org.apache.hadoop.yarn.applications.unmanagedamlauncher.TestUnmanagedAMLauncher +Tests run: 2, Failures: 0, Errors: 1, Skipped: 0, Time elapsed: 35.937 sec <<< FAILURE! - in org.apache.hadoop.yarn.applications.unmanagedamlauncher.TestUnmanagedAMLauncher +testDSShell(org.apache.hadoop.yarn.applications.unmanagedamlauncher.TestUnmanagedAMLauncher) Time elapsed: 14.558 sec <<< ERROR! +java.lang.RuntimeException: Failed to receive final expected state in ApplicationReport, CurrentState=ACCEPTED, ExpectedStates=FINISHED,FAILED,KILLED + at org.apache.hadoop.yarn.applications.unmanagedamlauncher.UnmanagedAMLauncher.monitorApplication(UnmanagedAMLauncher.java:447) + at org.apache.hadoop.yarn.applications.unmanagedamlauncher.UnmanagedAMLauncher.run(UnmanagedAMLauncher.java:352) + at org.apache.hadoop.yarn.applications.unmanagedamlauncher.TestUnmanagedAMLauncher.testDSShell(TestUnmanagedAMLauncher.java:145) +{code}
    • +
    • YARN-1448. + Major sub-task reported by Wangda Tan and fixed by Wangda Tan (api , resourcemanager)
      + AM-RM protocol changes to support container resizing
      +
      As described in YARN-1197, we need add API in RM to support +1) Add increase request in AllocateRequest +2) Can get successfully increased/decreased containers from RM in AllocateResponse
    • +
    • YARN-1447. + Major sub-task reported by Wangda Tan and fixed by Wangda Tan (api)
      + Common PB type definitions for container resizing
      +
      As described in YARN-1197, we need add some common PB types for container resource change, like ResourceChangeContext, etc. These types will be both used by RM/NM protocols
    • +
    • YARN-1446. + Major sub-task reported by Jian He and fixed by Jian He (resourcemanager)
      + Change killing application to wait until state store is done
      +
      When user kills an application, it should wait until the state store is done with saving the killed status of the application. Otherwise, if RM crashes in the middle between user killing the application and writing the status to the store, RM will relaunch this application after it restarts.
    • +
    • YARN-1435. + Major bug reported by Tassapol Athiapinya and fixed by Xuan Gong (applications/distributed-shell)
      + Distributed Shell should not run other commands except "sh", and run the custom script at the same time.
      +
      Currently, if we want to run custom script at DS. We can do it like this : +--shell_command sh --shell_script custom_script.sh +But it may be better to separate running shell_command and shell_script
    • +
    • YARN-1425. + Major bug reported by Omkar Vinit Joshi and fixed by Omkar Vinit Joshi
      + TestRMRestart fails because MockRM.waitForState(AttemptId) uses current attempt instead of the attempt passed as argument
      +
      TestRMRestart is failing on trunk. Fixing it.
    • +
    • YARN-1423. + Major improvement reported by Sandy Ryza and fixed by Ted Malaska (scheduler)
      + Support queue placement by secondary group in the Fair Scheduler
      +
    • +
    • YARN-1419. + Minor bug reported by Jonathan Eagles and fixed by Jonathan Eagles (scheduler)
      + TestFifoScheduler.testAppAttemptMetrics fails intermittently under jdk7
      +
      QueueMetrics holds its data in a static variable causing metrics to bleed over from test to test. clearQueueMetrics is to be called for tests that need to measure metrics correctly for a single test. jdk7 comes into play since tests are run out of order, and in the case make the metrics unreliable.
    • +
    • YARN-1416. + Major bug reported by Omkar Vinit Joshi and fixed by Jian He
      + InvalidStateTransitions getting reported in multiple test cases even though they pass
      +
      It might be worth checking why they are reporting this. +Testcase : TestRMAppTransitions, TestRM +there are large number of such errors. +can't handle RMAppEventType.APP_UPDATE_SAVED at RMAppState.FAILED +
    • +
    • YARN-1411. + Critical sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla
      + HA config shouldn't affect NodeManager RPC addresses
      +
      When HA is turned on, {{YarnConfiguration#getSoketAddress()}} fetches rpc-addresses corresponding to the specified rm-id. This should only be for RM rpc-addresses. Other confs, like NM rpc-addresses shouldn't be affected by this. + +Currently, the NM address settings in yarn-site.xml aren't reflected in the actual ports.
    • +
    • YARN-1409. + Major bug reported by Tsuyoshi OZAWA and fixed by Tsuyoshi OZAWA
      + NonAggregatingLogHandler can throw RejectedExecutionException
      +
      This problem is caused by handling APPLICATION_FINISHED events after calling sched.shotdown() in NonAggregatingLongHandler#serviceStop(). org.apache.hadoop.mapred.TestJobCleanup can fail because of RejectedExecutionException by NonAggregatingLogHandler. + +{code} +2013-11-13 10:53:06,970 FATAL [AsyncDispatcher event handler] event.AsyncDispatcher (AsyncDispatcher.java:dispatch(166)) - Error in dispatcher thread +java.util.concurrent.RejectedExecutionException: Task java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask@d51df63 rejected from java.util.concurrent.ScheduledThreadPoolExecutor@7a20e369[Shutting down, pool size = 4, active threads = 0, queued tasks = 7, completed tasks = 0] + at java.util.concurrent.ThreadPoolExecutor$AbortPolicy.rejectedExecution(ThreadPoolExecutor.java:2048) + at java.util.concurrent.ThreadPoolExecutor.reject(ThreadPoolExecutor.java:821) + at java.util.concurrent.ScheduledThreadPoolExecutor.delayedExecute(ScheduledThreadPoolExecutor.java:325) + at java.util.concurrent.ScheduledThreadPoolExecutor.schedule(ScheduledThreadPoolExecutor.java:530) + at org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.NonAggregatingLogHandler.handle(NonAggregatingLogHandler.java:121) + at org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.NonAggregatingLogHandler.handle(NonAggregatingLogHandler.java:49) + at org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:159) + at org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:95) + at java.lang.Thread.run(Thread.java:724) +{code}
    • +
    • YARN-1407. + Major bug reported by Sandy Ryza and fixed by Sandy Ryza
      + RM Web UI and REST APIs should uniformly use YarnApplicationState
      +
      RMAppState isn't a public facing enum like YarnApplicationState, so we shouldn't return values or list filters that come from it. However, some Blocks and AppInfo are still using RMAppState. + +It is not 100% clear to me whether or not fixing this would be a backwards-incompatible change. The change would only reduce the set of possible strings that the API returns, so I think not. We have also been changing the contents of RMAppState since 2.2.0, e.g. in YARN-891. It would still be good to fix this ASAP (i.e. for 2.2.1).
    • +
    • YARN-1405. + Major sub-task reported by Yesha Vora and fixed by Jian He
      + RM hangs on shutdown if calling system.exit in serviceInit or serviceStart
      +
      Enable yarn.resourcemanager.recovery.enabled=true and Pass a local path to yarn.resourcemanager.fs.state-store.uri. such as "file:///tmp/MYTMP" + +if the directory /tmp/MYTMP is not readable or writable, RM should crash and should print "Permission denied Error" + +Currently, RM throws "java.io.FileNotFoundException: File file:/tmp/MYTMP/FSRMStateRoot/RMDTSecretManagerRoot does not exist" Error. RM returns Exiting status 1 but RM process does not shutdown. + +Snapshot of Resource manager log: + +2013-09-27 18:31:36,621 INFO security.NMTokenSecretManagerInRM (NMTokenSecretManagerInRM.java:rollMasterKey(97)) - Rolling master-key for nm-tokens +2013-09-27 18:31:36,694 ERROR resourcemanager.ResourceManager (ResourceManager.java:serviceStart(640)) - Failed to load/recover state +java.io.FileNotFoundException: File file:/tmp/MYTMP/FSRMStateRoot/RMDTSecretManagerRoot does not exist + at org.apache.hadoop.fs.RawLocalFileSystem.listStatus(RawLocalFileSystem.java:379) + at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1478) + at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1518) + at org.apache.hadoop.fs.ChecksumFileSystem.listStatus(ChecksumFileSystem.java:564) + at org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore.loadRMDTSecretManagerState(FileSystemRMStateStore.java:188) + at org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore.loadState(FileSystemRMStateStore.java:112) + at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.serviceStart(ResourceManager.java:635) + at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193) + at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.main(ResourceManager.java:855) +2013-09-27 18:31:36,697 INFO util.ExitUtil (ExitUtil.java:terminate(124)) - Exiting with status 1
    • +
    • YARN-1403. + Major improvement reported by Sandy Ryza and fixed by Sandy Ryza
      + Separate out configuration loading from QueueManager in the Fair Scheduler
      +
    • +
    • YARN-1401. + Major bug reported by Gera Shegalov and fixed by Gera Shegalov (nodemanager)
      + With zero sleep-delay-before-sigkill.ms, no signal is ever sent
      +
      If you set in yarn-site.xml yarn.nodemanager.sleep-delay-before-sigkill.ms=0 then an unresponsive child JVM is never killed. In MRv1, TT used to immediately SIGKILL in this case.
    • +
    • YARN-1400. + Trivial bug reported by Raja Aluri and fixed by Raja Aluri (resourcemanager)
      + yarn.cmd uses HADOOP_RESOURCEMANAGER_OPTS. Should be YARN_RESOURCEMANAGER_OPTS.
      +
      yarn.cmd uses HADOOP_RESOURCEMANAGER_OPTS. Should be YARN_RESOURCEMANAGER_OPTS.
    • +
    • YARN-1395. + Major bug reported by Chris Nauroth and fixed by Chris Nauroth (applications/distributed-shell)
      + Distributed shell application master launched with debug flag can hang waiting for external ls process.
      +
      Distributed shell launched with the debug flag will run {{ApplicationMaster#dumpOutDebugInfo}}. This method launches an external process to run ls and print the contents of the current working directory. We've seen that this can cause the application master to hang on {{Process#waitFor}}.
    • +
    • YARN-1392. + Major new feature reported by Sandy Ryza and fixed by Sandy Ryza (scheduler)
      + Allow sophisticated app-to-queue placement policies in the Fair Scheduler
      +
      Currently the Fair Scheduler supports app-to-queue placement by username. It would be beneficial to allow more sophisticated policies that rely on primary and secondary groups and fallbacks.
    • +
    • YARN-1388. + Trivial bug reported by Liyin Liang and fixed by Liyin Liang (resourcemanager)
      + Fair Scheduler page always displays blank fair share
      +
      YARN-1044 fixed min/max/used resource display problem in the scheduler page. But the "Fair Share" has the same problem and need to fix it.
    • +
    • YARN-1387. + Major improvement reported by Karthik Kambatla and fixed by Karthik Kambatla (api)
      + RMWebServices should use ClientRMService for filtering applications
      +
      YARN's REST API allows filtering applications, this should be moved to ClientRMService to allow Java API also support the same functionality.
    • +
    • YARN-1386. + Critical bug reported by Jason Lowe and fixed by Jason Lowe (nodemanager)
      + NodeManager mistakenly loses resources and relocalizes them
      +
      When a local resource that should already be present is requested again, the nodemanager checks to see if it still present. However the method it uses to check for presence is via File.exists() as the user of the nodemanager process. If the resource was a private resource localized for another user, it will be localized to a location that is not accessible by the nodemanager user. Therefore File.exists() returns false, the nodemanager mistakenly believes the resource is no longer available, and it proceeds to localize it over and over.
    • +
    • YARN-1381. + Minor bug reported by Ted Yu and fixed by Ted Yu
      + Same relaxLocality appears twice in exception message of AMRMClientImpl#checkLocalityRelaxationConflict()
      +
      Here is related code: +{code} + throw new InvalidContainerRequestException("Cannot submit a " + + "ContainerRequest asking for location " + location + + " with locality relaxation " + relaxLocality + " when it has " + + "already been requested with locality relaxation " + relaxLocality); +{code} +The last relaxLocality should be reqs.values().iterator().next().remoteRequest.getRelaxLocality()
    • +
    • YARN-1378. + Major sub-task reported by Jian He and fixed by Jian He (resourcemanager)
      + Implement a RMStateStore cleaner for deleting application/attempt info
      +
      Now that we are storing the final state of application/attempt instead of removing application/attempt info on application/attempt completion(YARN-891), we need a separate RMStateStore cleaner for cleaning the application/attempt state.
    • +
    • YARN-1374. + Blocker bug reported by Devaraj K and fixed by Karthik Kambatla (resourcemanager)
      + Resource Manager fails to start due to ConcurrentModificationException
      +
      Resource Manager is failing to start with the below ConcurrentModificationException. + +{code:xml} +2013-10-30 20:22:42,371 INFO org.apache.hadoop.util.HostsFileReader: Refreshing hosts (include/exclude) list +2013-10-30 20:22:42,376 INFO org.apache.hadoop.service.AbstractService: Service ResourceManager failed in state INITED; cause: java.util.ConcurrentModificationException +java.util.ConcurrentModificationException + at java.util.AbstractList$Itr.checkForComodification(AbstractList.java:372) + at java.util.AbstractList$Itr.next(AbstractList.java:343) + at java.util.Collections$UnmodifiableCollection$1.next(Collections.java:1010) + at org.apache.hadoop.service.CompositeService.serviceInit(CompositeService.java:107) + at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.serviceInit(ResourceManager.java:187) + at org.apache.hadoop.service.AbstractService.init(AbstractService.java:163) + at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.main(ResourceManager.java:944) +2013-10-30 20:22:42,378 INFO org.apache.hadoop.yarn.server.resourcemanager.RMHAProtocolService: Transitioning to standby +2013-10-30 20:22:42,378 INFO org.apache.hadoop.yarn.server.resourcemanager.RMHAProtocolService: Transitioned to standby +2013-10-30 20:22:42,378 FATAL org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Error starting ResourceManager +java.util.ConcurrentModificationException + at java.util.AbstractList$Itr.checkForComodification(AbstractList.java:372) + at java.util.AbstractList$Itr.next(AbstractList.java:343) + at java.util.Collections$UnmodifiableCollection$1.next(Collections.java:1010) + at org.apache.hadoop.service.CompositeService.serviceInit(CompositeService.java:107) + at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.serviceInit(ResourceManager.java:187) + at org.apache.hadoop.service.AbstractService.init(AbstractService.java:163) + at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.main(ResourceManager.java:944) +2013-10-30 20:22:42,379 INFO org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: SHUTDOWN_MSG: +/************************************************************ +SHUTDOWN_MSG: Shutting down ResourceManager at HOST-10-18-40-24/10.18.40.24 +************************************************************/ +{code}
    • +
    • YARN-1358. + Minor test reported by Chuan Liu and fixed by Chuan Liu (client)
      + TestYarnCLI fails on Windows due to line endings
      +
      The unit test fails on Windows due to incorrect line endings was used for comparing the output from command line output. Error messages are as follows. +{noformat} +junit.framework.ComparisonFailure: expected:<...argument for options[] +usage: application +...> but was:<...argument for options[ +] +usage: application +...> + at junit.framework.Assert.assertEquals(Assert.java:85) + at junit.framework.Assert.assertEquals(Assert.java:91) + at org.apache.hadoop.yarn.client.cli.TestYarnCLI.testMissingArguments(TestYarnCLI.java:878) +{noformat}
    • +
    • YARN-1357. + Minor test reported by Chuan Liu and fixed by Chuan Liu (nodemanager)
      + TestContainerLaunch.testContainerEnvVariables fails on Windows
      +
      This test fails on Windows due to incorrect use of batch script command. Error messages are as follows. +{noformat} +junit.framework.AssertionFailedError: expected:<java.nio.HeapByteBuffer[pos=0 lim=19 cap=19]> but was:<java.nio.HeapByteBuffer[pos=0 lim=19 cap=19]> + at junit.framework.Assert.fail(Assert.java:50) + at junit.framework.Assert.failNotEquals(Assert.java:287) + at junit.framework.Assert.assertEquals(Assert.java:67) + at junit.framework.Assert.assertEquals(Assert.java:74) + at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.TestContainerLaunch.testContainerEnvVariables(TestContainerLaunch.java:508) +{noformat}
    • +
    • YARN-1351. + Trivial bug reported by Konstantin Weitz and fixed by Konstantin Weitz (resourcemanager)
      + Invalid string format in Fair Scheduler log warn message
      +
      While trying to print a warning, two values of the wrong type (Resource instead of int) are passed into a String.format method call, leading to a runtime exception, in the file: + +_trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java_. + +The warning was intended to be printed whenever the resources don't fit into each other, either because the number of virtual cores or the memory is too small. I changed the %d's into %s, this way the warning will contain both the cores and the memory. +
    • +
    • YARN-1349. + Major bug reported by Chris Nauroth and fixed by Chris Nauroth (client)
      + yarn.cmd does not support passthrough to any arbitrary class.
      +
      The yarn shell script supports passthrough to calling any arbitrary class if the first argument is not one of the per-defined sub-commands. The equivalent cmd script does not implement this and instead fails trying to do a labeled goto to the first argument.
    • +
    • YARN-1343. + Critical bug reported by Alejandro Abdelnur and fixed by Alejandro Abdelnur (resourcemanager)
      + NodeManagers additions/restarts are not reported as node updates in AllocateResponse responses to AMs
      +
      If a NodeManager joins the cluster or gets restarted, running AMs never receive the node update indicating the Node is running.
    • +
    • YARN-1335. + Major improvement reported by Sandy Ryza and fixed by Sandy Ryza (scheduler)
      + Move duplicate code from FSSchedulerApp and FiCaSchedulerApp into SchedulerApplication
      +
      FSSchedulerApp and FiCaSchedulerApp use duplicate code in a lot of places. They both extend SchedulerApplication. We can move a lot of this duplicate code into SchedulerApplication.
    • +
    • YARN-1333. + Major improvement reported by Sandy Ryza and fixed by Tsuyoshi OZAWA (scheduler)
      + Support blacklisting in the Fair Scheduler
      +
    • +
    • YARN-1332. + Minor improvement reported by Sandy Ryza and fixed by Sebastian Wong
      + In TestAMRMClient, replace assertTrue with assertEquals where possible
      +
      TestAMRMClient uses a lot of "assertTrue(amClient.ask.size() == 0)" where "assertEquals(0, amClient.ask.size())" would make it easier to see why it's failing at a glance.
    • +
    • YARN-1331. + Trivial bug reported by Chris Nauroth and fixed by Chris Nauroth (client)
      + yarn.cmd exits with NoClassDefFoundError trying to run rmadmin or logs
      +
      The yarn shell script was updated so that the rmadmin and logs sub-commands launch {{org.apache.hadoop.yarn.client.cli.RMAdminCLI}} and {{org.apache.hadoop.yarn.client.cli.LogsCLI}}. The yarn.cmd script also needs to be updated so that the commands work on Windows.
    • +
    • YARN-1325. + Major sub-task reported by Tsuyoshi OZAWA and fixed by Xuan Gong (resourcemanager)
      + Enabling HA should check Configuration contains multiple RMs
      +
      Currently, we can enable RM HA configuration without multiple RM ids(YarnConfiguration.RM_HA_IDS). This behaviour can cause wrong operations. ResourceManager should verify that more than 1 RM id must be specified in RM-HA-IDs. + +One idea is to support "strict mode" to enforce this check as configuration(e.g. yarn.resourcemanager.ha.strict-mode.enabled).
    • +
    • YARN-1323. + Major sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla
      + Set HTTPS webapp address along with other RPC addresses in HAUtil
      +
      YARN-1232 adds the ability to configure multiple RMs, but missed out the https web app address. Need to add that in.
    • +
    • YARN-1321. + Blocker bug reported by Alejandro Abdelnur and fixed by Alejandro Abdelnur (client)
      + NMTokenCache is a singleton, prevents multiple AMs running in a single JVM to work correctly
      +
      NMTokenCache is a singleton. Because of this, if running multiple AMs in a single JVM NMTokens for the same node from different AMs step on each other and starting containers fail due to mismatch tokens. + +The error observed in the client side is something like: + +{code} +ERROR org.apache.hadoop.security.UserGroupInformation: PriviledgedActionException as:llama (auth:PROXY) via llama (auth:SIMPLE) cause:org.apache.hadoop.yarn.exceptions.YarnException: Unauthorized request to start container. +NMToken for application attempt : appattempt_1382038445650_0002_000001 was used for starting container with container token issued for application attempt : appattempt_1382038445650_0001_000001 +{code} +
    • +
    • YARN-1320. + Major bug reported by Tassapol Athiapinya and fixed by Xuan Gong (applications/distributed-shell)
      + Custom log4j properties in Distributed shell does not work properly.
      +
      Distributed shell cannot pick up custom log4j properties (specified with -log_properties). It always uses default log4j properties.
    • +
    • YARN-1318. + Blocker sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)
      + Promote AdminService to an Always-On service and merge in RMHAProtocolService
      +
      Per discussion in YARN-1068, we want AdminService to handle HA-admin operations in addition to the regular non-HA admin operations. To facilitate this, we need to move AdminService an Always-On service.
    • +
    • YARN-1315. + Major bug reported by Sandy Ryza and fixed by Sandy Ryza (resourcemanager , scheduler)
      + TestQueueACLs should also test FairScheduler
      +
    • +
    • YARN-1314. + Major bug reported by Tassapol Athiapinya and fixed by Xuan Gong (applications/distributed-shell)
      + Cannot pass more than 1 argument to shell command
      +
      Distributed shell cannot accept more than 1 parameters in argument parts. + +All of these commands are treated as 1 parameter: + +/usr/bin/yarn org.apache.hadoop.yarn.applications.distributedshell.Client -jar <distrubuted shell jar> -shell_command echo -shell_args "'"My name" "is Teddy"'" +/usr/bin/yarn org.apache.hadoop.yarn.applications.distributedshell.Client -jar <distrubuted shell jar> -shell_command echo -shell_args "''My name' 'is Teddy''" +/usr/bin/yarn org.apache.hadoop.yarn.applications.distributedshell.Client -jar <distrubuted shell jar> -shell_command echo -shell_args "'My name' 'is Teddy'"
    • +
    • YARN-1311. + Trivial sub-task reported by Vinod Kumar Vavilapalli and fixed by Vinod Kumar Vavilapalli
      + Fix app specific scheduler-events' names to be app-attempt based
      +
      Today, APP_ADDED and APP_REMOVED are sent to the scheduler. They are misnomers as schedulers only deal with AppAttempts today. This JIRA is for fixing their names so that we can add App-level events in the near future, notably for work-preserving RM-restart.
    • +
    • YARN-1307. + Major sub-task reported by Tsuyoshi OZAWA and fixed by Tsuyoshi OZAWA (resourcemanager)
      + Rethink znode structure for RM HA
      +
      Rethink for znode structure for RM HA is proposed in some JIRAs(YARN-659, YARN-1222). The motivation of this JIRA is quoted from Bikas' comment in YARN-1222: +{quote} +We should move to creating a node hierarchy for apps such that all znodes for an app are stored under an app znode instead of the app root znode. This will help in removeApplication and also in scaling better on ZK. The earlier code was written this way to ensure create/delete happens under a root znode for fencing. But given that we have moved to multi-operations globally, this isnt required anymore. +{quote}
    • +
    • YARN-1306. + Major bug reported by Wei Yan and fixed by Wei Yan
      + Clean up hadoop-sls sample-conf according to YARN-1228
      +
      Move fair scheduler allocations configuration to fair-scheduler.xml, and move all scheduler stuffs to yarn-site.xml
    • +
    • YARN-1305. + Major sub-task reported by Tsuyoshi OZAWA and fixed by Tsuyoshi OZAWA (resourcemanager)
      + RMHAProtocolService#serviceInit should handle HAUtil's IllegalArgumentException
      +
      When yarn.resourcemanager.ha.enabled is true, RMHAProtocolService#serviceInit calls HAUtil.setAllRpcAddresses. If the configuration values are null, it just throws IllegalArgumentException. +It's messy to analyse which keys are null, so we should handle it and log the name of keys which are null. + +A current log dump is as follows: +{code} +2013-10-15 06:24:53,431 INFO org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: registered UNIX signal handlers for [TERM, HUP, INT] +2013-10-15 06:24:54,203 INFO org.apache.hadoop.service.AbstractService: Service RMHAProtocolService failed in state INITED; cause: java.lang.IllegalArgumentException: Property value must not be null +java.lang.IllegalArgumentException: Property value must not be null + at com.google.common.base.Preconditions.checkArgument(Preconditions.java:88) + at org.apache.hadoop.conf.Configuration.set(Configuration.java:816) + at org.apache.hadoop.conf.Configuration.set(Configuration.java:798) + at org.apache.hadoop.yarn.conf.HAUtil.setConfValue(HAUtil.java:100) + at org.apache.hadoop.yarn.conf.HAUtil.setAllRpcAddresses(HAUtil.java:105) + at org.apache.hadoop.yarn.server.resourcemanager.RMHAProtocolService.serviceInit(RMHAProtocolService.java:60) + at org.apache.hadoop.service.AbstractService.init(AbstractService.java:163) + at org.apache.hadoop.service.CompositeService.serviceInit(CompositeService.java:108) + at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.serviceInit(ResourceManager.java:187) + at org.apache.hadoop.service.AbstractService.init(AbstractService.java:163) + at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.main(ResourceManager.java:940) +{code}
    • +
    • YARN-1303. + Major improvement reported by Tassapol Athiapinya and fixed by Xuan Gong (applications/distributed-shell)
      + Allow multiple commands separating with ";" in distributed-shell
      +
      In shell, we can do "ls; ls" to run 2 commands at once. + +In distributed shell, this is not working. We should improve to allow this to occur. There are practical use cases that I know of to run multiple commands or to set environment variables before a command.
    • +
    • YARN-1300. + Major bug reported by Ted Yu and fixed by Ted Yu
      + SLS tests fail because conf puts yarn properties in fair-scheduler.xml
      +
      I was looking at https://builds.apache.org/job/PreCommit-YARN-Build/2165//testReport/org.apache.hadoop.yarn.sls/TestSLSRunner/testSimulatorRunning/ +I am able to reproduce the failure locally. + +I found that FairSchedulerConfiguration.getAllocationFile() doesn't read the yarn.scheduler.fair.allocation.file config entry from fair-scheduler.xml + +This leads to the following: +{code} +Caused by: org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.AllocationConfigurationException: Bad fair scheduler config file: top-level element not <allocations> + at org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.QueueManager.reloadAllocs(QueueManager.java:302) + at org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.QueueManager.initialize(QueueManager.java:108) + at org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler.reinitialize(FairScheduler.java:1145) +{code}
    • +
    • YARN-1295. + Major bug reported by Sandy Ryza and fixed by Sandy Ryza (nodemanager)
      + In UnixLocalWrapperScriptBuilder, using bash -c can cause "Text file busy" errors
      +
      I missed this when working on YARN-1271.
    • +
    • YARN-1293. + Major bug reported by Tsuyoshi OZAWA and fixed by Tsuyoshi OZAWA
      + TestContainerLaunch.testInvalidEnvSyntaxDiagnostics fails on trunk
      +
      {quote} +------------------------------------------------------------------------------- +Test set: org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.TestContainerLaunch +------------------------------------------------------------------------------- +Tests run: 8, Failures: 1, Errors: 0, Skipped: 0, Time elapsed: 12.655 sec <<< FAILURE! - in org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.TestContainerLaunch +testInvalidEnvSyntaxDiagnostics(org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.TestContainerLaunch) Time elapsed: 0.114 sec <<< FAILURE! +junit.framework.AssertionFailedError: null + at junit.framework.Assert.fail(Assert.java:48) + at junit.framework.Assert.assertTrue(Assert.java:20) + at junit.framework.Assert.assertTrue(Assert.java:27) + at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.TestContainerLaunch.testInvalidEnvSyntaxDiagnostics(TestContainerLaunch.java:273) +{quote}
    • +
    • YARN-1290. + Major improvement reported by Wei Yan and fixed by Wei Yan
      + Let continuous scheduling achieve more balanced task assignment
      +
      Currently, in continuous scheduling (YARN-1010), in each round, the thread iterates over pre-ordered nodes and assigns tasks. This mechanism may overload the first several nodes, while the latter nodes have no tasks. + +We should sort all nodes according to available resource. In each round, always assign tasks to nodes with larger capacity, which can balance the load distribution among all nodes.
    • +
    • YARN-1288. + Major bug reported by Sandy Ryza and fixed by Sandy Ryza (scheduler)
      + Make Fair Scheduler ACLs more user friendly
      +
      The Fair Scheduler currently defaults the root queue's acl to empty and all other queues' acl to "*". Now that YARN-1258 enables configuring the root queue, we should reverse this. This will also bring the Fair Scheduler in line with the Capacity Scheduler. + +We should also not trim the acl strings, which makes it impossible to only specify groups in an acl.
    • +
    • YARN-1284. + Blocker bug reported by Alejandro Abdelnur and fixed by Alejandro Abdelnur (nodemanager)
      + LCE: Race condition leaves dangling cgroups entries for killed containers
      +
      When LCE & cgroups are enabled, when a container is is killed (in this case by its owning AM, an MRAM) it seems to be a race condition at OS level when doing a SIGTERM/SIGKILL and when the OS does all necessary cleanup. + +LCE code, after sending the SIGTERM/SIGKILL and getting the exitcode, immediately attempts to clean up the cgroups entry for the container. But this is failing with an error like: + +{code} +2013-10-07 15:21:24,359 WARN org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor: Exit code from container container_1381179532433_0016_01_000011 is : 143 +2013-10-07 15:21:24,359 DEBUG org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container: Processing container_1381179532433_0016_01_000011 of type UPDATE_DIAGNOSTICS_MSG +2013-10-07 15:21:24,359 DEBUG org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler: deleteCgroup: /run/cgroups/cpu/hadoop-yarn/container_1381179532433_0016_01_000011 +2013-10-07 15:21:24,359 WARN org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler: Unable to delete cgroup at: /run/cgroups/cpu/hadoop-yarn/container_1381179532433_0016_01_000011 +{code} + + +CgroupsLCEResourcesHandler.clearLimits() has logic to wait for 500 ms for AM containers to avoid this problem. it seems this should be done for all containers. + +Still, waiting for extra 500ms seems too expensive. + +We should look at a way of doing this in a more 'efficient way' from time perspective, may be spinning while the deleteCgroup() cannot be done with a minimal sleep and a timeout. +
    • +
    • YARN-1283. + Major sub-task reported by Yesha Vora and fixed by Omkar Vinit Joshi
      + Invalid 'url of job' mentioned in Job output with yarn.http.policy=HTTPS_ONLY
      +
      After setting yarn.http.policy=HTTPS_ONLY, the job output shows incorrect "The url to track the job". + +Currently, its printing http://RM:<httpsport>/proxy/application_1381162886563_0001/ instead https://RM:<httpsport>/proxy/application_1381162886563_0001/ + +http://hostname:8088/proxy/application_1381162886563_0001/ is invalid + +hadoop jar hadoop-mapreduce-client-jobclient-tests.jar sleep -m 1 -r 1 +13/10/07 18:39:39 INFO client.RMProxy: Connecting to ResourceManager at hostname/100.00.00.000:8032 +13/10/07 18:39:40 INFO mapreduce.JobSubmitter: number of splits:1 +13/10/07 18:39:40 INFO Configuration.deprecation: user.name is deprecated. Instead, use mapreduce.job.user.name +13/10/07 18:39:40 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar +13/10/07 18:39:40 INFO Configuration.deprecation: mapred.map.tasks.speculative.execution is deprecated. Instead, use mapreduce.map.speculative +13/10/07 18:39:40 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces +13/10/07 18:39:40 INFO Configuration.deprecation: mapreduce.partitioner.class is deprecated. Instead, use mapreduce.job.partitioner.class +13/10/07 18:39:40 INFO Configuration.deprecation: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative +13/10/07 18:39:40 INFO Configuration.deprecation: mapred.mapoutput.value.class is deprecated. Instead, use mapreduce.map.output.value.class +13/10/07 18:39:40 INFO Configuration.deprecation: mapreduce.map.class is deprecated. Instead, use mapreduce.job.map.class +13/10/07 18:39:40 INFO Configuration.deprecation: mapred.job.name is deprecated. Instead, use mapreduce.job.name +13/10/07 18:39:40 INFO Configuration.deprecation: mapreduce.reduce.class is deprecated. Instead, use mapreduce.job.reduce.class +13/10/07 18:39:40 INFO Configuration.deprecation: mapreduce.inputformat.class is deprecated. Instead, use mapreduce.job.inputformat.class +13/10/07 18:39:40 INFO Configuration.deprecation: mapred.input.dir is deprecated. Instead, use mapreduce.input.fileinputformat.inputdir +13/10/07 18:39:40 INFO Configuration.deprecation: mapreduce.outputformat.class is deprecated. Instead, use mapreduce.job.outputformat.class +13/10/07 18:39:40 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps +13/10/07 18:39:40 INFO Configuration.deprecation: mapred.mapoutput.key.class is deprecated. Instead, use mapreduce.map.output.key.class +13/10/07 18:39:40 INFO Configuration.deprecation: mapred.working.dir is deprecated. Instead, use mapreduce.job.working.dir +13/10/07 18:39:40 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1381162886563_0001 +13/10/07 18:39:40 INFO impl.YarnClientImpl: Submitted application application_1381162886563_0001 to ResourceManager at hostname/100.00.00.000:8032 +13/10/07 18:39:40 INFO mapreduce.Job: The url to track the job: http://hostname:8088/proxy/application_1381162886563_0001/ +13/10/07 18:39:40 INFO mapreduce.Job: Running job: job_1381162886563_0001 +13/10/07 18:39:46 INFO mapreduce.Job: Job job_1381162886563_0001 running in uber mode : false +13/10/07 18:39:46 INFO mapreduce.Job: map 0% reduce 0% +13/10/07 18:39:53 INFO mapreduce.Job: map 100% reduce 0% +13/10/07 18:39:58 INFO mapreduce.Job: map 100% reduce 100% +13/10/07 18:39:58 INFO mapreduce.Job: Job job_1381162886563_0001 completed successfully +13/10/07 18:39:58 INFO mapreduce.Job: Counters: 43 + File System Counters + FILE: Number of bytes read=26 + FILE: Number of bytes written=177279 + FILE: Number of read operations=0 + FILE: Number of large read operations=0 + FILE: Number of write operations=0 + HDFS: Number of bytes read=48 + HDFS: Number of bytes written=0 + HDFS: Number of read operations=1 + HDFS: Number of large read operations=0 + HDFS: Number of write operations=0 + Job Counters + Launched map tasks=1 + Launched reduce tasks=1 + Other local map tasks=1 + Total time spent by all maps in occupied slots (ms)=7136 + Total time spent by all reduces in occupied slots (ms)=6062 + Map-Reduce Framework + Map input records=1 + Map output records=1 + Map output bytes=4 + Map output materialized bytes=22 + Input split bytes=48 + Combine input records=0 + Combine output records=0 + Reduce input groups=1 + Reduce shuffle bytes=22 + Reduce input records=1 + Reduce output records=0 + Spilled Records=2 + Shuffled Maps =1 + Failed Shuffles=0 + Merged Map outputs=1 + GC time elapsed (ms)=60 + CPU time spent (ms)=1700 + Physical memory (bytes) snapshot=567582720 + Virtual memory (bytes) snapshot=4292997120 + Total committed heap usage (bytes)=846594048 + Shuffle Errors + BAD_ID=0 + CONNECTION=0 + IO_ERROR=0 + WRONG_LENGTH=0 + WRONG_MAP=0 + WRONG_REDUCE=0 + File Input Format Counters + Bytes Read=0 + File Output Format Counters + Bytes Written=0 + +
    • +
    • YARN-1268. + Major bug reported by Sandy Ryza and fixed by Sandy Ryza (scheduler)
      + TestFairScheduler.testContinuousScheduling is flaky
      +
      It looks like there's a timeout in it that's causing it to be flaky.
    • +
    • YARN-1265. + Major bug reported by Sandy Ryza and fixed by Sandy Ryza (resourcemanager , scheduler)
      + Fair Scheduler chokes on unhealthy node reconnect
      +
      Only nodes in the RUNNING state are tracked by schedulers. When a node reconnects, RMNodeImpl.ReconnectNodeTransition tries to remove it, even if it's in the RUNNING state. The FairScheduler doesn't guard against this. + +I think the best way to fix this is to check to see whether a node is RUNNING before telling the scheduler to remove it.
    • +
    • YARN-1259. + Trivial bug reported by Sandy Ryza and fixed by Robert Kanter (scheduler)
      + In Fair Scheduler web UI, queue num pending and num active apps switched
      +
      The values returned in FairSchedulerLeafQueueInfo by numPendingApplications and numActiveApplications should be switched.
    • +
    • YARN-1258. + Major improvement reported by Sandy Ryza and fixed by Sandy Ryza (scheduler)
      + Allow configuring the Fair Scheduler root queue
      +
      This would be useful for acls, maxRunningApps, scheduling modes, etc. + +The allocation file should be able to accept both: +* An implicit root queue +* A root queue at the top of the hierarchy with all queues under/inside of it
    • +
    • YARN-1253. + Blocker new feature reported by Alejandro Abdelnur and fixed by Roman Shaposhnik (nodemanager)
      + Changes to LinuxContainerExecutor to run containers as a single dedicated user in non-secure mode
      +
      When using cgroups we require LCE to be configured in the cluster to start containers. + +When LCE starts containers as the user that submitted the job. While this works correctly in a secure setup, in an un-secure setup this presents a couple issues: + +* LCE requires all Hadoop users submitting jobs to be Unix users in all nodes +* Because users can impersonate other users, any user would have access to any local file of other users + +Particularly, the second issue is not desirable as a user could get access to ssh keys of other users in the nodes or if there are NFS mounts, get to other users data outside of the cluster.
    • +
    • YARN-1241. + Major bug reported by Sandy Ryza and fixed by Sandy Ryza
      + In Fair Scheduler, maxRunningApps does not work for non-leaf queues
      +
      Setting the maxRunningApps property on a parent queue should make it that the sum of apps in all subqueues can't exceed it
    • +
    • YARN-1239. + Major sub-task reported by Bikas Saha and fixed by Jian He (resourcemanager)
      + Save version information in the state store
      +
      When creating root dir for the first time we should write version 1. If root dir exists then we should check that the version in the state store matches the version from config.
    • +
    • YARN-1232. + Major sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)
      + Configuration to support multiple RMs
      +
      We should augment the configuration to allow users specify two RMs and the individual RPC addresses for them.
    • +
    • YARN-1222. + Major sub-task reported by Bikas Saha and fixed by Karthik Kambatla
      + Make improvements in ZKRMStateStore for fencing
      +
      Using multi-operations for every ZK interaction. +In every operation, automatically creating/deleting a lock znode that is the child of the root znode. This is to achieve fencing by modifying the create/delete permissions on the root znode.
    • +
    • YARN-1210. + Major sub-task reported by Vinod Kumar Vavilapalli and fixed by Omkar Vinit Joshi
      + During RM restart, RM should start a new attempt only when previous attempt exits for real
      +
      When RM recovers, it can wait for existing AMs to contact RM back and then kill them forcefully before even starting a new AM. Worst case, RM will start a new AppAttempt after waiting for 10 mins ( the expiry interval). This way we'll minimize multiple AMs racing with each other. This can help issues with downstream components like Pig, Hive and Oozie during RM restart. + +In the mean while, new apps will proceed as usual as existing apps wait for recovery. + +This can continue to be useful after work-preserving restart, so that AMs which can properly sync back up with RM can continue to run and those that don't are guaranteed to be killed before starting a new attempt.
    • +
    • YARN-1199. + Major improvement reported by Mit Desai and fixed by Mit Desai
      + Make NM/RM Versions Available
      +
      Now as we have the NM and RM Versions available, we can display the YARN version of nodes running in the cluster. + +
    • +
    • YARN-1188. + Trivial bug reported by Akira AJISAKA and fixed by Tsuyoshi OZAWA
      + The context of QueueMetrics becomes 'default' when using FairScheduler
      +
      I found the context of QueueMetrics changed to 'default' from 'yarn' when I was using FairScheduler. +The context should always be 'yarn' by adding an annotation to FSQueueMetrics like below: + +{code} ++ @Metrics(context="yarn") +public class FSQueueMetrics extends QueueMetrics { +{code}
    • +
    • YARN-1185. + Major sub-task reported by Jason Lowe and fixed by Omkar Vinit Joshi (resourcemanager)
      + FileSystemRMStateStore can leave partial files that prevent subsequent recovery
      +
      FileSystemRMStateStore writes directly to the destination file when storing state. However if the RM were to crash in the middle of the write, the recovery method could encounter a partially-written file and either outright crash during recovery or silently load incomplete state. + +To avoid this, the data should be written to a temporary file and renamed to the destination file afterwards.
    • +
    • YARN-1183. + Major bug reported by Andrey Klochkov and fixed by Andrey Klochkov
      + MiniYARNCluster shutdown takes several minutes intermittently
      +
      As described in MAPREDUCE-5501 sometimes M/R tests leave MRAppMaster java processes living for several minutes after successful completion of the corresponding test. There is a concurrency issue in MiniYARNCluster shutdown logic which leads to this. Sometimes RM stops before an app master sends it's last report, and then the app master keeps retrying for >6 minutes. In some cases it leads to failures in subsequent tests, and it affects performance of tests as app masters eat resources.
    • +
    • YARN-1182. + Major bug reported by Karthik Kambatla and fixed by Karthik Kambatla
      + MiniYARNCluster creates and inits the RM/NM only on start()
      +
      MiniYARNCluster creates and inits the RM/NM only on start(). It should create and init() during init() itself.
    • +
    • YARN-1181. + Major sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla
      + Augment MiniYARNCluster to support HA mode
      +
      MiniYARNHACluster, along the lines of MiniYARNCluster, is needed for end-to-end HA tests.
    • +
    • YARN-1180. + Trivial bug reported by Thomas Graves and fixed by Chen He (capacityscheduler)
      + Update capacity scheduler docs to include types on the configs
      +
      The capacity scheduler docs (http://hadoop.apache.org/docs/r2.1.0-beta/hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html) don't include types for all the configs. For instance the minimum-user-limit-percent doesn't say its an Int. It also the only setting for the Resource Allocation configs that is an Int rather then a float.
    • +
    • YARN-1176. + Critical bug reported by Thomas Graves and fixed by Jonathan Eagles (resourcemanager)
      + RM web services ClusterMetricsInfo total nodes doesn't include unhealthy nodes
      +
      In the web services api for the cluster/metrics, the totalNodes reported doesn't include the unhealthy nodes. + +this.totalNodes = activeNodes + lostNodes + decommissionedNodes + + rebootedNodes;
    • +
    • YARN-1172. + Major sub-task reported by Karthik Kambatla and fixed by Tsuyoshi OZAWA (resourcemanager)
      + Convert *SecretManagers in the RM to services
      +
    • +
    • YARN-1145. + Major bug reported by Rohith and fixed by Rohith
      + Potential file handle leak in aggregated logs web ui
      +
      Any problem in getting aggregated logs for rendering on web ui, then LogReader is not closed. + +Now, it reader is not closed which causing many connections in close_wait state. + +hadoopuser@hadoopuser:> jps +*27909* JobHistoryServer + +DataNode port is 50010. When greped with DataNode port, many connections are in CLOSE_WAIT from JHS. +hadoopuser@hadoopuser:> netstat -tanlp |grep 50010 +tcp 0 0 10.18.40.48:50010 0.0.0.0:* LISTEN 21453/java +tcp 1 0 10.18.40.48:20596 10.18.40.48:50010 CLOSE_WAIT *27909*/java +tcp 1 0 10.18.40.48:19667 10.18.40.152:50010 CLOSE_WAIT *27909*/java +tcp 1 0 10.18.40.48:20593 10.18.40.48:50010 CLOSE_WAIT *27909*/java +tcp 1 0 10.18.40.48:12290 10.18.40.48:50010 CLOSE_WAIT *27909*/java +tcp 1 0 10.18.40.48:19662 10.18.40.152:50010 CLOSE_WAIT *27909*/java
    • +
    • YARN-1138. + Major bug reported by Yingda Chen and fixed by Chuan Liu (api)
      + yarn.application.classpath is set to point to $HADOOP_CONF_DIR etc., which does not work on Windows
      +
      yarn-default.xml has "yarn.application.classpath" entry set to $HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/share/hadoop/common/,$HADOOP_COMMON_HOME/share/hadoop/common/lib/,$HADOOP_HDFS_HOME/share/hadoop/hdfs/,$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/,$HADOOP_YARN_HOME/share/hadoop/yarn/*,$HADOOP_YARN_HOME/share/hadoop/yarn/lib. It does not work on Windows which needs to be fixed.
    • +
    • YARN-1121. + Major sub-task reported by Bikas Saha and fixed by Jian He (resourcemanager)
      + RMStateStore should flush all pending store events before closing
      +
      on serviceStop it should wait for all internal pending events to drain before stopping.
    • +
    • YARN-1119. + Major test reported by Robert Parker and fixed by Mit Desai (resourcemanager)
      + Add ClusterMetrics checks to tho TestRMNodeTransitions tests
      +
      YARN-1101 identified an issue where UNHEALTHY nodes could double decrement the active nodes. We should add checks for RUNNING node transitions.
    • +
    • YARN-1109. + Major improvement reported by Sandy Ryza and fixed by haosdent (nodemanager)
      + Demote NodeManager "Sending out status for container" logs to debug
      +
      Diagnosing NodeManager and container launch problems is made more difficult by the enormous number of logs like +{code} +Sending out status for container: container_id {, app_attempt_id {, application_id {, id: 18, cluster_timestamp: 1377559361179, }, attemptId: 1, }, id: 1337, }, state: C_RUNNING, diagnostics: "Container killed by the ApplicationMaster.\n", exit_status: -1000 +{code} + +On an NM with a few containers I am seeing tens of these per second.
    • +
    • YARN-1101. + Major bug reported by Robert Parker and fixed by Robert Parker (resourcemanager)
      + Active nodes can be decremented below 0
      +
      The issue is in RMNodeImpl where both RUNNING and UNHEALTHY states that transition to a deactive state (LOST, DECOMMISSIONED, REBOOTED) use the same DeactivateNodeTransition class. The DeactivateNodeTransition class naturally decrements the active node, however the in cases where the node has transition to UNHEALTHY the active count has already been decremented.
    • +
    • YARN-1098. + Major sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)
      + Separate out RM services into "Always On" and "Active"
      +
      From discussion on YARN-1027, it makes sense to separate out services that are stateful and stateless. The stateless services can run perennially irrespective of whether the RM is in Active/Standby state, while the stateful services need to be started on transitionToActive() and completely shutdown on transitionToStandby(). + +The external-facing stateless services should respond to the client/AM/NM requests depending on whether the RM is Active/Standby. +
    • +
    • YARN-1068. + Major sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)
      + Add admin support for HA operations
      +
      Support HA admin operations to facilitate transitioning the RM to Active and Standby states.
    • +
    • YARN-1060. + Major bug reported by Sandy Ryza and fixed by Niranjan Singh (scheduler)
      + Two tests in TestFairScheduler are missing @Test annotation
      +
      Amazingly, these tests appear to pass with the annotations added.
    • +
    • YARN-1053. + Blocker bug reported by Omkar Vinit Joshi and fixed by Omkar Vinit Joshi
      + Diagnostic message from ContainerExitEvent is ignored in ContainerImpl
      +
      If the container launch fails then we send ContainerExitEvent. This event contains exitCode and diagnostic message. Today we are ignoring diagnostic message while handling this event inside ContainerImpl. Fixing it as it is useful in diagnosing the failure.
    • +
    • YARN-1044. + Critical bug reported by Sangjin Lee and fixed by Sangjin Lee (resourcemanager , scheduler)
      + used/min/max resources do not display info in the scheduler page
      +
      Go to the scheduler page in RM, and click any queue to display the detailed info. You'll find that none of the resources entries (used, min, or max) would display values. + +It is because the values contain brackets ("<" and ">") and are not properly html-escaped.
    • +
    • YARN-1033. + Major sub-task reported by Nemon Lou and fixed by Karthik Kambatla
      + Expose RM active/standby state to Web UI and REST API
      +
      Both active and standby RM shall expose it's web server and show it's current state (active or standby) on web page. Users should be able to access this information through the REST API as well.
    • +
    • YARN-1029. + Major sub-task reported by Bikas Saha and fixed by Karthik Kambatla
      + Allow embedding leader election into the RM
      +
      It should be possible to embed common ActiveStandyElector into the RM such that ZooKeeper based leader election and notification is in-built. In conjunction with a ZK state store, this configuration will be a simple deployment option.
    • +
    • YARN-1028. + Major sub-task reported by Bikas Saha and fixed by Karthik Kambatla
      + Add FailoverProxyProvider like capability to RMProxy
      +
      RMProxy layer currently abstracts RM discovery and implements it by looking up service information from configuration. Motivated by HDFS and using existing classes from Common, we can add failover proxy providers that may provide RM discovery in extensible ways.
    • +
    • YARN-1027. + Major sub-task reported by Bikas Saha and fixed by Karthik Kambatla
      + Implement RMHAProtocolService
      +
      Implement existing HAServiceProtocol from Hadoop common. This protocol is the single point of interaction between the RM and HA clients/services.
    • +
    • YARN-1022. + Trivial bug reported by Bikas Saha and fixed by haosdent
      + Unnecessary INFO logs in AMRMClientAsync
      +
      Logs like the following should be debug or else every legitimate stop causes unnecessary exception traces in the logs. + +464 2013-08-03 20:01:34,459 INFO [AMRM Heartbeater thread] org.apache.hadoop.yarn.client.api.async.impl.AMRMClientAsyncImpl: Heartbeater interrupted +465 java.lang.InterruptedException: sleep interrupted +466 at java.lang.Thread.sleep(Native Method) +467 at org.apache.hadoop.yarn.client.api.async.impl.AMRMClientAsyncImpl$HeartbeatThread.run(AMRMClientAsyncImpl.java:249) +468 2013-08-03 20:01:34,460 INFO [AMRM Callback Handler Thread] org.apache.hadoop.yarn.client.api.async.impl.AMRMClientAsyncImpl: Interrupted while waiting for queue +469 java.lang.InterruptedException +470 at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.reportInterruptAfterWait(AbstractQueuedSynchronizer. java:1961) +471 at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1996) +472 at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:399) +473 at org.apache.hadoop.yarn.client.api.async.impl.AMRMClientAsyncImpl$CallbackHandlerThread.run(AMRMClientAsyncImpl.java:275)
    • +
    • YARN-1021. + Major new feature reported by Wei Yan and fixed by Wei Yan (scheduler)
      + Yarn Scheduler Load Simulator
      +
      The Yarn Scheduler is a fertile area of interest with different implementations, e.g., Fifo, Capacity and Fair schedulers. Meanwhile, several optimizations are also made to improve scheduler performance for different scenarios and workload. Each scheduler algorithm has its own set of features, and drives scheduling decisions by many factors, such as fairness, capacity guarantee, resource availability, etc. It is very important to evaluate a scheduler algorithm very well before we deploy it in a production cluster. Unfortunately, currently it is non-trivial to evaluate a scheduling algorithm. Evaluating in a real cluster is always time and cost consuming, and it is also very hard to find a large-enough cluster. Hence, a simulator which can predict how well a scheduler algorithm for some specific workload would be quite useful. + +We want to build a Scheduler Load Simulator to simulate large-scale Yarn clusters and application loads in a single machine. This would be invaluable in furthering Yarn by providing a tool for researchers and developers to prototype new scheduler features and predict their behavior and performance with reasonable amount of confidence, there-by aiding rapid innovation. + +The simulator will exercise the real Yarn ResourceManager removing the network factor by simulating NodeManagers and ApplicationMasters via handling and dispatching NM/AMs heartbeat events from within the same JVM. + +To keep tracking of scheduler behavior and performance, a scheduler wrapper will wrap the real scheduler. + +The simulator will produce real time metrics while executing, including: + +* Resource usages for whole cluster and each queue, which can be utilized to configure cluster and queue's capacity. +* The detailed application execution trace (recorded in relation to simulated time), which can be analyzed to understand/validate the scheduler behavior (individual jobs turn around time, throughput, fairness, capacity guarantee, etc). +* Several key metrics of scheduler algorithm, such as time cost of each scheduler operation (allocate, handle, etc), which can be utilized by Hadoop developers to find the code spots and scalability limits. + +The simulator will provide real time charts showing the behavior of the scheduler and its performance. + +A short demo is available http://www.youtube.com/watch?v=6thLi8q0qLE, showing how to use simulator to simulate Fair Scheduler and Capacity Scheduler.
    • +
    • YARN-1010. + Critical improvement reported by Alejandro Abdelnur and fixed by Wei Yan (scheduler)
      + FairScheduler: decouple container scheduling from nodemanager heartbeats
      +
      Currently scheduling for a node is done when a node heartbeats. + +For large cluster where the heartbeat interval is set to several seconds this delays scheduling of incoming allocations significantly. + +We could have a continuous loop scanning all nodes and doing scheduling. If there is availability AMs will get the allocation in the next heartbeat after the one that placed the request.
    • +
    • YARN-985. + Major improvement reported by Ravi Prakash and fixed by Ravi Prakash (nodemanager)
      + Nodemanager should log where a resource was localized
      +
      When a resource is localized, we should log WHERE on the local disk it was localized. This helps in debugging afterwards (e.g. if the disk was to go bad).
    • +
    • YARN-976. + Major sub-task reported by Sandy Ryza and fixed by Sandy Ryza (documentation)
      + Document the meaning of a virtual core
      +
      As virtual cores are a somewhat novel concept, it would be helpful to have thorough documentation that clarifies their meaning.
    • +
    • YARN-895. + Major sub-task reported by Jian He and fixed by Jian He (resourcemanager)
      + RM crashes if it restarts while the state-store is down
      +
    • +
    • YARN-891. + Major sub-task reported by Bikas Saha and fixed by Jian He (resourcemanager)
      + Store completed application information in RM state store
      +
      Store completed application/attempt info in RMStateStore when application/attempt completes. This solves some problems like finished application get lost after RM restart and some other races like YARN-1195
    • +
    • YARN-888. + Major bug reported by Alejandro Abdelnur and fixed by Alejandro Abdelnur
      + clean up POM dependencies
      +
      Intermediate 'pom' modules define dependencies inherited by leaf modules. + +This is causing issues in intellij IDE. + +We should normalize the leaf modules like in common, hdfs and tools where all dependencies are defined in each leaf module and the intermediate 'pom' module do not define any dependency.
    • +
    • YARN-879. + Major bug reported by Junping Du and fixed by Junping Du
      + Fix tests w.r.t o.a.h.y.server.resourcemanager.Application
      +
      getResources() will return a list of containers that allocated by RM. However, it is now return null directly. The worse thing is: if LOG.debug is enabled, then it will definitely cause NPE exception.
    • +
    • YARN-819. + Major sub-task reported by Robert Parker and fixed by Robert Parker (nodemanager , resourcemanager)
      + ResourceManager and NodeManager should check for a minimum allowed version
      +
      Our use case is during upgrade on a large cluster several NodeManagers may not restart with the new version. Once the RM comes back up the NodeManager will re-register without issue to the RM. + +The NM should report the version the RM. The RM should have a configuration to disallow the check (default), equal to the RM (to prevent config change for each release), equal to or greater than RM (to allow NM upgrades), and finally an explicit version or version range. + +The RM should also have an configuration on how to treat the mismatch: REJECT, or REBOOT the NM.
    • +
    • YARN-807. + Major improvement reported by Sandy Ryza and fixed by Sandy Ryza
      + When querying apps by queue, iterating over all apps is inefficient and limiting
      +
      The question "which apps are in queue x" can be asked via the RM REST APIs, through the ClientRMService, and through the command line. In all these cases, the question is answered by scanning through every RMApp and filtering by the app's queue name. + +All schedulers maintain a mapping of queues to applications. I think it would make more sense to ask the schedulers which applications are in a given queue. This is what was done in MR1. This would also have the advantage of allowing a parent queue to return all the applications on leaf queues under it, and allow queue name aliases, as in the way that "root.default" and "default" refer to the same queue in the fair scheduler. + +
    • +
    • YARN-786. + Major improvement reported by Sandy Ryza and fixed by Sandy Ryza
      + Expose application resource usage in RM REST API
      +
      It might be good to require users to explicitly ask for this information, as it's a little more expensive to collect than the other fields in AppInfo.
    • +
    • YARN-764. + Major bug reported by Nemon Lou and fixed by Nemon Lou (resourcemanager)
      + blank Used Resources on Capacity Scheduler page
      +
      Even when there are jobs running,used resources is empty on Capacity Scheduler page for leaf queue.(I use google-chrome on windows 7.) +After changing resource.java's toString method by replacing "<>" with "{}",this bug gets fixed.
    • +
    • YARN-709. + Major sub-task reported by Jian He and fixed by Jian He (resourcemanager)
      + verify that new jobs submitted with old RM delegation tokens after RM restart are accepted
      +
      More elaborate test for restoring RM delegation tokens on RM restart. +New jobs with old RM delegation tokens should be accepted by new RM as long as the token is still valid
    • +
    • YARN-674. + Major sub-task reported by Vinod Kumar Vavilapalli and fixed by Omkar Vinit Joshi (resourcemanager)
      + Slow or failing DelegationToken renewals on submission itself make RM unavailable
      +
      This was caused by YARN-280. A slow or a down NameNode for will make it look like RM is unavailable as it may run out of RPC handlers due to blocked client submissions.
    • +
    • YARN-649. + Major sub-task reported by Sandy Ryza and fixed by Sandy Ryza (nodemanager)
      + Make container logs available over HTTP in plain text
      +
      It would be good to make container logs available over the REST API for MAPREDUCE-4362 and so that they can be accessed programatically in general.
    • +
    • YARN-584. + Major bug reported by Sandy Ryza and fixed by Harshit Daga (scheduler)
      + In scheduler web UIs, queues unexpand on refresh
      +
      In the fair scheduler web UI, you can expand queue information. Refreshing the page causes the expansions to go away, which is annoying for someone who wants to monitor the scheduler page and needs to reopen all the queues they care about each time.
    • +
    • YARN-546. + Major bug reported by Lohit Vijayarenu and fixed by Sandy Ryza (scheduler)
      + Allow disabling the Fair Scheduler event log
      +
      Hadoop 1.0 supported an option to turn on/off FairScheduler event logging using mapred.fairscheduler.eventlog.enabled. In Hadoop 2.0, it looks like this option has been removed (or not ported?) which causes event logging to be enabled by default and there is no way to turn it off.
    • +
    • YARN-478. + Major sub-task reported by Aleksey Gorshkov and fixed by Aleksey Gorshkov
      + fix coverage org.apache.hadoop.yarn.webapp.log
      +
      fix coverage org.apache.hadoop.yarn.webapp.log +one patch for trunk, branch-2, branch-0.23
    • +
    • YARN-465. + Major sub-task reported by Aleksey Gorshkov and fixed by Andrey Klochkov
      + fix coverage org.apache.hadoop.yarn.server.webproxy
      +
      fix coverage org.apache.hadoop.yarn.server.webproxy +patch YARN-465-trunk.patch for trunk +patch YARN-465-branch-2.patch for branch-2 +patch YARN-465-branch-0.23.patch for branch-0.23 + +There is issue in branch-0.23 . Patch does not creating .keep file. +For fix it need to run commands: + +mkdir yhadoop-common/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/proxy +touch yhadoop-common/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/proxy/.keep +
    • +
    • YARN-461. + Major bug reported by Sandy Ryza and fixed by Wei Yan (resourcemanager)
      + Fair scheduler should not accept apps with empty string queue name
      +
      When an app is submitted with "" for the queue, the RMAppManager passes it on like it does with any other string. +
    • +
    • YARN-427. + Major sub-task reported by Aleksey Gorshkov and fixed by Aleksey Gorshkov
      + Coverage fix for org.apache.hadoop.yarn.server.api.*
      +
      Coverage fix for org.apache.hadoop.yarn.server.api.* + +patch YARN-427-trunk.patch for trunk +patch YARN-427-branch-2.patch for branch-2 and branch-0.23
    • +
    • YARN-425. + Major sub-task reported by Aleksey Gorshkov and fixed by Aleksey Gorshkov
      + coverage fix for yarn api
      +
      coverage fix for yarn api +patch YARN-425-trunk-a.patch for trunk +patch YARN-425-branch-2.patch for branch-2 +patch YARN-425-branch-0.23.patch for branch-0.23
    • +
    • YARN-408. + Minor bug reported by Mayank Bansal and fixed by Mayank Bansal (scheduler)
      + Capacity Scheduler delay scheduling should not be disabled by default
      +
      Capacity Scheduler delay scheduling should not be disabled by default. +Enabling it to number of nodes in one rack. + +Thanks, +Mayank
    • +
    • YARN-353. + Major sub-task reported by Hitesh Shah and fixed by Karthik Kambatla (resourcemanager)
      + Add Zookeeper-based store implementation for RMStateStore
      +
      Add store that write RM state data to ZK +
    • +
    • YARN-312. + Major sub-task reported by Junping Du and fixed by Junping Du (api)
      + Add updateNodeResource in ResourceManagerAdministrationProtocol
      +
      Add fundamental RPC (ResourceManagerAdministrationProtocol) to support node's resource change. For design detail, please refer parent JIRA: YARN-291.
    • +
    • YARN-311. + Major sub-task reported by Junping Du and fixed by Junping Du (resourcemanager , scheduler)
      + Dynamic node resource configuration: core scheduler changes
      +
      As the first step, we go for resource change on RM side and expose admin APIs (admin protocol, CLI, REST and JMX API) later. In this jira, we will only contain changes in scheduler. +The flow to update node's resource and awareness in resource scheduling is: +1. Resource update is through admin API to RM and take effect on RMNodeImpl. +2. When next NM heartbeat for updating status comes, the RMNode's resource change will be aware and the delta resource is added to schedulerNode's availableResource before actual scheduling happens. +3. Scheduler do resource allocation according to new availableResource in SchedulerNode. +For more design details, please refer proposal and discussions in parent JIRA: YARN-291.
    • +
    • YARN-305. + Critical bug reported by Lohit Vijayarenu and fixed by Lohit Vijayarenu (resourcemanager)
      + Fair scheduler logs too many "Node offered to app:..." messages
      +
      Running fair scheduler YARN shows that RM has lots of messages like the below. +{noformat} +INFO org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.AppSchedulable: Node offered to app: application_1357147147433_0002 reserved: false +{noformat} + +They dont seem to tell much and same line is dumped many times in RM log. It would be good to have it improved with node information or moved to some other logging level with enough debug information
    • +
    • YARN-7. + Major sub-task reported by Arun C Murthy and fixed by Junping Du
      + Add support for DistributedShell to ask for CPUs along with memory
      +
    • +
    • MAPREDUCE-5744. + Blocker bug reported by Sangjin Lee and fixed by Gera Shegalov
      + Job hangs because RMContainerAllocator$AssignedRequests.preemptReduce() violates the comparator contract
      +
    • +
    • MAPREDUCE-5743. + Major bug reported by Ted Yu and fixed by Ted Yu
      + TestRMContainerAllocator is failing
      +
    • +
    • MAPREDUCE-5729. + Critical bug reported by Karthik Kambatla and fixed by Karthik Kambatla (mrv2)
      + mapred job -list throws NPE
      +
    • +
    • MAPREDUCE-5725. + Major bug reported by Sandy Ryza and fixed by Sandy Ryza
      + TestNetworkedJob relies on the Capacity Scheduler
      +
    • +
    • MAPREDUCE-5724. + Critical bug reported by Alejandro Abdelnur and fixed by Alejandro Abdelnur (jobhistoryserver)
      + JobHistoryServer does not start if HDFS is not running
      +
    • +
    • MAPREDUCE-5723. + Blocker bug reported by Mohammad Kamrul Islam and fixed by Mohammad Kamrul Islam (applicationmaster)
      + MR AM container log can be truncated or empty
      +
    • +
    • MAPREDUCE-5694. + Major bug reported by Mohammad Kamrul Islam and fixed by Mohammad Kamrul Islam
      + MR AM container syslog is empty
      +
    • +
    • MAPREDUCE-5693. + Major bug reported by Gera Shegalov and fixed by Gera Shegalov (mrv2)
      + Restore MRv1 behavior for log flush
      +
    • +
    • MAPREDUCE-5692. + Major improvement reported by Gera Shegalov and fixed by Gera Shegalov (mrv2)
      + Add explicit diagnostics when a task attempt is killed due to speculative execution
      +
    • +
    • MAPREDUCE-5689. + Critical bug reported by Lohit Vijayarenu and fixed by Lohit Vijayarenu
      + MRAppMaster does not preempt reducers when scheduled maps cannot be fulfilled
      +
    • +
    • MAPREDUCE-5687. + Major test reported by Ted Yu and fixed by Jian He
      + TestYARNRunner#testResourceMgrDelegate fails with NPE after YARN-1446
      +
    • +
    • MAPREDUCE-5685. + Blocker bug reported by Yi Song and fixed by Yi Song (client)
      + getCacheFiles() api doesn't work in WrappedReducer.java due to typo
      +
    • +
    • MAPREDUCE-5679. + Major bug reported by Liyin Liang and fixed by Liyin Liang
      + TestJobHistoryParsing has race condition
      +
    • +
    • MAPREDUCE-5674. + Major bug reported by Chuan Liu and fixed by Chuan Liu (client)
      + Missing start and finish time in mapred.JobStatus
      +
    • +
    • MAPREDUCE-5672. + Major improvement reported by Gera Shegalov and fixed by Gera Shegalov (mr-am , mrv2)
      + Provide optional RollingFileAppender for container log4j (syslog)
      +
    • +
    • MAPREDUCE-5656. + Critical bug reported by Jason Lowe and fixed by Jason Lowe
      + bzip2 codec can drop records when reading data in splits
      +
    • +
    • MAPREDUCE-5650. + Major bug reported by Gera Shegalov and fixed by Gera Shegalov (mrv2)
      + Job fails when hprof mapreduce.task.profile.map/reduce.params is specified
      +
    • +
    • MAPREDUCE-5645. + Major bug reported by Jonathan Eagles and fixed by Mit Desai
      + TestFixedLengthInputFormat fails with native libs
      +
    • +
    • MAPREDUCE-5640. + Trivial improvement reported by Jason Lowe and fixed by Jason Lowe (test)
      + Rename TestLineRecordReader in jobclient module
      +
    • +
    • MAPREDUCE-5632. + Major test reported by Ted Yu and fixed by Jonathan Eagles
      + TestRMContainerAllocator#testUpdatedNodes fails
      +
    • +
    • MAPREDUCE-5631. + Major bug reported by Jonathan Eagles and fixed by Jonathan Eagles
      + TestJobEndNotifier.testNotifyRetries fails with Should have taken more than 5 seconds in jdk7
      +
    • +
    • MAPREDUCE-5625. + Major test reported by Jonathan Eagles and fixed by Mariappan Asokan
      + TestFixedLengthInputFormat fails in jdk7 environment
      +
    • +
    • MAPREDUCE-5623. + Major bug reported by Tsuyoshi OZAWA and fixed by Jason Lowe
      + TestJobCleanup fails because of RejectedExecutionException and NPE.
      +
    • +
    • MAPREDUCE-5616. + Major bug reported by Chris Nauroth and fixed by Chris Nauroth (client)
      + MR Client-AppMaster RPC max retries on socket timeout is too high.
      +
    • +
    • MAPREDUCE-5613. + Major bug reported by Gera Shegalov and fixed by Gera Shegalov (applicationmaster)
      + DefaultSpeculator holds and checks hashmap that is always empty
      +
    • +
    • MAPREDUCE-5610. + Major test reported by Jonathan Eagles and fixed by Jonathan Eagles
      + TestSleepJob fails in jdk7
      +
    • +
    • MAPREDUCE-5604. + Minor bug reported by Chris Nauroth and fixed by Chris Nauroth (test)
      + TestMRAMWithNonNormalizedCapabilities fails on Windows due to exceeding max path length
      +
    • +
    • MAPREDUCE-5601. + Major improvement reported by Sandy Ryza and fixed by Sandy Ryza
      + ShuffleHandler fadvises file regions as DONTNEED even when fetch fails
      +
    • +
    • MAPREDUCE-5598. + Major bug reported by Robert Kanter and fixed by Robert Kanter (test)
      + TestUserDefinedCounters.testMapReduceJob is flakey
      +
    • +
    • MAPREDUCE-5596. + Major improvement reported by Sandy Ryza and fixed by Sandy Ryza
      + Allow configuring the number of threads used to serve shuffle connections
      +
    • +
    • MAPREDUCE-5587. + Major bug reported by Jonathan Eagles and fixed by Jonathan Eagles
      + TestTextOutputFormat fails on JDK7
      +
    • +
    • MAPREDUCE-5586. + Major bug reported by Jonathan Eagles and fixed by Jonathan Eagles
      + TestCopyMapper#testCopyFailOnBlockSizeDifference fails when run from hadoop-tools/hadoop-distcp directory
      +
    • +
    • MAPREDUCE-5585. + Major bug reported by Jonathan Eagles and fixed by Jonathan Eagles
      + TestCopyCommitter#testNoCommitAction Fails on JDK7
      +
    • +
    • MAPREDUCE-5569. + Major bug reported by Nathan Roberts and fixed by Nathan Roberts
      + FloatSplitter is not generating correct splits
      +
    • +
    • MAPREDUCE-5561. + Critical bug reported by Cindy Li and fixed by Karthik Kambatla
      + org.apache.hadoop.mapreduce.v2.app.job.impl.TestJobImpl testcase failing on trunk
      +
    • +
    • MAPREDUCE-5550. + Major bug reported by Vrushali C and fixed by Gera Shegalov
      + Task Status message (reporter.setStatus) not shown in UI with Hadoop 2.0
      +
    • +
    • MAPREDUCE-5546. + Major bug reported by Chuan Liu and fixed by Chuan Liu
      + mapred.cmd on Windows set HADOOP_OPTS incorrectly
      +
    • +
    • MAPREDUCE-5522. + Minor bug reported by Jinghui Wang and fixed by Jinghui Wang (test)
      + Incorrectly expect the array of JobQueueInfo returned by o.a.h.mapred.QueueManager#getJobQueueInfos to have a specific order.
      +
    • +
    • MAPREDUCE-5518. + Trivial bug reported by Albert Chu and fixed by Albert Chu (examples)
      + Fix typo "can't read paritions file"
      +
    • +
    • MAPREDUCE-5514. + Blocker bug reported by Zhijie Shen and fixed by Zhijie Shen
      + TestRMContainerAllocator fails on trunk
      +
    • +
    • MAPREDUCE-5504. + Major bug reported by Thomas Graves and fixed by Kousuke Saruta (client)
      + mapred queue -info inconsistent with types
      +
    • +
    • MAPREDUCE-5487. + Major improvement reported by Sandy Ryza and fixed by Sandy Ryza (performance , task)
      + In task processes, JobConf is unnecessarily loaded again in Limits
      +
    • +
    • MAPREDUCE-5484. + Major improvement reported by Sandy Ryza and fixed by Sandy Ryza (task)
      + YarnChild unnecessarily loads job conf twice
      +
    • +
    • MAPREDUCE-5481. + Blocker bug reported by Jason Lowe and fixed by Sandy Ryza (mrv2 , test)
      + Enable uber jobs to have multiple reducers
      +
    • +
    • MAPREDUCE-5464. + Major task reported by Sandy Ryza and fixed by Sandy Ryza
      + Add analogs of the SLOTS_MILLIS counters that jive with the YARN resource model
      +
    • +
    • MAPREDUCE-5463. + Major task reported by Sandy Ryza and fixed by Tsuyoshi OZAWA
      + Deprecate SLOTS_MILLIS counters
      +
    • +
    • MAPREDUCE-5457. + Major improvement reported by Sandy Ryza and fixed by Sandy Ryza
      + Add a KeyOnlyTextOutputReader to enable streaming to write out text files without separators
      +
    • +
    • MAPREDUCE-5451. + Major bug reported by Mostafa Elhemali and fixed by Yingda Chen
      + MR uses LD_LIBRARY_PATH which doesn't mean anything in Windows
      +
    • +
    • MAPREDUCE-5431. + Major bug reported by Timothy St. Clair and fixed by Timothy St. Clair (build)
      + Missing pom dependency in MR-client
      +
    • +
    • MAPREDUCE-5411. + Major sub-task reported by Ashwin Shankar and fixed by Ashwin Shankar (jobhistoryserver)
      + Refresh size of loaded job cache on history server
      +
    • +
    • MAPREDUCE-5409. + Major sub-task reported by Devaraj K and fixed by Gera Shegalov
      + MRAppMaster throws InvalidStateTransitonException: Invalid event: TA_TOO_MANY_FETCH_FAILURE at KILLED for TaskAttemptImpl
      +
    • +
    • MAPREDUCE-5404. + Major bug reported by Ted Yu and fixed by Ted Yu (jobhistoryserver)
      + HSAdminServer does not use ephemeral ports in minicluster mode
      +
    • +
    • MAPREDUCE-5386. + Major sub-task reported by Ashwin Shankar and fixed by Ashwin Shankar (jobhistoryserver)
      + Ability to refresh history server job retention and job cleaner settings
      +
    • +
    • MAPREDUCE-5380. + Major bug reported by Stephen Chu and fixed by Stephen Chu
      + Invalid mapred command should return non-zero exit code
      +
    • +
    • MAPREDUCE-5373. + Major bug reported by Chuan Liu and fixed by Jonathan Eagles
      + TestFetchFailure.testFetchFailureMultipleReduces could fail intermittently
      +
    • +
    • MAPREDUCE-5356. + Major sub-task reported by Ashwin Shankar and fixed by Ashwin Shankar (jobhistoryserver)
      + Ability to refresh aggregated log retention period and check interval
      +
    • +
    • MAPREDUCE-5332. + Major new feature reported by Jason Lowe and fixed by Jason Lowe (jobhistoryserver)
      + Support token-preserving restart of history server
      +
    • +
    • MAPREDUCE-5329. + Major bug reported by Avner BenHanoch and fixed by Avner BenHanoch (mr-am)
      + APPLICATION_INIT is never sent to AuxServices other than the builtin ShuffleHandler
      +
    • +
    • MAPREDUCE-5316. + Major bug reported by Ashwin Shankar and fixed by Ashwin Shankar (client)
      + job -list-attempt-ids command does not handle illegal task-state
      +
    • +
    • MAPREDUCE-5266. + Major new feature reported by Jason Lowe and fixed by Ashwin Shankar (jobhistoryserver)
      + Ability to refresh retention settings on history server
      +
    • +
    • MAPREDUCE-5265. + Major new feature reported by Jason Lowe and fixed by Ashwin Shankar (jobhistoryserver)
      + History server admin service to refresh user and superuser group mappings
      +
    • +
    • MAPREDUCE-5186. + Critical bug reported by Sangjin Lee and fixed by Robert Parker (job submission)
      + mapreduce.job.max.split.locations causes some splits created by CombineFileInputFormat to fail
      +
    • +
    • MAPREDUCE-5102. + Major test reported by Aleksey Gorshkov and fixed by Andrey Klochkov
      + fix coverage org.apache.hadoop.mapreduce.lib.db and org.apache.hadoop.mapred.lib.db
      +
    • +
    • MAPREDUCE-5084. + Major test reported by Aleksey Gorshkov and fixed by Aleksey Gorshkov
      + fix coverage org.apache.hadoop.mapreduce.v2.app.webapp and org.apache.hadoop.mapreduce.v2.hs.webapp
      +
    • +
    • MAPREDUCE-5052. + Critical bug reported by Kendall Thrapp and fixed by Chen He (jobhistoryserver , webapps)
      + Job History UI and web services confusing job start time and job submit time
      +
    • +
    • MAPREDUCE-5020. + Major bug reported by Trevor Robinson and fixed by Trevor Robinson (client)
      + Compile failure with JDK8
      +
    • +
    • MAPREDUCE-4680. + Major bug reported by Sandy Ryza and fixed by Robert Kanter (jobhistoryserver)
      + Job history cleaner should only check timestamps of files in old enough directories
      +
    • +
    • MAPREDUCE-4421. + Major improvement reported by Arun C Murthy and fixed by Jason Lowe
      + Run MapReduce framework via the distributed cache
      +
    • +
    • MAPREDUCE-3310. + Major improvement reported by Mathias Herberts and fixed by Alejandro Abdelnur (client)
      + Custom grouping comparator cannot be set for Combiners
      +
    • +
    • MAPREDUCE-1176. + Major new feature reported by BitsOfInfo and fixed by Mariappan Asokan
      + FixedLengthInputFormat and FixedLengthRecordReader
      +
      Addition of FixedLengthInputFormat and FixedLengthRecordReader in the org.apache.hadoop.mapreduce.lib.input package. These two classes can be used when you need to read data from files containing fixed length (fixed width) records. Such files have no CR/LF (or any combination thereof), no delimiters etc, but each record is a fixed length, and extra data is padded with spaces. The data is one gigantic line within a file. When creating a job that specifies this input format, the job must have the "mapreduce.input.fixedlengthinputformat.record.length" property set as follows myJobConf.setInt("mapreduce.input.fixedlengthinputformat.record.length",[myFixedRecordLength]); + +Please see javadoc for more details.
    • +
    • MAPREDUCE-434. + Minor improvement reported by Yoram Arnon and fixed by Aaron Kimball
      + LocalJobRunner limited to single reducer
      +
    • +
    • HDFS-5921. + Critical bug reported by Aaron T. Myers and fixed by Aaron T. Myers (namenode)
      + Cannot browse file system via NN web UI if any directory has the sticky bit set
      +
    • +
    • HDFS-5876. + Major bug reported by Haohui Mai and fixed by Haohui Mai (datanode)
      + SecureDataNodeStarter does not pick up configuration in hdfs-site.xml
      +
    • +
    • HDFS-5873. + Major bug reported by Yesha Vora and fixed by Haohui Mai
      + dfs.http.policy should have higher precedence over dfs.https.enable
      +
    • +
    • HDFS-5845. + Blocker bug reported by Andrew Wang and fixed by Andrew Wang (namenode)
      + SecondaryNameNode dies when checkpointing with cache pools
      +
    • +
    • HDFS-5844. + Minor bug reported by Akira AJISAKA and fixed by Akira AJISAKA (documentation)
      + Fix broken link in WebHDFS.apt.vm
      +
    • +
    • HDFS-5842. + Major bug reported by Arpit Gupta and fixed by Jing Zhao (security)
      + Cannot create hftp filesystem when using a proxy user ugi and a doAs on a secure cluster
      +
    • +
    • HDFS-5841. + Major improvement reported by Andrew Wang and fixed by Andrew Wang
      + Update HDFS caching documentation with new changes
      +
    • +
    • HDFS-5837. + Major bug reported by Bryan Beaudreault and fixed by Tao Luo (namenode)
      + dfs.namenode.replication.considerLoad does not consider decommissioned nodes
      +
    • +
    • HDFS-5833. + Trivial improvement reported by Bangtao Zhou and fixed by (namenode)
      + SecondaryNameNode have an incorrect java doc
      +
    • +
    • HDFS-5830. + Blocker bug reported by Yongjun Zhang and fixed by Yongjun Zhang (caching , hdfs-client)
      + WebHdfsFileSystem.getFileBlockLocations throws IllegalArgumentException when accessing another cluster.
      +
    • +
    • HDFS-5825. + Minor improvement reported by Haohui Mai and fixed by Haohui Mai
      + Use FileUtils.copyFile() to implement DFSTestUtils.copyFile()
      +
    • +
    • HDFS-5806. + Major bug reported by Nathan Roberts and fixed by Nathan Roberts (balancer)
      + balancer should set SoTimeout to avoid indefinite hangs
      +
    • +
    • HDFS-5800. + Trivial bug reported by Kousuke Saruta and fixed by Kousuke Saruta (hdfs-client)
      + Typo: soft-limit for hard-limit in DFSClient
      +
    • +
    • HDFS-5789. + Major bug reported by Uma Maheswara Rao G and fixed by Uma Maheswara Rao G (namenode)
      + Some of snapshot APIs missing checkOperation double check in fsn
      +
    • +
    • HDFS-5788. + Major improvement reported by Nathan Roberts and fixed by Nathan Roberts (namenode)
      + listLocatedStatus response can be very large
      +
    • +
    • HDFS-5784. + Major sub-task reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (namenode)
      + reserve space in edit log header and fsimage header for feature flag section
      +
    • +
    • HDFS-5777. + Major bug reported by Jing Zhao and fixed by Jing Zhao (namenode)
      + Update LayoutVersion for the new editlog op OP_ADD_BLOCK
      +
    • +
    • HDFS-5766. + Major bug reported by Liang Xie and fixed by Liang Xie (hdfs-client)
      + In DFSInputStream, do not add datanode to deadNodes after InvalidEncryptionKeyException in fetchBlockByteRange
      +
    • +
    • HDFS-5762. + Major bug reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe
      + BlockReaderLocal doesn't return -1 on EOF when doing zero-length reads
      +
    • +
    • HDFS-5756. + Major bug reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (libhdfs)
      + hadoopRzOptionsSetByteBufferPool does not accept NULL argument, contrary to docs
      +
    • +
    • HDFS-5748. + Major improvement reported by Kihwal Lee and fixed by Haohui Mai
      + Too much information shown in the dfs health page.
      +
    • +
    • HDFS-5747. + Minor bug reported by Tsz Wo (Nicholas), SZE and fixed by Arpit Agarwal (namenode)
      + BlocksMap.getStoredBlock(..) and BlockInfoUnderConstruction.addReplicaIfNotPresent(..) may throw NullPointerException
      +
    • +
    • HDFS-5728. + Critical bug reported by Vinayakumar B and fixed by Vinayakumar B (datanode)
      + [Diskfull] Block recovery will fail if the metafile does not have crc for all chunks of the block
      +
    • +
    • HDFS-5721. + Minor improvement reported by Ted Yu and fixed by Ted Yu
      + sharedEditsImage in Namenode#initializeSharedEdits() should be closed before method returns
      +
    • +
    • HDFS-5719. + Minor bug reported by Ted Yu and fixed by Ted Yu (namenode)
      + FSImage#doRollback() should close prevState before return
      +
    • +
    • HDFS-5710. + Major bug reported by Ted Yu and fixed by Uma Maheswara Rao G
      + FSDirectory#getFullPathName should check inodes against null
      +
    • +
    • HDFS-5704. + Major bug reported by Suresh Srinivas and fixed by Jing Zhao (namenode)
      + Change OP_UPDATE_BLOCKS with a new OP_ADD_BLOCK
      +
      Add a new editlog record (OP_ADD_BLOCK) that only records allocation of the new block instead of the entire block list, on every block allocation.
    • +
    • HDFS-5703. + Major new feature reported by Alejandro Abdelnur and fixed by Alejandro Abdelnur (webhdfs)
      + Add support for HTTPS and swebhdfs to HttpFS
      +
    • +
    • HDFS-5695. + Major improvement reported by Haohui Mai and fixed by Haohui Mai (test)
      + Clean up TestOfflineEditsViewer and OfflineEditsViewerHelper
      +
    • +
    • HDFS-5691. + Minor bug reported by Akira AJISAKA and fixed by Akira AJISAKA (documentation)
      + Fix typo in ShortCircuitLocalRead document
      +
    • +
    • HDFS-5690. + Blocker bug reported by Haohui Mai and fixed by Haohui Mai
      + DataNode fails to start in secure mode when dfs.http.policy equals to HTTP_ONLY
      +
    • +
    • HDFS-5681. + Major bug reported by Daryn Sharp and fixed by Daryn Sharp (namenode)
      + renewLease should not hold fsn write lock
      +
    • +
    • HDFS-5677. + Minor improvement reported by Vincent Sheffer and fixed by Vincent Sheffer (datanode , ha)
      + Need error checking for HA cluster configuration
      +
    • +
    • HDFS-5676. + Minor improvement reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (hdfs-client)
      + fix inconsistent synchronization of CachingStrategy
      +
    • +
    • HDFS-5675. + Minor bug reported by Plamen Jeliazkov and fixed by Plamen Jeliazkov (benchmarks)
      + Add Mkdirs operation to NNThroughputBenchmark
      +
    • +
    • HDFS-5674. + Minor improvement reported by Tsz Wo (Nicholas), SZE and fixed by Tsz Wo (Nicholas), SZE (namenode)
      + Editlog code cleanup
      +
    • +
    • HDFS-5671. + Critical bug reported by JamesLi and fixed by JamesLi (hdfs-client)
      + Fix socket leak in DFSInputStream#getBlockReader
      +
    • +
    • HDFS-5667. + Major sub-task reported by Eric Sirianni and fixed by Arpit Agarwal (datanode)
      + Include DatanodeStorage in StorageReport
      +
    • +
    • HDFS-5666. + Minor bug reported by Colin Patrick McCabe and fixed by Jimmy Xiang (namenode)
      + Fix inconsistent synchronization in BPOfferService
      +
    • +
    • HDFS-5663. + Major improvement reported by Liang Xie and fixed by Liang Xie (hdfs-client)
      + make the retry time and interval value configurable in openInfo()
      +
      Makes the retries and time between retries getting the length of the last block on file configurable. Below are the new configurations. + +dfs.client.retry.times.get-last-block-length +dfs.client.retry.interval-ms.get-last-block-length + +They are set to the 3 and 4000 respectively, these being what was previously hardcoded. + +
    • +
    • HDFS-5662. + Major improvement reported by Brandon Li and fixed by Brandon Li (namenode)
      + Can't decommission a DataNode due to file's replication factor larger than the rest of the cluster size
      +
    • +
    • HDFS-5661. + Major bug reported by Benoy Antony and fixed by Benoy Antony
      + Browsing FileSystem via web ui, should use datanode's fqdn instead of ip address
      +
    • +
    • HDFS-5657. + Major bug reported by Brandon Li and fixed by Brandon Li (nfs)
      + race condition causes writeback state error in NFS gateway
      +
    • +
    • HDFS-5652. + Minor improvement reported by Liang Xie and fixed by Liang Xie (hdfs-client)
      + refactoring/uniforming invalid block token exception handling in DFSInputStream
      +
    • +
    • HDFS-5649. + Major bug reported by Brandon Li and fixed by Brandon Li (nfs)
      + Unregister NFS and Mount service when NFS gateway is shutting down
      +
    • +
    • HDFS-5637. + Major improvement reported by Liang Xie and fixed by Liang Xie (hdfs-client , security)
      + try to refeatchToken while local read InvalidToken occurred
      +
    • +
    • HDFS-5634. + Major sub-task reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (hdfs-client)
      + allow BlockReaderLocal to switch between checksumming and not
      +
    • +
    • HDFS-5633. + Minor improvement reported by Jing Zhao and fixed by Jing Zhao
      + Improve OfflineImageViewer to use less memory
      +
    • +
    • HDFS-5629. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Support HTTPS in JournalNode and SecondaryNameNode
      +
    • +
    • HDFS-5592. + Major bug reported by Vinayakumar B and fixed by Vinayakumar B
      + "DIR* completeFile: /file is closed by DFSClient_" should be logged only for successful closure of the file.
      +
    • +
    • HDFS-5590. + Major bug reported by Jing Zhao and fixed by Jing Zhao
      + Block ID and generation stamp may be reused when persistBlocks is set to false
      +
    • +
    • HDFS-5587. + Minor improvement reported by Brandon Li and fixed by Brandon Li (nfs)
      + add debug information when NFS fails to start with duplicate user or group names
      +
    • +
    • HDFS-5582. + Minor bug reported by Henry Hung and fixed by sathish
      + hdfs getconf -excludeFile or -includeFile always failed
      +
    • +
    • HDFS-5581. + Major bug reported by Vinayakumar B and fixed by Vinayakumar B (namenode)
      + NameNodeFsck should use only one instance of BlockPlacementPolicy
      +
    • +
    • HDFS-5580. + Major bug reported by Binglin Chang and fixed by Binglin Chang
      + Infinite loop in Balancer.waitForMoveCompletion
      +
    • +
    • HDFS-5579. + Major bug reported by zhaoyunjiong and fixed by zhaoyunjiong (namenode)
      + Under construction files make DataNode decommission take very long hours
      +
    • +
    • HDFS-5577. + Trivial improvement reported by Brandon Li and fixed by Brandon Li (documentation)
      + NFS user guide update
      +
    • +
    • HDFS-5568. + Major improvement reported by Vinayakumar B and fixed by Vinayakumar B (snapshots)
      + Support inclusion of snapshot paths in Namenode fsck
      +
    • +
    • HDFS-5563. + Major improvement reported by Brandon Li and fixed by Brandon Li (nfs)
      + NFS gateway should commit the buffered data when read request comes after write to the same file
      +
    • +
    • HDFS-5561. + Minor improvement reported by Fengdong Yu and fixed by Haohui Mai (namenode)
      + FSNameSystem#getNameJournalStatus() in JMX should return plain text instead of HTML
      +
    • +
    • HDFS-5560. + Major bug reported by Josh Elser and fixed by Josh Elser
      + Trash configuration log statements prints incorrect units
      +
    • +
    • HDFS-5558. + Major bug reported by Kihwal Lee and fixed by Kihwal Lee
      + LeaseManager monitor thread can crash if the last block is complete but another block is not.
      +
    • +
    • HDFS-5557. + Critical bug reported by Kihwal Lee and fixed by Kihwal Lee
      + Write pipeline recovery for the last packet in the block may cause rejection of valid replicas
      +
    • +
    • HDFS-5552. + Major bug reported by Shinichi Yamashita and fixed by Haohui Mai (namenode)
      + Fix wrong information of "Cluster summay" in dfshealth.html
      +
    • +
    • HDFS-5548. + Major improvement reported by Haohui Mai and fixed by Haohui Mai (nfs)
      + Use ConcurrentHashMap in portmap
      +
    • +
    • HDFS-5545. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Allow specifying endpoints for listeners in HttpServer
      +
    • +
    • HDFS-5544. + Minor bug reported by sathish and fixed by sathish (hdfs-client)
      + Adding Test case For Checking dfs.checksum type as NULL value
      +
    • +
    • HDFS-5540. + Minor bug reported by Binglin Chang and fixed by Binglin Chang
      + Fix intermittent failure in TestBlocksWithNotEnoughRacks
      +
    • +
    • HDFS-5538. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + URLConnectionFactory should pick up the SSL related configuration by default
      +
    • +
    • HDFS-5536. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Implement HTTP policy for Namenode and DataNode
      +
      Add new HTTP policy configuration. Users can use "dfs.http.policy" to control the HTTP endpoints for NameNode and DataNode. Specifically, The following values are supported: +- HTTP_ONLY : Service is provided only on http +- HTTPS_ONLY : Service is provided only on https +- HTTP_AND_HTTPS : Service is provided both on http and https + +hadoop.ssl.enabled and dfs.https.enabled are deprecated. When the deprecated configuration properties are still configured, currently http policy is decided based on the following rules: +1. If dfs.http.policy is set to HTTPS_ONLY or HTTP_AND_HTTPS. It picks the specified policy, otherwise it proceeds to 2~4. +2. It picks HTTPS_ONLY if hadoop.ssl.enabled equals to true. +3. It picks HTTP_AND_HTTPS if dfs.https.enable equals to true. +4. It picks HTTP_ONLY for other configurations.
    • +
    • HDFS-5533. + Minor bug reported by Binglin Chang and fixed by Binglin Chang (snapshots)
      + Symlink delete/create should be treated as DELETE/CREATE in snapshot diff report
      +
    • +
    • HDFS-5532. + Major improvement reported by Vinayakumar B and fixed by Vinayakumar B (webhdfs)
      + Enable the webhdfs by default to support new HDFS web UI
      +
    • +
    • HDFS-5526. + Blocker bug reported by Tsz Wo (Nicholas), SZE and fixed by Kihwal Lee (datanode)
      + Datanode cannot roll back to previous layout version
      +
    • +
    • HDFS-5525. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Inline dust templates
      +
    • +
    • HDFS-5519. + Minor sub-task reported by Brandon Li and fixed by Brandon Li (nfs)
      + COMMIT handler should update the commit status after sync
      +
    • +
    • HDFS-5514. + Major sub-task reported by Daryn Sharp and fixed by Daryn Sharp (namenode)
      + FSNamesystem's fsLock should allow custom implementation
      +
    • +
    • HDFS-5506. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Use URLConnectionFactory in DelegationTokenFetcher
      +
    • +
    • HDFS-5504. + Major bug reported by Vinayakumar B and fixed by Vinayakumar B (snapshots)
      + In HA mode, OP_DELETE_SNAPSHOT is not decrementing the safemode threshold, leads to NN safemode.
      +
    • +
    • HDFS-5502. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Fix HTTPS support in HsftpFileSystem
      +
      Fix the https support in HsftpFileSystem. With the change the client now verifies the server certificate. In particular, client side will verify the Common Name of the certificate using a strategy specified by the configuration property "hadoop.ssl.hostname.verifier".
    • +
    • HDFS-5495. + Major improvement reported by Andrew Wang and fixed by Jarek Jarcec Cecho
      + Remove further JUnit3 usages from HDFS
      +
    • +
    • HDFS-5489. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Use TokenAspect in WebHDFSFileSystem
      +
    • +
    • HDFS-5488. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Clean up TestHftpURLTimeout
      +
    • +
    • HDFS-5487. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Introduce unit test for TokenAspect
      +
    • +
    • HDFS-5476. + Major bug reported by Jing Zhao and fixed by Jing Zhao
      + Snapshot: clean the blocks/files/directories under a renamed file/directory while deletion
      +
    • +
    • HDFS-5474. + Major bug reported by Uma Maheswara Rao G and fixed by sathish (snapshots)
      + Deletesnapshot can make Namenode in safemode on NN restarts.
      +
    • +
    • HDFS-5469. + Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)
      + Add configuration property for the sub-directroy export path
      +
    • +
    • HDFS-5467. + Trivial improvement reported by Andrew Wang and fixed by Shinichi Yamashita
      + Remove tab characters in hdfs-default.xml
      +
    • +
    • HDFS-5458. + Major bug reported by Andrew Wang and fixed by Mike Mellenthin (datanode)
      + Datanode failed volume threshold ignored if exception is thrown in getDataDirsFromURIs
      +
    • +
    • HDFS-5456. + Critical bug reported by Chris Nauroth and fixed by Chris Nauroth (namenode)
      + NameNode startup progress creates new steps if caller attempts to create a counter for a step that doesn't already exist.
      +
    • +
    • HDFS-5454. + Minor sub-task reported by Eric Sirianni and fixed by Arpit Agarwal (datanode)
      + DataNode UUID should be assigned prior to FsDataset initialization
      +
    • +
    • HDFS-5449. + Blocker bug reported by Kihwal Lee and fixed by Kihwal Lee
      + WebHdfs compatibility broken between 2.2 and 1.x / 23.x
      +
    • +
    • HDFS-5444. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Choose default web UI based on browser capabilities
      +
    • +
    • HDFS-5443. + Major bug reported by Uma Maheswara Rao G and fixed by Jing Zhao (snapshots)
      + Delete 0-sized block when deleting an under-construction file that is included in snapshot
      +
    • +
    • HDFS-5440. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Extract the logic of handling delegation tokens in HftpFileSystem to the TokenAspect class
      +
    • +
    • HDFS-5438. + Critical bug reported by Kihwal Lee and fixed by Kihwal Lee (namenode)
      + Flaws in block report processing can cause data loss
      +
    • +
    • HDFS-5436. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Move HsFtpFileSystem and HFtpFileSystem into org.apache.hdfs.web
      +
    • +
    • HDFS-5434. + Minor bug reported by Buddy and fixed by (namenode)
      + Write resiliency for replica count 1
      +
    • +
    • HDFS-5433. + Critical bug reported by Aaron T. Myers and fixed by Aaron T. Myers (snapshots)
      + When reloading fsimage during checkpointing, we should clear existing snapshottable directories
      +
    • +
    • HDFS-5432. + Trivial bug reported by Chris Nauroth and fixed by Chris Nauroth (datanode , test)
      + TestDatanodeJsp fails on Windows due to assumption that loopback address resolves to host name localhost.
      +
    • +
    • HDFS-5428. + Major bug reported by Vinayakumar B and fixed by Jing Zhao (snapshots)
      + under construction files deletion after snapshot+checkpoint+nn restart leads nn safemode
      +
    • +
    • HDFS-5427. + Major bug reported by Vinayakumar B and fixed by Vinayakumar B (snapshots)
      + not able to read deleted files from snapshot directly under snapshottable dir after checkpoint and NN restart
      +
    • +
    • HDFS-5425. + Major bug reported by sathish and fixed by Jing Zhao (namenode , snapshots)
      + Renaming underconstruction file with snapshots can make NN failure on restart
      +
    • +
    • HDFS-5413. + Major bug reported by Chris Nauroth and fixed by Chris Nauroth (scripts)
      + hdfs.cmd does not support passthrough to any arbitrary class.
      +
    • +
    • HDFS-5407. + Trivial bug reported by Haohui Mai and fixed by Haohui Mai
      + Fix typos in DFSClientCache
      +
    • +
    • HDFS-5406. + Major sub-task reported by Arpit Agarwal and fixed by Arpit Agarwal (datanode)
      + Send incremental block reports for all storages in a single call
      +
    • +
    • HDFS-5403. + Major bug reported by Aaron T. Myers and fixed by Aaron T. Myers (webhdfs)
      + WebHdfs client cannot communicate with older WebHdfs servers post HDFS-5306
      +
    • +
    • HDFS-5400. + Major bug reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe
      + DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT constant is set to the wrong value
      +
    • +
    • HDFS-5399. + Major improvement reported by Jing Zhao and fixed by Jing Zhao
      + Revisit SafeModeException and corresponding retry policies
      +
    • +
    • HDFS-5393. + Minor sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Serve bootstrap and jQuery locally
      +
    • +
    • HDFS-5382. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Implement the UI of browsing filesystems in HTML 5 page
      +
    • +
    • HDFS-5379. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Update links to datanode information in dfshealth.html
      +
    • +
    • HDFS-5375. + Minor bug reported by Chris Nauroth and fixed by Chris Nauroth (tools)
      + hdfs.cmd does not expose several snapshot commands.
      +
    • +
    • HDFS-5374. + Trivial bug reported by Suresh Srinivas and fixed by Suresh Srinivas
      + Remove deadcode in DFSOutputStream
      +
    • +
    • HDFS-5372. + Major bug reported by Tsz Wo (Nicholas), SZE and fixed by Vinayakumar B (namenode)
      + In FSNamesystem, hasReadLock() returns false if the current thread holds the write lock
      +
    • +
    • HDFS-5371. + Minor improvement reported by Jing Zhao and fixed by Jing Zhao (ha , test)
      + Let client retry the same NN when "dfs.client.test.drop.namenode.response.number" is enabled
      +
    • +
    • HDFS-5370. + Trivial bug reported by Kousuke Saruta and fixed by Kousuke Saruta (hdfs-client)
      + Typo in Error Message: different between range in condition and range in error message
      +
    • +
    • HDFS-5365. + Major bug reported by Radim Kolar and fixed by Radim Kolar (build , libhdfs)
      + Fix libhdfs compile error on FreeBSD9
      +
    • +
    • HDFS-5364. + Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)
      + Add OpenFileCtx cache
      +
    • +
    • HDFS-5363. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Refactor WebHdfsFileSystem: move SPENGO-authenticated connection creation to URLConnectionFactory
      +
    • +
    • HDFS-5360. + Minor improvement reported by Shinichi Yamashita and fixed by Shinichi Yamashita (snapshots)
      + Improvement of usage message of renameSnapshot and deleteSnapshot
      +
    • +
    • HDFS-5353. + Blocker bug reported by Haohui Mai and fixed by Colin Patrick McCabe
      + Short circuit reads fail when dfs.encrypt.data.transfer is enabled
      +
    • +
    • HDFS-5352. + Minor bug reported by Ted Yu and fixed by Ted Yu
      + Server#initLog() doesn't close InputStream in httpfs
      +
    • +
    • HDFS-5350. + Minor improvement reported by Rob Weltman and fixed by Jimmy Xiang (namenode)
      + Name Node should report fsimage transfer time as a metric
      +
    • +
    • HDFS-5347. + Major sub-task reported by Brandon Li and fixed by Brandon Li (documentation)
      + add HDFS NFS user guide
      +
    • +
    • HDFS-5346. + Major bug reported by Kihwal Lee and fixed by Ravi Prakash (namenode , performance)
      + Avoid unnecessary call to getNumLiveDataNodes() for each block during IBR processing
      +
    • +
    • HDFS-5344. + Minor improvement reported by sathish and fixed by sathish (snapshots , tools)
      + Make LsSnapshottableDir as Tool interface implementation
      +
    • +
    • HDFS-5343. + Major bug reported by sathish and fixed by sathish (hdfs-client)
      + When cat command is issued on snapshot files getting unexpected result
      +
    • +
    • HDFS-5342. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Provide more information in the FSNamesystem JMX interfaces
      +
    • +
    • HDFS-5341. + Major bug reported by qus-jiawei and fixed by qus-jiawei (datanode)
      + Reduce fsdataset lock duration during directory scanning.
      +
    • +
    • HDFS-5338. + Major improvement reported by Tsz Wo (Nicholas), SZE and fixed by Tsz Wo (Nicholas), SZE (namenode)
      + Add a conf to disable hostname check in DN registration
      +
    • +
    • HDFS-5337. + Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)
      + should do hsync for a commit request even there is no pending writes
      +
    • +
    • HDFS-5336. + Minor bug reported by Akira AJISAKA and fixed by Akira AJISAKA (namenode)
      + DataNode should not output 'StartupProgress' metrics
      +
    • +
    • HDFS-5335. + Major bug reported by Arpit Gupta and fixed by Haohui Mai
      + DFSOutputStream#close() keeps throwing exceptions when it is called multiple times
      +
    • +
    • HDFS-5334. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Implement dfshealth.jsp in HTML pages
      +
    • +
    • HDFS-5331. + Major improvement reported by Vinayakumar B and fixed by Vinayakumar B (snapshots)
      + make SnapshotDiff.java to a o.a.h.util.Tool interface implementation
      +
    • +
    • HDFS-5330. + Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)
      + fix readdir and readdirplus for large directories
      +
    • +
    • HDFS-5329. + Major bug reported by Brandon Li and fixed by Brandon Li (namenode , nfs)
      + Update FSNamesystem#getListing() to handle inode path in startAfter token
      +
    • +
    • HDFS-5325. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Remove WebHdfsFileSystem#ConnRunner
      +
    • +
    • HDFS-5323. + Minor improvement reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (namenode)
      + Remove some deadcode in BlockManager
      +
    • +
    • HDFS-5322. + Major bug reported by Arpit Gupta and fixed by Jing Zhao (ha)
      + HDFS delegation token not found in cache errors seen on secure HA clusters
      +
    • +
    • HDFS-5317. + Critical sub-task reported by Suresh Srinivas and fixed by Haohui Mai
      + Go back to DFS Home link does not work on datanode webUI
      +
    • +
    • HDFS-5316. + Critical sub-task reported by Suresh Srinivas and fixed by Haohui Mai
      + Namenode ignores the default https port
      +
    • +
    • HDFS-5312. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Generate HTTP / HTTPS URL in DFSUtil#getInfoServer() based on the configured http policy
      +
    • +
    • HDFS-5307. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai
      + Support both HTTP and HTTPS in jsp pages
      +
    • +
    • HDFS-5305. + Major bug reported by Suresh Srinivas and fixed by Suresh Srinivas
      + Add https support in HDFS
      +
    • +
    • HDFS-5297. + Major bug reported by Akira AJISAKA and fixed by Akira AJISAKA (documentation)
      + Fix dead links in HDFS site documents
      +
    • +
    • HDFS-5291. + Critical bug reported by Arpit Gupta and fixed by Jing Zhao (ha)
      + Clients need to retry when Active NN is in SafeMode
      +
    • +
    • HDFS-5288. + Major sub-task reported by Haohui Mai and fixed by Haohui Mai (nfs)
      + Close idle connections in portmap
      +
    • +
    • HDFS-5283. + Critical bug reported by Vinayakumar B and fixed by Vinayakumar B (snapshots)
      + NN not coming out of startup safemode due to under construction blocks only inside snapshots also counted in safemode threshhold
      +
    • +
    • HDFS-5281. + Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)
      + COMMIT request should not block
      +
    • +
    • HDFS-5276. + Major bug reported by Chengxiang Li and fixed by Colin Patrick McCabe
      + FileSystem.Statistics got performance issue on multi-thread read/write.
      +
    • +
    • HDFS-5267. + Minor improvement reported by Junping Du and fixed by Junping Du
      + Remove volatile from LightWeightHashSet
      +
    • +
    • HDFS-5260. + Major new feature reported by Chris Nauroth and fixed by Chris Nauroth (hdfs-client , libhdfs)
      + Merge zero-copy memory-mapped HDFS client reads to trunk and branch-2.
      +
    • +
    • HDFS-5257. + Major bug reported by Vinayakumar B and fixed by Vinayakumar B (hdfs-client , namenode)
      + addBlock() retry should return LocatedBlock with locations else client will get AIOBE
      +
    • +
    • HDFS-5252. + Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)
      + Stable write is not handled correctly in someplace
      +
    • +
    • HDFS-5240. + Major sub-task reported by Daryn Sharp and fixed by Daryn Sharp (namenode)
      + Separate formatting from logging in the audit logger API
      +
    • +
    • HDFS-5239. + Major sub-task reported by Daryn Sharp and fixed by Daryn Sharp (namenode)
      + Allow FSNamesystem lock fairness to be configurable
      +
    • +
    • HDFS-5220. + Major improvement reported by Rob Weltman and fixed by Jimmy Xiang (namenode)
      + Expose group resolution time as metric
      +
    • +
    • HDFS-5207. + Major improvement reported by Junping Du and fixed by Junping Du (namenode)
      + In BlockPlacementPolicy, update 2 parameters of chooseTarget()
      +
    • +
    • HDFS-5188. + Major improvement reported by Tsz Wo (Nicholas), SZE and fixed by Tsz Wo (Nicholas), SZE (namenode)
      + Clean up BlockPlacementPolicy and its implementations
      +
    • +
    • HDFS-5171. + Major sub-task reported by Brandon Li and fixed by Haohui Mai (nfs)
      + NFS should create input stream for a file and try to share it with multiple read requests
      +
    • +
    • HDFS-5170. + Trivial bug reported by Andrew Wang and fixed by Andrew Wang
      + BlockPlacementPolicyDefault uses the wrong classname when alerting to enable debug logging
      +
    • +
    • HDFS-5164. + Minor bug reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (namenode)
      + deleteSnapshot should check if OperationCategory.WRITE is possible before taking write lock
      +
    • +
    • HDFS-5144. + Minor improvement reported by Akira AJISAKA and fixed by Akira AJISAKA (documentation)
      + Document time unit to NameNodeMetrics.java
      +
    • +
    • HDFS-5136. + Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)
      + MNT EXPORT should give the full group list which can mount the exports
      +
    • +
    • HDFS-5130. + Minor test reported by Binglin Chang and fixed by Binglin Chang (test)
      + Add test for snapshot related FsShell and DFSAdmin commands
      +
    • +
    • HDFS-5122. + Major bug reported by Arpit Gupta and fixed by Haohui Mai (ha , webhdfs)
      + Support failover and retry in WebHdfsFileSystem for NN HA
      +
    • +
    • HDFS-5110. + Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)
      + Change FSDataOutputStream to HdfsDataOutputStream for opened streams to fix type cast error
      +
    • +
    • HDFS-5107. + Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)
      + Fix array copy error in Readdir and Readdirplus responses
      +
    • +
    • HDFS-5104. + Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)
      + Support dotdot name in NFS LOOKUP operation
      +
    • +
    • HDFS-5093. + Minor bug reported by Chuan Liu and fixed by Chuan Liu (test)
      + TestGlobPaths should re-use the MiniDFSCluster to avoid failure on Windows
      +
    • +
    • HDFS-5078. + Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)
      + Support file append in NFSv3 gateway to enable data streaming to HDFS
      +
    • +
    • HDFS-5075. + Major bug reported by Timothy St. Clair and fixed by Timothy St. Clair
      + httpfs-config.sh calls out incorrect env script name
      +
    • +
    • HDFS-5074. + Major bug reported by Todd Lipcon and fixed by Todd Lipcon (ha , namenode)
      + Allow starting up from an fsimage checkpoint in the middle of a segment
      +
    • +
    • HDFS-5073. + Minor bug reported by Kihwal Lee and fixed by Arpit Agarwal (test)
      + TestListCorruptFileBlocks fails intermittently
      +
    • +
    • HDFS-5071. + Major sub-task reported by Kihwal Lee and fixed by Brandon Li (nfs)
      + Change hdfs-nfs parent project to hadoop-project
      +
    • +
    • HDFS-5069. + Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)
      + Include hadoop-nfs and hadoop-hdfs-nfs into hadoop dist for NFS deployment
      +
    • +
    • HDFS-5068. + Major improvement reported by Konstantin Shvachko and fixed by Konstantin Shvachko (benchmarks)
      + Convert NNThroughputBenchmark to a Tool to allow generic options.
      +
    • +
    • HDFS-5065. + Major bug reported by Ivan Mitic and fixed by Ivan Mitic (hdfs-client , test)
      + TestSymlinkHdfsDisable fails on Windows
      +
    • +
    • HDFS-5043. + Major bug reported by Brandon Li and fixed by Brandon Li
      + For HdfsFileStatus, set default value of childrenNum to -1 instead of 0 to avoid confusing applications
      +
    • +
    • HDFS-5037. + Critical improvement reported by Todd Lipcon and fixed by Andrew Wang (ha , namenode)
      + Active NN should trigger its own edit log rolls
      +
    • +
    • HDFS-5035. + Major bug reported by Andrew Wang and fixed by Andrew Wang (namenode)
      + getFileLinkStatus and rename do not correctly check permissions of symlinks
      +
    • +
    • HDFS-5034. + Trivial improvement reported by Andrew Wang and fixed by Andrew Wang (namenode)
      + Remove debug prints from getFileLinkInfo
      +
    • +
    • HDFS-5023. + Major bug reported by Ravi Prakash and fixed by Mit Desai (snapshots , test)
      + TestSnapshotPathINodes.testAllowSnapshot is failing with jdk7
      +
    • +
    • HDFS-5014. + Major bug reported by Vinayakumar B and fixed by Vinayakumar B (datanode , ha)
      + BPOfferService#processCommandFromActor() synchronization on namenode RPC call delays IBR to Active NN, if Stanby NN is unstable
      +
    • +
    • HDFS-5004. + Major improvement reported by Trevor Lorimer and fixed by Trevor Lorimer (namenode)
      + Add additional JMX bean for NameNode status data
      +
    • +
    • HDFS-4997. + Major bug reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (libhdfs)
      + libhdfs doesn't return correct error codes in most cases
      +
      libhdfs now returns correct codes in errno. Previously, due to a bug, many functions set errno to 255 instead of the more specific error code.
    • +
    • HDFS-4995. + Major bug reported by Kihwal Lee and fixed by Kihwal Lee (namenode)
      + Make getContentSummary() less expensive
      +
    • +
    • HDFS-4994. + Minor bug reported by Kihwal Lee and fixed by Robert Parker (namenode)
      + Audit log getContentSummary() calls
      +
    • +
    • HDFS-4983. + Major improvement reported by Harsh J and fixed by Yongjun Zhang (webhdfs)
      + Numeric usernames do not work with WebHDFS FS
      +
      Add a new configuration property "dfs.webhdfs.user.provider.user.pattern" for specifying user name filters for WebHDFS.
    • +
    • HDFS-4962. + Minor sub-task reported by Tsz Wo (Nicholas), SZE and fixed by Tsz Wo (Nicholas), SZE (nfs)
      + Use enum for nfs constants
      +
    • +
    • HDFS-4949. + Major new feature reported by Andrew Wang and fixed by Andrew Wang (datanode , namenode)
      + Centralized cache management in HDFS
      +
    • +
    • HDFS-4948. + Major bug reported by Robert Joseph Evans and fixed by Brandon Li
      + mvn site for hadoop-hdfs-nfs fails
      +
    • +
    • HDFS-4947. + Major sub-task reported by Brandon Li and fixed by Jing Zhao (nfs)
      + Add NFS server export table to control export by hostname or IP range
      +
    • +
    • HDFS-4885. + Major sub-task reported by Junping Du and fixed by Junping Du
      + Update verifyBlockPlacement() API in BlockPlacementPolicy
      +
    • +
    • HDFS-4879. + Major improvement reported by Todd Lipcon and fixed by Todd Lipcon (namenode)
      + Add "blocked ArrayList" collection to avoid CMS full GCs
      +
    • +
    • HDFS-4860. + Major improvement reported by Trevor Lorimer and fixed by Trevor Lorimer (namenode)
      + Add additional attributes to JMX beans
      +
    • +
    • HDFS-4816. + Major bug reported by Andrew Wang and fixed by Andrew Wang (namenode)
      + transitionToActive blocks if the SBN is doing checkpoint image transfer
      +
    • +
    • HDFS-4772. + Minor improvement reported by Brandon Li and fixed by Brandon Li (namenode)
      + Add number of children in HdfsFileStatus
      +
    • +
    • HDFS-4763. + Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)
      + Add script changes/utility for starting NFS gateway
      +
    • +
    • HDFS-4762. + Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)
      + Provide HDFS based NFSv3 and Mountd implementation
      +
    • +
    • HDFS-4657. + Major bug reported by Aaron T. Myers and fixed by Aaron T. Myers (namenode)
      + Limit the number of blocks logged by the NN after a block report to a configurable value.
      +
    • +
    • HDFS-4633. + Major bug reported by Chris Nauroth and fixed by Chris Nauroth (hdfs-client , test)
      + TestDFSClientExcludedNodes fails sporadically if excluded nodes cache expires too quickly
      +
    • +
    • HDFS-4517. + Major test reported by Vadim Bondarev and fixed by Ivan A. Veselovsky
      + Cover class RemoteBlockReader with unit tests
      +
    • +
    • HDFS-4516. + Critical bug reported by Uma Maheswara Rao G and fixed by Vinayakumar B (namenode)
      + Client crash after block allocation and NN switch before lease recovery for the same file can cause readers to fail forever
      +
    • +
    • HDFS-4512. + Major test reported by Vadim Bondarev and fixed by Vadim Bondarev
      + Cover package org.apache.hadoop.hdfs.server.common with tests
      +
    • +
    • HDFS-4511. + Major test reported by Vadim Bondarev and fixed by Andrey Klochkov
      + Cover package org.apache.hadoop.hdfs.tools with unit test
      +
    • +
    • HDFS-4510. + Major test reported by Vadim Bondarev and fixed by Andrey Klochkov
      + Cover classes ClusterJspHelper/NamenodeJspHelper with unit tests
      +
    • +
    • HDFS-4491. + Major test reported by Tsuyoshi OZAWA and fixed by Andrey Klochkov (test)
      + Parallel testing HDFS
      +
    • +
    • HDFS-4376. + Major bug reported by Aaron T. Myers and fixed by Junping Du (balancer)
      + Fix several race conditions in Balancer and resolve intermittent timeout of TestBalancerWithNodeGroup
      +
    • +
    • HDFS-4329. + Major bug reported by Andy Isaacson and fixed by Cristina L. Abad (hdfs-client)
      + DFSShell issues with directories with spaces in name
      +
    • +
    • HDFS-4278. + Major improvement reported by Harsh J and fixed by Kousuke Saruta (datanode , namenode)
      + Log an ERROR when DFS_BLOCK_ACCESS_TOKEN_ENABLE config is disabled but security is turned on.
      +
    • +
    • HDFS-4201. + Critical bug reported by Eli Collins and fixed by Jimmy Xiang (namenode)
      + NPE in BPServiceActor#sendHeartBeat
      +
    • +
    • HDFS-4096. + Major sub-task reported by Jing Zhao and fixed by Haohui Mai (datanode , namenode)
      + Add snapshot information to namenode WebUI
      +
    • +
    • HDFS-3987. + Major sub-task reported by Alejandro Abdelnur and fixed by Haohui Mai
      + Support webhdfs over HTTPS
      +
    • +
    • HDFS-3981. + Major bug reported by Xiaobo Peng and fixed by Xiaobo Peng (namenode)
      + access time is set without holding FSNamesystem write lock
      +
    • +
    • HDFS-3934. + Minor bug reported by Andy Isaacson and fixed by Colin Patrick McCabe
      + duplicative dfs_hosts entries handled wrong
      +
    • +
    • HDFS-2933. + Major improvement reported by Philip Zeyliger and fixed by Vivek Ganesan (datanode)
      + Improve DataNode Web UI Index Page
      +
    • +
    • HADOOP-10317. + Major bug reported by Andrew Wang and fixed by Andrew Wang
      + Rename branch-2.3 release version from 2.4.0-SNAPSHOT to 2.3.0-SNAPSHOT
      +
    • +
    • HADOOP-10313. + Major bug reported by Alejandro Abdelnur and fixed by Alejandro Abdelnur (build)
      + Script and jenkins job to produce Hadoop release artifacts
      +
    • +
    • HADOOP-10311. + Blocker bug reported by Suresh Srinivas and fixed by Alejandro Abdelnur
      + Cleanup vendor names from the code base
      +
    • +
    • HADOOP-10310. + Blocker bug reported by Aaron T. Myers and fixed by Aaron T. Myers (security)
      + SaslRpcServer should be initialized even when no secret manager present
      +
    • +
    • HADOOP-10305. + Major bug reported by Akira AJISAKA and fixed by Akira AJISAKA (metrics)
      + Add "rpc.metrics.quantile.enable" and "rpc.metrics.percentiles.intervals" to core-default.xml
      +
    • +
    • HADOOP-10292. + Major bug reported by Haohui Mai and fixed by Haohui Mai
      + Restore HttpServer from branch-2.2 in branch-2
      +
    • +
    • HADOOP-10291. + Major bug reported by Mit Desai and fixed by Mit Desai
      + TestSecurityUtil#testSocketAddrWithIP fails
      +
    • +
    • HADOOP-10288. + Major bug reported by Todd Lipcon and fixed by Todd Lipcon (util)
      + Explicit reference to Log4JLogger breaks non-log4j users
      +
    • +
    • HADOOP-10274. + Minor improvement reported by takeshi.miao and fixed by takeshi.miao (security)
      + Lower the logging level from ERROR to WARN for UGI.doAs method
      +
    • +
    • HADOOP-10273. + Major bug reported by Arpit Agarwal and fixed by Arpit Agarwal (build)
      + Fix 'mvn site'
      +
    • +
    • HADOOP-10255. + Blocker bug reported by Haohui Mai and fixed by Haohui Mai
      + Rename HttpServer to HttpServer2 to retain older HttpServer in branch-2 for compatibility
      +
    • +
    • HADOOP-10252. + Major bug reported by Jimmy Xiang and fixed by Jimmy Xiang
      + HttpServer can't start if hostname is not specified
      +
    • +
    • HADOOP-10250. + Major bug reported by Yongjun Zhang and fixed by Yongjun Zhang
      + VersionUtil returns wrong value when comparing two versions
      +
    • +
    • HADOOP-10248. + Major improvement reported by Ted Yu and fixed by Akira AJISAKA
      + Property name should be included in the exception where property value is null
      +
    • +
    • HADOOP-10240. + Trivial bug reported by Chris Nauroth and fixed by Chris Nauroth (documentation)
      + Windows build instructions incorrectly state requirement of protoc 2.4.1 instead of 2.5.0
      +
    • +
    • HADOOP-10236. + Trivial bug reported by Akira AJISAKA and fixed by Akira AJISAKA
      + Fix typo in o.a.h.ipc.Client#checkResponse
      +
    • +
    • HADOOP-10235. + Major bug reported by Alejandro Abdelnur and fixed by Alejandro Abdelnur (build)
      + Hadoop tarball has 2 versions of stax-api JARs
      +
    • +
    • HADOOP-10234. + Major bug reported by Chris Nauroth and fixed by Chris Nauroth (scripts)
      + "hadoop.cmd jar" does not propagate exit code.
      +
    • +
    • HADOOP-10228. + Minor improvement reported by Haohui Mai and fixed by Haohui Mai (fs)
      + FsPermission#fromShort() should cache FsAction.values()
      +
    • +
    • HADOOP-10223. + Minor bug reported by Ted Yu and fixed by Ted Yu
      + MiniKdc#main() should close the FileReader it creates
      +
    • +
    • HADOOP-10214. + Major bug reported by Liang Xie and fixed by Liang Xie (ha)
      + Fix multithreaded correctness warnings in ActiveStandbyElector
      +
    • +
    • HADOOP-10212. + Major bug reported by Akira AJISAKA and fixed by Akira AJISAKA (documentation)
      + Incorrect compile command in Native Library document
      +
    • +
    • HADOOP-10208. + Trivial improvement reported by Benoy Antony and fixed by Benoy Antony
      + Remove duplicate initialization in StringUtils.getStringCollection
      +
    • +
    • HADOOP-10207. + Minor test reported by Jimmy Xiang and fixed by Jimmy Xiang
      + TestUserGroupInformation#testLogin is flaky
      +
    • +
    • HADOOP-10203. + Major bug reported by Andrei Savu and fixed by Andrei Savu (fs/s3)
      + Connection leak in Jets3tNativeFileSystemStore#retrieveMetadata
      +
    • +
    • HADOOP-10198. + Minor improvement reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (native)
      + DomainSocket: add support for socketpair
      +
    • +
    • HADOOP-10193. + Minor bug reported by Gregory Chanan and fixed by Gregory Chanan (security)
      + hadoop-auth's PseudoAuthenticationHandler can consume getInputStream
      +
    • +
    • HADOOP-10178. + Major bug reported by shanyu zhao and fixed by shanyu zhao (conf)
      + Configuration deprecation always emit "deprecated" warnings when a new key is used
      +
    • +
    • HADOOP-10175. + Major bug reported by Chuan Liu and fixed by Chuan Liu (fs)
      + Har files system authority should preserve userinfo
      +
    • +
    • HADOOP-10173. + Critical improvement reported by Daryn Sharp and fixed by Daryn Sharp (ipc)
      + Remove UGI from DIGEST-MD5 SASL server creation
      +
    • +
    • HADOOP-10172. + Critical improvement reported by Daryn Sharp and fixed by Daryn Sharp (ipc)
      + Cache SASL server factories
      +
    • +
    • HADOOP-10171. + Major bug reported by Mit Desai and fixed by Mit Desai
      + TestRPC fails intermittently on jkd7
      +
    • +
    • HADOOP-10169. + Minor improvement reported by Liang Xie and fixed by Liang Xie (metrics)
      + remove the unnecessary synchronized in JvmMetrics class
      +
    • +
    • HADOOP-10168. + Major bug reported by Thejas M Nair and fixed by Thejas M Nair
      + fix javadoc of ReflectionUtils.copy
      +
    • +
    • HADOOP-10167. + Major improvement reported by Mikhail Antonov and fixed by (build)
      + Mark hadoop-common source as UTF-8 in Maven pom files / refactoring
      +
    • +
    • HADOOP-10164. + Major improvement reported by Robert Joseph Evans and fixed by Robert Joseph Evans
      + Allow UGI to login with a known Subject
      +
    • +
    • HADOOP-10162. + Major bug reported by Mit Desai and fixed by Mit Desai
      + Fix symlink-related test failures in TestFileContextResolveAfs and TestStat in branch-2
      +
    • +
    • HADOOP-10147. + Minor bug reported by Eric Sirianni and fixed by Steve Loughran (build)
      + Upgrade to commons-logging 1.1.3 to avoid potential deadlock in MiniDFSCluster
      +
    • +
    • HADOOP-10146. + Critical bug reported by Daryn Sharp and fixed by Daryn Sharp (util)
      + Workaround JDK7 Process fd close bug
      +
    • +
    • HADOOP-10143. + Major improvement reported by Liang Xie and fixed by Liang Xie (io)
      + replace WritableFactories's hashmap with ConcurrentHashMap
      +
    • +
    • HADOOP-10142. + Major bug reported by Vinayakumar B and fixed by Vinayakumar B
      + Avoid groups lookup for unprivileged users such as "dr.who"
      +
    • +
    • HADOOP-10135. + Major bug reported by David Dobbins and fixed by David Dobbins (fs)
      + writes to swift fs over partition size leave temp files and empty output file
      +
    • +
    • HADOOP-10132. + Minor improvement reported by Ted Yu and fixed by Ted Yu
      + RPC#stopProxy() should log the class of proxy when IllegalArgumentException is encountered
      +
    • +
    • HADOOP-10130. + Minor bug reported by Binglin Chang and fixed by Binglin Chang
      + RawLocalFS::LocalFSFileInputStream.pread does not track FS::Statistics
      +
    • +
    • HADOOP-10129. + Critical bug reported by Daryn Sharp and fixed by Daryn Sharp (tools/distcp)
      + Distcp may succeed when it fails
      +
    • +
    • HADOOP-10127. + Major bug reported by Karthik Kambatla and fixed by Karthik Kambatla (ipc)
      + Add ipc.client.connect.retry.interval to control the frequency of connection retries
      +
    • +
    • HADOOP-10126. + Minor bug reported by Vinayakumar B and fixed by Vinayakumar B (util)
      + LightWeightGSet log message is confusing : "2.0% max memory = 2.0 GB"
      +
    • +
    • HADOOP-10125. + Major bug reported by Ming Ma and fixed by Ming Ma (ipc)
      + no need to process RPC request if the client connection has been dropped
      +
    • +
    • HADOOP-10112. + Major bug reported by Brandon Li and fixed by Brandon Li (tools)
      + har file listing doesn't work with wild card
      +
    • +
    • HADOOP-10111. + Major improvement reported by Kihwal Lee and fixed by Kihwal Lee
      + Allow DU to be initialized with an initial value
      +
    • +
    • HADOOP-10110. + Blocker bug reported by Chuan Liu and fixed by Chuan Liu (build)
      + hadoop-auth has a build break due to missing dependency
      +
    • +
    • HADOOP-10109. + Major sub-task reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (test)
      + Fix test failure in TestOfflineEditsViewer introduced by HADOOP-10052
      +
    • +
    • HADOOP-10107. + Major sub-task reported by Tsz Wo (Nicholas), SZE and fixed by Kihwal Lee (ipc)
      + Server.getNumOpenConnections may throw NPE
      +
    • +
    • HADOOP-10106. + Minor bug reported by Ming Ma and fixed by Ming Ma
      + Incorrect thread name in RPC log messages
      +
    • +
    • HADOOP-10103. + Minor sub-task reported by Steve Loughran and fixed by Akira AJISAKA (build)
      + update commons-lang to 2.6
      +
    • +
    • HADOOP-10102. + Minor sub-task reported by Steve Loughran and fixed by Akira AJISAKA (build)
      + update commons IO from 2.1 to 2.4
      +
    • +
    • HADOOP-10100. + Major bug reported by Robert Kanter and fixed by Robert Kanter
      + MiniKDC shouldn't use apacheds-all artifact
      +
    • +
    • HADOOP-10095. + Minor improvement reported by Nicolas Liochon and fixed by Nicolas Liochon (io)
      + Performance improvement in CodecPool
      +
    • +
    • HADOOP-10094. + Trivial bug reported by Enis Soztutar and fixed by Enis Soztutar (util)
      + NPE in GenericOptionsParser#preProcessForWindows()
      +
    • +
    • HADOOP-10093. + Major bug reported by shanyu zhao and fixed by shanyu zhao (conf)
      + hadoop-env.cmd sets HADOOP_CLIENT_OPTS with a max heap size that is too small.
      +
    • +
    • HADOOP-10090. + Major bug reported by Ivan Mitic and fixed by Ivan Mitic (metrics)
      + Jobtracker metrics not updated properly after execution of a mapreduce job
      +
    • +
    • HADOOP-10088. + Major bug reported by Raja Aluri and fixed by Raja Aluri (build)
      + copy-nativedistlibs.sh needs to quote snappy lib dir
      +
    • +
    • HADOOP-10087. + Major bug reported by Yu Gao and fixed by Colin Patrick McCabe (security)
      + UserGroupInformation.getGroupNames() fails to return primary group first when JniBasedUnixGroupsMappingWithFallback is used
      +
    • +
    • HADOOP-10086. + Minor improvement reported by Masatake Iwasaki and fixed by Masatake Iwasaki (documentation)
      + User document for authentication in secure cluster
      +
    • +
    • HADOOP-10081. + Critical bug reported by Jason Lowe and fixed by Tsuyoshi OZAWA (ipc)
      + Client.setupIOStreams can leak socket resources on exception or error
      +
    • +
    • HADOOP-10079. + Major improvement reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe
      + log a warning message if group resolution takes too long.
      +
    • +
    • HADOOP-10078. + Minor bug reported by Robert Kanter and fixed by Robert Kanter (security)
      + KerberosAuthenticator always does SPNEGO
      +
    • +
    • HADOOP-10072. + Trivial bug reported by Chris Nauroth and fixed by Chris Nauroth (nfs , test)
      + TestNfsExports#testMultiMatchers fails due to non-deterministic timing around cache expiry check.
      +
    • +
    • HADOOP-10067. + Minor improvement reported by Robert Rati and fixed by Robert Rati
      + Missing POM dependency on jsr305
      +
    • +
    • HADOOP-10064. + Major improvement reported by Arpit Agarwal and fixed by Arpit Agarwal (build)
      + Upgrade to maven antrun plugin version 1.7
      +
    • +
    • HADOOP-10058. + Minor bug reported by Akira AJISAKA and fixed by Chen He (metrics)
      + TestMetricsSystemImpl#testInitFirstVerifyStopInvokedImmediately fails on trunk
      +
    • +
    • HADOOP-10055. + Trivial bug reported by Eli Collins and fixed by Akira AJISAKA (documentation)
      + FileSystemShell.apt.vm doc has typo "numRepicas"
      +
    • +
    • HADOOP-10052. + Major sub-task reported by Andrew Wang and fixed by Andrew Wang (fs)
      + Temporarily disable client-side symlink resolution
      +
    • +
    • HADOOP-10047. + Major new feature reported by Gopal V and fixed by Gopal V (io)
      + Add a directbuffer Decompressor API to hadoop
      +
      Direct Bytebuffer decompressors for Zlib (Deflate & Gzip) and Snappy
    • +
    • HADOOP-10046. + Trivial improvement reported by David S. Wang and fixed by David S. Wang
      + Print a log message when SSL is enabled
      +
    • +
    • HADOOP-10040. + Major bug reported by Yingda Chen and fixed by Chris Nauroth
      + hadoop.cmd in UNIX format and would not run by default on Windows
      +
    • +
    • HADOOP-10039. + Major bug reported by Suresh Srinivas and fixed by Haohui Mai (security)
      + Add Hive to the list of projects using AbstractDelegationTokenSecretManager
      +
    • +
    • HADOOP-10031. + Major bug reported by Chuan Liu and fixed by Chuan Liu (fs)
      + FsShell -get/copyToLocal/moveFromLocal should support Windows local path
      +
    • +
    • HADOOP-10030. + Major bug reported by Chuan Liu and fixed by Chuan Liu
      + FsShell -put/copyFromLocal should support Windows local path
      +
    • +
    • HADOOP-10029. + Major bug reported by Suresh Srinivas and fixed by Suresh Srinivas (fs)
      + Specifying har file to MR job fails in secure cluster
      +
    • +
    • HADOOP-10028. + Minor bug reported by Jing Zhao and fixed by Haohui Mai
      + Malformed ssl-server.xml.example
      +
    • +
    • HADOOP-10006. + Blocker bug reported by Junping Du and fixed by Junping Du (fs , util)
      + Compilation failure in trunk for o.a.h.fs.swift.util.JSONUtil
      +
    • +
    • HADOOP-10005. + Trivial improvement reported by Jackie Chang and fixed by Jackie Chang
      + No need to check INFO severity level is enabled or not
      +
    • +
    • HADOOP-9998. + Major improvement reported by Junping Du and fixed by Junping Du (net)
      + Provide methods to clear only part of the DNSToSwitchMapping
      +
    • +
    • HADOOP-9982. + Major bug reported by Akira AJISAKA and fixed by Akira AJISAKA (documentation)
      + Fix dead links in hadoop site docs
      +
    • +
    • HADOOP-9981. + Critical bug reported by Kihwal Lee and fixed by Colin Patrick McCabe
      + globStatus should minimize its listStatus and getFileStatus calls
      +
    • +
    • HADOOP-9964. + Major bug reported by Junping Du and fixed by Junping Du (util)
      + O.A.H.U.ReflectionUtils.printThreadInfo() is not thread-safe which cause TestHttpServer pending 10 minutes or longer.
      +
    • +
    • HADOOP-9956. + Major sub-task reported by Daryn Sharp and fixed by Daryn Sharp (ipc)
      + RPC listener inefficiently assigns connections to readers
      +
    • +
    • HADOOP-9955. + Major sub-task reported by Daryn Sharp and fixed by Daryn Sharp (ipc)
      + RPC idle connection closing is extremely inefficient
      +
    • +
    • HADOOP-9929. + Major bug reported by Jason Lowe and fixed by Colin Patrick McCabe (fs)
      + Insufficient permissions for a path reported as file not found
      +
    • +
    • HADOOP-9915. + Trivial improvement reported by Binglin Chang and fixed by Binglin Chang
      + o.a.h.fs.Stat support on Macosx
      +
    • +
    • HADOOP-9909. + Major improvement reported by Shinichi Yamashita and fixed by (fs)
      + org.apache.hadoop.fs.Stat should permit other LANG
      +
    • +
    • HADOOP-9908. + Major bug reported by Todd Lipcon and fixed by Todd Lipcon (util)
      + Fix NPE when versioninfo properties file is missing
      +
    • +
    • HADOOP-9898. + Minor bug reported by Todd Lipcon and fixed by Todd Lipcon (ipc , net)
      + Set SO_KEEPALIVE on all our sockets
      +
    • +
    • HADOOP-9897. + Trivial improvement reported by Binglin Chang and fixed by Binglin Chang (fs)
      + Add method to get path start position without drive specifier in o.a.h.fs.Path
      +
    • +
    • HADOOP-9889. + Major bug reported by Wei Yan and fixed by Wei Yan
      + Refresh the Krb5 configuration when creating a new kdc in Hadoop-MiniKDC
      +
    • +
    • HADOOP-9887. + Major bug reported by Chris Nauroth and fixed by Chuan Liu (fs)
      + globStatus does not correctly handle paths starting with a drive spec on Windows
      +
    • +
    • HADOOP-9875. + Minor bug reported by Aaron T. Myers and fixed by Aaron T. Myers (test)
      + TestDoAsEffectiveUser can fail on JDK 7
      +
    • +
    • HADOOP-9871. + Minor bug reported by Luke Lu and fixed by Junping Du
      + Fix intermittent findbug warnings in DefaultMetricsSystem
      +
    • +
    • HADOOP-9866. + Major test reported by Alejandro Abdelnur and fixed by Wei Yan (test)
      + convert hadoop-auth testcases requiring kerberos to use minikdc
      +
    • +
    • HADOOP-9865. + Major bug reported by Chuan Liu and fixed by Chuan Liu
      + FileContext.globStatus() has a regression with respect to relative path
      +
    • +
    • HADOOP-9860. + Major improvement reported by Wei Yan and fixed by Wei Yan
      + Remove class HackedKeytab and HackedKeytabEncoder from hadoop-minikdc once jira DIRSERVER-1882 solved
      +
    • +
    • HADOOP-9848. + Major new feature reported by Wei Yan and fixed by Wei Yan (security , test)
      + Create a MiniKDC for use with security testing
      +
    • +
    • HADOOP-9847. + Minor bug reported by Andrew Wang and fixed by Colin Patrick McCabe
      + TestGlobPath symlink tests fail to cleanup properly
      +
    • +
    • HADOOP-9833. + Minor improvement reported by Steve Loughran and fixed by Kousuke Saruta (build)
      + move slf4j to version 1.7.5
      +
    • +
    • HADOOP-9830. + Trivial bug reported by Dmitry Lysnichenko and fixed by Kousuke Saruta (documentation)
      + Typo at http://hadoop.apache.org/docs/current/
      +
    • +
    • HADOOP-9820. + Blocker bug reported by Daryn Sharp and fixed by Daryn Sharp (ipc , security)
      + RPCv9 wire protocol is insufficient to support multiplexing
      +
    • +
    • HADOOP-9817. + Major bug reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe
      + FileSystem#globStatus and FileContext#globStatus need to work with symlinks
      +
    • +
    • HADOOP-9806. + Major bug reported by Brandon Li and fixed by Brandon Li (nfs)
      + PortmapInterface should check if the procedure is out-of-range
      +
    • +
    • HADOOP-9791. + Major bug reported by Ivan Mitic and fixed by Ivan Mitic
      + Add a test case covering long paths for new FileUtil access check methods
      +
    • +
    • HADOOP-9787. + Major bug reported by Karthik Kambatla and fixed by Karthik Kambatla (util)
      + ShutdownHelper util to shutdown threads and threadpools
      +
    • +
    • HADOOP-9784. + Major improvement reported by Junping Du and fixed by Junping Du
      + Add a builder for HttpServer
      +
    • +
    • HADOOP-9748. + Critical sub-task reported by Daryn Sharp and fixed by Daryn Sharp (security)
      + Reduce blocking on UGI.ensureInitialized
      +
    • +
    • HADOOP-9703. + Minor bug reported by Mark Miller and fixed by Tsuyoshi OZAWA
      + org.apache.hadoop.ipc.Client leaks threads on stop.
      +
    • +
    • HADOOP-9698. + Blocker sub-task reported by Daryn Sharp and fixed by Daryn Sharp (ipc)
      + RPCv9 client must honor server's SASL negotiate response
      +
      The RPC client now waits for the Server's SASL negotiate response before instantiating its SASL client.
    • +
    • HADOOP-9693. + Trivial improvement reported by Steve Loughran and fixed by
      + Shell should add a probe for OSX
      +
    • +
    • HADOOP-9686. + Major improvement reported by Jason Lowe and fixed by Jason Lowe (conf)
      + Easy access to final parameters in Configuration
      +
    • +
    • HADOOP-9683. + Blocker sub-task reported by Luke Lu and fixed by Daryn Sharp (ipc)
      + Wrap IpcConnectionContext in RPC headers
      +
      Connection context is now sent as a rpc header wrapped protobuf.
    • +
    • HADOOP-9660. + Major bug reported by Enis Soztutar and fixed by Enis Soztutar (scripts , util)
      + [WINDOWS] Powershell / cmd parses -Dkey=value from command line as [-Dkey, value] which breaks GenericsOptionParser
      +
    • +
    • HADOOP-9652. + Major improvement reported by Colin Patrick McCabe and fixed by Andrew Wang
      + Allow RawLocalFs#getFileLinkStatus to fill in the link owner and mode if requested
      +
    • +
    • HADOOP-9635. + Major bug reported by V. Karthik Kumar and fixed by (native)
      + Fix Potential Stack Overflow in DomainSocket.c
      +
    • +
    • HADOOP-9623. + Major improvement reported by Timothy St. Clair and fixed by Amandeep Khurana (fs/s3)
      + Update jets3t dependency to 0.9.0
      +
    • +
    • HADOOP-9618. + Major new feature reported by Todd Lipcon and fixed by Todd Lipcon (util)
      + Add thread which detects JVM pauses
      +
    • +
    • HADOOP-9611. + Major improvement reported by Timothy St. Clair and fixed by Timothy St. Clair (build)
      + mvn-rpmbuild against google-guice > 3.0 yields missing cglib dependency
      +
    • +
    • HADOOP-9598. + Major test reported by Aleksey Gorshkov and fixed by Andrey Klochkov
      + Improve code coverage of RMAdminCLI
      +
    • +
    • HADOOP-9594. + Major improvement reported by Timothy St. Clair and fixed by Timothy St. Clair (build)
      + Update apache commons math dependency
      +
    • +
    • HADOOP-9582. + Major bug reported by Ashwin Shankar and fixed by Ashwin Shankar (conf)
      + Non-existent file to "hadoop fs -conf" doesn't throw error
      +
    • +
    • HADOOP-9527. + Major bug reported by Arpit Agarwal and fixed by Arpit Agarwal (fs , test)
      + Add symlink support to LocalFileSystem on Windows
      +
    • +
    • HADOOP-9515. + Major new feature reported by Brandon Li and fixed by Brandon Li
      + Add general interface for NFS and Mount
      +
    • +
    • HADOOP-9509. + Major new feature reported by Brandon Li and fixed by Brandon Li
      + Implement ONCRPC and XDR
      +
    • +
    • HADOOP-9494. + Major improvement reported by Dennis Y and fixed by Andrey Klochkov
      + Excluded auto-generated and examples code from clover reports
      +
    • +
    • HADOOP-9487. + Major improvement reported by Steve Loughran and fixed by (conf)
      + Deprecation warnings in Configuration should go to their own log or otherwise be suppressible
      +
    • +
    • HADOOP-9470. + Major improvement reported by Ivan A. Veselovsky and fixed by Ivan A. Veselovsky (test)
      + eliminate duplicate FQN tests in different Hadoop modules
      +
    • +
    • HADOOP-9432. + Minor new feature reported by Steve Loughran and fixed by (build , documentation)
      + Add support for markdown .md files in site documentation
      +
    • +
    • HADOOP-9421. + Blocker sub-task reported by Sanjay Radia and fixed by Daryn Sharp
      + Convert SASL to use ProtoBuf and provide negotiation capabilities
      +
      Raw SASL protocol now uses protobufs wrapped with RPC headers. +The negotiation sequence incorporates the state of the exchange. +The server now has the ability to advertise its supported auth types.
    • +
    • HADOOP-9420. + Major bug reported by Todd Lipcon and fixed by Liang Xie (ipc , metrics)
      + Add percentile or max metric for rpcQueueTime, processing time
      +
    • +
    • HADOOP-9417. + Major sub-task reported by Andrew Wang and fixed by Andrew Wang (fs)
      + Support for symlink resolution in LocalFileSystem / RawLocalFileSystem
      +
    • +
    • HADOOP-9350. + Minor bug reported by Steve Loughran and fixed by Robert Kanter (build)
      + Hadoop not building against Java7 on OSX
      +
    • +
    • HADOOP-9319. + Major improvement reported by Arpit Agarwal and fixed by Binglin Chang
      + Update bundled lz4 source to latest version
      +
    • +
    • HADOOP-9291. + Major test reported by Ivan A. Veselovsky and fixed by Ivan A. Veselovsky
      + enhance unit-test coverage of package o.a.h.metrics2
      +
    • +
    • HADOOP-9254. + Major test reported by Vadim Bondarev and fixed by Vadim Bondarev
      + Cover packages org.apache.hadoop.util.bloom, org.apache.hadoop.util.hash
      +
    • +
    • HADOOP-9241. + Trivial improvement reported by Harsh J and fixed by Harsh J
      + DU refresh interval is not configurable
      +
      The 'du' (disk usage command from Unix) script refresh monitor is now configurable in the same way as its 'df' counterpart, via the property 'fs.du.interval', the default of which is 10 minute (in ms).
    • +
    • HADOOP-9225. + Major test reported by Vadim Bondarev and fixed by Andrey Klochkov
      + Cover package org.apache.hadoop.compress.Snappy
      +
    • +
    • HADOOP-9199. + Major test reported by Vadim Bondarev and fixed by Andrey Klochkov
      + Cover package org.apache.hadoop.io with unit tests
      +
    • +
    • HADOOP-9114. + Minor bug reported by liuyang and fixed by sathish
      + After defined the dfs.checksum.type as the NULL, write file and hflush will through java.lang.ArrayIndexOutOfBoundsException
      +
    • +
    • HADOOP-9078. + Major test reported by Ivan A. Veselovsky and fixed by Ivan A. Veselovsky
      + enhance unit-test coverage of class org.apache.hadoop.fs.FileContext
      +
    • +
    • HADOOP-9063. + Minor test reported by Ivan A. Veselovsky and fixed by Ivan A. Veselovsky
      + enhance unit-test coverage of class org.apache.hadoop.fs.FileUtil
      +
    • +
    • HADOOP-9016. + Minor bug reported by Ivan A. Veselovsky and fixed by Ivan A. Veselovsky
      + org.apache.hadoop.fs.HarFileSystem.HarFSDataInputStream.HarFsInputStream.skip(long) must never return negative value.
      +
    • +
    • HADOOP-8814. + Minor improvement reported by Brandon Li and fixed by Brandon Li (conf , fs , fs/s3 , ha , io , metrics , performance , record , security , util)
      + Inefficient comparison with the empty string. Use isEmpty() instead
      +
    • +
    • HADOOP-8753. + Minor bug reported by Nishan Shetty, Huawei and fixed by Benoy Antony
      + LocalDirAllocator throws "ArithmeticException: / by zero" when there is no available space on configured local dir
      +
    • +
    • HADOOP-8704. + Major improvement reported by Thomas Graves and fixed by Jonathan Eagles
      + add request logging to jetty/httpserver
      +
    • +
    • HADOOP-8545. + Major new feature reported by Tim Miller and fixed by Dmitry Mezhensky (fs)
      + Filesystem Implementation for OpenStack Swift
      +
      Added file system implementation for OpenStack Swift. +There are two implementation: block and native (similar to Amazon S3 integration). +Data locality issue solved by patch in Swift, commit procedure to OpenStack is in progress. + +To use implementation add to core-site.xml following: +... + <property> + <name>fs.swift.impl</name> + <value>com.mirantis.fs.SwiftFileSystem</value> + </property> + <property> + <name>fs.swift.block.impl</name> + <value>com.mirantis.fs.block.SwiftBlockFileSystem</value> + </property> +... + +In MapReduce job specify following configs for OpenStack Keystone authentication: +conf.set("swift.auth.url", "http://172.18.66.117:5000/v2.0/tokens"); +conf.set("swift.tenant", "superuser"); +conf.set("swift.username", "admin1"); +conf.set("swift.password", "password"); +conf.setInt("swift.http.port", 8080); +conf.setInt("swift.https.port", 443); + +Additional information specified on github: https://github.com/DmitryMezhensky/Hadoop-and-Swift-integration
    • +
    • HADOOP-7344. + Major bug reported by Daryn Sharp and fixed by Colin Patrick McCabe (fs)
      + globStatus doesn't grok groupings with a slash
      +
    • +
    + - diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js index 3ab21548f58..39450043cda 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js @@ -28,7 +28,7 @@ {"name": "nn", "url": "/jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo"}, {"name": "nnstat", "url": "/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus"}, {"name": "fs", "url": "/jmx?qry=Hadoop:service=NameNode,name=FSNamesystemState"}, - {"name": "mem", "url": "/jmx?qry=java.lang:type=Memory"}, + {"name": "mem", "url": "/jmx?qry=java.lang:type=Memory"} ]; var HELPERS = { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.html index 50c7dfe46b4..f9c339748f9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.html +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.html @@ -1,3 +1,5 @@ + - From 84ae8a3ca8d31b537ff13e5ad79272eaf6531401 Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Fri, 14 Feb 2014 08:32:55 +0000 Subject: [PATCH 46/47] Move Flatten INode hierarchy jiras (HDFS-5531, HDFS-5285, HDFS-5286, HDFS-5537, HDFS-5554, HDFS-5647, HDFS-5632, HDFS-5715, HDFS-5726) to 2.4.0 section in CHANGES.txt git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1568218 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 54 ++++++++++----------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 31451c8c3de..3f9b95f4e49 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -120,31 +120,6 @@ Trunk (Unreleased) HDFS-5041. Add the time of last heartbeat to dead server Web UI (Shinichi Yamashita via brandonli) - HDFS-5531. Combine the getNsQuota() and getDsQuota() methods in INode. - (szetszwo) - - HDFS-5285. Flatten INodeFile hierarchy: Replace INodeFileUnderConstruction - and INodeFileUnderConstructionWithSnapshot with FileUnderContructionFeature. - (jing9 via szetszwo) - - HDFS-5286. Flatten INodeDirectory hierarchy: Replace INodeDirectoryWithQuota - with DirectoryWithQuotaFeature. (szetszwo) - - HDFS-5537. Remove FileWithSnapshot interface. (jing9 via szetszwo) - - HDFS-5554. Flatten INodeFile hierarchy: Replace INodeFileWithSnapshot with - FileWithSnapshotFeature. (jing9 via szetszwo) - - HDFS-5647. Merge INodeDirectory.Feature and INodeFile.Feature. (Haohui Mai - via jing9) - - HDFS-5632. Flatten INodeDirectory hierarchy: Replace - INodeDirectoryWithSnapshot with DirectoryWithSnapshotFeature. - (jing9 via szetszwo) - - HDFS-5715. Use Snapshot ID to indicate the corresponding Snapshot for a - FileDiff/DirectoryDiff. (jing9) - HDFS-5721. sharedEditsImage in Namenode#initializeSharedEdits() should be closed before method returns. (Ted Yu via junping_du) @@ -275,8 +250,6 @@ Trunk (Unreleased) HDFS-5719. FSImage#doRollback() should close prevState before return (Ted Yu via brandonli) - HDFS-5726. Fix compilation error in AbstractINodeDiff for JDK7. (jing9) - HDFS-5768. Consolidate the serialization code in DelegationTokenSecretManager (Haohui Mai via brandonli) @@ -376,6 +349,33 @@ Release 2.4.0 - UNRELEASED HDFS-5940. Minor cleanups to ShortCircuitReplica, FsDatasetCache, and DomainSocketWatcher (cmccabe) + HDFS-5531. Combine the getNsQuota() and getDsQuota() methods in INode. + (szetszwo) + + HDFS-5285. Flatten INodeFile hierarchy: Replace INodeFileUnderConstruction + and INodeFileUnderConstructionWithSnapshot with FileUnderContructionFeature. + (jing9 via szetszwo) + + HDFS-5286. Flatten INodeDirectory hierarchy: Replace INodeDirectoryWithQuota + with DirectoryWithQuotaFeature. (szetszwo) + + HDFS-5537. Remove FileWithSnapshot interface. (jing9 via szetszwo) + + HDFS-5554. Flatten INodeFile hierarchy: Replace INodeFileWithSnapshot with + FileWithSnapshotFeature. (jing9 via szetszwo) + + HDFS-5647. Merge INodeDirectory.Feature and INodeFile.Feature. (Haohui Mai + via jing9) + + HDFS-5632. Flatten INodeDirectory hierarchy: Replace + INodeDirectoryWithSnapshot with DirectoryWithSnapshotFeature. + (jing9 via szetszwo) + + HDFS-5715. Use Snapshot ID to indicate the corresponding Snapshot for a + FileDiff/DirectoryDiff. (jing9) + + HDFS-5726. Fix compilation error in AbstractINodeDiff for JDK7. (jing9) + OPTIMIZATIONS HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery From 72f63c8957880ed5e432fef0a1612b8cb44bd1e4 Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Fri, 14 Feb 2014 18:07:04 +0000 Subject: [PATCH 47/47] HDFS-5943. 'dfs.namenode.https-address' property is not loaded from configuration in federation setup. Contributed by Suresh Srinivas. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1568412 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../hadoop/hdfs/server/namenode/NameNode.java | 65 ++++++++----------- 2 files changed, 30 insertions(+), 38 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 3f9b95f4e49..8cc4ddd4a50 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -448,6 +448,9 @@ Release 2.4.0 - UNRELEASED HDFS-5901. NameNode new UI doesn't support IE8 and IE9 on windows 7 (Vinayakumar B via brandonli) + HDFS-5943. 'dfs.namenode.https-address' property is not loaded from + configuration in federation setup. (suresh) + Release 2.3.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 726ddd211c4..6df82cbd3f1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -17,39 +17,22 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY; -import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; -import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; - -import java.io.IOException; -import java.io.PrintStream; -import java.net.InetSocketAddress; -import java.net.URI; -import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; - -import javax.management.ObjectName; - +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Joiner; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Trash; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo; import org.apache.hadoop.ha.HAServiceStatus; import org.apache.hadoop.ha.HealthCheckFailedException; import org.apache.hadoop.ha.ServiceFailedException; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Trash; - -import static org.apache.hadoop.hdfs.DFSConfigKeys.*; -import static org.apache.hadoop.util.ExitUtil.terminate; -import static org.apache.hadoop.util.ToolRunner.confirmPrompt; - import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.HAUtil; @@ -58,20 +41,11 @@ import org.apache.hadoop.hdfs.protocol.ClientProtocol; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; -import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState; -import org.apache.hadoop.hdfs.server.namenode.ha.BootstrapStandby; -import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; -import org.apache.hadoop.hdfs.server.namenode.ha.HAState; -import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState; +import org.apache.hadoop.hdfs.server.namenode.ha.*; import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics; -import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol; -import org.apache.hadoop.hdfs.server.protocol.JournalProtocol; -import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol; -import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; -import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration; -import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; +import org.apache.hadoop.hdfs.server.protocol.*; import org.apache.hadoop.ipc.Server; import org.apache.hadoop.ipc.StandbyException; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; @@ -89,10 +63,23 @@ import org.apache.hadoop.util.JvmPauseMonitor; import org.apache.hadoop.util.ServicePlugin; import org.apache.hadoop.util.StringUtils; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Joiner; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; +import javax.management.ObjectName; +import java.io.IOException; +import java.io.PrintStream; +import java.net.InetSocketAddress; +import java.net.URI; +import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.*; +import static org.apache.hadoop.util.ExitUtil.terminate; +import static org.apache.hadoop.util.ToolRunner.confirmPrompt; /********************************************************** * NameNode serves as both directory namespace manager and @@ -183,8 +170,10 @@ public class NameNode implements NameNodeStatusMXBean { DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY, DFS_NAMENODE_HTTP_ADDRESS_KEY, + DFS_NAMENODE_HTTPS_ADDRESS_KEY, DFS_NAMENODE_KEYTAB_FILE_KEY, DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, + DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY, DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY, DFS_NAMENODE_BACKUP_ADDRESS_KEY, DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY,