From a2edb11b68ae01a44092cb14ac2717a6aad93305 Mon Sep 17 00:00:00 2001
From: Jing Zhao <jing9@apache.org>
Date: Sun, 9 Feb 2014 19:18:51 +0000
Subject: [PATCH 01/47] HDFS-5698. Use protobuf to serialize / deserialize
 FSImage. Contributed by Haohui Mai.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566359 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |  49 ++
 .../dev-support/findbugsExcludeFile.xml       |   3 +
 hadoop-hdfs-project/hadoop-hdfs/pom.xml       |   1 +
 .../hadoop-hdfs/src/main/bin/hdfs             |   2 +-
 .../hadoop/hdfs/protocol/LayoutVersion.java   |   3 +-
 .../DelegationTokenSecretManager.java         |  87 ++-
 .../hdfs/server/namenode/CacheManager.java    | 147 ++++-
 .../hadoop/hdfs/server/namenode/FSImage.java  |   5 +-
 .../server/namenode/FSImageCompression.java   |   6 +-
 .../hdfs/server/namenode/FSImageFormat.java   |  75 ++-
 .../server/namenode/FSImageFormatPBINode.java | 466 +++++++++++++++
 .../namenode/FSImageFormatProtobuf.java       | 551 ++++++++++++++++++
 .../hdfs/server/namenode/FSImageUtil.java     |  93 +++
 .../hdfs/server/namenode/FSNamesystem.java    |  22 +
 .../hdfs/server/namenode/INodeDirectory.java  |   2 +-
 .../hdfs/server/namenode/INodeFile.java       |   2 +-
 .../hadoop/hdfs/server/namenode/INodeMap.java |   5 +
 .../server/namenode/SaveNamespaceContext.java |   6 +-
 .../DirectoryWithSnapshotFeature.java         |   6 +-
 .../snapshot/FSImageFormatPBSnapshot.java     | 437 ++++++++++++++
 .../snapshot/SnapshotFSImageFormat.java       |  26 +-
 .../namenode/snapshot/SnapshotManager.java    |  17 +
 .../FileDistributionCalculator.java           | 160 +++++
 .../ImageLoaderCurrent.java                   |   2 +-
 .../tools/offlineImageViewer/LsrPBImage.java  | 233 ++++++++
 .../OfflineImageViewerPB.java                 | 178 ++++++
 .../offlineImageViewer/PBImageXmlWriter.java  | 415 +++++++++++++
 .../hadoop-hdfs/src/main/proto/fsimage.proto  | 280 +++++++++
 .../hdfs/server/namenode/TestFSImage.java     | 138 +++++
 .../namenode/TestFSImageWithSnapshot.java     |   4 +-
 .../namenode/ha/TestStandbyCheckpoints.java   |   2 +-
 .../snapshot/TestRenameWithSnapshots.java     |   1 -
 .../namenode/snapshot/TestSnapshot.java       |  39 +-
 .../TestOfflineImageViewer.java               | 427 ++++----------
 .../src/test/resources/editsStored            | Bin 4272 -> 4272 bytes
 .../src/test/resources/editsStored.xml        |   2 +-
 36 files changed, 3488 insertions(+), 404 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageUtil.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionCalculator.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsrPBImage.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImage.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 47f931f2215..22b201627a1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -286,6 +286,55 @@ Trunk (Unreleased)
     HDFS-5794. Fix the inconsistency of layout version number of 
     ADD_DATANODE_AND_STORAGE_UUIDS between trunk and branch-2. (jing9)
 
+  BREAKDOWN OF HDFS-5698 SUBTASKS AND RELATED JIRAS
+
+    HDFS-5717. Save FSImage header in protobuf. (Haohui Mai via jing9)
+
+    HDFS-5738. Serialize INode information in protobuf. (Haohui Mai via jing9)
+
+    HDFS-5772. Serialize under-construction file information in FSImage. (jing9)
+
+    HDFS-5783. Compute the digest before loading FSImage. (Haohui Mai via jing9)
+
+    HDFS-5785. Serialize symlink in protobuf. (Haohui Mai via jing9)
+
+    HDFS-5793. Optimize the serialization of PermissionStatus. (Haohui Mai via
+    jing9)
+
+    HDFS-5743. Use protobuf to serialize snapshot information. (jing9)
+
+    HDFS-5774. Serialize CachePool directives in protobuf. (Haohui Mai via jing9)
+
+    HDFS-5744. Serialize information for token managers in protobuf. (Haohui Mai
+    via jing9)
+
+    HDFS-5824. Add a Type field in Snapshot DiffEntry's protobuf definition.
+    (jing9)
+
+    HDFS-5808. Implement cancellation when saving FSImage. (Haohui Mai via jing9)
+
+    HDFS-5826. Update the stored edit logs to be consistent with the changes in
+    HDFS-5698 branch. (Haohui Mai via jing9)
+
+    HDFS-5797. Implement offline image viewer. (Haohui Mai via jing9)
+
+    HDFS-5771. Track progress when loading fsimage. (Haohui Mai via cnauroth)
+
+    HDFS-5871. Use PBHelper to serialize CacheDirectiveInfoExpirationProto.
+    (Haohui Mai via jing9)
+
+    HDFS-5884. LoadDelegator should use IOUtils.readFully() to read the magic
+    header. (Haohui Mai via jing9)
+
+    HDFS-5885. Add annotation for repeated fields in the protobuf definition.
+    (Haohui Mai via jing9)
+
+    HDFS-5906. Fixing findbugs and javadoc warnings in the HDFS-5698 branch.
+    (Haohui Mai via jing9)
+
+    HDFS-5911. The id of a CacheDirective instance does not get serialized in 
+    the protobuf-fsimage. (Haohui Mai via jing9)
+
 Release 2.4.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
index 028e64cad94..70b7e65f842 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
@@ -8,6 +8,9 @@
      <Match>
        <Package name="org.apache.hadoop.hdfs.server.namenode.ha.proto" />
      </Match>
+     <Match>
+       <Class name="~org.apache.hadoop.hdfs.server.namenode.FsImageProto.*" />
+     </Match>
      <Match>
        <Package name="org.apache.hadoop.hdfs.qjournal.protocol" />
      </Match>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml
index 0b1e55d46c5..6cd9fea1dd7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml
@@ -458,6 +458,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
                 <includes>
                   <include>ClientDatanodeProtocol.proto</include>
                   <include>DatanodeProtocol.proto</include>
+                  <include>fsimage.proto</include>
                 </includes>
               </source>
               <output>${project.build.directory}/generated-sources/java</output>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
index fa00cd47d0e..5d823b7dd21 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
@@ -139,7 +139,7 @@ elif [ "$COMMAND" = "balancer" ] ; then
 elif [ "$COMMAND" = "jmxget" ] ; then
   CLASS=org.apache.hadoop.hdfs.tools.JMXGet
 elif [ "$COMMAND" = "oiv" ] ; then
-  CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
+  CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
 elif [ "$COMMAND" = "oev" ] ; then
   CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
 elif [ "$COMMAND" = "fetchdt" ] ; then
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java
index 923ed70ac8f..9842b53fbd3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java
@@ -112,7 +112,8 @@ public class LayoutVersion {
     ADD_DATANODE_AND_STORAGE_UUIDS(-49, "Replace StorageID with DatanodeUuid."
         + " Use distinct StorageUuid per storage directory."),
     ADD_LAYOUT_FLAGS(-50, "Add support for layout flags."),
-    CACHING(-51, "Support for cache pools and path-based caching");
+    CACHING(-51, "Support for cache pools and path-based caching"),
+    PROTOBUF_FORMAT(-52, "Use protobuf to serialize FSImage");
 
     final int lv;
     final int ancestorLV;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
index e291204cc23..b9fce60446b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
@@ -23,12 +23,16 @@ import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.InterruptedIOException;
 import java.net.InetSocketAddress;
+import java.util.ArrayList;
 import java.util.Iterator;
+import java.util.List;
+import java.util.Map.Entry;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
@@ -46,6 +50,10 @@ import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager;
 import org.apache.hadoop.security.token.delegation.DelegationKey;
 
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.protobuf.ByteString;
+
 /**
  * A HDFS specific delegation token secret manager.
  * The secret manager is responsible for generating and accepting the password
@@ -167,7 +175,45 @@ public class DelegationTokenSecretManager
     }
     serializerCompat.load(in);
   }
-  
+
+  public static class SecretManagerState {
+    public final SecretManagerSection section;
+    public final List<SecretManagerSection.DelegationKey> keys;
+    public final List<SecretManagerSection.PersistToken> tokens;
+
+    public SecretManagerState(
+        SecretManagerSection s,
+        List<SecretManagerSection.DelegationKey> keys,
+        List<SecretManagerSection.PersistToken> tokens) {
+      this.section = s;
+      this.keys = keys;
+      this.tokens = tokens;
+    }
+  }
+
+  public synchronized void loadSecretManagerState(SecretManagerState state)
+      throws IOException {
+    Preconditions.checkState(!running,
+        "Can't load state from image in a running SecretManager.");
+
+    currentId = state.section.getCurrentId();
+    delegationTokenSequenceNumber = state.section.getTokenSequenceNumber();
+    for (SecretManagerSection.DelegationKey k : state.keys) {
+      addKey(new DelegationKey(k.getId(), k.getExpiryDate(), k.hasKey() ? k
+          .getKey().toByteArray() : null));
+    }
+
+    for (SecretManagerSection.PersistToken t : state.tokens) {
+      DelegationTokenIdentifier id = new DelegationTokenIdentifier(new Text(
+          t.getOwner()), new Text(t.getRenewer()), new Text(t.getRealUser()));
+      id.setIssueDate(t.getIssueDate());
+      id.setMaxDate(t.getMaxDate());
+      id.setSequenceNumber(t.getSequenceNumber());
+      id.setMasterKeyId(t.getMasterKeyId());
+      addPersistedDelegationToken(id, t.getExpiryDate());
+    }
+  }
+
   /**
    * Store the current state of the SecretManager for persistence
    * 
@@ -179,7 +225,43 @@ public class DelegationTokenSecretManager
       String sdPath) throws IOException {
     serializerCompat.save(out, sdPath);
   }
-  
+
+  public synchronized SecretManagerState saveSecretManagerState() {
+    SecretManagerSection s = SecretManagerSection.newBuilder()
+        .setCurrentId(currentId)
+        .setTokenSequenceNumber(delegationTokenSequenceNumber)
+        .setNumKeys(allKeys.size()).setNumTokens(currentTokens.size()).build();
+    ArrayList<SecretManagerSection.DelegationKey> keys = Lists
+        .newArrayListWithCapacity(allKeys.size());
+    ArrayList<SecretManagerSection.PersistToken> tokens = Lists
+        .newArrayListWithCapacity(currentTokens.size());
+
+    for (DelegationKey v : allKeys.values()) {
+      SecretManagerSection.DelegationKey.Builder b = SecretManagerSection.DelegationKey
+          .newBuilder().setId(v.getKeyId()).setExpiryDate(v.getExpiryDate());
+      if (v.getEncodedKey() != null) {
+        b.setKey(ByteString.copyFrom(v.getEncodedKey()));
+      }
+      keys.add(b.build());
+    }
+
+    for (Entry<DelegationTokenIdentifier, DelegationTokenInformation> e : currentTokens
+        .entrySet()) {
+      DelegationTokenIdentifier id = e.getKey();
+      SecretManagerSection.PersistToken.Builder b = SecretManagerSection.PersistToken
+          .newBuilder().setOwner(id.getOwner().toString())
+          .setRenewer(id.getRenewer().toString())
+          .setRealUser(id.getRealUser().toString())
+          .setIssueDate(id.getIssueDate()).setMaxDate(id.getMaxDate())
+          .setSequenceNumber(id.getSequenceNumber())
+          .setMasterKeyId(id.getMasterKeyId())
+          .setExpiryDate(e.getValue().getRenewDate());
+      tokens.add(b.build());
+    }
+
+    return new SecretManagerState(s, keys, tokens);
+  }
+
   /**
    * This method is intended to be used only while reading edit logs.
    * 
@@ -431,4 +513,5 @@ public class DelegationTokenSecretManager
       prog.endStep(Phase.LOADING_FSIMAGE, step);
     }
   }
+
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java
index ba3936ca997..de536b30d79 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java
@@ -50,8 +50,10 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries;
 import org.apache.hadoop.fs.CacheFlag;
 import org.apache.hadoop.fs.InvalidRequestException;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.permission.FsAction;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.protocol.CacheDirective;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
@@ -62,11 +64,15 @@ import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
 import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
+import org.apache.hadoop.hdfs.protocolPB.PBHelper;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList.Type;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
@@ -81,6 +87,7 @@ import org.apache.hadoop.util.LightWeightGSet;
 import org.apache.hadoop.util.Time;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
 
 /**
  * The Cache Manager handles caching on DataNodes.
@@ -167,6 +174,19 @@ public final class CacheManager {
    */
   private CacheReplicationMonitor monitor;
 
+  public static final class PersistState {
+    public final CacheManagerSection section;
+    public final List<CachePoolInfoProto> pools;
+    public final List<CacheDirectiveInfoProto> directives;
+
+    public PersistState(CacheManagerSection section,
+        List<CachePoolInfoProto> pools, List<CacheDirectiveInfoProto> directives) {
+      this.section = section;
+      this.pools = pools;
+      this.directives = directives;
+    }
+  }
+
   CacheManager(FSNamesystem namesystem, Configuration conf,
       BlockManager blockManager) {
     this.namesystem = namesystem;
@@ -944,6 +964,64 @@ public final class CacheManager {
     serializerCompat.save(out, sdPath);
   }
 
+  public PersistState saveState() throws IOException {
+    ArrayList<CachePoolInfoProto> pools = Lists
+        .newArrayListWithCapacity(cachePools.size());
+    ArrayList<CacheDirectiveInfoProto> directives = Lists
+        .newArrayListWithCapacity(directivesById.size());
+
+    for (CachePool pool : cachePools.values()) {
+      CachePoolInfo p = pool.getInfo(true);
+      CachePoolInfoProto.Builder b = CachePoolInfoProto.newBuilder()
+          .setPoolName(p.getPoolName());
+
+      if (p.getOwnerName() != null)
+        b.setOwnerName(p.getOwnerName());
+
+      if (p.getGroupName() != null)
+        b.setGroupName(p.getGroupName());
+
+      if (p.getMode() != null)
+        b.setMode(p.getMode().toShort());
+
+      if (p.getLimit() != null)
+        b.setLimit(p.getLimit());
+
+      pools.add(b.build());
+    }
+
+    for (CacheDirective directive : directivesById.values()) {
+      CacheDirectiveInfo info = directive.toInfo();
+      CacheDirectiveInfoProto.Builder b = CacheDirectiveInfoProto.newBuilder()
+          .setId(info.getId());
+
+      if (info.getPath() != null) {
+        b.setPath(info.getPath().toUri().getPath());
+      }
+
+      if (info.getReplication() != null) {
+        b.setReplication(info.getReplication());
+      }
+
+      if (info.getPool() != null) {
+        b.setPool(info.getPool());
+      }
+
+      Expiration expiry = info.getExpiration();
+      if (expiry != null) {
+        assert (!expiry.isRelative());
+        b.setExpiration(PBHelper.convert(expiry));
+      }
+
+      directives.add(b.build());
+    }
+    CacheManagerSection s = CacheManagerSection.newBuilder()
+        .setNextDirectiveId(nextDirectiveId).setNumPools(pools.size())
+        .setNumDirectives(directives.size()).build();
+
+    return new PersistState(s, pools, directives);
+  }
+
   /**
    * Reloads CacheManager state from the passed DataInput. Used during namenode
    * startup to restore CacheManager state from an FSImage.
@@ -954,6 +1032,56 @@ public final class CacheManager {
     serializerCompat.load(in);
   }
 
+  public void loadState(PersistState s) throws IOException {
+    nextDirectiveId = s.section.getNextDirectiveId();
+    for (CachePoolInfoProto p : s.pools) {
+      CachePoolInfo info = new CachePoolInfo(p.getPoolName());
+      if (p.hasOwnerName())
+        info.setOwnerName(p.getOwnerName());
+
+      if (p.hasGroupName())
+        info.setGroupName(p.getGroupName());
+
+      if (p.hasMode())
+        info.setMode(new FsPermission((short) p.getMode()));
+
+      if (p.hasLimit())
+        info.setLimit(p.getLimit());
+
+      addCachePool(info);
+    }
+
+    for (CacheDirectiveInfoProto p : s.directives) {
+      // Get pool reference by looking it up in the map
+      final String poolName = p.getPool();
+      CacheDirective directive = new CacheDirective(p.getId(), new Path(
+          p.getPath()).toUri().getPath(), (short) p.getReplication(), p
+          .getExpiration().getMillis());
+      addCacheDirective(poolName, directive);
+    }
+  }
+
+  private void addCacheDirective(final String poolName,
+      final CacheDirective directive) throws IOException {
+    CachePool pool = cachePools.get(poolName);
+    if (pool == null) {
+      throw new IOException("Directive refers to pool " + poolName
+          + ", which does not exist.");
+    }
+    boolean addedDirective = pool.getDirectiveList().add(directive);
+    assert addedDirective;
+    if (directivesById.put(directive.getId(), directive) != null) {
+      throw new IOException("A directive with ID " + directive.getId()
+          + " already exists");
+    }
+    List<CacheDirective> directives = directivesByPath.get(directive.getPath());
+    if (directives == null) {
+      directives = new LinkedList<CacheDirective>();
+      directivesByPath.put(directive.getPath(), directives);
+    }
+    directives.add(directive);
+  }
+
   private final class SerializerCompat {
     private void save(DataOutputStream out, String sdPath) throws IOException {
       out.writeLong(nextDirectiveId);
@@ -1036,27 +1164,10 @@ public final class CacheManager {
         CacheDirectiveInfo info = FSImageSerialization.readCacheDirectiveInfo(in);
         // Get pool reference by looking it up in the map
         final String poolName = info.getPool();
-        CachePool pool = cachePools.get(poolName);
-        if (pool == null) {
-          throw new IOException("Directive refers to pool " + poolName +
-              ", which does not exist.");
-        }
         CacheDirective directive =
             new CacheDirective(info.getId(), info.getPath().toUri().getPath(),
                 info.getReplication(), info.getExpiration().getAbsoluteMillis());
-        boolean addedDirective = pool.getDirectiveList().add(directive);
-        assert addedDirective;
-        if (directivesById.put(directive.getId(), directive) != null) {
-          throw new IOException("A directive with ID " + directive.getId() +
-              " already exists");
-        }
-        List<CacheDirective> directives =
-            directivesByPath.get(directive.getPath());
-        if (directives == null) {
-          directives = new LinkedList<CacheDirective>();
-          directivesByPath.put(directive.getPath(), directives);
-        }
-        directives.add(directive);
+        addCacheDirective(poolName, directive);
         counter.increment();
       }
       prog.endStep(Phase.LOADING_FSIMAGE, step);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index 166ffb2fd9b..62020173f4c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -797,8 +797,7 @@ public class FSImage implements Closeable {
    */
   private void loadFSImage(File curFile, MD5Hash expectedMd5,
       FSNamesystem target, MetaRecoveryContext recovery) throws IOException {
-    FSImageFormat.Loader loader = new FSImageFormat.Loader(
-        conf, target);
+    FSImageFormat.LoaderDelegator loader = FSImageFormat.newLoader(conf, target);
     loader.load(curFile);
     target.setBlockPoolId(this.getBlockPoolID());
 
@@ -827,7 +826,7 @@ public class FSImage implements Closeable {
     File newFile = NNStorage.getStorageFile(sd, NameNodeFile.IMAGE_NEW, txid);
     File dstFile = NNStorage.getStorageFile(sd, NameNodeFile.IMAGE, txid);
     
-    FSImageFormat.Saver saver = new FSImageFormat.Saver(context);
+    FSImageFormatProtobuf.Saver saver = new FSImageFormatProtobuf.Saver(context);
     FSImageCompression compression = FSImageCompression.createCompression(conf);
     saver.save(newFile, compression);
     
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageCompression.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageCompression.java
index e0a46f15445..872ee74c802 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageCompression.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageCompression.java
@@ -57,6 +57,10 @@ class FSImageCompression {
     imageCodec = codec;
   }
 
+  public CompressionCodec getImageCodec() {
+    return imageCodec;
+  }
+
   /**
    * Create a "noop" compression - i.e. uncompressed
    */
@@ -89,7 +93,7 @@ class FSImageCompression {
    * Create a compression instance using the codec specified by
    * <code>codecClassName</code>
    */
-  private static FSImageCompression createCompression(Configuration conf,
+  static FSImageCompression createCompression(Configuration conf,
                                                       String codecClassName)
     throws IOException {
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
index 3ad258a4512..bcbad75d810 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
@@ -68,12 +68,13 @@ import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Co
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
 import org.apache.hadoop.hdfs.util.ReadOnlyList;
+import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.util.StringUtils;
 
-import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
+import com.google.common.annotations.VisibleForTesting;
 
 /**
  * Contains inner classes for reading or writing the on-disk format for
@@ -180,16 +181,74 @@ import com.google.common.base.Preconditions;
 @InterfaceStability.Evolving
 public class FSImageFormat {
   private static final Log LOG = FSImage.LOG;
-  
+
   // Static-only class
   private FSImageFormat() {}
-  
+
+  interface AbstractLoader {
+    MD5Hash getLoadedImageMd5();
+    long getLoadedImageTxId();
+  }
+
+  static class LoaderDelegator implements AbstractLoader {
+    private AbstractLoader impl;
+    private final Configuration conf;
+    private final FSNamesystem fsn;
+
+    LoaderDelegator(Configuration conf, FSNamesystem fsn) {
+      this.conf = conf;
+      this.fsn = fsn;
+    }
+
+    @Override
+    public MD5Hash getLoadedImageMd5() {
+      return impl.getLoadedImageMd5();
+    }
+
+    @Override
+    public long getLoadedImageTxId() {
+      return impl.getLoadedImageTxId();
+    }
+
+    public void load(File file) throws IOException {
+      Preconditions.checkState(impl == null, "Image already loaded!");
+
+      FileInputStream is = null;
+      try {
+        is = new FileInputStream(file);
+        byte[] magic = new byte[FSImageUtil.MAGIC_HEADER.length];
+        IOUtils.readFully(is, magic, 0, magic.length);
+        if (Arrays.equals(magic, FSImageUtil.MAGIC_HEADER)) {
+          FSImageFormatProtobuf.Loader loader = new FSImageFormatProtobuf.Loader(
+              conf, fsn);
+          impl = loader;
+          loader.load(file);
+        } else {
+          Loader loader = new Loader(conf, fsn);
+          impl = loader;
+          loader.load(file);
+        }
+
+      } finally {
+        IOUtils.cleanup(LOG, is);
+      }
+    }
+  }
+
+  /**
+   * Construct a loader class to load the image. It chooses the loader based on
+   * the layout version.
+   */
+  public static LoaderDelegator newLoader(Configuration conf, FSNamesystem fsn) {
+    return new LoaderDelegator(conf, fsn);
+  }
+
   /**
    * A one-shot class responsible for loading an image. The load() function
    * should be called once, after which the getter methods may be used to retrieve
    * information about the image that was loaded, if loading was successful.
    */
-  public static class Loader {
+  public static class Loader implements AbstractLoader {
     private final Configuration conf;
     /** which namesystem this loader is working for */
     private final FSNamesystem namesystem;
@@ -214,12 +273,14 @@ public class FSImageFormat {
      * Return the MD5 checksum of the image that has been loaded.
      * @throws IllegalStateException if load() has not yet been called.
      */
-    MD5Hash getLoadedImageMd5() {
+    @Override
+    public MD5Hash getLoadedImageMd5() {
       checkLoaded();
       return imgDigest;
     }
 
-    long getLoadedImageTxId() {
+    @Override
+    public long getLoadedImageTxId() {
       checkLoaded();
       return imgTxId;
     }
@@ -242,7 +303,7 @@ public class FSImageFormat {
       }
     }
 
-    void load(File curFile) throws IOException {
+    public void load(File curFile) throws IOException {
       checkNotLoaded();
       assert curFile != null : "curFile is null";
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java
new file mode 100644
index 00000000000..5ade5cec6a3
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java
@@ -0,0 +1,466 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.HadoopIllegalArgumentException;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
+import org.apache.hadoop.hdfs.protocolPB.PBHelper;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.StringMap;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection.FileUnderConstructionEntry;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
+import org.apache.hadoop.hdfs.server.namenode.INodeReference.DstReference;
+import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithCount;
+import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithName;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
+import org.apache.hadoop.hdfs.util.ReadOnlyList;
+
+import com.google.common.base.Preconditions;
+import com.google.protobuf.ByteString;
+
+@InterfaceAudience.Private
+public final class FSImageFormatPBINode {
+  private final static long USER_GROUP_STRID_MASK = (1 << 24) - 1;
+  private final static int USER_STRID_OFFSET = 40;
+  private final static int GROUP_STRID_OFFSET = 16;
+  private static final Log LOG = LogFactory.getLog(FSImageFormatProtobuf.class);
+
+  public final static class Loader {
+    public static PermissionStatus loadPermission(long id,
+        final String[] stringTable) {
+      short perm = (short) (id & ((1 << GROUP_STRID_OFFSET) - 1));
+      int gsid = (int) ((id >> GROUP_STRID_OFFSET) & USER_GROUP_STRID_MASK);
+      int usid = (int) ((id >> USER_STRID_OFFSET) & USER_GROUP_STRID_MASK);
+      return new PermissionStatus(stringTable[usid], stringTable[gsid],
+          new FsPermission(perm));
+    }
+
+    public static INodeReference loadINodeReference(
+        INodeSection.INodeReference r, FSDirectory dir) throws IOException {
+      long referredId = r.getReferredId();
+      INode referred = dir.getInode(referredId);
+      WithCount withCount = (WithCount) referred.getParentReference();
+      if (withCount == null) {
+        withCount = new INodeReference.WithCount(null, referred);
+      }
+      final INodeReference ref;
+      if (r.hasDstSnapshotId()) { // DstReference
+        ref = new INodeReference.DstReference(null, withCount,
+            r.getDstSnapshotId());
+      } else {
+        ref = new INodeReference.WithName(null, withCount, r.getName()
+            .toByteArray(), r.getLastSnapshotId());
+      }
+      return ref;
+    }
+
+    public static INodeDirectory loadINodeDirectory(INodeSection.INode n,
+        final String[] stringTable) {
+      assert n.getType() == INodeSection.INode.Type.DIRECTORY;
+      INodeSection.INodeDirectory d = n.getDirectory();
+
+      final PermissionStatus permissions = loadPermission(d.getPermission(),
+          stringTable);
+      final INodeDirectory dir = new INodeDirectory(n.getId(), n.getName()
+          .toByteArray(), permissions, d.getModificationTime());
+
+      final long nsQuota = d.getNsQuota(), dsQuota = d.getDsQuota();
+      if (nsQuota >= 0 || dsQuota >= 0) {
+        dir.addDirectoryWithQuotaFeature(nsQuota, dsQuota);
+      }
+      return dir;
+    }
+
+    public static void updateBlocksMap(INodeFile file, BlockManager bm) {
+      // Add file->block mapping
+      final BlockInfo[] blocks = file.getBlocks();
+      if (blocks != null) {
+        for (int i = 0; i < blocks.length; i++) {
+          file.setBlock(i, bm.addBlockCollection(blocks[i], file));
+        }
+      }
+    }
+
+    private final FSDirectory dir;
+    private final FSNamesystem fsn;
+    private final FSImageFormatProtobuf.Loader parent;
+
+    Loader(FSNamesystem fsn, final FSImageFormatProtobuf.Loader parent) {
+      this.fsn = fsn;
+      this.dir = fsn.dir;
+      this.parent = parent;
+    }
+
+    void loadINodeDirectorySection(InputStream in) throws IOException {
+      while (true) {
+        INodeDirectorySection.DirEntry e = INodeDirectorySection.DirEntry
+            .parseDelimitedFrom(in);
+        // note that in is a LimitedInputStream
+        if (e == null) {
+          break;
+        }
+        INodeDirectory p = dir.getInode(e.getParent()).asDirectory();
+        for (long id : e.getChildrenList()) {
+          INode child = dir.getInode(id);
+          addToParent(p, child);
+        }
+        for (int i = 0; i < e.getNumOfRef(); i++) {
+          INodeReference ref = loadINodeReference(in);
+          addToParent(p, ref);
+        }
+      }
+    }
+
+    private INodeReference loadINodeReference(InputStream in)
+        throws IOException {
+      INodeSection.INodeReference ref = INodeSection.INodeReference
+          .parseDelimitedFrom(in);
+      return loadINodeReference(ref, dir);
+    }
+
+    void loadINodeSection(InputStream in) throws IOException {
+      INodeSection s = INodeSection.parseDelimitedFrom(in);
+      fsn.resetLastInodeId(s.getLastInodeId());
+      LOG.info("Loading " + s.getNumInodes() + " INodes.");
+      for (int i = 0; i < s.getNumInodes(); ++i) {
+        INodeSection.INode p = INodeSection.INode.parseDelimitedFrom(in);
+        if (p.getId() == INodeId.ROOT_INODE_ID) {
+          loadRootINode(p);
+        } else {
+          INode n = loadINode(p);
+          dir.addToInodeMap(n);
+        }
+      }
+    }
+
+    /**
+     * Load the under-construction files section, and update the lease map
+     */
+    void loadFilesUnderConstructionSection(InputStream in) throws IOException {
+      while (true) {
+        FileUnderConstructionEntry entry = FileUnderConstructionEntry
+            .parseDelimitedFrom(in);
+        if (entry == null) {
+          break;
+        }
+        // update the lease manager
+        INodeFile file = dir.getInode(entry.getInodeId()).asFile();
+        FileUnderConstructionFeature uc = file.getFileUnderConstructionFeature();
+        Preconditions.checkState(uc != null); // file must be under-construction
+        fsn.leaseManager.addLease(uc.getClientName(), entry.getFullPath());
+      }
+    }
+
+    private void addToParent(INodeDirectory parent, INode child) {
+      if (parent == dir.rootDir && FSDirectory.isReservedName(child)) {
+        throw new HadoopIllegalArgumentException("File name \""
+            + child.getLocalName() + "\" is reserved. Please "
+            + " change the name of the existing file or directory to another "
+            + "name before upgrading to this release.");
+      }
+      // NOTE: This does not update space counts for parents
+      if (!parent.addChild(child)) {
+        return;
+      }
+      dir.cacheName(child);
+
+      if (child.isFile()) {
+        updateBlocksMap(child.asFile(), fsn.getBlockManager());
+      }
+    }
+
+    private INode loadINode(INodeSection.INode n) {
+      switch (n.getType()) {
+      case FILE:
+        return loadINodeFile(n);
+      case DIRECTORY:
+        return loadINodeDirectory(n, parent.getStringTable());
+      case SYMLINK:
+        return loadINodeSymlink(n);
+      default:
+        break;
+      }
+      return null;
+    }
+
+    private INodeFile loadINodeFile(INodeSection.INode n) {
+      assert n.getType() == INodeSection.INode.Type.FILE;
+      INodeSection.INodeFile f = n.getFile();
+      List<BlockProto> bp = f.getBlocksList();
+      short replication = (short) f.getReplication();
+
+      BlockInfo[] blocks = new BlockInfo[bp.size()];
+      for (int i = 0, e = bp.size(); i < e; ++i) {
+        blocks[i] = new BlockInfo(PBHelper.convert(bp.get(i)), replication);
+      }
+      final PermissionStatus permissions = loadPermission(f.getPermission(),
+          parent.getStringTable());
+
+      final INodeFile file = new INodeFile(n.getId(),
+          n.getName().toByteArray(), permissions, f.getModificationTime(),
+          f.getAccessTime(), blocks, replication, f.getPreferredBlockSize());
+      // under-construction information
+      if (f.hasFileUC()) {
+        INodeSection.FileUnderConstructionFeature uc = f.getFileUC();
+        file.toUnderConstruction(uc.getClientName(), uc.getClientMachine(),
+            null);
+        if (blocks.length > 0) {
+          BlockInfo lastBlk = file.getLastBlock();
+          // replace the last block of file
+          file.setBlock(file.numBlocks() - 1, new BlockInfoUnderConstruction(
+              lastBlk, replication));
+        }
+      }
+      return file;
+    }
+
+
+    private INodeSymlink loadINodeSymlink(INodeSection.INode n) {
+      assert n.getType() == INodeSection.INode.Type.SYMLINK;
+      INodeSection.INodeSymlink s = n.getSymlink();
+      final PermissionStatus permissions = loadPermission(s.getPermission(),
+          parent.getStringTable());
+      return new INodeSymlink(n.getId(), n.getName().toByteArray(), permissions,
+          0, 0, s.getTarget().toStringUtf8());
+    }
+
+    private void loadRootINode(INodeSection.INode p) {
+      INodeDirectory root = loadINodeDirectory(p, parent.getStringTable());
+      final Quota.Counts q = root.getQuotaCounts();
+      final long nsQuota = q.get(Quota.NAMESPACE);
+      final long dsQuota = q.get(Quota.DISKSPACE);
+      if (nsQuota != -1 || dsQuota != -1) {
+        dir.rootDir.getDirectoryWithQuotaFeature().setQuota(nsQuota, dsQuota);
+      }
+      dir.rootDir.cloneModificationTime(root);
+      dir.rootDir.clonePermissionStatus(root);
+    }
+  }
+
+  public final static class Saver {
+    private static long buildPermissionStatus(INodeAttributes n,
+        final StringMap stringMap) {
+      long userId = stringMap.getStringId(n.getUserName());
+      long groupId = stringMap.getStringId(n.getGroupName());
+      return ((userId & USER_GROUP_STRID_MASK) << USER_STRID_OFFSET)
+          | ((groupId & USER_GROUP_STRID_MASK) << GROUP_STRID_OFFSET)
+          | n.getFsPermissionShort();
+    }
+
+    public static INodeSection.INodeFile.Builder buildINodeFile(
+        INodeFileAttributes file, final StringMap stringMap) {
+      INodeSection.INodeFile.Builder b = INodeSection.INodeFile.newBuilder()
+          .setAccessTime(file.getAccessTime())
+          .setModificationTime(file.getModificationTime())
+          .setPermission(buildPermissionStatus(file, stringMap))
+          .setPreferredBlockSize(file.getPreferredBlockSize())
+          .setReplication(file.getFileReplication());
+      return b;
+    }
+
+    public static INodeSection.INodeDirectory.Builder buildINodeDirectory(
+        INodeDirectoryAttributes dir, final StringMap stringMap) {
+      Quota.Counts quota = dir.getQuotaCounts();
+      INodeSection.INodeDirectory.Builder b = INodeSection.INodeDirectory
+          .newBuilder().setModificationTime(dir.getModificationTime())
+          .setNsQuota(quota.get(Quota.NAMESPACE))
+          .setDsQuota(quota.get(Quota.DISKSPACE))
+          .setPermission(buildPermissionStatus(dir, stringMap));
+      return b;
+    }
+
+    public static INodeSection.INodeReference.Builder buildINodeReference(
+        INodeReference ref) throws IOException {
+      INodeSection.INodeReference.Builder rb = INodeSection.INodeReference
+          .newBuilder().setReferredId(ref.getId());
+      if (ref instanceof WithName) {
+        rb.setLastSnapshotId(((WithName) ref).getLastSnapshotId()).setName(
+            ByteString.copyFrom(ref.getLocalNameBytes()));
+      } else if (ref instanceof DstReference) {
+        rb.setDstSnapshotId(((DstReference) ref).getDstSnapshotId());
+      }
+      return rb;
+    }
+
+    private final FSNamesystem fsn;
+    private final FileSummary.Builder summary;
+    private final SaveNamespaceContext context;
+    private final FSImageFormatProtobuf.Saver parent;
+
+    Saver(FSImageFormatProtobuf.Saver parent, FileSummary.Builder summary) {
+      this.parent = parent;
+      this.summary = summary;
+      this.context = parent.getContext();
+      this.fsn = context.getSourceNamesystem();
+    }
+
+    void serializeINodeDirectorySection(OutputStream out) throws IOException {
+      Iterator<INodeWithAdditionalFields> iter = fsn.getFSDirectory()
+          .getINodeMap().getMapIterator();
+      int i = 0;
+      while (iter.hasNext()) {
+        INodeWithAdditionalFields n = iter.next();
+        if (!n.isDirectory()) {
+          continue;
+        }
+
+        ReadOnlyList<INode> children = n.asDirectory().getChildrenList(
+            Snapshot.CURRENT_STATE_ID);
+        if (children.size() > 0) {
+          INodeDirectorySection.DirEntry.Builder b = INodeDirectorySection.
+              DirEntry.newBuilder().setParent(n.getId());
+          List<INodeReference> refs = new ArrayList<INodeReference>();
+          for (INode inode : children) {
+            if (!inode.isReference()) {
+              b.addChildren(inode.getId());
+            } else {
+              refs.add(inode.asReference());
+            }
+          }
+          b.setNumOfRef(refs.size());
+          INodeDirectorySection.DirEntry e = b.build();
+          e.writeDelimitedTo(out);
+          for (INodeReference ref : refs) {
+            INodeSection.INodeReference.Builder rb = buildINodeReference(ref);
+            rb.build().writeDelimitedTo(out);
+          }
+        }
+
+        ++i;
+        if (i % FSImageFormatProtobuf.Saver.CHECK_CANCEL_INTERVAL == 0) {
+          context.checkCancelled();
+        }
+      }
+      parent.commitSection(summary,
+          FSImageFormatProtobuf.SectionName.INODE_DIR);
+    }
+
+    void serializeINodeSection(OutputStream out) throws IOException {
+      INodeMap inodesMap = fsn.dir.getINodeMap();
+
+      INodeSection.Builder b = INodeSection.newBuilder()
+          .setLastInodeId(fsn.getLastInodeId()).setNumInodes(inodesMap.size());
+      INodeSection s = b.build();
+      s.writeDelimitedTo(out);
+
+      int i = 0;
+      Iterator<INodeWithAdditionalFields> iter = inodesMap.getMapIterator();
+      while (iter.hasNext()) {
+        INodeWithAdditionalFields n = iter.next();
+        save(out, n);
+        ++i;
+        if (i % FSImageFormatProtobuf.Saver.CHECK_CANCEL_INTERVAL == 0) {
+          context.checkCancelled();
+        }
+      }
+      parent.commitSection(summary, FSImageFormatProtobuf.SectionName.INODE);
+    }
+
+    void serializeFilesUCSection(OutputStream out) throws IOException {
+      Map<String, INodeFile> ucMap = fsn.getFilesUnderConstruction();
+      for (Map.Entry<String, INodeFile> entry : ucMap.entrySet()) {
+        String path = entry.getKey();
+        INodeFile file = entry.getValue();
+        FileUnderConstructionEntry.Builder b = FileUnderConstructionEntry
+            .newBuilder().setInodeId(file.getId()).setFullPath(path);
+        FileUnderConstructionEntry e = b.build();
+        e.writeDelimitedTo(out);
+      }
+      parent.commitSection(summary,
+          FSImageFormatProtobuf.SectionName.FILES_UNDERCONSTRUCTION);
+    }
+
+    private void save(OutputStream out, INode n) throws IOException {
+      if (n.isDirectory()) {
+        save(out, n.asDirectory());
+      } else if (n.isFile()) {
+        save(out, n.asFile());
+      } else if (n.isSymlink()) {
+        save(out, n.asSymlink());
+      }
+    }
+
+    private void save(OutputStream out, INodeDirectory n) throws IOException {
+      INodeSection.INodeDirectory.Builder b = buildINodeDirectory(n,
+          parent.getStringMap());
+      INodeSection.INode r = buildINodeCommon(n)
+          .setType(INodeSection.INode.Type.DIRECTORY).setDirectory(b).build();
+      r.writeDelimitedTo(out);
+    }
+
+    private void save(OutputStream out, INodeFile n) throws IOException {
+      INodeSection.INodeFile.Builder b = buildINodeFile(n,
+          parent.getStringMap());
+
+      for (Block block : n.getBlocks()) {
+        b.addBlocks(PBHelper.convert(block));
+      }
+
+      FileUnderConstructionFeature uc = n.getFileUnderConstructionFeature();
+      if (uc != null) {
+        INodeSection.FileUnderConstructionFeature f =
+            INodeSection.FileUnderConstructionFeature
+            .newBuilder().setClientName(uc.getClientName())
+            .setClientMachine(uc.getClientMachine()).build();
+        b.setFileUC(f);
+      }
+
+      INodeSection.INode r = buildINodeCommon(n)
+          .setType(INodeSection.INode.Type.FILE).setFile(b).build();
+      r.writeDelimitedTo(out);
+    }
+
+    private void save(OutputStream out, INodeSymlink n) throws IOException {
+      INodeSection.INodeSymlink.Builder b = INodeSection.INodeSymlink
+          .newBuilder()
+          .setPermission(buildPermissionStatus(n, parent.getStringMap()))
+          .setTarget(ByteString.copyFrom(n.getSymlink()));
+      INodeSection.INode r = buildINodeCommon(n)
+          .setType(INodeSection.INode.Type.SYMLINK).setSymlink(b).build();
+      r.writeDelimitedTo(out);
+    }
+
+    private final INodeSection.INode.Builder buildINodeCommon(INode n) {
+      return INodeSection.INode.newBuilder()
+          .setId(n.getId())
+          .setName(ByteString.copyFrom(n.getLocalNameBytes()));
+    }
+  }
+
+  private FSImageFormatPBINode() {
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java
new file mode 100644
index 00000000000..2edc57b18d7
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java
@@ -0,0 +1,551 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.security.DigestOutputStream;
+import java.security.MessageDigest;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.protocol.LayoutVersion;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
+import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.StringTableSection;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.FSImageFormatPBSnapshot;
+import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase;
+import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
+import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
+import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
+import org.apache.hadoop.hdfs.util.MD5FileUtils;
+import org.apache.hadoop.io.MD5Hash;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressorStream;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.io.LimitInputStream;
+import com.google.protobuf.CodedOutputStream;
+
+/**
+ * Utility class to read / write fsimage in protobuf format.
+ */
+@InterfaceAudience.Private
+public final class FSImageFormatProtobuf {
+  private static final Log LOG = LogFactory.getLog(FSImageFormatProtobuf.class);
+
+  public static final class Loader implements FSImageFormat.AbstractLoader {
+    static final int MINIMUM_FILE_LENGTH = 8;
+    private final Configuration conf;
+    private final FSNamesystem fsn;
+
+    private String[] stringTable;
+
+    /** The MD5 sum of the loaded file */
+    private MD5Hash imgDigest;
+    /** The transaction ID of the last edit represented by the loaded file */
+    private long imgTxId;
+
+    Loader(Configuration conf, FSNamesystem fsn) {
+      this.conf = conf;
+      this.fsn = fsn;
+    }
+
+    @Override
+    public MD5Hash getLoadedImageMd5() {
+      return imgDigest;
+    }
+
+    @Override
+    public long getLoadedImageTxId() {
+      return imgTxId;
+    }
+
+    public String[] getStringTable() {
+      return stringTable;
+    }
+
+    void load(File file) throws IOException {
+      long start = System.currentTimeMillis();
+      imgDigest = MD5FileUtils.computeMd5ForFile(file);
+      RandomAccessFile raFile = new RandomAccessFile(file, "r");
+      FileInputStream fin = new FileInputStream(file);
+      try {
+        loadInternal(raFile, fin);
+        long end = System.currentTimeMillis();
+        LOG.info("Loaded FSImage in " + (end - start) / 1000 + " seconds.");
+      } finally {
+        fin.close();
+        raFile.close();
+      }
+    }
+
+    private void loadInternal(RandomAccessFile raFile, FileInputStream fin)
+        throws IOException {
+      if (!FSImageUtil.checkFileFormat(raFile)) {
+        throw new IOException("Unrecognized file format");
+      }
+      FileSummary summary = FSImageUtil.loadSummary(raFile);
+
+      FileChannel channel = fin.getChannel();
+
+      FSImageFormatPBINode.Loader inodeLoader = new FSImageFormatPBINode.Loader(
+          fsn, this);
+      FSImageFormatPBSnapshot.Loader snapshotLoader = new FSImageFormatPBSnapshot.Loader(
+          fsn, this);
+
+      ArrayList<FileSummary.Section> sections = Lists.newArrayList(summary
+          .getSectionsList());
+      Collections.sort(sections, new Comparator<FileSummary.Section>() {
+        @Override
+        public int compare(FileSummary.Section s1, FileSummary.Section s2) {
+          SectionName n1 = SectionName.fromString(s1.getName());
+          SectionName n2 = SectionName.fromString(s2.getName());
+          if (n1 == null) {
+            return n2 == null ? 0 : -1;
+          } else if (n2 == null) {
+            return -1;
+          } else {
+            return n1.ordinal() - n2.ordinal();
+          }
+        }
+      });
+
+      StartupProgress prog = NameNode.getStartupProgress();
+      /**
+       * beginStep() and the endStep() calls do not match the boundary of the
+       * sections. This is because that the current implementation only allows
+       * a particular step to be started for once.
+       */
+      Step currentStep = null;
+
+      for (FileSummary.Section s : sections) {
+        channel.position(s.getOffset());
+        InputStream in = new BufferedInputStream(new LimitInputStream(fin,
+            s.getLength()));
+
+        in = FSImageUtil.wrapInputStreamForCompression(conf,
+            summary.getCodec(), in);
+
+        String n = s.getName();
+
+        switch (SectionName.fromString(n)) {
+        case NS_INFO:
+          loadNameSystemSection(in);
+          break;
+        case STRING_TABLE:
+          loadStringTableSection(in);
+          break;
+        case INODE: {
+          currentStep = new Step(StepType.INODES);
+          prog.beginStep(Phase.LOADING_FSIMAGE, currentStep);
+          inodeLoader.loadINodeSection(in);
+        }
+          break;
+        case INODE_DIR:
+          inodeLoader.loadINodeDirectorySection(in);
+          break;
+        case FILES_UNDERCONSTRUCTION:
+          inodeLoader.loadFilesUnderConstructionSection(in);
+          break;
+        case SNAPSHOT:
+          snapshotLoader.loadSnapshotSection(in);
+          break;
+        case SNAPSHOT_DIFF:
+          snapshotLoader.loadSnapshotDiffSection(in);
+          break;
+        case SECRET_MANAGER: {
+          prog.endStep(Phase.LOADING_FSIMAGE, currentStep);
+          Step step = new Step(StepType.DELEGATION_TOKENS);
+          prog.beginStep(Phase.LOADING_FSIMAGE, step);
+          loadSecretManagerSection(in);
+          prog.endStep(Phase.LOADING_FSIMAGE, step);
+        }
+          break;
+        case CACHE_MANAGER: {
+          Step step = new Step(StepType.CACHE_POOLS);
+          prog.beginStep(Phase.LOADING_FSIMAGE, step);
+          loadCacheManagerSection(in);
+          prog.endStep(Phase.LOADING_FSIMAGE, step);
+        }
+          break;
+        default:
+          LOG.warn("Unregconized section " + n);
+          break;
+        }
+      }
+    }
+
+    private void loadNameSystemSection(InputStream in) throws IOException {
+      NameSystemSection s = NameSystemSection.parseDelimitedFrom(in);
+      fsn.setGenerationStampV1(s.getGenstampV1());
+      fsn.setGenerationStampV2(s.getGenstampV2());
+      fsn.setGenerationStampV1Limit(s.getGenstampV1Limit());
+      fsn.setLastAllocatedBlockId(s.getLastAllocatedBlockId());
+      imgTxId = s.getTransactionId();
+    }
+
+    private void loadStringTableSection(InputStream in) throws IOException {
+      StringTableSection s = StringTableSection.parseDelimitedFrom(in);
+      stringTable = new String[s.getNumEntry() + 1];
+      for (int i = 0; i < s.getNumEntry(); ++i) {
+        StringTableSection.Entry e = StringTableSection.Entry
+            .parseDelimitedFrom(in);
+        stringTable[e.getId()] = e.getStr();
+      }
+    }
+
+    private void loadSecretManagerSection(InputStream in) throws IOException {
+      SecretManagerSection s = SecretManagerSection.parseDelimitedFrom(in);
+      int numKeys = s.getNumKeys(), numTokens = s.getNumTokens();
+      ArrayList<SecretManagerSection.DelegationKey> keys = Lists
+          .newArrayListWithCapacity(numKeys);
+      ArrayList<SecretManagerSection.PersistToken> tokens = Lists
+          .newArrayListWithCapacity(numTokens);
+
+      for (int i = 0; i < numKeys; ++i)
+        keys.add(SecretManagerSection.DelegationKey.parseDelimitedFrom(in));
+
+      for (int i = 0; i < numTokens; ++i)
+        tokens.add(SecretManagerSection.PersistToken.parseDelimitedFrom(in));
+
+      fsn.loadSecretManagerState(s, keys, tokens);
+    }
+
+    private void loadCacheManagerSection(InputStream in) throws IOException {
+      CacheManagerSection s = CacheManagerSection.parseDelimitedFrom(in);
+      ArrayList<CachePoolInfoProto> pools = Lists.newArrayListWithCapacity(s
+          .getNumPools());
+      ArrayList<CacheDirectiveInfoProto> directives = Lists
+          .newArrayListWithCapacity(s.getNumDirectives());
+      for (int i = 0; i < s.getNumPools(); ++i)
+        pools.add(CachePoolInfoProto.parseDelimitedFrom(in));
+      for (int i = 0; i < s.getNumDirectives(); ++i)
+        directives.add(CacheDirectiveInfoProto.parseDelimitedFrom(in));
+      fsn.getCacheManager().loadState(
+          new CacheManager.PersistState(s, pools, directives));
+    }
+
+  }
+
+  public static final class Saver {
+    private final SaveNamespaceContext context;
+    private long currentOffset = FSImageUtil.MAGIC_HEADER.length;
+    private MD5Hash savedDigest;
+    private StringMap stringMap = new StringMap();
+
+    private FileChannel fileChannel;
+    // OutputStream for the section data
+    private OutputStream sectionOutputStream;
+    private CompressionCodec codec;
+    private OutputStream underlyingOutputStream;
+    public static final int CHECK_CANCEL_INTERVAL = 4096;
+
+    Saver(SaveNamespaceContext context) {
+      this.context = context;
+    }
+
+    public MD5Hash getSavedDigest() {
+      return savedDigest;
+    }
+
+    public SaveNamespaceContext getContext() {
+      return context;
+    }
+
+    public void commitSection(FileSummary.Builder summary, SectionName name)
+        throws IOException {
+      long oldOffset = currentOffset;
+      flushSectionOutputStream();
+
+      if (codec != null) {
+        sectionOutputStream = codec.createOutputStream(underlyingOutputStream);
+      } else {
+        sectionOutputStream = underlyingOutputStream;
+      }
+      long length = fileChannel.position() - oldOffset;
+      summary.addSections(FileSummary.Section.newBuilder().setName(name.name)
+          .setLength(length).setOffset(currentOffset));
+      currentOffset += length;
+    }
+
+    private void flushSectionOutputStream() throws IOException {
+      if (codec != null) {
+        ((CompressorStream) sectionOutputStream).finish();
+      }
+      sectionOutputStream.flush();
+    }
+
+    void save(File file, FSImageCompression compression) throws IOException {
+      FileOutputStream fout = new FileOutputStream(file);
+      fileChannel = fout.getChannel();
+      try {
+        saveInternal(fout, compression, file.getAbsolutePath().toString());
+      } finally {
+        fout.close();
+      }
+    }
+
+    private static void saveFileSummary(OutputStream out, FileSummary summary)
+        throws IOException {
+      summary.writeDelimitedTo(out);
+      int length = getOndiskTrunkSize(summary);
+      byte[] lengthBytes = new byte[4];
+      ByteBuffer.wrap(lengthBytes).asIntBuffer().put(length);
+      out.write(lengthBytes);
+    }
+
+    private void saveInodes(FileSummary.Builder summary) throws IOException {
+      FSImageFormatPBINode.Saver saver = new FSImageFormatPBINode.Saver(this,
+          summary);
+
+      saver.serializeINodeSection(sectionOutputStream);
+      saver.serializeINodeDirectorySection(sectionOutputStream);
+      saver.serializeFilesUCSection(sectionOutputStream);
+    }
+
+    private void saveSnapshots(FileSummary.Builder summary) throws IOException {
+      FSImageFormatPBSnapshot.Saver snapshotSaver = new FSImageFormatPBSnapshot.Saver(
+          this, summary, context, context.getSourceNamesystem());
+
+      snapshotSaver.serializeSnapshotSection(sectionOutputStream);
+      snapshotSaver.serializeSnapshotDiffSection(sectionOutputStream);
+    }
+
+    private void saveInternal(FileOutputStream fout,
+        FSImageCompression compression, String filePath) throws IOException {
+      StartupProgress prog = NameNode.getStartupProgress();
+      MessageDigest digester = MD5Hash.getDigester();
+
+      underlyingOutputStream = new DigestOutputStream(new BufferedOutputStream(
+          fout), digester);
+      underlyingOutputStream.write(FSImageUtil.MAGIC_HEADER);
+
+      fileChannel = fout.getChannel();
+
+      FileSummary.Builder b = FileSummary.newBuilder()
+          .setOndiskVersion(FSImageUtil.FILE_VERSION)
+          .setLayoutVersion(LayoutVersion.getCurrentLayoutVersion());
+
+      codec = compression.getImageCodec();
+      if (codec != null) {
+        b.setCodec(codec.getClass().getCanonicalName());
+        sectionOutputStream = codec.createOutputStream(underlyingOutputStream);
+      } else {
+        sectionOutputStream = underlyingOutputStream;
+      }
+
+      saveNameSystemSection(b);
+      // Check for cancellation right after serializing the name system section.
+      // Some unit tests, such as TestSaveNamespace#testCancelSaveNameSpace
+      // depends on this behavior.
+      context.checkCancelled();
+
+      Step step = new Step(StepType.INODES, filePath);
+      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
+      saveInodes(b);
+      saveSnapshots(b);
+      prog.endStep(Phase.SAVING_CHECKPOINT, step);
+
+      step = new Step(StepType.DELEGATION_TOKENS, filePath);
+      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
+      saveSecretManagerSection(b);
+      prog.endStep(Phase.SAVING_CHECKPOINT, step);
+
+      step = new Step(StepType.CACHE_POOLS, filePath);
+      prog.beginStep(Phase.SAVING_CHECKPOINT, step);
+      saveCacheManagerSection(b);
+      prog.endStep(Phase.SAVING_CHECKPOINT, step);
+
+      saveStringTableSection(b);
+
+      // We use the underlyingOutputStream to write the header. Therefore flush
+      // the buffered stream (which is potentially compressed) first.
+      flushSectionOutputStream();
+
+      FileSummary summary = b.build();
+      saveFileSummary(underlyingOutputStream, summary);
+      underlyingOutputStream.close();
+      savedDigest = new MD5Hash(digester.digest());
+    }
+
+    private void saveSecretManagerSection(FileSummary.Builder summary)
+        throws IOException {
+      final FSNamesystem fsn = context.getSourceNamesystem();
+      DelegationTokenSecretManager.SecretManagerState state = fsn
+          .saveSecretManagerState();
+      state.section.writeDelimitedTo(sectionOutputStream);
+      for (SecretManagerSection.DelegationKey k : state.keys)
+        k.writeDelimitedTo(sectionOutputStream);
+
+      for (SecretManagerSection.PersistToken t : state.tokens)
+        t.writeDelimitedTo(sectionOutputStream);
+
+      commitSection(summary, SectionName.SECRET_MANAGER);
+    }
+
+    private void saveCacheManagerSection(FileSummary.Builder summary)
+        throws IOException {
+      final FSNamesystem fsn = context.getSourceNamesystem();
+      CacheManager.PersistState state = fsn.getCacheManager().saveState();
+      state.section.writeDelimitedTo(sectionOutputStream);
+
+      for (CachePoolInfoProto p : state.pools)
+        p.writeDelimitedTo(sectionOutputStream);
+
+      for (CacheDirectiveInfoProto p : state.directives)
+        p.writeDelimitedTo(sectionOutputStream);
+
+      commitSection(summary, SectionName.CACHE_MANAGER);
+    }
+
+    private void saveNameSystemSection(FileSummary.Builder summary)
+        throws IOException {
+      final FSNamesystem fsn = context.getSourceNamesystem();
+      OutputStream out = sectionOutputStream;
+      NameSystemSection.Builder b = NameSystemSection.newBuilder()
+          .setGenstampV1(fsn.getGenerationStampV1())
+          .setGenstampV1Limit(fsn.getGenerationStampV1Limit())
+          .setGenstampV2(fsn.getGenerationStampV2())
+          .setLastAllocatedBlockId(fsn.getLastAllocatedBlockId())
+          .setTransactionId(context.getTxId());
+
+      // We use the non-locked version of getNamespaceInfo here since
+      // the coordinating thread of saveNamespace already has read-locked
+      // the namespace for us. If we attempt to take another readlock
+      // from the actual saver thread, there's a potential of a
+      // fairness-related deadlock. See the comments on HDFS-2223.
+      b.setNamespaceId(fsn.unprotectedGetNamespaceInfo().getNamespaceID());
+      NameSystemSection s = b.build();
+      s.writeDelimitedTo(out);
+
+      commitSection(summary, SectionName.NS_INFO);
+    }
+
+    private void saveStringTableSection(FileSummary.Builder summary)
+        throws IOException {
+      OutputStream out = sectionOutputStream;
+      StringTableSection.Builder b = StringTableSection.newBuilder()
+          .setNumEntry(stringMap.size());
+      b.build().writeDelimitedTo(out);
+      for (Entry<String, Integer> e : stringMap.entrySet()) {
+        StringTableSection.Entry.Builder eb = StringTableSection.Entry
+            .newBuilder().setId(e.getValue()).setStr(e.getKey());
+        eb.build().writeDelimitedTo(out);
+      }
+      commitSection(summary, SectionName.STRING_TABLE);
+    }
+
+    public StringMap getStringMap() {
+      return stringMap;
+    }
+  }
+
+  public static class StringMap {
+    private final Map<String, Integer> stringMap;
+
+    public StringMap() {
+      stringMap = Maps.newHashMap();
+    }
+
+    int getStringId(String str) {
+      if (str == null) {
+        return 0;
+      }
+      Integer v = stringMap.get(str);
+      if (v == null) {
+        int nv = stringMap.size() + 1;
+        stringMap.put(str, nv);
+        return nv;
+      }
+      return v;
+    }
+
+    int size() {
+      return stringMap.size();
+    }
+
+    Set<Entry<String, Integer>> entrySet() {
+      return stringMap.entrySet();
+    }
+  }
+
+  /**
+   * Supported section name. The order of the enum determines the order of
+   * loading.
+   */
+  public enum SectionName {
+    NS_INFO("NS_INFO"),
+    STRING_TABLE("STRING_TABLE"),
+    INODE("INODE"),
+    SNAPSHOT("SNAPSHOT"),
+    INODE_DIR("INODE_DIR"),
+    FILES_UNDERCONSTRUCTION("FILES_UNDERCONSTRUCTION"),
+    SNAPSHOT_DIFF("SNAPSHOT_DIFF"),
+    SECRET_MANAGER("SECRET_MANAGER"),
+    CACHE_MANAGER("CACHE_MANAGER");
+
+    private static final SectionName[] values = SectionName.values();
+
+    public static SectionName fromString(String name) {
+      for (SectionName n : values) {
+        if (n.name.equals(name))
+          return n;
+      }
+      return null;
+    }
+
+    private final String name;
+
+    private SectionName(String name) {
+      this.name = name;
+    }
+  }
+
+  private static int getOndiskTrunkSize(com.google.protobuf.GeneratedMessage s) {
+    return CodedOutputStream.computeRawVarint32Size(s.getSerializedSize())
+        + s.getSerializedSize();
+  }
+
+  private FSImageFormatProtobuf() {
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageUtil.java
new file mode 100644
index 00000000000..b9953480f26
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageUtil.java
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.RandomAccessFile;
+import java.util.Arrays;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.protocol.LayoutVersion;
+import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.Loader;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
+import org.apache.hadoop.io.compress.CompressionCodec;
+
+@InterfaceAudience.Private
+public final class FSImageUtil {
+  public static final byte[] MAGIC_HEADER = "HDFSIMG1".getBytes();
+  public static final int FILE_VERSION = 1;
+
+  public static boolean checkFileFormat(RandomAccessFile file)
+      throws IOException {
+    if (file.length() < Loader.MINIMUM_FILE_LENGTH)
+      return false;
+
+    byte[] magic = new byte[MAGIC_HEADER.length];
+    file.readFully(magic);
+    if (!Arrays.equals(MAGIC_HEADER, magic))
+      return false;
+
+    return true;
+  }
+
+  public static FileSummary loadSummary(RandomAccessFile file)
+      throws IOException {
+    final int FILE_LENGTH_FIELD_SIZE = 4;
+    long fileLength = file.length();
+    file.seek(fileLength - FILE_LENGTH_FIELD_SIZE);
+    int summaryLength = file.readInt();
+
+    if (summaryLength <= 0) {
+      throw new IOException("Negative length of the file");
+    }
+    file.seek(fileLength - FILE_LENGTH_FIELD_SIZE - summaryLength);
+
+    byte[] summaryBytes = new byte[summaryLength];
+    file.readFully(summaryBytes);
+
+    FileSummary summary = FileSummary
+        .parseDelimitedFrom(new ByteArrayInputStream(summaryBytes));
+    if (summary.getOndiskVersion() != FILE_VERSION) {
+      throw new IOException("Unsupported file version "
+          + summary.getOndiskVersion());
+    }
+
+    if (!LayoutVersion.supports(Feature.PROTOBUF_FORMAT,
+        summary.getLayoutVersion())) {
+      throw new IOException("Unsupported layout version "
+          + summary.getLayoutVersion());
+    }
+    return summary;
+  }
+
+  public static InputStream wrapInputStreamForCompression(
+      Configuration conf, String codec, InputStream in) throws IOException {
+    if (codec.isEmpty())
+      return in;
+
+    FSImageCompression compression = FSImageCompression.createCompression(
+        conf, codec);
+    CompressionCodec imageCodec = compression.getImageCodec();
+    return imageCodec.createInputStream(in);
+  }
+
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 4e209767dfc..f91c41c7610 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -179,6 +179,7 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
+import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager.SecretManagerState;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockCollection;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
@@ -196,6 +197,8 @@ import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.common.Util;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection.PersistToken;
 import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream;
 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
@@ -6012,6 +6015,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
   }
 
+  /**
+   * @return all the under-construction files in the lease map
+   */
+  Map<String, INodeFile> getFilesUnderConstruction() {
+    synchronized (leaseManager) {
+      return leaseManager.getINodesUnderConstruction();
+    }
+  }
+
   /**
    * Register a Backup name-node, verifying that it belongs
    * to the correct namespace, and adding it to the set of
@@ -6288,6 +6300,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     dtSecretManager.saveSecretManagerStateCompat(out, sdPath);
   }
 
+  SecretManagerState saveSecretManagerState() {
+    return dtSecretManager.saveSecretManagerState();
+  }
+
   /**
    * @param in load the state of secret manager from input stream
    */
@@ -6295,6 +6311,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     dtSecretManager.loadSecretManagerStateCompat(in);
   }
 
+  void loadSecretManagerState(SecretManagerSection s,
+      List<SecretManagerSection.DelegationKey> keys,
+      List<SecretManagerSection.PersistToken> tokens) throws IOException {
+    dtSecretManager.loadSecretManagerState(new SecretManagerState(s, keys, tokens));
+  }
+
   /**
    * Log the updateMasterKey operation to edit logs
    * 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
index 83cb0a4eb94..f9a06f1e5bd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
@@ -171,7 +171,7 @@ public class INodeDirectory extends INodeWithAdditionalFields
     return children == null? -1: Collections.binarySearch(children, name);
   }
   
-  protected DirectoryWithSnapshotFeature addSnapshotFeature(
+  public DirectoryWithSnapshotFeature addSnapshotFeature(
       DirectoryDiffList diffs) {
     Preconditions.checkState(!isWithSnapshot(), 
         "Directory is already with snapshot");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
index 500405e09d4..80abb5268dc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
@@ -252,7 +252,7 @@ public class INodeFile extends INodeWithAdditionalFields
   
   /* Start of Snapshot Feature */
 
-  private FileWithSnapshotFeature addSnapshotFeature(FileDiffList diffs) {
+  public FileWithSnapshotFeature addSnapshotFeature(FileDiffList diffs) {
     Preconditions.checkState(!isWithSnapshot(), 
         "File is already with snapshot");
     FileWithSnapshotFeature sf = new FileWithSnapshotFeature(diffs);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java
index 5ffcc21f5bb..bd0355b6618 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeMap.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import java.util.Iterator;
 import java.util.List;
 
 import org.apache.hadoop.fs.permission.FsPermission;
@@ -46,6 +47,10 @@ public class INodeMap {
   /** Synchronized by external lock. */
   private final GSet<INode, INodeWithAdditionalFields> map;
   
+  public Iterator<INodeWithAdditionalFields> getMapIterator() {
+    return map.iterator();
+  }
+
   private INodeMap(GSet<INode, INodeWithAdditionalFields> map) {
     Preconditions.checkArgument(map != null);
     this.map = map;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceContext.java
index 67ee88e11de..a7c4c75f005 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceContext.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceContext.java
@@ -22,6 +22,7 @@ import java.util.Collections;
 import java.util.List;
 import java.util.concurrent.CountDownLatch;
 
+import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.util.Canceler;
 
@@ -32,7 +33,8 @@ import com.google.common.base.Preconditions;
  * allows cancellation, and also is responsible for accumulating
  * failed storage directories.
  */
-class SaveNamespaceContext {
+@InterfaceAudience.Private
+public class SaveNamespaceContext {
   private final FSNamesystem sourceNamesystem;
   private final long txid;
   private final List<StorageDirectory> errorSDs =
@@ -72,7 +74,7 @@ class SaveNamespaceContext {
     completionLatch.countDown();
   }
 
-  void checkCancelled() throws SaveNamespaceCancelledException {
+  public void checkCancelled() throws SaveNamespaceCancelledException {
     if (canceller.isCancelled()) {
       throw new SaveNamespaceCancelledException(
           canceller.getCancellationReason());
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java
index 06f7a89e33a..a9cad94f0b2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DirectoryWithSnapshotFeature.java
@@ -244,7 +244,7 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
       this.isSnapshotRoot = isSnapshotRoot;
     }
 
-    ChildrenDiff getChildrenDiff() {
+    public ChildrenDiff getChildrenDiff() {
       return diff;
     }
     
@@ -343,6 +343,10 @@ public class DirectoryWithSnapshotFeature implements INode.Feature {
       return super.toString() + " childrenSize=" + childrenSize + ", " + diff;
     }
 
+    int getChildrenSize() {
+      return childrenSize;
+    }
+
     @Override
     void write(DataOutput out, ReferenceMap referenceMap) throws IOException {
       writeSnapshot(out);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java
new file mode 100644
index 00000000000..06cc1d0ac1f
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java
@@ -0,0 +1,437 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.snapshot;
+
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Loader.loadINodeDirectory;
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Loader.loadINodeReference;
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Loader.loadPermission;
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Loader.updateBlocksMap;
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Saver.buildINodeDirectory;
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Saver.buildINodeFile;
+import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Saver.buildINodeReference;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.hdfs.server.namenode.FSDirectory;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection.CreatedListEntry;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection.DiffEntry.Type;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotSection;
+import org.apache.hadoop.hdfs.server.namenode.INode;
+import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
+import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryAttributes;
+import org.apache.hadoop.hdfs.server.namenode.INodeFile;
+import org.apache.hadoop.hdfs.server.namenode.INodeFileAttributes;
+import org.apache.hadoop.hdfs.server.namenode.INodeMap;
+import org.apache.hadoop.hdfs.server.namenode.INodeReference;
+import org.apache.hadoop.hdfs.server.namenode.INodeWithAdditionalFields;
+import org.apache.hadoop.hdfs.server.namenode.SaveNamespaceContext;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiff;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiffList;
+import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.Root;
+import org.apache.hadoop.hdfs.util.Diff.ListType;
+
+import com.google.common.base.Preconditions;
+import com.google.protobuf.ByteString;
+
+@InterfaceAudience.Private
+public class FSImageFormatPBSnapshot {
+  /**
+   * Loading snapshot related information from protobuf based FSImage
+   */
+  public final static class Loader {
+    private final FSNamesystem fsn;
+    private final FSDirectory fsDir;
+    private final FSImageFormatProtobuf.Loader parent;
+    private final Map<Integer, Snapshot> snapshotMap;
+
+
+    public Loader(FSNamesystem fsn, FSImageFormatProtobuf.Loader parent) {
+      this.fsn = fsn;
+      this.fsDir = fsn.getFSDirectory();
+      this.snapshotMap = new HashMap<Integer, Snapshot>();
+      this.parent = parent;
+    }
+
+    /**
+     * Load the snapshots section from fsimage. Also convert snapshottable
+     * directories into {@link INodeDirectorySnapshottable}.
+     *
+     */
+    public void loadSnapshotSection(InputStream in) throws IOException {
+      SnapshotManager sm = fsn.getSnapshotManager();
+      SnapshotSection section = SnapshotSection.parseDelimitedFrom(in);
+      int snum = section.getNumSnapshots();
+      sm.setNumSnapshots(snum);
+      sm.setSnapshotCounter(section.getSnapshotCounter());
+      for (long sdirId : section.getSnapshottableDirList()) {
+        INodeDirectory dir = fsDir.getInode(sdirId).asDirectory();
+        final INodeDirectorySnapshottable sdir;
+        if (!dir.isSnapshottable()) {
+          sdir = new INodeDirectorySnapshottable(dir);
+          fsDir.addToInodeMap(sdir);
+        } else {
+          // dir is root, and admin set root to snapshottable before
+          sdir = (INodeDirectorySnapshottable) dir;
+          sdir.setSnapshotQuota(INodeDirectorySnapshottable.SNAPSHOT_LIMIT);
+        }
+        sm.addSnapshottable(sdir);
+      }
+      loadSnapshots(in, snum);
+    }
+
+    private void loadSnapshots(InputStream in, int size) throws IOException {
+      for (int i = 0; i < size; i++) {
+        SnapshotSection.Snapshot pbs = SnapshotSection.Snapshot
+            .parseDelimitedFrom(in);
+        INodeDirectory root = loadINodeDirectory(pbs.getRoot(),
+            parent.getStringTable());
+        int sid = pbs.getSnapshotId();
+        INodeDirectorySnapshottable parent = (INodeDirectorySnapshottable) fsDir
+            .getInode(root.getId()).asDirectory();
+        Snapshot snapshot = new Snapshot(sid, root, parent);
+        // add the snapshot to parent, since we follow the sequence of
+        // snapshotsByNames when saving, we do not need to sort when loading
+        parent.addSnapshot(snapshot);
+        snapshotMap.put(sid, snapshot);
+      }
+    }
+
+    /**
+     * Load the snapshot diff section from fsimage.
+     */
+    public void loadSnapshotDiffSection(InputStream in) throws IOException {
+      while (true) {
+        SnapshotDiffSection.DiffEntry entry = SnapshotDiffSection.DiffEntry
+            .parseDelimitedFrom(in);
+        if (entry == null) {
+          break;
+        }
+        long inodeId = entry.getInodeId();
+        INode inode = fsDir.getInode(inodeId);
+        SnapshotDiffSection.DiffEntry.Type type = entry.getType();
+        switch (type) {
+        case FILEDIFF:
+          loadFileDiffList(in, inode.asFile(), entry.getNumOfDiff());
+          break;
+        case DIRECTORYDIFF:
+          loadDirectoryDiffList(in, inode.asDirectory(), entry.getNumOfDiff());
+          break;
+        }
+      }
+    }
+
+    /** Load FileDiff list for a file with snapshot feature */
+    private void loadFileDiffList(InputStream in, INodeFile file, int size)
+        throws IOException {
+      final FileDiffList diffs = new FileDiffList();
+      for (int i = 0; i < size; i++) {
+        SnapshotDiffSection.FileDiff pbf = SnapshotDiffSection.FileDiff
+            .parseDelimitedFrom(in);
+        INodeFileAttributes copy = null;
+        if (pbf.hasSnapshotCopy()) {
+          INodeSection.INodeFile fileInPb = pbf.getSnapshotCopy();
+          PermissionStatus permission = loadPermission(
+              fileInPb.getPermission(), parent.getStringTable());
+          copy = new INodeFileAttributes.SnapshotCopy(pbf.getName()
+              .toByteArray(), permission, fileInPb.getModificationTime(),
+              fileInPb.getAccessTime(), (short) fileInPb.getReplication(),
+              fileInPb.getPreferredBlockSize());
+        }
+
+        FileDiff diff = new FileDiff(pbf.getSnapshotId(), copy, null,
+            pbf.getFileSize());
+        diffs.addFirst(diff);
+      }
+      file.addSnapshotFeature(diffs);
+    }
+
+    /** Load the created list in a DirectoryDiff */
+    private List<INode> loadCreatedList(InputStream in, INodeDirectory dir,
+        int size) throws IOException {
+      List<INode> clist = new ArrayList<INode>(size);
+      for (long c = 0; c < size; c++) {
+        CreatedListEntry entry = CreatedListEntry.parseDelimitedFrom(in);
+        INode created = SnapshotFSImageFormat.loadCreated(entry.getName()
+            .toByteArray(), dir);
+        clist.add(created);
+      }
+      return clist;
+    }
+
+    private void addToDeletedList(INode dnode, INodeDirectory parent) {
+      dnode.setParent(parent);
+      if (dnode.isFile()) {
+        updateBlocksMap(dnode.asFile(), fsn.getBlockManager());
+      }
+    }
+
+    /**
+     * Load the deleted list in a DirectoryDiff
+     * @param totalSize the total size of the deleted list
+     * @param deletedNodes non-reference inodes in the deleted list. These
+     *        inodes' ids are directly recorded in protobuf
+     */
+    private List<INode> loadDeletedList(InputStream in, INodeDirectory dir,
+        int refNum, List<Long> deletedNodes) throws IOException {
+      List<INode> dlist = new ArrayList<INode>(refNum + deletedNodes.size());
+      // load non-reference inodes
+      for (long deletedId : deletedNodes) {
+        INode deleted = fsDir.getInode(deletedId);
+        dlist.add(deleted);
+        addToDeletedList(deleted, dir);
+      }
+      // load reference nodes in the deleted list
+      for (int r = 0; r < refNum; r++) {
+        INodeSection.INodeReference ref = INodeSection.INodeReference
+            .parseDelimitedFrom(in);
+        INodeReference refNode = loadINodeReference(ref, fsDir);
+        dlist.add(refNode);
+        addToDeletedList(refNode, dir);
+      }
+      Collections.sort(dlist, new Comparator<INode>() {
+        @Override
+        public int compare(INode n1, INode n2) {
+          return n1.compareTo(n2.getLocalNameBytes());
+        }
+      });
+      return dlist;
+    }
+
+    /** Load DirectoryDiff list for a directory with snapshot feature */
+    private void loadDirectoryDiffList(InputStream in, INodeDirectory dir,
+        int size) throws IOException {
+      if (!dir.isWithSnapshot()) {
+        dir.addSnapshotFeature(null);
+      }
+      DirectoryDiffList diffs = dir.getDiffs();
+      for (int i = 0; i < size; i++) {
+        // load a directory diff
+        SnapshotDiffSection.DirectoryDiff diffInPb = SnapshotDiffSection.
+            DirectoryDiff.parseDelimitedFrom(in);
+        final int snapshotId = diffInPb.getSnapshotId();
+        final Snapshot snapshot = snapshotMap.get(snapshotId);
+        int childrenSize = diffInPb.getChildrenSize();
+        boolean useRoot = diffInPb.getIsSnapshotRoot();
+        INodeDirectoryAttributes copy = null;
+        if (useRoot) {
+          copy = snapshot.getRoot();
+        }else if (diffInPb.hasSnapshotCopy()) {
+          INodeSection.INodeDirectory dirCopyInPb = diffInPb.getSnapshotCopy();
+          final byte[] name = diffInPb.getName().toByteArray();
+          PermissionStatus permission = loadPermission(dirCopyInPb
+              .getPermission(), parent.getStringTable());
+          long modTime = dirCopyInPb.getModificationTime();
+          boolean noQuota = dirCopyInPb.getNsQuota() == -1
+              && dirCopyInPb.getDsQuota() == -1;
+          copy = noQuota ? new INodeDirectoryAttributes.SnapshotCopy(name,
+              permission, modTime)
+              : new INodeDirectoryAttributes.CopyWithQuota(name, permission,
+                  modTime, dirCopyInPb.getNsQuota(), dirCopyInPb.getDsQuota());
+        }
+        // load created list
+        List<INode> clist = loadCreatedList(in, dir,
+            diffInPb.getCreatedListSize());
+        // load deleted list
+        List<INode> dlist = loadDeletedList(in, dir,
+            diffInPb.getNumOfDeletedRef(), diffInPb.getDeletedINodeList());
+        // create the directory diff
+        DirectoryDiff diff = new DirectoryDiff(snapshotId, copy, null,
+            childrenSize, clist, dlist, useRoot);
+        diffs.addFirst(diff);
+      }
+    }
+  }
+
+  /**
+   * Saving snapshot related information to protobuf based FSImage
+   */
+  public final static class Saver {
+    private final FSNamesystem fsn;
+    private final FileSummary.Builder headers;
+    private final FSImageFormatProtobuf.Saver parent;
+    private final SaveNamespaceContext context;
+
+    public Saver(FSImageFormatProtobuf.Saver parent,
+        FileSummary.Builder headers, SaveNamespaceContext context, FSNamesystem fsn) {
+      this.parent = parent;
+      this.headers = headers;
+      this.context = context;
+      this.fsn = fsn;
+    }
+
+    /**
+     * save all the snapshottable directories and snapshots to fsimage
+     */
+    public void serializeSnapshotSection(OutputStream out) throws IOException {
+      SnapshotManager sm = fsn.getSnapshotManager();
+      SnapshotSection.Builder b = SnapshotSection.newBuilder()
+          .setSnapshotCounter(sm.getSnapshotCounter())
+          .setNumSnapshots(sm.getNumSnapshots());
+
+      INodeDirectorySnapshottable[] snapshottables = sm.getSnapshottableDirs();
+      for (INodeDirectorySnapshottable sdir : snapshottables) {
+        b.addSnapshottableDir(sdir.getId());
+      }
+      b.build().writeDelimitedTo(out);
+      int i = 0;
+      for(INodeDirectorySnapshottable sdir : snapshottables) {
+        for(Snapshot s : sdir.getSnapshotsByNames()) {
+          Root sroot = s.getRoot();
+          SnapshotSection.Snapshot.Builder sb = SnapshotSection.Snapshot
+              .newBuilder().setSnapshotId(s.getId());
+          INodeSection.INodeDirectory.Builder db = buildINodeDirectory(sroot,
+              parent.getStringMap());
+          INodeSection.INode r = INodeSection.INode.newBuilder()
+              .setId(sroot.getId())
+              .setType(INodeSection.INode.Type.DIRECTORY)
+              .setName(ByteString.copyFrom(sroot.getLocalNameBytes()))
+              .setDirectory(db).build();
+          sb.setRoot(r).build().writeDelimitedTo(out);
+          i++;
+          if (i % FSImageFormatProtobuf.Saver.CHECK_CANCEL_INTERVAL == 0) {
+            context.checkCancelled();
+          }
+        }
+      }
+      Preconditions.checkState(i == sm.getNumSnapshots());
+      parent.commitSection(headers, FSImageFormatProtobuf.SectionName.SNAPSHOT);
+    }
+
+    /**
+     * save all the snapshot diff to fsimage
+     */
+    public void serializeSnapshotDiffSection(OutputStream out)
+        throws IOException {
+      INodeMap inodesMap = fsn.getFSDirectory().getINodeMap();
+      int i = 0;
+      Iterator<INodeWithAdditionalFields> iter = inodesMap.getMapIterator();
+      while (iter.hasNext()) {
+        INodeWithAdditionalFields inode = iter.next();
+        if (inode.isFile()) {
+          serializeFileDiffList(inode.asFile(), out);
+        } else if (inode.isDirectory()) {
+          serializeDirDiffList(inode.asDirectory(), out);
+        }
+        ++i;
+        if (i % FSImageFormatProtobuf.Saver.CHECK_CANCEL_INTERVAL == 0) {
+          context.checkCancelled();
+        }
+      }
+      parent.commitSection(headers,
+          FSImageFormatProtobuf.SectionName.SNAPSHOT_DIFF);
+    }
+
+    private void serializeFileDiffList(INodeFile file, OutputStream out)
+        throws IOException {
+      FileWithSnapshotFeature sf = file.getFileWithSnapshotFeature();
+      if (sf != null) {
+        List<FileDiff> diffList = sf.getDiffs().asList();
+        SnapshotDiffSection.DiffEntry entry = SnapshotDiffSection.DiffEntry
+            .newBuilder().setInodeId(file.getId()).setType(Type.FILEDIFF)
+            .setNumOfDiff(diffList.size()).build();
+        entry.writeDelimitedTo(out);
+        for (int i = diffList.size() - 1; i >= 0; i--) {
+          FileDiff diff = diffList.get(i);
+          SnapshotDiffSection.FileDiff.Builder fb = SnapshotDiffSection.FileDiff
+              .newBuilder().setSnapshotId(diff.getSnapshotId())
+              .setFileSize(diff.getFileSize());
+          INodeFileAttributes copy = diff.snapshotINode;
+          if (copy != null) {
+            fb.setName(ByteString.copyFrom(copy.getLocalNameBytes()))
+                .setSnapshotCopy(buildINodeFile(copy, parent.getStringMap()));
+          }
+          fb.build().writeDelimitedTo(out);
+        }
+      }
+    }
+
+    private void saveCreatedDeletedList(List<INode> created,
+        List<INodeReference> deletedRefs, OutputStream out) throws IOException {
+      // local names of the created list member
+      for (INode c : created) {
+        SnapshotDiffSection.CreatedListEntry.newBuilder()
+            .setName(ByteString.copyFrom(c.getLocalNameBytes())).build()
+            .writeDelimitedTo(out);
+      }
+      // reference nodes in deleted list
+      for (INodeReference ref : deletedRefs) {
+        INodeSection.INodeReference.Builder rb = buildINodeReference(ref);
+        rb.build().writeDelimitedTo(out);
+      }
+    }
+
+    private void serializeDirDiffList(INodeDirectory dir, OutputStream out)
+        throws IOException {
+      DirectoryWithSnapshotFeature sf = dir.getDirectoryWithSnapshotFeature();
+      if (sf != null) {
+        List<DirectoryDiff> diffList = sf.getDiffs().asList();
+        SnapshotDiffSection.DiffEntry entry = SnapshotDiffSection.DiffEntry
+            .newBuilder().setInodeId(dir.getId()).setType(Type.DIRECTORYDIFF)
+            .setNumOfDiff(diffList.size()).build();
+        entry.writeDelimitedTo(out);
+        for (int i = diffList.size() - 1; i >= 0; i--) { // reverse order!
+          DirectoryDiff diff = diffList.get(i);
+          SnapshotDiffSection.DirectoryDiff.Builder db = SnapshotDiffSection.
+              DirectoryDiff.newBuilder().setSnapshotId(diff.getSnapshotId())
+                           .setChildrenSize(diff.getChildrenSize())
+                           .setIsSnapshotRoot(diff.isSnapshotRoot());
+          INodeDirectoryAttributes copy = diff.snapshotINode;
+          if (!diff.isSnapshotRoot() && copy != null) {
+            db.setName(ByteString.copyFrom(copy.getLocalNameBytes()))
+                .setSnapshotCopy(
+                    buildINodeDirectory(copy, parent.getStringMap()));
+          }
+          // process created list and deleted list
+          List<INode> created = diff.getChildrenDiff()
+              .getList(ListType.CREATED);
+          db.setCreatedListSize(created.size());
+          List<INode> deleted = diff.getChildrenDiff().getList(ListType.DELETED);
+          List<INodeReference> refs = new ArrayList<INodeReference>();
+          for (INode d : deleted) {
+            if (d.isReference()) {
+              refs.add(d.asReference());
+            } else {
+              db.addDeletedINode(d.getId());
+            }
+          }
+          db.setNumOfDeletedRef(refs.size());
+          db.build().writeDelimitedTo(out);
+          saveCreatedDeletedList(created, refs, out);
+        }
+      }
+    }
+  }
+
+  private FSImageFormatPBSnapshot(){}
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java
index e836cd87959..69fdf97391c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotFSImageFormat.java
@@ -27,7 +27,6 @@ import java.util.Map;
 
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.server.namenode.FSImageFormat;
-import org.apache.hadoop.hdfs.server.namenode.FSImageFormat.Loader;
 import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
 import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INodeAttributes;
@@ -137,7 +136,7 @@ public class SnapshotFSImageFormat {
    * @param parent The directory that the created list belongs to.
    * @return The created node.
    */
-  private static INode loadCreated(byte[] createdNodeName,
+  public static INode loadCreated(byte[] createdNodeName,
       INodeDirectory parent) throws IOException {
     // the INode in the created list should be a reference to another INode
     // in posterior SnapshotDiffs or one of the current children
@@ -209,11 +208,13 @@ public class SnapshotFSImageFormat {
   
   /**
    * Load snapshots and snapshotQuota for a Snapshottable directory.
-   * @param snapshottableParent The snapshottable directory for loading.
-   * @param numSnapshots The number of snapshots that the directory has.
-   * @param in The {@link DataInput} instance to read.
-   * @param loader The {@link Loader} instance that this loading procedure is 
-   *               using.
+   *
+   * @param snapshottableParent
+   *          The snapshottable directory for loading.
+   * @param numSnapshots
+   *          The number of snapshots that the directory has.
+   * @param loader
+   *          The loader
    */
   public static void loadSnapshotList(
       INodeDirectorySnapshottable snapshottableParent, int numSnapshots,
@@ -231,10 +232,13 @@ public class SnapshotFSImageFormat {
   /**
    * Load the {@link SnapshotDiff} list for the INodeDirectoryWithSnapshot
    * directory.
-   * @param dir The snapshottable directory for loading.
-   * @param in The {@link DataInput} instance to read.
-   * @param loader The {@link Loader} instance that this loading procedure is 
-   *               using.
+   *
+   * @param dir
+   *          The snapshottable directory for loading.
+   * @param in
+   *          The {@link DataInput} instance to read.
+   * @param loader
+   *          The loader
    */
   public static void loadDirectoryDiffList(INodeDirectory dir,
       DataInput in, FSImageFormat.Loader loader) throws IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
index 8fa0f0c932b..be1ddc0e9e6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/SnapshotManager.java
@@ -270,6 +270,23 @@ public class SnapshotManager implements SnapshotStats {
     return numSnapshots.get();
   }
   
+  void setNumSnapshots(int num) {
+    numSnapshots.set(num);
+  }
+
+  int getSnapshotCounter() {
+    return snapshotCounter;
+  }
+
+  void setSnapshotCounter(int counter) {
+    snapshotCounter = counter;
+  }
+
+  INodeDirectorySnapshottable[] getSnapshottableDirs() {
+    return snapshottables.values().toArray(
+        new INodeDirectorySnapshottable[snapshottables.size()]);
+  }
+
   /**
    * Write {@link #snapshotCounter}, {@link #numSnapshots},
    * and all snapshots to the DataOutput.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionCalculator.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionCalculator.java
new file mode 100644
index 00000000000..2433b28a859
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionCalculator.java
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.BufferedInputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintWriter;
+import java.io.RandomAccessFile;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
+import org.apache.hadoop.hdfs.server.namenode.FSImageUtil;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
+import org.apache.hadoop.io.IOUtils;
+
+import com.google.common.base.Preconditions;
+import com.google.common.io.LimitInputStream;
+
+/**
+ * This is the tool for analyzing file sizes in the namespace image. In order to
+ * run the tool one should define a range of integers <tt>[0, maxSize]</tt> by
+ * specifying <tt>maxSize</tt> and a <tt>step</tt>. The range of integers is
+ * divided into segments of size <tt>step</tt>:
+ * <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>, and the visitor
+ * calculates how many files in the system fall into each segment
+ * <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>. Note that files larger than
+ * <tt>maxSize</tt> always fall into the very last segment.
+ *
+ * <h3>Input.</h3>
+ * <ul>
+ * <li><tt>filename</tt> specifies the location of the image file;</li>
+ * <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
+ * sizes considered by the visitor;</li>
+ * <li><tt>step</tt> the range is divided into segments of size step.</li>
+ * </ul>
+ *
+ * <h3>Output.</h3> The output file is formatted as a tab separated two column
+ * table: Size and NumFiles. Where Size represents the start of the segment, and
+ * numFiles is the number of files form the image which size falls in this
+ * segment.
+ *
+ */
+final class FileDistributionCalculator {
+  private final static long MAX_SIZE_DEFAULT = 0x2000000000L; // 1/8 TB = 2^37
+  private final static int INTERVAL_DEFAULT = 0x200000; // 2 MB = 2^21
+
+  private final Configuration conf;
+  private final long maxSize;
+  private final int steps;
+  private final PrintWriter out;
+
+  private int[] distribution;
+  private int totalFiles;
+  private int totalDirectories;
+  private int totalBlocks;
+  private long totalSpace;
+  private long maxFileSize;
+
+  FileDistributionCalculator(Configuration conf, long maxSize, int steps,
+      PrintWriter out) {
+    this.conf = conf;
+    this.maxSize = maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize;
+    this.steps = steps == 0 ? INTERVAL_DEFAULT : steps;
+    this.out = out;
+    long numIntervals = this.maxSize / this.steps;
+    this.distribution = new int[1 + (int) (numIntervals)];
+    Preconditions.checkState(numIntervals < Integer.MAX_VALUE,
+        "Too many distribution intervals");
+  }
+
+  void visit(RandomAccessFile file) throws IOException {
+    if (!FSImageUtil.checkFileFormat(file)) {
+      throw new IOException("Unrecognized FSImage");
+    }
+
+    FileSummary summary = FSImageUtil.loadSummary(file);
+    FileInputStream in = null;
+    try {
+      in = new FileInputStream(file.getFD());
+      for (FileSummary.Section s : summary.getSectionsList()) {
+        if (SectionName.fromString(s.getName()) != SectionName.INODE) {
+          continue;
+        }
+
+        in.getChannel().position(s.getOffset());
+        InputStream is = FSImageUtil.wrapInputStreamForCompression(conf,
+            summary.getCodec(), new BufferedInputStream(new LimitInputStream(
+                in, s.getLength())));
+        run(is);
+        output();
+      }
+    } finally {
+      IOUtils.cleanup(null, in);
+    }
+  }
+
+  private void run(InputStream in) throws IOException {
+    INodeSection s = INodeSection.parseDelimitedFrom(in);
+    for (int i = 0; i < s.getNumInodes(); ++i) {
+      INodeSection.INode p = INodeSection.INode.parseDelimitedFrom(in);
+      if (p.getType() == INodeSection.INode.Type.FILE) {
+        ++totalFiles;
+        INodeSection.INodeFile f = p.getFile();
+        totalBlocks += f.getBlocksCount();
+        long fileSize = 0;
+        for (BlockProto b : f.getBlocksList()) {
+          fileSize += b.getNumBytes() * f.getReplication();
+        }
+        maxFileSize = Math.max(fileSize, maxFileSize);
+        totalSpace += fileSize;
+
+        int bucket = fileSize > maxSize ? distribution.length - 1 : (int) Math
+            .ceil((double)fileSize / steps);
+        ++distribution[bucket];
+
+      } else if (p.getType() == INodeSection.INode.Type.DIRECTORY) {
+        ++totalDirectories;
+      }
+
+      if (i % (1 << 20) == 0) {
+        out.println("Processed " + i + " inodes.");
+      }
+    }
+  }
+
+  private void output() {
+    // write the distribution into the output file
+    out.print("Size\tNumFiles\n");
+    for (int i = 0; i < distribution.length; i++) {
+      if (distribution[i] != 0) {
+        out.print(((long) i * steps) + "\t" + distribution[i]);
+        out.print('\n');
+      }
+    }
+    out.print("totalFiles = " + totalFiles + "\n");
+    out.print("totalDirectories = " + totalDirectories + "\n");
+    out.print("totalBlocks = " + totalBlocks + "\n");
+    out.print("totalSpace = " + totalSpace + "\n");
+    out.print("maxFileSize = " + maxFileSize + "\n");
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java
index c529fb5cdc2..19b859118ec 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java
@@ -127,7 +127,7 @@ class ImageLoaderCurrent implements ImageLoader {
                                       new SimpleDateFormat("yyyy-MM-dd HH:mm");
   private static int[] versions = { -16, -17, -18, -19, -20, -21, -22, -23,
       -24, -25, -26, -27, -28, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39,
-      -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51 };
+      -40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51, -52 };
   private int imageVersion = 0;
   
   private final Map<Long, Boolean> subtreeMap = new HashMap<Long, Boolean>();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsrPBImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsrPBImage.java
new file mode 100644
index 00000000000..e467725646e
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsrPBImage.java
@@ -0,0 +1,233 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.BufferedInputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintWriter;
+import java.io.RandomAccessFile;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
+import org.apache.hadoop.hdfs.server.namenode.FSImageUtil;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeDirectory;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeFile;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeSymlink;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.StringTableSection;
+import org.apache.hadoop.hdfs.server.namenode.INodeId;
+import org.apache.hadoop.io.IOUtils;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.io.LimitInputStream;
+
+/**
+ * This is the tool for analyzing file sizes in the namespace image. In order to
+ * run the tool one should define a range of integers <tt>[0, maxSize]</tt> by
+ * specifying <tt>maxSize</tt> and a <tt>step</tt>. The range of integers is
+ * divided into segments of size <tt>step</tt>:
+ * <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>, and the visitor
+ * calculates how many files in the system fall into each segment
+ * <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>. Note that files larger than
+ * <tt>maxSize</tt> always fall into the very last segment.
+ *
+ * <h3>Input.</h3>
+ * <ul>
+ * <li><tt>filename</tt> specifies the location of the image file;</li>
+ * <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
+ * sizes considered by the visitor;</li>
+ * <li><tt>step</tt> the range is divided into segments of size step.</li>
+ * </ul>
+ *
+ * <h3>Output.</h3> The output file is formatted as a tab separated two column
+ * table: Size and NumFiles. Where Size represents the start of the segment, and
+ * numFiles is the number of files form the image which size falls in this
+ * segment.
+ * 
+ */
+final class LsrPBImage {
+  private final Configuration conf;
+  private final PrintWriter out;
+  private String[] stringTable;
+  private HashMap<Long, INodeSection.INode> inodes = Maps.newHashMap();
+  private HashMap<Long, long[]> dirmap = Maps.newHashMap();
+
+  public LsrPBImage(Configuration conf, PrintWriter out) {
+    this.conf = conf;
+    this.out = out;
+  }
+
+  public void visit(RandomAccessFile file) throws IOException {
+    if (!FSImageUtil.checkFileFormat(file)) {
+      throw new IOException("Unrecognized FSImage");
+    }
+
+    FileSummary summary = FSImageUtil.loadSummary(file);
+    FileInputStream fin = null;
+    try {
+      fin = new FileInputStream(file.getFD());
+
+      ArrayList<FileSummary.Section> sections = Lists.newArrayList(summary
+          .getSectionsList());
+      Collections.sort(sections, new Comparator<FileSummary.Section>() {
+        @Override
+        public int compare(FileSummary.Section s1, FileSummary.Section s2) {
+          SectionName n1 = SectionName.fromString(s1.getName());
+          SectionName n2 = SectionName.fromString(s2.getName());
+          if (n1 == null) {
+            return n2 == null ? 0 : -1;
+          } else if (n2 == null) {
+            return -1;
+          } else {
+            return n1.ordinal() - n2.ordinal();
+          }
+        }
+      });
+
+      for (FileSummary.Section s : sections) {
+        fin.getChannel().position(s.getOffset());
+        InputStream is = FSImageUtil.wrapInputStreamForCompression(conf,
+            summary.getCodec(), new BufferedInputStream(new LimitInputStream(
+                fin, s.getLength())));
+
+        switch (SectionName.fromString(s.getName())) {
+        case STRING_TABLE:
+          loadStringTable(is);
+          break;
+        case INODE:
+          loadINodeSection(is);
+          break;
+        case INODE_DIR:
+          loadINodeDirectorySection(is);
+          break;
+        default:
+          break;
+        }
+      }
+      list("", INodeId.ROOT_INODE_ID);
+    } finally {
+      IOUtils.cleanup(null, fin);
+    }
+  }
+
+  private void list(String parent, long dirId) {
+    INode inode = inodes.get(dirId);
+    listINode(parent.isEmpty() ? "/" : parent, inode);
+    long[] children = dirmap.get(dirId);
+    if (children == null) {
+      return;
+    }
+    String newParent = parent + inode.getName().toStringUtf8() + "/";
+    for (long cid : children) {
+      list(newParent, cid);
+    }
+  }
+
+  private void listINode(String parent, INode inode) {
+    switch (inode.getType()) {
+    case FILE: {
+      INodeFile f = inode.getFile();
+      PermissionStatus p = FSImageFormatPBINode.Loader.loadPermission(
+          f.getPermission(), stringTable);
+      out.print(String.format("-%s %2s %8s %10s %10s %10d %s%s\n", p
+          .getPermission().toString(), f.getReplication(), p.getUserName(), p
+          .getGroupName(), f.getModificationTime(), getFileSize(f), parent,
+          inode.getName().toStringUtf8()));
+    }
+      break;
+    case DIRECTORY: {
+      INodeDirectory d = inode.getDirectory();
+      PermissionStatus p = FSImageFormatPBINode.Loader.loadPermission(
+          d.getPermission(), stringTable);
+      out.print(String.format("d%s  - %8s %10s %10s %10d %s%s\n", p
+          .getPermission().toString(), p.getUserName(), p.getGroupName(), d
+          .getModificationTime(), 0, parent, inode.getName().toStringUtf8()));
+    }
+      break;
+    case SYMLINK: {
+      INodeSymlink d = inode.getSymlink();
+      PermissionStatus p = FSImageFormatPBINode.Loader.loadPermission(
+          d.getPermission(), stringTable);
+      out.print(String.format("-%s  - %8s %10s %10s %10d %s%s -> %s\n", p
+          .getPermission().toString(), p.getUserName(), p.getGroupName(), 0, 0,
+          parent, inode.getName().toStringUtf8(), d.getTarget().toStringUtf8()));
+    }
+      break;
+    default:
+      break;
+    }
+  }
+
+  private long getFileSize(INodeFile f) {
+    long size = 0;
+    for (BlockProto p : f.getBlocksList()) {
+      size += p.getNumBytes();
+    }
+    return size;
+  }
+
+  private void loadINodeDirectorySection(InputStream in) throws IOException {
+    while (true) {
+      INodeDirectorySection.DirEntry e = INodeDirectorySection.DirEntry
+          .parseDelimitedFrom(in);
+      // note that in is a LimitedInputStream
+      if (e == null) {
+        break;
+      }
+      long[] l = new long[e.getChildrenCount()];
+      for (int i = 0; i < l.length; ++i) {
+        l[i] = e.getChildren(i);
+      }
+      dirmap.put(e.getParent(), l);
+      for (int i = 0; i < e.getNumOfRef(); i++) {
+        INodeSection.INodeReference.parseDelimitedFrom(in);
+      }
+    }
+  }
+
+  private void loadINodeSection(InputStream in) throws IOException {
+    INodeSection s = INodeSection.parseDelimitedFrom(in);
+    for (int i = 0; i < s.getNumInodes(); ++i) {
+      INodeSection.INode p = INodeSection.INode.parseDelimitedFrom(in);
+      inodes.put(p.getId(), p);
+    }
+  }
+
+  private void loadStringTable(InputStream in) throws IOException {
+    StringTableSection s = StringTableSection.parseDelimitedFrom(in);
+    stringTable = new String[s.getNumEntry() + 1];
+    for (int i = 0; i < s.getNumEntry(); ++i) {
+      StringTableSection.Entry e = StringTableSection.Entry
+          .parseDelimitedFrom(in);
+      stringTable[e.getId()] = e.getStr();
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java
new file mode 100644
index 00000000000..2d8c42d39d1
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java
@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.RandomAccessFile;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * OfflineImageViewer to dump the contents of an Hadoop image file to XML or the
+ * console. Main entry point into utility, either via the command line or
+ * programatically.
+ */
+@InterfaceAudience.Private
+public class OfflineImageViewerPB {
+  public static final Log LOG = LogFactory.getLog(OfflineImageViewerPB.class);
+
+  private final static String usage = "Usage: bin/hdfs oiv [OPTIONS] -i INPUTFILE -o OUTPUTFILE\n"
+      + "Offline Image Viewer\n"
+      + "View a Hadoop fsimage INPUTFILE using the specified PROCESSOR,\n"
+      + "saving the results in OUTPUTFILE.\n"
+      + "\n"
+      + "The oiv utility will attempt to parse correctly formed image files\n"
+      + "and will abort fail with mal-formed image files.\n"
+      + "\n"
+      + "The tool works offline and does not require a running cluster in\n"
+      + "order to process an image file.\n"
+      + "\n"
+      + "The following image processors are available:\n"
+      + "  * Ls: The default image processor generates an lsr-style listing\n"
+      + "    of the files in the namespace, with the same fields in the same\n"
+      + "    order.  Note that in order to correctly determine file sizes,\n"
+      + "    this formatter cannot skip blocks and will override the\n"
+      + "    -skipBlocks option.\n"
+      + "  * XML: This processor creates an XML document with all elements of\n"
+      + "    the fsimage enumerated, suitable for further analysis by XML\n"
+      + "    tools.\n"
+      + "  * FileDistribution: This processor analyzes the file size\n"
+      + "    distribution in the image.\n"
+      + "    -maxSize specifies the range [0, maxSize] of file sizes to be\n"
+      + "     analyzed (128GB by default).\n"
+      + "    -step defines the granularity of the distribution. (2MB by default)\n"
+      + "\n"
+      + "Required command line arguments:\n"
+      + "-i,--inputFile <arg>   FSImage file to process.\n"
+      + "-o,--outputFile <arg>  Name of output file. If the specified\n"
+      + "                       file exists, it will be overwritten.\n"
+      + "\n"
+      + "Optional command line arguments:\n"
+      + "-p,--processor <arg>   Select which type of processor to apply\n"
+      + "                       against image file."
+      + " (Ls|XML|FileDistribution).\n"
+      + "-h,--help              Display usage information and exit\n";
+
+  /**
+   * Build command-line options and descriptions
+   */
+  private static Options buildOptions() {
+    Options options = new Options();
+
+    // Build in/output file arguments, which are required, but there is no
+    // addOption method that can specify this
+    OptionBuilder.isRequired();
+    OptionBuilder.hasArgs();
+    OptionBuilder.withLongOpt("outputFile");
+    options.addOption(OptionBuilder.create("o"));
+
+    OptionBuilder.isRequired();
+    OptionBuilder.hasArgs();
+    OptionBuilder.withLongOpt("inputFile");
+    options.addOption(OptionBuilder.create("i"));
+
+    options.addOption("p", "processor", true, "");
+    options.addOption("h", "help", false, "");
+    options.addOption("skipBlocks", false, "");
+    options.addOption("printToScreen", false, "");
+    options.addOption("delimiter", true, "");
+
+    return options;
+  }
+
+  /**
+   * Entry point to command-line-driven operation. User may specify options and
+   * start fsimage viewer from the command line. Program will process image file
+   * and exit cleanly or, if an error is encountered, inform user and exit.
+   * 
+   * @param args
+   *          Command line options
+   * @throws IOException
+   */
+  public static void main(String[] args) throws IOException {
+    Options options = buildOptions();
+    if (args.length == 0) {
+      printUsage();
+      return;
+    }
+
+    CommandLineParser parser = new PosixParser();
+    CommandLine cmd;
+
+    try {
+      cmd = parser.parse(options, args);
+    } catch (ParseException e) {
+      System.out.println("Error parsing command-line options: ");
+      printUsage();
+      return;
+    }
+
+    if (cmd.hasOption("h")) { // print help and exit
+      printUsage();
+      return;
+    }
+
+    String inputFile = cmd.getOptionValue("i");
+    String processor = cmd.getOptionValue("p", "Ls");
+    String outputFile = cmd.getOptionValue("o");
+
+    PrintWriter out = (outputFile == null || outputFile.equals("-")) ? new PrintWriter(
+        System.out) : new PrintWriter(new File(outputFile));
+
+    Configuration conf = new Configuration();
+    try {
+      if (processor.equals("FileDistribution")) {
+        long maxSize = Long.parseLong(cmd.getOptionValue("maxSize", "0"));
+        int step = Integer.parseInt(cmd.getOptionValue("step", "0"));
+        new FileDistributionCalculator(conf, maxSize, step, out)
+            .visit(new RandomAccessFile(inputFile, "r"));
+      } else if (processor.equals("XML")) {
+        new PBImageXmlWriter(conf, out).visit(new RandomAccessFile(inputFile,
+            "r"));
+      } else {
+        new LsrPBImage(conf, out).visit(new RandomAccessFile(inputFile, "r"));
+      }
+    } catch (EOFException e) {
+      System.err.println("Input file ended unexpectedly. Exiting");
+    } catch (IOException e) {
+      System.err.println("Encountered exception.  Exiting: " + e.getMessage());
+    } finally {
+      out.close();
+    }
+
+  }
+
+  /**
+   * Print application usage instructions.
+   */
+  private static void printUsage() {
+    System.out.println(usage);
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java
new file mode 100644
index 00000000000..7ebf1196c4b
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java
@@ -0,0 +1,415 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.BufferedInputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintWriter;
+import java.io.RandomAccessFile;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoExpirationProto;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
+import org.apache.hadoop.hdfs.server.namenode.FSImageUtil;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection.FileUnderConstructionEntry;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeDirectory;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeSymlink;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.StringTableSection;
+import org.apache.hadoop.io.IOUtils;
+
+import com.google.common.collect.Lists;
+import com.google.common.io.LimitInputStream;
+
+/**
+ * This is the tool for analyzing file sizes in the namespace image. In order to
+ * run the tool one should define a range of integers <tt>[0, maxSize]</tt> by
+ * specifying <tt>maxSize</tt> and a <tt>step</tt>. The range of integers is
+ * divided into segments of size <tt>step</tt>:
+ * <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>, and the visitor
+ * calculates how many files in the system fall into each segment
+ * <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>. Note that files larger than
+ * <tt>maxSize</tt> always fall into the very last segment.
+ *
+ * <h3>Input.</h3>
+ * <ul>
+ * <li><tt>filename</tt> specifies the location of the image file;</li>
+ * <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
+ * sizes considered by the visitor;</li>
+ * <li><tt>step</tt> the range is divided into segments of size step.</li>
+ * </ul>
+ *
+ * <h3>Output.</h3> The output file is formatted as a tab separated two column
+ * table: Size and NumFiles. Where Size represents the start of the segment, and
+ * numFiles is the number of files form the image which size falls in this
+ * segment.
+ *
+ */
+@InterfaceAudience.Private
+public final class PBImageXmlWriter {
+  private final Configuration conf;
+  private final PrintWriter out;
+  private String[] stringTable;
+
+  public PBImageXmlWriter(Configuration conf, PrintWriter out) {
+    this.conf = conf;
+    this.out = out;
+  }
+
+  public void visit(RandomAccessFile file) throws IOException {
+    if (!FSImageUtil.checkFileFormat(file)) {
+      throw new IOException("Unrecognized FSImage");
+    }
+
+    FileSummary summary = FSImageUtil.loadSummary(file);
+    FileInputStream fin = null;
+    try {
+      fin = new FileInputStream(file.getFD());
+      out.print("<?xml version=\"1.0\"?>\n");
+
+      ArrayList<FileSummary.Section> sections = Lists.newArrayList(summary
+          .getSectionsList());
+      Collections.sort(sections, new Comparator<FileSummary.Section>() {
+        @Override
+        public int compare(FileSummary.Section s1, FileSummary.Section s2) {
+          SectionName n1 = SectionName.fromString(s1.getName());
+          SectionName n2 = SectionName.fromString(s2.getName());
+          if (n1 == null) {
+            return n2 == null ? 0 : -1;
+          } else if (n2 == null) {
+            return -1;
+          } else {
+            return n1.ordinal() - n2.ordinal();
+          }
+        }
+      });
+
+      for (FileSummary.Section s : sections) {
+        fin.getChannel().position(s.getOffset());
+        InputStream is = FSImageUtil.wrapInputStreamForCompression(conf,
+            summary.getCodec(), new BufferedInputStream(new LimitInputStream(
+                fin, s.getLength())));
+
+        switch (SectionName.fromString(s.getName())) {
+        case NS_INFO:
+          dumpNameSection(is);
+          break;
+        case STRING_TABLE:
+          loadStringTable(is);
+          break;
+        case INODE:
+          dumpINodeSection(is);
+          break;
+        case INODE_DIR:
+          dumpINodeDirectorySection(is);
+          break;
+        case FILES_UNDERCONSTRUCTION:
+          dumpFileUnderConstructionSection(is);
+          break;
+        case SNAPSHOT:
+          dumpSnapshotSection(is);
+          break;
+        case SNAPSHOT_DIFF:
+          dumpSnapshotDiffSection(is);
+          break;
+        case SECRET_MANAGER:
+          dumpSecretManagerSection(is);
+          break;
+        case CACHE_MANAGER:
+          dumpCacheManagerSection(is);
+          break;
+        default:
+          break;
+        }
+      }
+    } finally {
+      IOUtils.cleanup(null, fin);
+    }
+  }
+
+  private void dumpCacheManagerSection(InputStream is) throws IOException {
+    out.print("<CacheManagerSection>");
+    CacheManagerSection s = CacheManagerSection.parseDelimitedFrom(is);
+    o("nextDirectiveId", s.getNextDirectiveId());
+    for (int i = 0; i < s.getNumPools(); ++i) {
+      CachePoolInfoProto p = CachePoolInfoProto.parseDelimitedFrom(is);
+      out.print("<pool>");
+      o("poolName", p.getPoolName()).o("ownerName", p.getOwnerName())
+          .o("groupName", p.getGroupName()).o("mode", p.getMode())
+          .o("limit", p.getLimit())
+          .o("maxRelativeExpiry", p.getMaxRelativeExpiry());
+      out.print("</pool>\n");
+    }
+    for (int i = 0; i < s.getNumPools(); ++i) {
+      CacheDirectiveInfoProto p = CacheDirectiveInfoProto
+          .parseDelimitedFrom(is);
+      out.print("<directive>");
+      o("id", p.getId()).o("path", p.getPath())
+          .o("replication", p.getReplication()).o("pool", p.getPool());
+      out.print("<expiration>");
+      CacheDirectiveInfoExpirationProto e = p.getExpiration();
+      o("millis", e.getMillis()).o("relatilve", e.getIsRelative());
+      out.print("</expiration>\n");
+      out.print("</directive>\n");
+    }
+    out.print("</CacheManagerSection>\n");
+
+  }
+
+  private void dumpFileUnderConstructionSection(InputStream in)
+      throws IOException {
+    out.print("<FileUnderConstructionSection>");
+    while (true) {
+      FileUnderConstructionEntry e = FileUnderConstructionEntry
+          .parseDelimitedFrom(in);
+      if (e == null) {
+        break;
+      }
+      out.print("<inode>");
+      o("id", e.getInodeId()).o("path", e.getFullPath());
+      out.print("</inode>\n");
+    }
+    out.print("</FileUnderConstructionSection>\n");
+  }
+
+  private void dumpINodeDirectory(INodeDirectory d) {
+    o("mtime", d.getModificationTime()).o("permission",
+        dumpPermission(d.getPermission()));
+
+    if (d.hasDsQuota() && d.hasNsQuota()) {
+      o("nsquota", d.getNsQuota()).o("dsquota", d.getDsQuota());
+    }
+  }
+
+  private void dumpINodeDirectorySection(InputStream in) throws IOException {
+    out.print("<INodeDirectorySection>");
+    while (true) {
+      INodeDirectorySection.DirEntry e = INodeDirectorySection.DirEntry
+          .parseDelimitedFrom(in);
+      // note that in is a LimitedInputStream
+      if (e == null) {
+        break;
+      }
+      out.print("<directory>");
+      o("parent", e.getParent());
+      for (long id : e.getChildrenList()) {
+        o("inode", id);
+      }
+      for (int i = 0; i < e.getNumOfRef(); i++) {
+        INodeSection.INodeReference r = INodeSection.INodeReference
+            .parseDelimitedFrom(in);
+        dumpINodeReference(r);
+
+      }
+      out.print("</directory>\n");
+    }
+    out.print("</INodeDirectorySection>\n");
+  }
+
+  private void dumpINodeReference(INodeSection.INodeReference r) {
+    out.print("<ref>");
+    o("referredId", r.getReferredId()).o("name", r.getName().toStringUtf8())
+        .o("dstSnapshotId", r.getDstSnapshotId())
+        .o("lastSnapshotId", r.getLastSnapshotId());
+    out.print("</ref>\n");
+  }
+
+  private void dumpINodeFile(INodeSection.INodeFile f) {
+    o("replication", f.getReplication()).o("mtime", f.getModificationTime())
+        .o("atime", f.getAccessTime())
+        .o("perferredBlockSize", f.getPreferredBlockSize())
+        .o("permission", dumpPermission(f.getPermission()));
+
+    if (f.getBlocksCount() > 0) {
+      out.print("<blocks>");
+      for (BlockProto b : f.getBlocksList()) {
+        out.print("<block>");
+        o("id", b.getBlockId()).o("genstamp", b.getGenStamp()).o("numBytes",
+            b.getNumBytes());
+        out.print("</block>\n");
+      }
+      out.print("</blocks>\n");
+    }
+
+    if (f.hasFileUC()) {
+      INodeSection.FileUnderConstructionFeature u = f.getFileUC();
+      out.print("<file-under-construction>");
+      o("clientName", u.getClientName()).o("clientMachine",
+          u.getClientMachine());
+      out.print("</file-under-construction>\n");
+    }
+  }
+
+  private void dumpINodeSection(InputStream in) throws IOException {
+    INodeSection s = INodeSection.parseDelimitedFrom(in);
+    out.print("<INodeSection>");
+    o("lastInodeId", s.getLastInodeId());
+    for (int i = 0; i < s.getNumInodes(); ++i) {
+      INodeSection.INode p = INodeSection.INode.parseDelimitedFrom(in);
+      out.print("<inode>");
+      o("id", p.getId()).o("type", p.getType()).o("name",
+          p.getName().toStringUtf8());
+
+      if (p.hasFile()) {
+        dumpINodeFile(p.getFile());
+      } else if (p.hasDirectory()) {
+        dumpINodeDirectory(p.getDirectory());
+      } else if (p.hasSymlink()) {
+        dumpINodeSymlink(p.getSymlink());
+      }
+
+      out.print("</inode>\n");
+    }
+    out.print("</INodeSection>\n");
+  }
+
+  private void dumpINodeSymlink(INodeSymlink s) {
+    o("permission", dumpPermission(s.getPermission())).o("target",
+        s.getTarget().toStringUtf8());
+  }
+
+  private void dumpNameSection(InputStream in) throws IOException {
+    NameSystemSection s = NameSystemSection.parseDelimitedFrom(in);
+    out.print("<NameSection>\n");
+    o("genstampV1", s.getGenstampV1()).o("genstampV2", s.getGenstampV2())
+        .o("genstampV1Limit", s.getGenstampV1Limit())
+        .o("lastAllocatedBlockId", s.getLastAllocatedBlockId())
+        .o("txid", s.getTransactionId());
+    out.print("<NameSection>\n");
+  }
+
+  private String dumpPermission(long permission) {
+    return FSImageFormatPBINode.Loader.loadPermission(permission, stringTable)
+        .toString();
+  }
+
+  private void dumpSecretManagerSection(InputStream is) throws IOException {
+    out.print("<SecretManagerSection>");
+    SecretManagerSection s = SecretManagerSection.parseDelimitedFrom(is);
+    o("currentId", s.getCurrentId()).o("tokenSequenceNumber",
+        s.getTokenSequenceNumber());
+    out.print("</SecretManagerSection>");
+  }
+
+  private void dumpSnapshotDiffSection(InputStream in) throws IOException {
+    out.print("<SnapshotDiffSection>");
+    while (true) {
+      SnapshotDiffSection.DiffEntry e = SnapshotDiffSection.DiffEntry
+          .parseDelimitedFrom(in);
+      if (e == null) {
+        break;
+      }
+      out.print("<diff>");
+      o("inodeid", e.getInodeId());
+      switch (e.getType()) {
+      case FILEDIFF: {
+        for (int i = 0; i < e.getNumOfDiff(); ++i) {
+          out.print("<filediff>");
+          SnapshotDiffSection.FileDiff f = SnapshotDiffSection.FileDiff
+              .parseDelimitedFrom(in);
+          o("snapshotId", f.getSnapshotId()).o("size", f.getFileSize()).o(
+              "name", f.getName().toStringUtf8());
+          out.print("</filediff>\n");
+        }
+      }
+        break;
+      case DIRECTORYDIFF: {
+        for (int i = 0; i < e.getNumOfDiff(); ++i) {
+          out.print("<dirdiff>");
+          SnapshotDiffSection.DirectoryDiff d = SnapshotDiffSection.DirectoryDiff
+              .parseDelimitedFrom(in);
+          o("snapshotId", d.getSnapshotId())
+              .o("isSnapshotroot", d.getIsSnapshotRoot())
+              .o("childrenSize", d.getChildrenSize())
+              .o("name", d.getName().toStringUtf8());
+
+          for (int j = 0; j < d.getCreatedListSize(); ++j) {
+            SnapshotDiffSection.CreatedListEntry ce = SnapshotDiffSection.CreatedListEntry
+                .parseDelimitedFrom(in);
+            out.print("<created>");
+            o("name", ce.getName().toStringUtf8());
+            out.print("</created>\n");
+          }
+          for (int j = 0; j < d.getNumOfDeletedRef(); ++j) {
+            INodeSection.INodeReference r = INodeSection.INodeReference
+                .parseDelimitedFrom(in);
+            dumpINodeReference(r);
+          }
+          out.print("</dirdiff>\n");
+        }
+      }
+        break;
+      default:
+        break;
+      }
+      out.print("</diff>");
+    }
+    out.print("<SnapshotDiffSection>\n");
+  }
+
+  private void dumpSnapshotSection(InputStream in) throws IOException {
+    out.print("<SnapshotSection>");
+    SnapshotSection s = SnapshotSection.parseDelimitedFrom(in);
+    o("snapshotCounter", s.getSnapshotCounter());
+    if (s.getSnapshottableDirCount() > 0) {
+      out.print("<snapshottableDir>");
+      for (long id : s.getSnapshottableDirList()) {
+        o("dir", id);
+      }
+      out.print("</snapshottableDir>\n");
+    }
+    for (int i = 0; i < s.getNumSnapshots(); ++i) {
+      SnapshotSection.Snapshot pbs = SnapshotSection.Snapshot
+          .parseDelimitedFrom(in);
+      o("snapshot", pbs.getSnapshotId());
+    }
+    out.print("</SnapshotSection>\n");
+  }
+
+  private void loadStringTable(InputStream in) throws IOException {
+    StringTableSection s = StringTableSection.parseDelimitedFrom(in);
+    stringTable = new String[s.getNumEntry() + 1];
+    for (int i = 0; i < s.getNumEntry(); ++i) {
+      StringTableSection.Entry e = StringTableSection.Entry
+          .parseDelimitedFrom(in);
+      stringTable[e.getId()] = e.getStr();
+    }
+  }
+
+  private PBImageXmlWriter o(final String e, final Object v) {
+    out.print("<" + e + ">" + v + "</" + e + ">");
+    return this;
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto
new file mode 100644
index 00000000000..af7ba874d29
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto
@@ -0,0 +1,280 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option java_package = "org.apache.hadoop.hdfs.server.namenode";
+option java_outer_classname = "FsImageProto";
+
+package hadoop.hdfs.fsimage;
+
+import "hdfs.proto";
+
+/**
+ * This file defines the on-disk layout of the file system image. The
+ * layout is defined by the following EBNF grammar, in which angle
+ * brackets mark protobuf definitions. (e.g., <FileSummary>)
+ *
+ * FILE := MAGIC SECTION* <FileSummary> FileSummaryLength
+ * MAGIC := 'HDFSIMG1'
+ * SECTION := <NameSystemSection> | ...
+ * FileSummaryLength := 4 byte int
+ *
+ * Some notes:
+ *
+ * The codec field in FileSummary describes the compression codec used
+ * for all sections. The fileheader is always uncompressed.
+ *
+ * All protobuf messages are serialized in delimited form, which means
+ * that there always will be an integer indicates the size of the
+ * protobuf message.
+ *
+ */
+
+message FileSummary {
+  // The version of the above EBNF grammars.
+  required uint32 ondiskVersion = 1;
+  // layoutVersion describes which features are available in the
+  // FSImage.
+  required uint32 layoutVersion = 2;
+  optional string codec         = 3;
+  // index for each section
+  message Section {
+    optional string name = 1;
+    optional uint64 length = 2;
+    optional uint64 offset = 3;
+  }
+  repeated Section sections = 4;
+}
+
+/**
+ * Name: NS_INFO
+ */
+message NameSystemSection {
+  optional uint32 namespaceId = 1;
+  optional uint64 genstampV1 = 2;
+  optional uint64 genstampV2 = 3;
+  optional uint64 genstampV1Limit = 4;
+  optional uint64 lastAllocatedBlockId = 5;
+  optional uint64 transactionId = 6;
+}
+
+/**
+ * Permission is serialized as a 64-bit long. [0:24):[25:48):[48:64) (in Big Endian).
+ * The first and the second parts are the string ids of the user and
+ * group name, and the last 16 bits are the permission bits.
+ *
+ * Name: INODE
+ */
+message INodeSection {
+  /**
+   * under-construction feature for INodeFile
+   */
+  message FileUnderConstructionFeature {
+    optional string clientName = 1;
+    optional string clientMachine = 2;
+  }
+
+  message INodeFile {
+    optional uint32 replication = 1;
+    optional uint64 modificationTime = 2;
+    optional uint64 accessTime = 3;
+    optional uint64 preferredBlockSize = 4;
+    optional fixed64 permission = 5;
+    repeated BlockProto blocks = 6;
+    optional FileUnderConstructionFeature fileUC = 7;
+  }
+
+  message INodeDirectory {
+    optional uint64 modificationTime = 1;
+    // namespace quota
+    optional uint64 nsQuota = 2;
+    // diskspace quota
+    optional uint64 dsQuota = 3;
+    optional fixed64 permission = 4;
+  }
+
+  message INodeSymlink {
+    optional fixed64 permission = 1;
+    optional bytes target = 2;
+  }
+
+  message INodeReference {
+    // id of the referred inode
+    optional uint64 referredId = 1;
+    // local name recorded in WithName
+    optional bytes name = 2;
+    // recorded in DstReference
+    optional uint32 dstSnapshotId = 3;
+    // recorded in WithName
+    optional uint32 lastSnapshotId = 4;
+  }
+
+  message INode {
+    enum Type {
+      FILE = 1;
+      DIRECTORY = 2;
+      SYMLINK = 3;
+    };
+    required Type type = 1;
+    required uint64 id = 2;
+    optional bytes name = 3;
+
+    optional INodeFile file = 4;
+    optional INodeDirectory directory = 5;
+    optional INodeSymlink symlink = 6;
+  }
+
+  optional uint64 lastInodeId = 1;
+  optional uint64 numInodes = 2;
+  // repeated INodes..
+}
+
+/**
+ * This section records information about under-construction files for
+ * reconstructing the lease map.
+ * NAME: FILES_UNDERCONSTRUCTION
+ */
+message FilesUnderConstructionSection {
+  message FileUnderConstructionEntry {
+    optional uint64 inodeId = 1;
+    optional string fullPath = 2;
+  }
+  // repeated FileUnderConstructionEntry...
+}
+
+/**
+ * This section records the children of each directories
+ * NAME: INODE_DIR
+ */
+message INodeDirectorySection {
+  message DirEntry {
+    optional uint64 parent = 1;
+    repeated uint64 children = 2 [packed = true];
+    optional uint64 numOfRef = 3;
+    // repeated INodeReference...
+  }
+  // repeated DirEntry, ended at the boundary of the section.
+}
+
+/**
+ * This section records the information about snapshot
+ * NAME: SNAPSHOT
+ */
+message SnapshotSection {
+  message Snapshot {
+    optional uint32 snapshotId = 1;
+    // Snapshot root
+    optional INodeSection.INode root = 2;
+  }
+
+  optional uint32 snapshotCounter = 1;
+  repeated uint64 snapshottableDir = 2 [packed = true];
+  // total number of snapshots
+  optional uint32 numSnapshots = 3;
+  // repeated Snapshot...
+}
+
+/**
+ * This section records information about snapshot diffs
+ * NAME: SNAPSHOT_DIFF
+ */
+message SnapshotDiffSection {
+  message CreatedListEntry {
+    optional bytes name = 1;
+  }
+
+  message DirectoryDiff {
+    optional uint32 snapshotId = 1;
+    optional uint32 childrenSize = 2;
+    optional bool isSnapshotRoot = 3;
+    optional bytes name = 4;
+    optional INodeSection.INodeDirectory snapshotCopy = 5;
+    optional uint32 createdListSize = 6;
+    optional uint32 numOfDeletedRef = 7; // number of reference nodes in deleted list
+    repeated uint64 deletedINode = 8 [packed = true]; // id of deleted inode
+    // repeated CreatedListEntry (size is specified by createdListSize)
+    // repeated INodeReference (reference inodes in deleted list)
+  }
+
+  message FileDiff {
+    optional uint32 snapshotId = 1;
+    optional uint64 fileSize = 2;
+    optional bytes name = 3;
+    optional INodeSection.INodeFile snapshotCopy = 4;
+  }
+
+  message DiffEntry {
+    enum Type {
+      FILEDIFF = 1;
+      DIRECTORYDIFF = 2;
+    }
+    required Type type = 1;
+    optional uint64 inodeId = 2;
+    optional uint32 numOfDiff = 3;
+
+    // repeated DirectoryDiff or FileDiff
+  }
+
+  // repeated DiffEntry
+}
+
+/**
+ * This section maps string to id
+ * NAME: STRING_TABLE
+ */
+message StringTableSection {
+  message Entry {
+    optional uint32 id = 1;
+    optional string str = 2;
+  }
+  optional uint32 numEntry = 1;
+  // repeated Entry
+}
+
+message SecretManagerSection {
+  message DelegationKey {
+    optional uint32 id         = 1;
+    optional uint64 expiryDate = 2;
+    optional bytes  key        = 3;
+  }
+  message PersistToken {
+    optional uint32 version        = 1;
+    optional string owner          = 2;
+    optional string renewer        = 3;
+    optional string realUser       = 4;
+    optional uint64 issueDate      = 5;
+    optional uint64 maxDate        = 6;
+    optional uint32 sequenceNumber = 7;
+    optional uint32 masterKeyId    = 8;
+    optional uint64 expiryDate     = 9;
+  }
+  optional uint32 currentId = 1;
+  optional uint32 tokenSequenceNumber = 2;
+  optional uint32 numKeys = 3;
+  optional uint32 numTokens = 4;
+  // repeated DelegationKey keys
+  // repeated PersistToken tokens
+}
+
+message CacheManagerSection {
+  required uint64 nextDirectiveId = 1;
+  required uint32 numPools        = 2;
+  required uint32 numDirectives   = 3;
+  // repeated CachePoolInfoProto pools
+  // repeated CacheDirectiveInfoProto directives
+}
+
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImage.java
new file mode 100644
index 00000000000..552b091b7b4
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImage.java
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.EnumSet;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSOutputStream;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
+import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
+import org.apache.hadoop.hdfs.util.MD5FileUtils;
+import org.junit.Test;
+
+public class TestFSImage {
+
+  @Test
+  public void testPersist() throws IOException {
+    Configuration conf = new Configuration();
+    testPersistHelper(conf);
+  }
+
+  @Test
+  public void testCompression() throws IOException {
+    Configuration conf = new Configuration();
+    conf.setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, true);
+    conf.set(DFSConfigKeys.DFS_IMAGE_COMPRESSION_CODEC_KEY,
+        "org.apache.hadoop.io.compress.GzipCodec");
+    testPersistHelper(conf);
+  }
+
+  private void testPersistHelper(Configuration conf) throws IOException {
+    MiniDFSCluster cluster = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).build();
+      cluster.waitActive();
+      FSNamesystem fsn = cluster.getNamesystem();
+      DistributedFileSystem fs = cluster.getFileSystem();
+
+      final Path dir = new Path("/abc/def");
+      final Path file1 = new Path(dir, "f1");
+      final Path file2 = new Path(dir, "f2");
+
+      // create an empty file f1
+      fs.create(file1).close();
+
+      // create an under-construction file f2
+      FSDataOutputStream out = fs.create(file2);
+      out.writeBytes("hello");
+      ((DFSOutputStream) out.getWrappedStream()).hsync(EnumSet
+          .of(SyncFlag.UPDATE_LENGTH));
+
+      // checkpoint
+      fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+      fs.saveNamespace();
+      fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
+
+      cluster.restartNameNode();
+      cluster.waitActive();
+      fs = cluster.getFileSystem();
+
+      assertTrue(fs.isDirectory(dir));
+      assertTrue(fs.exists(file1));
+      assertTrue(fs.exists(file2));
+
+      // check internals of file2
+      INodeFile file2Node = fsn.dir.getINode4Write(file2.toString()).asFile();
+      assertEquals("hello".length(), file2Node.computeFileSize());
+      assertTrue(file2Node.isUnderConstruction());
+      BlockInfo[] blks = file2Node.getBlocks();
+      assertEquals(1, blks.length);
+      assertEquals(BlockUCState.UNDER_CONSTRUCTION, blks[0].getBlockUCState());
+      // check lease manager
+      Lease lease = fsn.leaseManager.getLeaseByPath(file2.toString());
+      Assert.assertNotNull(lease);
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+
+  /**
+   * Ensure that the digest written by the saver equals to the digest of the
+   * file.
+   */
+  @Test
+  public void testDigest() throws IOException {
+    Configuration conf = new Configuration();
+    MiniDFSCluster cluster = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build();
+      DistributedFileSystem fs = cluster.getFileSystem();
+      fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+      fs.saveNamespace();
+      fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
+      File currentDir = FSImageTestUtil.getNameNodeCurrentDirs(cluster, 0).get(
+          0);
+      File fsimage = FSImageTestUtil.findNewestImageFile(currentDir
+          .getAbsolutePath());
+      assertEquals(MD5FileUtils.readStoredMd5ForFile(fsimage),
+          MD5FileUtils.computeMd5ForFile(fsimage));
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java
index 21935d05d9c..f3cbf15aae2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java
@@ -140,7 +140,7 @@ public class TestFSImageWithSnapshot {
   private File saveFSImageToTempFile() throws IOException {
     SaveNamespaceContext context = new SaveNamespaceContext(fsn, txid,
         new Canceler());
-    FSImageFormat.Saver saver = new FSImageFormat.Saver(context);
+    FSImageFormatProtobuf.Saver saver = new FSImageFormatProtobuf.Saver(context);
     FSImageCompression compression = FSImageCompression.createCompression(conf);
     File imageFile = getImageFile(testDir, txid);
     fsn.readLock();
@@ -154,7 +154,7 @@ public class TestFSImageWithSnapshot {
   
   /** Load the fsimage from a temp file */
   private void loadFSImageFromTempFile(File imageFile) throws IOException {
-    FSImageFormat.Loader loader = new FSImageFormat.Loader(conf, fsn);
+    FSImageFormat.LoaderDelegator loader = FSImageFormat.newLoader(conf, fsn);
     fsn.writeLock();
     fsn.getFSDirectory().writeLock();
     try {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
index 3ff5d54dc66..0ca112da5c3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
@@ -287,7 +287,6 @@ public class TestStandbyCheckpoints {
     doEdits(0, 1000);
     nn0.getRpcServer().rollEditLog();
     answerer.waitForCall();
-    answerer.proceed();
     assertTrue("SBN is not performing checkpoint but it should be.",
         answerer.getFireCount() == 1 && answerer.getResultCount() == 0);
     
@@ -306,6 +305,7 @@ public class TestStandbyCheckpoints {
     // RPC to the SBN happened during the checkpoint.
     assertTrue("SBN should have still been checkpointing.",
         answerer.getFireCount() == 1 && answerer.getResultCount() == 0);
+    answerer.proceed();
     answerer.waitForResult();
     assertTrue("SBN should have finished checkpointing.",
         answerer.getFireCount() == 1 && answerer.getResultCount() == 1);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java
index 7fe8087f2a4..d4e887949e0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestRenameWithSnapshots.java
@@ -73,7 +73,6 @@ import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.Mockito;
-;
 
 /** Testing rename with snapshots. */
 public class TestRenameWithSnapshots {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshot.java
index 27228bd0482..20cc1351e8d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshot.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshot.java
@@ -25,6 +25,9 @@ import static org.junit.Assert.fail;
 
 import java.io.File;
 import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.RandomAccessFile;
+import java.io.StringWriter;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.EnumSet;
@@ -53,8 +56,7 @@ import org.apache.hadoop.hdfs.server.namenode.INode;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper.TestDirectoryTree;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper.TestDirectoryTree.Node;
-import org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer;
-import org.apache.hadoop.hdfs.tools.offlineImageViewer.XmlImageVisitor;
+import org.apache.hadoop.hdfs.tools.offlineImageViewer.PBImageXmlWriter;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.util.Time;
@@ -245,8 +247,8 @@ public class TestSnapshot {
    * snapshots
    */
   @Test
-  public void testOfflineImageViewer() throws Throwable {
-    runTestSnapshot(SNAPSHOT_ITERATION_NUMBER);
+  public void testOfflineImageViewer() throws Exception {
+    runTestSnapshot(1);
     
     // retrieve the fsimage. Note that we already save namespace to fsimage at
     // the end of each iteration of runTestSnapshot.
@@ -254,31 +256,10 @@ public class TestSnapshot {
         FSImageTestUtil.getFSImage(
         cluster.getNameNode()).getStorage().getStorageDir(0));
     assertNotNull("Didn't generate or can't find fsimage", originalFsimage);
-    
-    String ROOT = System.getProperty("test.build.data", "build/test/data");
-    File testFile = new File(ROOT, "/image");
-    String xmlImage = ROOT + "/image_xml";
-    boolean success = false;
-    
-    try {
-      DFSTestUtil.copyFile(originalFsimage, testFile);
-      XmlImageVisitor v = new XmlImageVisitor(xmlImage, true);
-      OfflineImageViewer oiv = new OfflineImageViewer(testFile.getPath(), v,
-          true);
-      oiv.go();
-      success = true;
-    } finally {
-      if (testFile.exists()) {
-        testFile.delete();
-      }
-      // delete the xml file if the parsing is successful
-      if (success) {
-        File xmlImageFile = new File(xmlImage);
-        if (xmlImageFile.exists()) {
-          xmlImageFile.delete();
-        }
-      }
-    }
+    StringWriter output = new StringWriter();
+    PrintWriter o = new PrintWriter(output);
+    PBImageXmlWriter v = new PBImageXmlWriter(new Configuration(), o);
+    v.visit(new RandomAccessFile(originalFsimage, "r"));
   }
 
   private void runTestSnapshot(int iteration) throws Exception {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
index 11aa3b821f0..91a5c1521c7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
@@ -20,23 +20,20 @@ package org.apache.hadoop.hdfs.tools.offlineImageViewer;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
 
 import java.io.BufferedReader;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.EOFException;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.FileReader;
 import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
+import java.io.PrintWriter;
+import java.io.RandomAccessFile;
+import java.io.StringWriter;
 import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
 import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -46,27 +43,29 @@ import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
+import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.test.PathUtils;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
-
+import org.junit.rules.TemporaryFolder;
 
 /**
- * Test function of OfflineImageViewer by:
- *   * confirming it can correctly process a valid fsimage file and that
- *     the processing generates a correct representation of the namespace
- *   * confirming it correctly fails to process an fsimage file with a layout
- *     version it shouldn't be able to handle
- *   * confirm it correctly bails on malformed image files, in particular, a
- *     file that ends suddenly.
+ * Test function of OfflineImageViewer by: * confirming it can correctly process
+ * a valid fsimage file and that the processing generates a correct
+ * representation of the namespace * confirming it correctly fails to process an
+ * fsimage file with a layout version it shouldn't be able to handle * confirm
+ * it correctly bails on malformed image files, in particular, a file that ends
+ * suddenly.
  */
 public class TestOfflineImageViewer {
   private static final Log LOG = LogFactory.getLog(OfflineImageViewer.class);
@@ -76,22 +75,22 @@ public class TestOfflineImageViewer {
   private static File originalFsimage = null;
 
   // Elements of lines of ls-file output to be compared to FileStatus instance
-  private static class LsElements {
-    public String perms;
-    public int replication;
-    public String username;
-    public String groupname;
-    public long filesize;
-    public char dir; // d if dir, - otherwise
+  private static final class LsElements {
+    private String perms;
+    private int replication;
+    private String username;
+    private String groupname;
+    private long filesize;
+    private boolean isDir;
   }
-  
+
   // namespace as written to dfs, to be compared with viewer's output
-  final static HashMap<String, FileStatus> writtenFiles = 
-      new HashMap<String, FileStatus>();
-  
-  private static String ROOT = PathUtils.getTestDirName(TestOfflineImageViewer.class);
-  
-  // Create a populated namespace for later testing.  Save its contents to a
+  final static HashMap<String, FileStatus> writtenFiles = new HashMap<String, FileStatus>();
+
+  @Rule
+  public TemporaryFolder folder = new TemporaryFolder();
+
+  // Create a populated namespace for later testing. Save its contents to a
   // data structure and store its fsimage location.
   // We only want to generate the fsimage file once and use it for
   // multiple tests.
@@ -100,35 +99,39 @@ public class TestOfflineImageViewer {
     MiniDFSCluster cluster = null;
     try {
       Configuration conf = new HdfsConfiguration();
-      conf.setLong(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY, 10000);
-      conf.setLong(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY, 5000);
-      conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
+      conf.setLong(
+          DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY, 10000);
+      conf.setLong(
+          DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY, 5000);
+      conf.setBoolean(
+          DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
       conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL,
           "RULE:[2:$1@$0](JobTracker@.*FOO.COM)s/@.*//" + "DEFAULT");
       cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
       cluster.waitActive();
       FileSystem hdfs = cluster.getFileSystem();
-      
+
       int filesize = 256;
-      
-      // Create a reasonable namespace 
-      for(int i = 0; i < NUM_DIRS; i++)  {
+
+      // Create a reasonable namespace
+      for (int i = 0; i < NUM_DIRS; i++) {
         Path dir = new Path("/dir" + i);
         hdfs.mkdirs(dir);
         writtenFiles.put(dir.toString(), pathToFileEntry(hdfs, dir.toString()));
-        for(int j = 0; j < FILES_PER_DIR; j++) {
+        for (int j = 0; j < FILES_PER_DIR; j++) {
           Path file = new Path(dir, "file" + j);
           FSDataOutputStream o = hdfs.create(file);
-          o.write(new byte[ filesize++ ]);
+          o.write(new byte[filesize++]);
           o.close();
-          
-          writtenFiles.put(file.toString(), pathToFileEntry(hdfs, file.toString()));
+
+          writtenFiles.put(file.toString(),
+              pathToFileEntry(hdfs, file.toString()));
         }
       }
 
       // Get delegation tokens so we log the delegation token op
-      Token<?>[] delegationTokens = 
-          hdfs.addDelegationTokens(TEST_RENEWER, null);
+      Token<?>[] delegationTokens = hdfs
+          .addDelegationTokens(TEST_RENEWER, null);
       for (Token<?> t : delegationTokens) {
         LOG.debug("got token " + t);
       }
@@ -137,329 +140,113 @@ public class TestOfflineImageViewer {
       cluster.getNameNodeRpc()
           .setSafeMode(SafeModeAction.SAFEMODE_ENTER, false);
       cluster.getNameNodeRpc().saveNamespace();
-      
+
       // Determine location of fsimage file
-      originalFsimage = FSImageTestUtil.findLatestImageFile(
-          FSImageTestUtil.getFSImage(
-          cluster.getNameNode()).getStorage().getStorageDir(0));
+      originalFsimage = FSImageTestUtil.findLatestImageFile(FSImageTestUtil
+          .getFSImage(cluster.getNameNode()).getStorage().getStorageDir(0));
       if (originalFsimage == null) {
         throw new RuntimeException("Didn't generate or can't find fsimage");
       }
       LOG.debug("original FS image file is " + originalFsimage);
     } finally {
-      if(cluster != null)
+      if (cluster != null)
         cluster.shutdown();
     }
   }
-  
+
   @AfterClass
   public static void deleteOriginalFSImage() throws IOException {
-    if(originalFsimage != null && originalFsimage.exists()) {
+    if (originalFsimage != null && originalFsimage.exists()) {
       originalFsimage.delete();
     }
   }
-  
-  // Convenience method to generate a file status from file system for 
+
+  // Convenience method to generate a file status from file system for
   // later comparison
-  private static FileStatus pathToFileEntry(FileSystem hdfs, String file) 
-        throws IOException {
+  private static FileStatus pathToFileEntry(FileSystem hdfs, String file)
+      throws IOException {
     return hdfs.getFileStatus(new Path(file));
   }
-  
-  // Verify that we can correctly generate an ls-style output for a valid 
+
+  // Verify that we can correctly generate an ls-style output for a valid
   // fsimage
   @Test
   public void outputOfLSVisitor() throws IOException {
-    File testFile = new File(ROOT, "/basicCheck");
-    File outputFile = new File(ROOT, "/basicCheckOutput");
-    
-    try {
-      DFSTestUtil.copyFile(originalFsimage, testFile);
-      
-      ImageVisitor v = new LsImageVisitor(outputFile.getPath(), true);
-      OfflineImageViewer oiv = new OfflineImageViewer(testFile.getPath(), v, false);
-
-      oiv.go();
-      
-      HashMap<String, LsElements> fileOutput = readLsfile(outputFile);
-      
-      compareNamespaces(writtenFiles, fileOutput);
-    } finally {
-      if(testFile.exists()) testFile.delete();
-      if(outputFile.exists()) outputFile.delete();
-    }
-    LOG.debug("Correctly generated ls-style output.");
-  }
-  
-  // Confirm that attempting to read an fsimage file with an unsupported
-  // layout results in an error
-  @Test
-  public void unsupportedFSLayoutVersion() throws IOException {
-    File testFile = new File(ROOT, "/invalidLayoutVersion");
-    File outputFile = new File(ROOT, "invalidLayoutVersionOutput");
-    
-    try {
-      int badVersionNum = -432;
-      changeLayoutVersion(originalFsimage, testFile, badVersionNum);
-      ImageVisitor v = new LsImageVisitor(outputFile.getPath(), true);
-      OfflineImageViewer oiv = new OfflineImageViewer(testFile.getPath(), v, false);
-      
-      try {
-        oiv.go();
-        fail("Shouldn't be able to read invalid laytout version");
-      } catch(IOException e) {
-        if(!e.getMessage().contains(Integer.toString(badVersionNum)))
-          throw e; // wasn't error we were expecting
-        LOG.debug("Correctly failed at reading bad image version.");
+    StringWriter output = new StringWriter();
+    PrintWriter out = new PrintWriter(output);
+    LsrPBImage v = new LsrPBImage(new Configuration(), out);
+    v.visit(new RandomAccessFile(originalFsimage, "r"));
+    out.close();
+    Pattern pattern = Pattern
+        .compile("([d\\-])([rwx\\-]{9})\\s*(-|\\d+)\\s*(\\w+)\\s*(\\w+)\\s*(\\d+)\\s*(\\d+)\\s*([\b/]+)");
+    int count = 0;
+    for (String s : output.toString().split("\n")) {
+      Matcher m = pattern.matcher(s);
+      assertTrue(m.find());
+      LsElements e = new LsElements();
+      e.isDir = m.group(1).equals("d");
+      e.perms = m.group(2);
+      e.replication = m.group(3).equals("-") ? 0 : Integer.parseInt(m.group(3));
+      e.username = m.group(4);
+      e.groupname = m.group(5);
+      e.filesize = Long.parseLong(m.group(7));
+      String path = m.group(8);
+      if (!path.equals("/")) {
+        compareFiles(writtenFiles.get(path), e);
       }
-    } finally {
-      if(testFile.exists()) testFile.delete();
-      if(outputFile.exists()) outputFile.delete();
+      ++count;
     }
+    assertEquals(writtenFiles.size() + 1, count);
   }
-  
-  // Verify that image viewer will bail on a file that ends unexpectedly
-  @Test
-  public void truncatedFSImage() throws IOException {
-    File testFile = new File(ROOT, "/truncatedFSImage");
-    File outputFile = new File(ROOT, "/trucnatedFSImageOutput");
-    try {
-      copyPartOfFile(originalFsimage, testFile);
-      assertTrue("Created truncated fsimage", testFile.exists());
-      
-      ImageVisitor v = new LsImageVisitor(outputFile.getPath(), true);
-      OfflineImageViewer oiv = new OfflineImageViewer(testFile.getPath(), v, false);
 
-      try {
-        oiv.go();
-        fail("Managed to process a truncated fsimage file");
-      } catch (EOFException e) {
-        LOG.debug("Correctly handled EOF");
-      }
-
-    } finally {
-      if(testFile.exists()) testFile.delete();
-      if(outputFile.exists()) outputFile.delete();
-    }
+  @Test(expected = IOException.class)
+  public void testTruncatedFSImage() throws IOException {
+    File truncatedFile = folder.newFile();
+    StringWriter output = new StringWriter();
+    copyPartOfFile(originalFsimage, truncatedFile);
+    new FileDistributionCalculator(new Configuration(), 0, 0, new PrintWriter(
+        output)).visit(new RandomAccessFile(truncatedFile, "r"));
   }
-  
-  // Test that our ls file has all the same compenents of the original namespace
-  private void compareNamespaces(HashMap<String, FileStatus> written,
-      HashMap<String, LsElements> fileOutput) {
-    assertEquals( "Should be the same number of files in both, plus one for root"
-            + " in fileoutput", fileOutput.keySet().size(), 
-                                written.keySet().size() + 1);
-    Set<String> inFile = fileOutput.keySet();
 
-    // For each line in the output file, verify that the namespace had a
-    // filestatus counterpart 
-    for (String path : inFile) {
-      if (path.equals("/")) // root's not included in output from system call
-        continue;
-
-      assertTrue("Path in file (" + path + ") was written to fs", written
-          .containsKey(path));
-      
-      compareFiles(written.get(path), fileOutput.get(path));
-      
-      written.remove(path);
-    }
-
-    assertEquals("No more files were written to fs", 0, written.size());
-  }
-  
   // Compare two files as listed in the original namespace FileStatus and
   // the output of the ls file from the image processor
   private void compareFiles(FileStatus fs, LsElements elements) {
-    assertEquals("directory listed as such",  
-                 fs.isDirectory() ? 'd' : '-', elements.dir);
-    assertEquals("perms string equal", 
-                                fs.getPermission().toString(), elements.perms);
+    assertEquals("directory listed as such", fs.isDirectory(), elements.isDir);
+    assertEquals("perms string equal", fs.getPermission().toString(),
+        elements.perms);
     assertEquals("replication equal", fs.getReplication(), elements.replication);
     assertEquals("owner equal", fs.getOwner(), elements.username);
     assertEquals("group equal", fs.getGroup(), elements.groupname);
     assertEquals("lengths equal", fs.getLen(), elements.filesize);
   }
 
-  // Read the contents of the file created by the Ls processor
-  private HashMap<String, LsElements> readLsfile(File lsFile) throws IOException {
-    BufferedReader br = new BufferedReader(new FileReader(lsFile));
-    String line = null;
-    HashMap<String, LsElements> fileContents = new HashMap<String, LsElements>();
-    
-    while((line = br.readLine()) != null) 
-      readLsLine(line, fileContents);
-    
-    br.close();
-    return fileContents;
-  }
-  
-  // Parse a line from the ls output.  Store permissions, replication, 
-  // username, groupname and filesize in hashmap keyed to the path name
-  private void readLsLine(String line, HashMap<String, LsElements> fileContents) {
-    String elements [] = line.split("\\s+");
-    
-    assertEquals("Not enough elements in ls output", 8, elements.length);
-    
-    LsElements lsLine = new LsElements();
-    
-    lsLine.dir = elements[0].charAt(0);
-    lsLine.perms = elements[0].substring(1);
-    lsLine.replication = elements[1].equals("-") 
-                                             ? 0 : Integer.valueOf(elements[1]);
-    lsLine.username = elements[2];
-    lsLine.groupname = elements[3];
-    lsLine.filesize = Long.valueOf(elements[4]);
-    // skipping date and time 
-    
-    String path = elements[7];
-    
-    // Check that each file in the ls output was listed once
-    assertFalse("LS file had duplicate file entries", 
-        fileContents.containsKey(path));
-    
-    fileContents.put(path, lsLine);
-  }
-  
-  // Copy one fsimage to another, changing the layout version in the process
-  private void changeLayoutVersion(File src, File dest, int newVersion) 
-         throws IOException {
-    DataInputStream in = null; 
-    DataOutputStream out = null; 
-    
-    try {
-      in = new DataInputStream(new FileInputStream(src));
-      out = new DataOutputStream(new FileOutputStream(dest));
-      
-      in.readInt();
-      out.writeInt(newVersion);
-      
-      byte [] b = new byte[1024];
-      while( in.read(b)  > 0 ) {
-        out.write(b);
-      }
-    } finally {
-      if(in != null) in.close();
-      if(out != null) out.close();
-    }
-  }
-  
-  // Only copy part of file into the other.  Used for testing truncated fsimage
   private void copyPartOfFile(File src, File dest) throws IOException {
-    InputStream in = null;
-    OutputStream out = null;
-    
-    byte [] b = new byte[256];
-    int bytesWritten = 0;
-    int count;
-    int maxBytes = 700;
-    
+    FileInputStream in = null;
+    FileOutputStream out = null;
+    final int MAX_BYTES = 700;
     try {
       in = new FileInputStream(src);
       out = new FileOutputStream(dest);
-      
-      while( (count = in.read(b))  > 0 && bytesWritten < maxBytes ) {
-        out.write(b);
-        bytesWritten += count;
-      } 
+      in.getChannel().transferTo(0, MAX_BYTES, out.getChannel());
     } finally {
-      if(in != null) in.close();
-      if(out != null) out.close();
+      IOUtils.cleanup(null, in);
+      IOUtils.cleanup(null, out);
     }
   }
 
   @Test
-  public void outputOfFileDistributionVisitor() throws IOException {
-    File testFile = new File(ROOT, "/basicCheck");
-    File outputFile = new File(ROOT, "/fileDistributionCheckOutput");
+  public void testFileDistributionVisitor() throws IOException {
+    StringWriter output = new StringWriter();
+    PrintWriter o = new PrintWriter(output);
+    new FileDistributionCalculator(new Configuration(), 0, 0, o)
+        .visit(new RandomAccessFile(originalFsimage, "r"));
+    o.close();
 
-    int totalFiles = 0;
-    BufferedReader reader = null;
-    try {
-      DFSTestUtil.copyFile(originalFsimage, testFile);
-      ImageVisitor v = new FileDistributionVisitor(outputFile.getPath(), 0, 0);
-      OfflineImageViewer oiv = 
-        new OfflineImageViewer(testFile.getPath(), v, false);
+    Pattern p = Pattern.compile("totalFiles = (\\d+)\n");
+    Matcher matcher = p.matcher(output.getBuffer());
 
-      oiv.go();
-
-      reader = new BufferedReader(new FileReader(outputFile));
-      String line = reader.readLine();
-      assertEquals(line, "Size\tNumFiles");
-      while((line = reader.readLine()) != null) {
-        String[] row = line.split("\t");
-        assertEquals(row.length, 2);
-        totalFiles += Integer.parseInt(row[1]);
-      }
-    } finally {
-      if (reader != null) {
-        reader.close();
-      }
-      if(testFile.exists()) testFile.delete();
-      if(outputFile.exists()) outputFile.delete();
-    }
+    assertTrue(matcher.find() && matcher.groupCount() == 1);
+    int totalFiles = Integer.parseInt(matcher.group(1));
     assertEquals(totalFiles, NUM_DIRS * FILES_PER_DIR);
   }
-  
-  private static class TestImageVisitor extends ImageVisitor {
-    private List<String> delegationTokenRenewers = new LinkedList<String>();
-    TestImageVisitor() {
-    }
-    
-    List<String> getDelegationTokenRenewers() {
-      return delegationTokenRenewers;
-    }
-
-    @Override
-    void start() throws IOException {
-    }
-
-    @Override
-    void finish() throws IOException {
-    }
-
-    @Override
-    void finishAbnormally() throws IOException {
-    }
-
-    @Override
-    void visit(ImageElement element, String value) throws IOException {
-      if (element == ImageElement.DELEGATION_TOKEN_IDENTIFIER_RENEWER) {
-        delegationTokenRenewers.add(value);
-      }
-    }
-
-    @Override
-    void visitEnclosingElement(ImageElement element) throws IOException {
-    }
-
-    @Override
-    void visitEnclosingElement(ImageElement element, ImageElement key,
-        String value) throws IOException {
-    }
-
-    @Override
-    void leaveEnclosingElement() throws IOException {
-    }
-  }
-
-  @Test
-  public void outputOfTestVisitor() throws IOException {
-    File testFile = new File(ROOT, "/basicCheck");
-
-    try {
-      DFSTestUtil.copyFile(originalFsimage, testFile);
-      TestImageVisitor v = new TestImageVisitor();
-      OfflineImageViewer oiv = new OfflineImageViewer(testFile.getPath(), v, true);
-      oiv.go();
-
-      // Validated stored delegation token identifiers.
-      List<String> dtrs = v.getDelegationTokenRenewers();
-      assertEquals(1, dtrs.size());
-      assertEquals(TEST_RENEWER, dtrs.get(0));
-    } finally {
-      if(testFile.exists()) testFile.delete();
-    }
-    LOG.debug("Passed TestVisitor validation.");
-  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored
index c6174327d112c413f165f5bc912915952859e83f..a3f3511c9eb15da0f4391d84096c45e678606fc1 100644
GIT binary patch
delta 13
Vcmdm>xIvNS|NsAIHnOY{001zi2H^kz

delta 13
Vcmdm>xIvNS|NsAIH?ph|001zn2I2q!

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml
index 3a60b6dc5c5..c7fafcccf5e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <EDITS>
-  <EDITS_VERSION>-51</EDITS_VERSION>
+  <EDITS_VERSION>-52</EDITS_VERSION>
   <RECORD>
     <OPCODE>OP_START_LOG_SEGMENT</OPCODE>
     <DATA>

From 204704a92df407c06951cca9a47e85f1e3ef5ba7 Mon Sep 17 00:00:00 2001
From: Konstantin Shvachko <shv@apache.org>
Date: Sun, 9 Feb 2014 20:49:18 +0000
Subject: [PATCH 02/47] HDFS-5837. dfs.namenode.replication.considerLoad should
 consider decommissioned nodes. Contributed by Tao Luo.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566410 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |   3 +
 .../BlockPlacementPolicyDefault.java          |   8 +-
 .../hdfs/server/namenode/FSClusterStats.java  |   6 +
 .../hdfs/server/namenode/FSNamesystem.java    |   7 +-
 .../TestReplicationPolicyConsiderLoad.java    | 161 ++++++++++++++++++
 5 files changed, 181 insertions(+), 4 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyConsiderLoad.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 22b201627a1..fe5e8bc59a9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -940,6 +940,9 @@ Release 2.3.0 - UNRELEASED
     HDFS-5873. dfs.http.policy should have higher precedence over dfs.https.enable.
     (Haohui Mai via jing9)
 
+    HDFS-5837. dfs.namenode.replication.considerLoad should consider
+    decommissioned nodes. (Tao Luo via shv)
+
   BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
 
     HDFS-4985. Add storage type to the protocol and expose it in block report
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
index f4dc208d731..8b740cd94c2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
@@ -633,9 +633,11 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
     // check the communication traffic of the target machine
     if (considerLoad) {
       double avgLoad = 0;
-      int size = clusterMap.getNumOfLeaves();
-      if (size != 0 && stats != null) {
-        avgLoad = (double)stats.getTotalLoad()/size;
+      if (stats != null) {
+        int size = stats.getNumDatanodesInService();
+        if (size != 0) {
+          avgLoad = (double)stats.getTotalLoad()/size;
+        }
       }
       if (node.getXceiverCount() > (2.0 * avgLoad)) {
         logNodeIsNotChosen(storage, "the node is too busy ");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSClusterStats.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSClusterStats.java
index f4827f38c8a..676aa0826c0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSClusterStats.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSClusterStats.java
@@ -42,6 +42,12 @@ public interface FSClusterStats {
    *         for writing targets, and false otherwise.
    */
   public boolean isAvoidingStaleDataNodesForWrite();
+
+  /**
+   * Indicates number of datanodes that are in service.
+   * @return Number of datanodes that are both alive and not decommissioned.
+   */
+  public int getNumDatanodesInService();
 }
     
     
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index f91c41c7610..5cd22ab4dd0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -6842,7 +6842,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     return this.blockManager.getDatanodeManager()
         .shouldAvoidStaleDataNodesForWrite();
   }
-  
+
+  @Override // FSClusterStats
+  public int getNumDatanodesInService() {
+    return getNumLiveDataNodes() - getNumDecomLiveDataNodes();
+  }
+
   public SnapshotManager getSnapshotManager() {
     return snapshotManager;
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyConsiderLoad.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyConsiderLoad.java
new file mode 100644
index 00000000000..0b84fd7c953
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyConsiderLoad.java
@@ -0,0 +1,161 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.blockmanagement;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.StorageType;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.test.PathUtils;
+import org.apache.hadoop.util.VersionInfo;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class TestReplicationPolicyConsiderLoad {
+
+  private static NameNode namenode;
+  private static DatanodeManager dnManager;
+  private static List<DatanodeRegistration> dnrList;
+  private static DatanodeDescriptor[] dataNodes;
+  private static DatanodeStorageInfo[] storages;
+
+  @BeforeClass
+  public static void setupCluster() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    final String[] racks = {
+        "/rack1",
+        "/rack1",
+        "/rack1",
+        "/rack2",
+        "/rack2",
+        "/rack2"};
+    storages = DFSTestUtil.createDatanodeStorageInfos(racks);
+    dataNodes = DFSTestUtil.toDatanodeDescriptor(storages);
+    FileSystem.setDefaultUri(conf, "hdfs://localhost:0");
+    conf.set(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY, "0.0.0.0:0");
+    File baseDir = PathUtils.getTestDir(TestReplicationPolicy.class);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
+        new File(baseDir, "name").getPath());
+    conf.setBoolean(
+        DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY, true);
+    conf.setBoolean(
+        DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_WRITE_KEY, true);
+    conf.setBoolean(
+        DFSConfigKeys.DFS_NAMENODE_REPLICATION_CONSIDERLOAD_KEY, true);
+    DFSTestUtil.formatNameNode(conf);
+    namenode = new NameNode(conf);
+    int blockSize = 1024;
+
+    dnrList = new ArrayList<DatanodeRegistration>();
+    dnManager = namenode.getNamesystem().getBlockManager().getDatanodeManager();
+
+    // Register DNs
+    for (int i=0; i < 6; i++) {
+      DatanodeRegistration dnr = new DatanodeRegistration(dataNodes[i],
+          new StorageInfo(), new ExportedBlockKeys(), VersionInfo.getVersion());
+      dnrList.add(dnr);
+      dnManager.registerDatanode(dnr);
+      dataNodes[i].getStorageInfos()[0].setUtilizationForTesting(
+          2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*blockSize, 0L,
+          2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*blockSize, 0L);
+      dataNodes[i].updateHeartbeat(
+          BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[i]),
+          0L, 0L, 0, 0);
+    }
+  }
+
+  /**
+   * Tests that chooseTarget with considerLoad set to true correctly calculates
+   * load with decommissioned nodes.
+   */
+  @Test
+  public void testChooseTargetWithDecomNodes() throws IOException {
+    namenode.getNamesystem().writeLock();
+    try {
+      // Decommission DNs so BlockPlacementPolicyDefault.isGoodTarget()
+      // returns false
+      for (int i = 0; i < 3; i++) {
+        DatanodeInfo d = dnManager.getDatanodeByXferAddr(
+            dnrList.get(i).getIpAddr(),
+            dnrList.get(i).getXferPort());
+        d.setDecommissioned();
+      }
+      String blockPoolId = namenode.getNamesystem().getBlockPoolId();
+      dnManager.handleHeartbeat(dnrList.get(3),
+          BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[3]),
+          blockPoolId, dataNodes[3].getCacheCapacity(),
+          dataNodes[3].getCacheRemaining(),
+          2, 0, 0);
+      dnManager.handleHeartbeat(dnrList.get(4),
+          BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[4]),
+          blockPoolId, dataNodes[4].getCacheCapacity(),
+          dataNodes[4].getCacheRemaining(),
+          4, 0, 0);
+      dnManager.handleHeartbeat(dnrList.get(5),
+          BlockManagerTestUtil.getStorageReportsForDatanode(dataNodes[5]),
+          blockPoolId, dataNodes[5].getCacheCapacity(),
+          dataNodes[5].getCacheRemaining(),
+          4, 0, 0);
+
+      // Call chooseTarget()
+      DatanodeStorageInfo[] targets = namenode.getNamesystem().getBlockManager()
+          .getBlockPlacementPolicy().chooseTarget("testFile.txt", 3,
+              dataNodes[0], new ArrayList<DatanodeStorageInfo>(), false, null,
+              1024, StorageType.DEFAULT);
+
+      assertEquals(3, targets.length);
+      Set<DatanodeStorageInfo> targetSet = new HashSet<DatanodeStorageInfo>(
+          Arrays.asList(targets));
+      for (int i = 3; i < storages.length; i++) {
+        assertTrue(targetSet.contains(storages[i]));
+      }
+    } finally {
+      dataNodes[0].stopDecommission();
+      dataNodes[1].stopDecommission();
+      dataNodes[2].stopDecommission();
+      namenode.getNamesystem().writeUnlock();
+    }
+    NameNode.LOG.info("Done working on it");
+  }
+
+  @AfterClass
+  public static void teardownCluster() {
+    if (namenode != null) namenode.stop();
+  }
+
+}

From ff24753aa7ba3aadfff8080d9709c6bc9cf07811 Mon Sep 17 00:00:00 2001
From: Konstantin Shvachko <shv@apache.org>
Date: Sun, 9 Feb 2014 21:07:28 +0000
Subject: [PATCH 03/47] HDFS-4370. Fix typo Blanacer in DataNode. Contributed
 by Chu Tong.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566422 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt                     | 2 ++
 .../java/org/apache/hadoop/hdfs/server/datanode/DataNode.java   | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index fe5e8bc59a9..0adccb73376 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -360,6 +360,8 @@ Release 2.4.0 - UNRELEASED
     HDFS-4911.  Reduce PeerCache timeout to be commensurate with
     dfs.datanode.socket.reuse.keepalive (cmccabe)
 
+    HDFS-4370. Fix typo Blanacer in DataNode. (Chu Tong via shv)
+
   OPTIMIZATIONS
 
     HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index ad580a53d1d..42a63e73f72 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -2494,7 +2494,7 @@ public class DataNode extends Configured
   /**
    * Get current value of the max balancer bandwidth in bytes per second.
    *
-   * @return bandwidth Blanacer bandwidth in bytes per second for this datanode.
+   * @return Balancer bandwidth in bytes per second for this datanode.
    */
   public Long getBalancerBandwidth() {
     DataXceiverServer dxcs =

From c7e265bf26a58d710967a56620c3eecc99b6b45b Mon Sep 17 00:00:00 2001
From: Brandon Li <brandonli@apache.org>
Date: Sun, 9 Feb 2014 23:15:48 +0000
Subject: [PATCH 04/47] HDFS-5886. Potential null pointer deference in
 RpcProgramNfs3#readlink(). Contributed by Brandon Li

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566468 13f79535-47bb-0310-9956-ffa450edef68
---
 .../java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java   | 3 ++-
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt                    | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java
index c2fc70990ca..533fa220774 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java
@@ -545,7 +545,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
         return new READLINK3Response(Nfs3Status.NFS3ERR_SERVERFAULT);
       }
       if (MAX_READ_TRANSFER_SIZE < target.getBytes().length) {
-        return new READLINK3Response(Nfs3Status.NFS3ERR_IO, postOpAttr, null);
+        return new READLINK3Response(Nfs3Status.NFS3ERR_IO, postOpAttr,
+            new byte[0]);
       }
 
       return new READLINK3Response(Nfs3Status.NFS3_OK, postOpAttr,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 0adccb73376..5a0e4216536 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -404,6 +404,9 @@ Release 2.4.0 - UNRELEASED
     HDFS-5900. Cannot set cache pool limit of "unlimited" via CacheAdmin.
     (wang)
 
+    HDFS-5886. Potential null pointer deference in RpcProgramNfs3#readlink()
+    (brandonli)
+
 Release 2.3.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES

From 1c151c31faca77b67b7f4d50d682e76bc519d79a Mon Sep 17 00:00:00 2001
From: Sanford Ryza <sandy@apache.org>
Date: Mon, 10 Feb 2014 09:19:26 +0000
Subject: [PATCH 05/47] YARN-1497. Fix comment and remove accidental println

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566537 13f79535-47bb-0310-9956-ffa450edef68
---
 .../org/apache/hadoop/yarn/client/cli/ApplicationCLI.java   | 6 +-----
 .../java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java | 1 -
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java
index 80e548d26e6..4332f5beeaf 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/cli/ApplicationCLI.java
@@ -382,11 +382,7 @@ public class ApplicationCLI extends YarnCLI {
   }
   
   /**
-   * Kills the application with the application id as appId
-   * 
-   * @param applicationId
-   * @throws YarnException
-   * @throws IOException
+   * Moves the application with the given ID to the given queue.
    */
   private void moveApplicationAcrossQueues(String applicationId, String queue)
       throws YarnException, IOException {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java
index 12bc6be7316..97721864968 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestYarnCLI.java
@@ -675,7 +675,6 @@ public class TestYarnCLI {
     int result = spyCli.run(new String[] { "-help" });
     Assert.assertTrue(result == 0);
     verify(spyCli).printUsage(any(Options.class));
-    System.err.println(sysOutStream.toString()); //todo sandyt remove this hejfkdsl
     Assert.assertEquals(createApplicationCLIHelpMessage(),
         sysOutStream.toString());
 

From bfd158f3231de96cab3308b219cb5278a43d0fe9 Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Mon, 10 Feb 2014 19:34:54 +0000
Subject: [PATCH 06/47] =?UTF-8?q?HADOOP-10333.=20Fix=20grammatical=20error?=
 =?UTF-8?q?=20in=20overview.html=20document.=20Contributed=20by=20Ren?=
 =?UTF-8?q?=C3=A9=20Nyffenegger.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566709 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-common-project/hadoop-common/CHANGES.txt                | 3 +++
 .../hadoop-common/src/main/java/overview.html                  | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 7a7106197d9..9eb7fae0f53 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -312,6 +312,9 @@ Release 2.4.0 - UNRELEASED
     HADOOP-10295. Allow distcp to automatically identify the checksum type of 
     source files and use it for the target. (jing9 and Laurent Goujon)
 
+    HADOOP-10333. Fix grammatical error in overview.html document.
+    (René Nyffenegger via suresh)
+
   OPTIMIZATIONS
 
   BUG FIXES
diff --git a/hadoop-common-project/hadoop-common/src/main/java/overview.html b/hadoop-common-project/hadoop-common/src/main/java/overview.html
index 759c093aa59..5868617709b 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/overview.html
+++ b/hadoop-common-project/hadoop-common/src/main/java/overview.html
@@ -57,7 +57,7 @@ that process vast amounts of data. Here's what makes Hadoop especially useful:</
 
 <ul>
   <li>
-    Hadoop was been demonstrated on GNU/Linux clusters with 2000 nodes.
+    Hadoop has been demonstrated on GNU/Linux clusters with more than 4000 nodes.
   </li>
   <li>
     Windows is also a supported platform.

From e74e117ad3e0b6c0572913f602a28934f87bba70 Mon Sep 17 00:00:00 2001
From: Zhijie Shen <zjshen@apache.org>
Date: Mon, 10 Feb 2014 21:31:34 +0000
Subject: [PATCH 07/47] YARN-1637. Implemented a client library for Java users
 to post timeline entities and events. Contributed by Zhijie Shen.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566752 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt               |   3 +
 .../hadoop-yarn/hadoop-yarn-client/pom.xml    |   4 +
 .../yarn/client/api/TimelineClient.java       |  70 +++++++++
 .../client/api/impl/TimelineClientImpl.java   | 106 ++++++++++++++
 .../client/api/impl/TestTimelineClient.java   | 137 ++++++++++++++++++
 5 files changed, 320 insertions(+)
 create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/TimelineClient.java
 create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java
 create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 3039c6f9a07..59afe849981 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -119,6 +119,9 @@ Release 2.4.0 - UNRELEASED
     YARN-1635. Implemented a Leveldb based ApplicationTimelineStore. (Billie
     Rinaldi via zjshen)
 
+    YARN-1637. Implemented a client library for Java users to post timeline
+    entities and events. (zjshen)
+
   IMPROVEMENTS
 
     YARN-1007. Enhance History Reader interface for Containers. (Mayank Bansal via
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml
index 54da659fee6..6091686a036 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml
@@ -79,6 +79,10 @@
       <groupId>org.mortbay.jetty</groupId>
       <artifactId>jetty-util</artifactId>
     </dependency>
+    <dependency>
+      <groupId>com.sun.jersey</groupId>
+      <artifactId>jersey-client</artifactId>
+    </dependency>
 
     <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/TimelineClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/TimelineClient.java
new file mode 100644
index 00000000000..8be00ac6ff6
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/TimelineClient.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.client.api;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.service.AbstractService;
+import org.apache.hadoop.yarn.api.records.apptimeline.ATSEntity;
+import org.apache.hadoop.yarn.api.records.apptimeline.ATSPutErrors;
+import org.apache.hadoop.yarn.client.api.impl.TimelineClientImpl;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+
+/**
+ * A client library that can be used to post some information in terms of a
+ * number of conceptual entities.
+ * 
+ * @See ATSEntity
+ */
+@Public
+@Unstable
+public abstract class TimelineClient extends AbstractService {
+
+  @Public
+  public static TimelineClient createTimelineClient() {
+    TimelineClient client = new TimelineClientImpl();
+    return client;
+  }
+
+  @Private
+  protected TimelineClient(String name) {
+    super(name);
+  }
+
+  /**
+   * <p>
+   * Post the information of a number of conceptual entities of an application
+   * to the timeline server. It is a blocking API. The method will not return
+   * until it gets the response from the timeline server.
+   * </p>
+   * 
+   * @param entities
+   *          the collection of {@link ATSEntity}
+   * @return the error information if the post entities are not correctly stored
+   * @throws IOException
+   * @throws YarnException
+   */
+  @Public
+  public abstract ATSPutErrors postEntities(
+      ATSEntity... entities) throws IOException, YarnException;
+
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java
new file mode 100644
index 00000000000..9fcc2bd6e3d
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.client.api.impl;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.Arrays;
+
+import javax.ws.rs.core.MediaType;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.http.HttpConfig;
+import org.apache.hadoop.yarn.api.records.apptimeline.ATSEntities;
+import org.apache.hadoop.yarn.api.records.apptimeline.ATSEntity;
+import org.apache.hadoop.yarn.api.records.apptimeline.ATSPutErrors;
+import org.apache.hadoop.yarn.client.api.TimelineClient;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.webapp.YarnJacksonJaxbJsonProvider;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Joiner;
+import com.sun.jersey.api.client.Client;
+import com.sun.jersey.api.client.ClientResponse;
+import com.sun.jersey.api.client.WebResource;
+import com.sun.jersey.api.client.config.ClientConfig;
+import com.sun.jersey.api.client.config.DefaultClientConfig;
+
+@Private
+@Unstable
+public class TimelineClientImpl extends TimelineClient {
+
+  private static final Log LOG = LogFactory.getLog(TimelineClientImpl.class);
+  private static final String RESOURCE_URI_STR = "/ws/v1/apptimeline/";
+  private static final Joiner JOINER = Joiner.on("");
+
+  private Client client;
+  private URI resURI;
+
+  public TimelineClientImpl() {
+    super(TimelineClientImpl.class.getName());
+    ClientConfig cc = new DefaultClientConfig();
+    cc.getClasses().add(YarnJacksonJaxbJsonProvider.class);
+    client = Client.create(cc);
+  }
+
+  protected void serviceInit(Configuration conf) throws Exception {
+    resURI = new URI(JOINER.join(HttpConfig.getSchemePrefix(),
+        HttpConfig.isSecure() ? conf.get(
+            YarnConfiguration.AHS_WEBAPP_HTTPS_ADDRESS,
+            YarnConfiguration.DEFAULT_AHS_WEBAPP_HTTPS_ADDRESS) : conf.get(
+            YarnConfiguration.AHS_WEBAPP_ADDRESS,
+            YarnConfiguration.DEFAULT_AHS_WEBAPP_ADDRESS), RESOURCE_URI_STR));
+    super.serviceInit(conf);
+  }
+
+  @Override
+  public ATSPutErrors postEntities(
+      ATSEntity... entities) throws IOException, YarnException {
+    ATSEntities entitiesContainer = new ATSEntities();
+    entitiesContainer.addEntities(Arrays.asList(entities));
+    ClientResponse resp = doPostingEntities(entitiesContainer);
+    if (resp.getClientResponseStatus() != ClientResponse.Status.OK) {
+      String msg =
+          "Failed to get the response from the timeline server.";
+      LOG.error(msg);
+      if (LOG.isDebugEnabled()) {
+        String output = resp.getEntity(String.class);
+        LOG.debug("HTTP error code: " + resp.getStatus()
+            + " Server response : \n" + output);
+      }
+      throw new YarnException(msg);
+    }
+    return resp.getEntity(ATSPutErrors.class);
+  }
+
+  @Private
+  @VisibleForTesting
+  public ClientResponse doPostingEntities(ATSEntities entities) {
+    WebResource webResource = client.resource(resURI);
+    return webResource.accept(MediaType.APPLICATION_JSON)
+        .type(MediaType.APPLICATION_JSON)
+        .post(ClientResponse.class, entities);
+  }
+
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java
new file mode 100644
index 00000000000..a3917a2da57
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java
@@ -0,0 +1,137 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.client.api.impl;
+
+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.when;
+import junit.framework.Assert;
+
+import org.apache.hadoop.yarn.api.records.apptimeline.ATSEntities;
+import org.apache.hadoop.yarn.api.records.apptimeline.ATSEntity;
+import org.apache.hadoop.yarn.api.records.apptimeline.ATSEvent;
+import org.apache.hadoop.yarn.api.records.apptimeline.ATSPutErrors;
+import org.apache.hadoop.yarn.client.api.TimelineClient;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.sun.jersey.api.client.ClientResponse;
+
+public class TestTimelineClient {
+
+  private TimelineClientImpl client;
+
+  @Before
+  public void setup() {
+    client = spy((TimelineClientImpl) TimelineClient.createTimelineClient());
+    client.init(new YarnConfiguration());
+    client.start();
+  }
+
+  @After
+  public void tearDown() {
+    client.stop();
+  }
+
+  @Test
+  public void testPostEntities() throws Exception {
+    mockClientResponse(ClientResponse.Status.OK, false);
+    try {
+      ATSPutErrors errors = client.postEntities(generateATSEntity());
+      Assert.assertEquals(0, errors.getErrors().size());
+    } catch (YarnException e) {
+      Assert.fail("Exception is not expected");
+    }
+  }
+
+  @Test
+  public void testPostEntitiesWithError() throws Exception {
+    mockClientResponse(ClientResponse.Status.OK, true);
+    try {
+      ATSPutErrors errors = client.postEntities(generateATSEntity());
+      Assert.assertEquals(1, errors.getErrors().size());
+      Assert.assertEquals("test entity id", errors.getErrors().get(0)
+          .getEntityId());
+      Assert.assertEquals("test entity type", errors.getErrors().get(0)
+          .getEntityType());
+      Assert.assertEquals(ATSPutErrors.ATSPutError.IO_EXCEPTION,
+          errors.getErrors().get(0).getErrorCode());
+    } catch (YarnException e) {
+      Assert.fail("Exception is not expected");
+    }
+  }
+
+  @Test
+  public void testPostEntitiesNoResponse() throws Exception {
+    mockClientResponse(ClientResponse.Status.INTERNAL_SERVER_ERROR, false);
+    try {
+      client.postEntities(generateATSEntity());
+      Assert.fail("Exception is expected");
+    } catch (YarnException e) {
+      Assert.assertTrue(e.getMessage().contains(
+          "Failed to get the response from the timeline server."));
+    }
+  }
+
+  private ClientResponse mockClientResponse(ClientResponse.Status status,
+      boolean hasError) {
+    ClientResponse response = mock(ClientResponse.class);
+    doReturn(response).when(client)
+        .doPostingEntities(any(ATSEntities.class));
+    when(response.getClientResponseStatus()).thenReturn(status);
+    ATSPutErrors.ATSPutError error = new ATSPutErrors.ATSPutError();
+    error.setEntityId("test entity id");
+    error.setEntityType("test entity type");
+    error.setErrorCode(ATSPutErrors.ATSPutError.IO_EXCEPTION);
+    ATSPutErrors errors = new ATSPutErrors();
+    if (hasError) {
+      errors.addError(error);
+    }
+    when(response.getEntity(ATSPutErrors.class)).thenReturn(errors);
+    return response;
+  }
+
+  private static ATSEntity generateATSEntity() {
+    ATSEntity entity = new ATSEntity();
+    entity.setEntityId("entity id");
+    entity.setEntityType("entity type");
+    entity.setStartTime(System.currentTimeMillis());
+    for (int i = 0; i < 2; ++i) {
+      ATSEvent event = new ATSEvent();
+      event.setTimestamp(System.currentTimeMillis());
+      event.setEventType("test event type " + i);
+      event.addEventInfo("key1", "val1");
+      event.addEventInfo("key2", "val2");
+      entity.addEvent(event);
+    }
+    entity.addRelatedEntity("test ref type 1", "test ref id 1");
+    entity.addRelatedEntity("test ref type 2", "test ref id 2");
+    entity.addPrimaryFilter("pkey1", "pval1");
+    entity.addPrimaryFilter("pkey2", "pval2");
+    entity.addOtherInfo("okey1", "oval1");
+    entity.addOtherInfo("okey2", "oval2");
+    return entity;
+  }
+
+}

From 1fa6ab249b0fa63cab550e1b7703339c4d888c5d Mon Sep 17 00:00:00 2001
From: Vinod Kumar Vavilapalli <vinodkv@apache.org>
Date: Mon, 10 Feb 2014 22:50:15 +0000
Subject: [PATCH 08/47] YARN-1459. Changed ResourceManager to depend its
 service initialization on the configuration-provider mechanism during startup
 too. Contributed by Xuan Gong.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566791 13f79535-47bb-0310-9956-ffa450edef68
---
 .../java/org/apache/hadoop/ipc/Server.java    |   6 +-
 .../ServiceAuthorizationManager.java          |   6 +-
 hadoop-yarn-project/CHANGES.txt               |   4 +
 .../dev-support/findbugs-exclude.xml          |   6 +
 .../yarn/conf/ConfigurationProvider.java      |  15 +-
 .../conf/ConfigurationProviderFactory.java    |  12 +-
 .../FileSystemBasedConfigurationProvider.java |  17 +-
 .../yarn/LocalConfigurationProvider.java      |   9 +-
 .../server/resourcemanager/AdminService.java  |  46 +++--
 .../ApplicationMasterService.java             |  20 +--
 .../resourcemanager/ClientRMService.java      |  15 +-
 .../server/resourcemanager/RMContext.java     |   2 +
 .../server/resourcemanager/RMContextImpl.java |  18 +-
 .../resourcemanager/ResourceManager.java      |  23 ++-
 .../ResourceTrackerService.java               |  15 +-
 .../scheduler/capacity/CapacityScheduler.java |  26 ++-
 .../security/authorize/RMPolicyProvider.java  |  19 ++
 .../resourcemanager/TestRMAdminService.java   | 170 ++++++++++--------
 .../capacity/TestCapacityScheduler.java       |  13 +-
 19 files changed, 266 insertions(+), 176 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
index 9c67146265b..9871a3d138a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
@@ -66,6 +66,7 @@ import javax.security.sasl.SaslServer;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configuration.IntegerRanges;
@@ -454,9 +455,10 @@ public abstract class Server {
    * Refresh the service authorization ACL for the service handled by this server
    * using the specified Configuration.
    */
-  public void refreshServiceAclWithConfigration(Configuration conf,
+  @Private
+  public void refreshServiceAclWithLoadedConfiguration(Configuration conf,
       PolicyProvider provider) {
-    serviceAuthorizationManager.refreshWithConfiguration(conf, provider);
+    serviceAuthorizationManager.refreshWithLoadedConfiguration(conf, provider);
   }
   /**
    * Returns a handle to the serviceAuthorizationManager (required in tests)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java
index cf032ba0980..66ab50cc58e 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/ServiceAuthorizationManager.java
@@ -26,6 +26,7 @@ import java.util.Set;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
@@ -122,10 +123,11 @@ public class ServiceAuthorizationManager {
     // Make a copy of the original config, and load the policy file
     Configuration policyConf = new Configuration(conf);
     policyConf.addResource(policyFile);
-    refreshWithConfiguration(policyConf, provider);
+    refreshWithLoadedConfiguration(policyConf, provider);
   }
 
-  public synchronized void refreshWithConfiguration(Configuration conf,
+  @Private
+  public synchronized void refreshWithLoadedConfiguration(Configuration conf,
       PolicyProvider provider) {
     final Map<Class<?>, AccessControlList> newAcls =
         new IdentityHashMap<Class<?>, AccessControlList>();
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 59afe849981..d31a3efe5b8 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -169,6 +169,10 @@ Release 2.4.0 - UNRELEASED
     YARN-1493. Changed ResourceManager and Scheduler interfacing to recognize
     app-attempts separately from apps. (Jian He via vinodkv)
 
+    YARN-1459. Changed ResourceManager to depend its service initialization
+    on the configuration-provider mechanism during startup too. (Xuan Gong via
+    vinodkv)
+
   OPTIMIZATIONS
 
   BUG FIXES
diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
index 74ca61b8578..0fac0b98f1f 100644
--- a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
+++ b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
@@ -309,4 +309,10 @@
     <Class name="org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore" />
     <Bug pattern="IS2_INCONSISTENT_SYNC" />
   </Match>
+
+  <!-- Multithreaded correctness warnings need to be ignored here as this is for creating the singleton.-->
+  <Match>
+      <Class name="org.apache.hadoop.yarn.server.resourcemanager.security.authorize.RMPolicyProvider"/>
+      <Bug pattern="DC_DOUBLECHECK" />
+  </Match>
 </FindBugsFilter>
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/ConfigurationProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/ConfigurationProvider.java
index 78c34d9de98..b31573d39eb 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/ConfigurationProvider.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/ConfigurationProvider.java
@@ -19,7 +19,6 @@
 package org.apache.hadoop.yarn.conf;
 
 import java.io.IOException;
-
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.conf.Configuration;
@@ -34,8 +33,8 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
  */
 public abstract class ConfigurationProvider {
 
-  public void init(Configuration conf) throws Exception {
-    initInternal(conf);
+  public void init(Configuration bootstrapConf) throws Exception {
+    initInternal(bootstrapConf);
   }
 
   public void close() throws Exception {
@@ -43,19 +42,21 @@ public abstract class ConfigurationProvider {
   }
 
   /**
-   * Get the configuration.
+   * Get the configuration and combine with bootstrapConf
+   * @param bootstrapConf Configuration
    * @param name The configuration file name
    * @return configuration
    * @throws YarnException
    * @throws IOException
    */
-  public abstract Configuration getConfiguration(String name)
-      throws YarnException, IOException;
+  public abstract Configuration getConfiguration(Configuration bootstrapConf,
+      String name) throws YarnException, IOException;
 
   /**
    * Derived classes initialize themselves using this method.
    */
-  public abstract void initInternal(Configuration conf) throws Exception;
+  public abstract void initInternal(Configuration bootstrapConf)
+      throws Exception;
 
   /**
    * Derived classes close themselves using this method.
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/ConfigurationProviderFactory.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/ConfigurationProviderFactory.java
index 4adc72e1f11..3562f173acb 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/ConfigurationProviderFactory.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/ConfigurationProviderFactory.java
@@ -33,12 +33,12 @@ public class ConfigurationProviderFactory {
   /**
    * Creates an instance of {@link ConfigurationProvider} using given
    * configuration.
-   * @param conf
+   * @param bootstrapConf
    * @return configurationProvider
    */
   @SuppressWarnings("unchecked")
   public static ConfigurationProvider
-      getConfigurationProvider(Configuration conf) {
+      getConfigurationProvider(Configuration bootstrapConf) {
     Class<? extends ConfigurationProvider> defaultProviderClass;
     try {
       defaultProviderClass = (Class<? extends ConfigurationProvider>)
@@ -49,9 +49,11 @@ public class ConfigurationProviderFactory {
           "Invalid default configuration provider class"
               + YarnConfiguration.DEFAULT_RM_CONFIGURATION_PROVIDER_CLASS, e);
     }
-    ConfigurationProvider configurationProvider = ReflectionUtils.newInstance(
-        conf.getClass(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS,
-            defaultProviderClass, ConfigurationProvider.class), conf);
+    ConfigurationProvider configurationProvider =
+        ReflectionUtils.newInstance(bootstrapConf.getClass(
+            YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS,
+            defaultProviderClass, ConfigurationProvider.class),
+            bootstrapConf);
     return configurationProvider;
   }
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/FileSystemBasedConfigurationProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/FileSystemBasedConfigurationProvider.java
index 709f54a3529..390aace7d21 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/FileSystemBasedConfigurationProvider.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/FileSystemBasedConfigurationProvider.java
@@ -19,7 +19,6 @@
 package org.apache.hadoop.yarn;
 
 import java.io.IOException;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience.Private;
@@ -42,24 +41,24 @@ public class FileSystemBasedConfigurationProvider
   private Path configDir;
 
   @Override
-  public synchronized Configuration getConfiguration(String name)
-      throws IOException, YarnException {
+  public synchronized Configuration getConfiguration(Configuration bootstrapConf,
+      String name) throws IOException, YarnException {
     Path configPath = new Path(this.configDir, name);
     if (!fs.exists(configPath)) {
       throw new YarnException("Can not find Configuration: " + name + " in "
           + configDir);
     }
-    Configuration conf = new Configuration(false);
-    conf.addResource(fs.open(configPath));
-    return conf;
+    bootstrapConf.addResource(fs.open(configPath));
+    return bootstrapConf;
   }
 
   @Override
-  public synchronized void initInternal(Configuration conf) throws Exception {
+  public synchronized void initInternal(Configuration bootstrapConf)
+      throws Exception {
     configDir =
-        new Path(conf.get(YarnConfiguration.FS_BASED_RM_CONF_STORE,
+        new Path(bootstrapConf.get(YarnConfiguration.FS_BASED_RM_CONF_STORE,
             YarnConfiguration.DEFAULT_FS_BASED_RM_CONF_STORE));
-    fs = configDir.getFileSystem(conf);
+    fs = configDir.getFileSystem(bootstrapConf);
     if (!fs.exists(configDir)) {
       fs.mkdirs(configDir);
     }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/LocalConfigurationProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/LocalConfigurationProvider.java
index d152c353f08..3e6996036f6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/LocalConfigurationProvider.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/LocalConfigurationProvider.java
@@ -19,7 +19,6 @@
 package org.apache.hadoop.yarn;
 
 import java.io.IOException;
-
 import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.conf.Configuration;
@@ -31,13 +30,13 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
 public class LocalConfigurationProvider extends ConfigurationProvider {
 
   @Override
-  public Configuration getConfiguration(String name)
-      throws IOException, YarnException {
-    return new Configuration();
+  public Configuration getConfiguration(Configuration bootstrapConf,
+      String name) throws IOException, YarnException {
+    return bootstrapConf;
   }
 
   @Override
-  public void initInternal(Configuration conf) throws Exception {
+  public void initInternal(Configuration bootstrapConf) throws Exception {
     // Do nothing
   }
 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
index d9c239e220a..da479b47ee8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
@@ -26,6 +26,7 @@ import java.util.Set;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.LocalConfigurationProvider;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.ha.HAServiceProtocol;
 import org.apache.hadoop.ha.HAServiceStatus;
@@ -45,11 +46,8 @@ import org.apache.hadoop.security.authorize.AccessControlList;
 import org.apache.hadoop.security.authorize.PolicyProvider;
 import org.apache.hadoop.security.authorize.ProxyUsers;
 import org.apache.hadoop.service.CompositeService;
-import org.apache.hadoop.yarn.LocalConfigurationProvider;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.ResourceOption;
-import org.apache.hadoop.yarn.conf.ConfigurationProvider;
-import org.apache.hadoop.yarn.conf.ConfigurationProviderFactory;
 import org.apache.hadoop.yarn.conf.HAUtil;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnException;
@@ -92,8 +90,6 @@ public class AdminService extends CompositeService implements
   private Server server;
   private InetSocketAddress masterServiceAddress;
   private AccessControlList adminAcl;
-  
-  private ConfigurationProvider configurationProvider = null;
 
   private final RecordFactory recordFactory = 
     RecordFactoryProvider.getRecordFactory(null);
@@ -115,10 +111,6 @@ public class AdminService extends CompositeService implements
       }
     }
 
-    this.configurationProvider =
-        ConfigurationProviderFactory.getConfigurationProvider(conf);
-    configurationProvider.init(conf);
-
     masterServiceAddress = conf.getSocketAddr(
         YarnConfiguration.RM_ADMIN_ADDRESS,
         YarnConfiguration.DEFAULT_RM_ADMIN_ADDRESS,
@@ -139,9 +131,6 @@ public class AdminService extends CompositeService implements
   @Override
   protected synchronized void serviceStop() throws Exception {
     stopServer();
-    if (this.configurationProvider != null) {
-      configurationProvider.close();
-    }
     super.serviceStop();
   }
 
@@ -158,7 +147,10 @@ public class AdminService extends CompositeService implements
     if (conf.getBoolean(
         CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION,
         false)) {
-      refreshServiceAcls(conf, new RMPolicyProvider());
+      refreshServiceAcls(
+          getConfiguration(conf,
+              YarnConfiguration.HADOOP_POLICY_CONFIGURATION_FILE),
+          RMPolicyProvider.getInstance());
     }
 
     if (rmContext.isHAEnabled()) {
@@ -321,8 +313,8 @@ public class AdminService extends CompositeService implements
     RefreshQueuesResponse response =
         recordFactory.newRecordInstance(RefreshQueuesResponse.class);
     try {
-      Configuration conf =
-          getConfiguration(YarnConfiguration.CS_CONFIGURATION_FILE);
+      Configuration conf = getConfiguration(getConfig(),
+          YarnConfiguration.CS_CONFIGURATION_FILE);
       rmContext.getScheduler().reinitialize(conf, this.rmContext);
       RMAuditLogger.logSuccess(user.getShortUserName(), argName,
           "AdminService");
@@ -376,7 +368,8 @@ public class AdminService extends CompositeService implements
     }
 
     Configuration conf =
-        getConfiguration(YarnConfiguration.CORE_SITE_CONFIGURATION_FILE);
+        getConfiguration(getConfig(),
+            YarnConfiguration.CORE_SITE_CONFIGURATION_FILE);
     ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
     RMAuditLogger.logSuccess(user.getShortUserName(),
         argName, "AdminService");
@@ -421,7 +414,7 @@ public class AdminService extends CompositeService implements
       throwStandbyException();
     }
     Configuration conf =
-        getConfiguration(YarnConfiguration.YARN_SITE_XML_FILE);
+        getConfiguration(getConfig(), YarnConfiguration.YARN_SITE_XML_FILE);
     adminAcl = new AccessControlList(conf.get(
         YarnConfiguration.YARN_ADMIN_ACL,
         YarnConfiguration.DEFAULT_YARN_ADMIN_ACL));
@@ -452,9 +445,10 @@ public class AdminService extends CompositeService implements
       throwStandbyException();
     }
 
-    PolicyProvider policyProvider = new RMPolicyProvider(); 
+    PolicyProvider policyProvider = RMPolicyProvider.getInstance();
     Configuration conf =
-        getConfiguration(YarnConfiguration.HADOOP_POLICY_CONFIGURATION_FILE);
+        getConfiguration(getConfig(),
+            YarnConfiguration.HADOOP_POLICY_CONFIGURATION_FILE);
 
     refreshServiceAcls(conf, policyProvider);
     rmContext.getClientRMService().refreshServiceAcls(conf, policyProvider);
@@ -466,12 +460,13 @@ public class AdminService extends CompositeService implements
     return recordFactory.newRecordInstance(RefreshServiceAclsResponse.class);
   }
 
-  synchronized void refreshServiceAcls(Configuration configuration,
+  private synchronized void refreshServiceAcls(Configuration configuration,
       PolicyProvider policyProvider) {
-    if (this.configurationProvider instanceof LocalConfigurationProvider) {
+    if (this.rmContext.getConfigurationProvider() instanceof
+        LocalConfigurationProvider) {
       this.server.refreshServiceAcl(configuration, policyProvider);
     } else {
-      this.server.refreshServiceAclWithConfigration(configuration,
+      this.server.refreshServiceAclWithLoadedConfiguration(configuration,
           policyProvider);
     }
   }
@@ -521,9 +516,10 @@ public class AdminService extends CompositeService implements
       return response;
   }
 
-  private synchronized Configuration getConfiguration(String confFileName)
-      throws YarnException, IOException {
-    return this.configurationProvider.getConfiguration(confFileName);
+  private synchronized Configuration getConfiguration(Configuration conf,
+      String confFileName) throws YarnException, IOException {
+    return this.rmContext.getConfigurationProvider().getConfiguration(conf,
+        confFileName);
   }
 
   @VisibleForTesting
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
index 2c4be13ee92..0c56134b811 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
@@ -105,7 +105,6 @@ public class ApplicationMasterService extends AbstractService implements
   private final AllocateResponse resync =
       recordFactory.newRecordInstance(AllocateResponse.class);
   private final RMContext rmContext;
-  private boolean useLocalConfigurationProvider;
 
   public ApplicationMasterService(RMContext rmContext, YarnScheduler scheduler) {
     super(ApplicationMasterService.class.getName());
@@ -115,15 +114,6 @@ public class ApplicationMasterService extends AbstractService implements
     this.rmContext = rmContext;
   }
 
-  @Override
-  protected void serviceInit(Configuration conf) throws Exception {
-    this.useLocalConfigurationProvider =
-        (LocalConfigurationProvider.class.isAssignableFrom(conf.getClass(
-            YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS,
-            LocalConfigurationProvider.class)));
-    super.serviceInit(conf);
-  }
-
   @Override
   protected void serviceStart() throws Exception {
     Configuration conf = getConfig();
@@ -150,7 +140,10 @@ public class ApplicationMasterService extends AbstractService implements
     if (conf.getBoolean(
         CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, 
         false)) {
-      refreshServiceAcls(conf, new RMPolicyProvider());
+      refreshServiceAcls(
+          this.rmContext.getConfigurationProvider().getConfiguration(conf,
+              YarnConfiguration.HADOOP_POLICY_CONFIGURATION_FILE),
+          RMPolicyProvider.getInstance());
     }
     
     this.server.start();
@@ -591,10 +584,11 @@ public class ApplicationMasterService extends AbstractService implements
 
   public void refreshServiceAcls(Configuration configuration, 
       PolicyProvider policyProvider) {
-    if (this.useLocalConfigurationProvider) {
+    if (this.rmContext.getConfigurationProvider() instanceof
+        LocalConfigurationProvider) {
       this.server.refreshServiceAcl(configuration, policyProvider);
     } else {
-      this.server.refreshServiceAclWithConfigration(configuration,
+      this.server.refreshServiceAclWithLoadedConfiguration(configuration,
           policyProvider);
     }
   }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
index 2f8526a7c71..43e94edd1a8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
@@ -136,7 +136,6 @@ public class ClientRMService extends AbstractService implements
 
   private final ApplicationACLsManager applicationsACLsManager;
   private final QueueACLsManager queueACLsManager;
-  private boolean useLocalConfigurationProvider;
 
   public ClientRMService(RMContext rmContext, YarnScheduler scheduler,
       RMAppManager rmAppManager, ApplicationACLsManager applicationACLsManager,
@@ -154,10 +153,6 @@ public class ClientRMService extends AbstractService implements
   @Override
   protected void serviceInit(Configuration conf) throws Exception {
     clientBindAddress = getBindAddress(conf);
-    this.useLocalConfigurationProvider =
-        (LocalConfigurationProvider.class.isAssignableFrom(conf.getClass(
-            YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS,
-            LocalConfigurationProvider.class)));
     super.serviceInit(conf);
   }
 
@@ -176,7 +171,10 @@ public class ClientRMService extends AbstractService implements
     if (conf.getBoolean(
         CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, 
         false)) {
-      refreshServiceAcls(conf, new RMPolicyProvider());
+      refreshServiceAcls(
+          this.rmContext.getConfigurationProvider().getConfiguration(conf,
+              YarnConfiguration.HADOOP_POLICY_CONFIGURATION_FILE),
+          RMPolicyProvider.getInstance());
     }
     
     this.server.start();
@@ -809,10 +807,11 @@ public class ClientRMService extends AbstractService implements
 
   void refreshServiceAcls(Configuration configuration, 
       PolicyProvider policyProvider) {
-    if (this.useLocalConfigurationProvider) {
+    if (this.rmContext.getConfigurationProvider() instanceof
+        LocalConfigurationProvider) {
       this.server.refreshServiceAcl(configuration, policyProvider);
     } else {
-      this.server.refreshServiceAclWithConfigration(configuration,
+      this.server.refreshServiceAclWithLoadedConfiguration(configuration,
           policyProvider);
     }
   }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java
index 64a4165feb4..79fb5dfa23e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java
@@ -23,6 +23,7 @@ import java.util.concurrent.ConcurrentMap;
 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.conf.ConfigurationProvider;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
@@ -97,4 +98,5 @@ public interface RMContext {
   void setRMApplicationHistoryWriter(
       RMApplicationHistoryWriter rmApplicationHistoryWriter);
 
+  ConfigurationProvider getConfigurationProvider();
 }
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java
index 79e59831e9d..689a0914190 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java
@@ -23,8 +23,10 @@ import java.util.concurrent.ConcurrentMap;
 
 import org.apache.hadoop.ha.HAServiceProtocol;
 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
+import org.apache.hadoop.yarn.LocalConfigurationProvider;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.conf.ConfigurationProvider;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter;
@@ -78,7 +80,7 @@ public class RMContextImpl implements RMContext {
   private ResourceTrackerService resourceTrackerService;
   private ApplicationMasterService applicationMasterService;
   private RMApplicationHistoryWriter rmApplicationHistoryWriter;
-
+  private ConfigurationProvider configurationProvider;
   /**
    * Default constructor. To be used in conjunction with setter methods for
    * individual fields.
@@ -119,8 +121,11 @@ public class RMContextImpl implements RMContext {
     } catch (Exception e) {
       assert false;
     }
+
+    ConfigurationProvider provider = new LocalConfigurationProvider();
+    setConfigurationProvider(provider);
   }
-  
+
   @Override
   public Dispatcher getDispatcher() {
     return this.rmDispatcher;
@@ -334,4 +339,13 @@ public class RMContextImpl implements RMContext {
     this.rmApplicationHistoryWriter = rmApplicationHistoryWriter;
   }
 
+  @Override
+  public ConfigurationProvider getConfigurationProvider() {
+    return this.configurationProvider;
+  }
+
+  public void setConfigurationProvider(
+      ConfigurationProvider configurationProvider) {
+    this.configurationProvider = configurationProvider;
+  }
 }
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
index 8575cd57d65..1040cc5c526 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
@@ -42,10 +42,13 @@ import org.apache.hadoop.util.ExitUtil;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.ShutdownHookManager;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.yarn.LocalConfigurationProvider;
 import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.conf.ConfigurationProvider;
+import org.apache.hadoop.yarn.conf.ConfigurationProviderFactory;
 import org.apache.hadoop.yarn.conf.HAUtil;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.AsyncDispatcher;
@@ -154,7 +157,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
   private boolean recoveryEnabled;
 
   private String webAppAddress;
-
+  private ConfigurationProvider configurationProvider = null;
   /** End of Active services */
 
   private Configuration conf;
@@ -182,6 +185,21 @@ public class ResourceManager extends CompositeService implements Recoverable {
     this.conf = conf;
     this.rmContext = new RMContextImpl();
 
+    this.configurationProvider =
+        ConfigurationProviderFactory.getConfigurationProvider(conf);
+    this.configurationProvider.init(this.conf);
+    rmContext.setConfigurationProvider(configurationProvider);
+    if (!(this.configurationProvider instanceof LocalConfigurationProvider)) {
+      // load yarn-site.xml
+      this.conf =
+          this.configurationProvider.getConfiguration(this.conf,
+              YarnConfiguration.YARN_SITE_XML_FILE);
+      // load core-site.xml
+      this.conf =
+          this.configurationProvider.getConfiguration(this.conf,
+              YarnConfiguration.CORE_SITE_CONFIGURATION_FILE);
+    }
+
     // register the handlers for all AlwaysOn services using setupDispatcher().
     rmDispatcher = setupDispatcher();
     addIfService(rmDispatcher);
@@ -884,6 +902,9 @@ public class ResourceManager extends CompositeService implements Recoverable {
     if (fetcher != null) {
       fetcher.stop();
     }
+    if (configurationProvider != null) {
+      configurationProvider.close();
+    }
     super.serviceStop();
     transitionToStandby(false);
     rmContext.setHAServiceState(HAServiceState.STOPPING);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java
index 4f74179717f..8136c056129 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java
@@ -95,7 +95,6 @@ public class ResourceTrackerService extends AbstractService implements
   
   private int minAllocMb;
   private int minAllocVcores;
-  private boolean useLocalConfigurationProvider;
 
   static {
     resync.setNodeAction(NodeAction.RESYNC);
@@ -145,10 +144,6 @@ public class ResourceTrackerService extends AbstractService implements
         YarnConfiguration.RM_NODEMANAGER_MINIMUM_VERSION,
         YarnConfiguration.DEFAULT_RM_NODEMANAGER_MINIMUM_VERSION);
 
-    this.useLocalConfigurationProvider =
-        (LocalConfigurationProvider.class.isAssignableFrom(conf.getClass(
-            YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS,
-            LocalConfigurationProvider.class)));
     super.serviceInit(conf);
   }
 
@@ -169,7 +164,10 @@ public class ResourceTrackerService extends AbstractService implements
     if (conf.getBoolean(
         CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, 
         false)) {
-      refreshServiceAcls(conf, new RMPolicyProvider());
+      refreshServiceAcls(
+          this.rmContext.getConfigurationProvider().getConfiguration(conf,
+              YarnConfiguration.HADOOP_POLICY_CONFIGURATION_FILE),
+          RMPolicyProvider.getInstance());
     }
 
     this.server.start();
@@ -423,10 +421,11 @@ public class ResourceTrackerService extends AbstractService implements
 
   void refreshServiceAcls(Configuration configuration, 
       PolicyProvider policyProvider) {
-    if (this.useLocalConfigurationProvider) {
+    if (this.rmContext.getConfigurationProvider() instanceof
+        LocalConfigurationProvider) {
       this.server.refreshServiceAcl(configuration, policyProvider);
     } else {
-      this.server.refreshServiceAclWithConfigration(configuration,
+      this.server.refreshServiceAclWithLoadedConfiguration(configuration,
           policyProvider);
     }
   }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
index b019a762515..eb4f814e1e7 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java
@@ -196,7 +196,6 @@ public class CapacityScheduler extends AbstractYarnScheduler
 
   private ResourceCalculator calculator;
   private boolean usePortForNodeName;
-  private boolean useLocalConfigurationProvider;
 
   public CapacityScheduler() {}
 
@@ -262,14 +261,21 @@ public class CapacityScheduler extends AbstractYarnScheduler
   @Override
   public synchronized void
       reinitialize(Configuration conf, RMContext rmContext) throws IOException {
+    Configuration configuration = new Configuration(conf);
     if (!initialized) {
-      this.useLocalConfigurationProvider =
-          (LocalConfigurationProvider.class.isAssignableFrom(conf.getClass(
-              YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS,
-              LocalConfigurationProvider.class)));
-      this.conf =
-          new CapacitySchedulerConfiguration(conf,
-              this.useLocalConfigurationProvider);
+      if (rmContext.getConfigurationProvider() instanceof
+          LocalConfigurationProvider) {
+        this.conf = new CapacitySchedulerConfiguration(configuration, true);
+      } else {
+        try {
+          this.conf =
+              new CapacitySchedulerConfiguration(rmContext
+                  .getConfigurationProvider().getConfiguration(configuration,
+                      YarnConfiguration.CS_CONFIGURATION_FILE), false);
+        } catch (Exception e) {
+          throw new IOException(e);
+        }
+      }
       validateConf(this.conf);
       this.minimumAllocation = this.conf.getMinimumAllocation();
       this.maximumAllocation = this.conf.getMaximumAllocation();
@@ -290,7 +296,8 @@ public class CapacityScheduler extends AbstractYarnScheduler
       CapacitySchedulerConfiguration oldConf = this.conf; 
       this.conf =
           new CapacitySchedulerConfiguration(conf,
-              this.useLocalConfigurationProvider);
+              rmContext.getConfigurationProvider() instanceof
+                  LocalConfigurationProvider);
       validateConf(this.conf);
       try {
         LOG.info("Re-initializing queues...");
@@ -316,6 +323,7 @@ public class CapacityScheduler extends AbstractYarnScheduler
   @Lock(CapacityScheduler.class)
   private void initializeQueues(CapacitySchedulerConfiguration conf)
     throws IOException {
+
     root = 
         parseQueue(this, conf, null, CapacitySchedulerConfiguration.ROOT, 
             queues, queues, noop);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/authorize/RMPolicyProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/authorize/RMPolicyProvider.java
index bdab4f37715..8c5efa15e44 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/authorize/RMPolicyProvider.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/security/authorize/RMPolicyProvider.java
@@ -18,7 +18,9 @@
 package org.apache.hadoop.yarn.server.resourcemanager.security.authorize;
 
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.ha.HAServiceProtocol;
 import org.apache.hadoop.security.authorize.PolicyProvider;
@@ -37,6 +39,23 @@ import org.apache.hadoop.yarn.server.api.ResourceTrackerPB;
 @InterfaceStability.Unstable
 public class RMPolicyProvider extends PolicyProvider {
 
+  private static RMPolicyProvider rmPolicyProvider = null;
+
+  private RMPolicyProvider() {}
+
+  @Private
+  @Unstable
+  public static RMPolicyProvider getInstance() {
+    if (rmPolicyProvider == null) {
+      synchronized(RMPolicyProvider.class) {
+        if (rmPolicyProvider == null) {
+          rmPolicyProvider = new RMPolicyProvider();
+        }
+      }
+    }
+    return rmPolicyProvider;
+  }
+
   private static final Service[] resourceManagerServices = 
       new Service[] {
     new Service(
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java
index 5372c18832d..ee008e93b43 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java
@@ -26,7 +26,6 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.security.authorize.AccessControlList;
@@ -105,34 +104,34 @@ public class TestRMAdminService {
       throws IOException, YarnException {
     configuration.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS,
         "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider");
-    rm = new MockRM(configuration);
-    rm.init(configuration);
-    rm.start();
+    try {
+      rm = new MockRM(configuration);
+      rm.init(configuration);
+      rm.start();
+      fail("Should throw an exception");
+    } catch(Exception ex) {
+      // Expect exception here
+    }
 
-    // clean the remoteDirectory
-    cleanRemoteDirectory();
+    //upload default configurations
+    uploadDefaultConfiguration();
+
+    try {
+      rm = new MockRM(configuration);
+      rm.init(configuration);
+      rm.start();
+    } catch(Exception ex) {
+      fail("Should not get any exceptions");
+    }
 
     CapacityScheduler cs =
         (CapacityScheduler) rm.getRMContext().getScheduler();
     int maxAppsBefore = cs.getConfiguration().getMaximumSystemApplications();
 
-    try {
-      rm.adminService.refreshQueues(RefreshQueuesRequest.newInstance());
-      fail("FileSystemBasedConfigurationProvider is used." +
-          " Should get an exception here");
-    } catch (Exception ex) {
-      Assert.assertTrue(ex.getMessage().contains(
-          "Can not find Configuration: capacity-scheduler.xml"));
-    }
-
     CapacitySchedulerConfiguration csConf =
         new CapacitySchedulerConfiguration();
     csConf.set("yarn.scheduler.capacity.maximum-applications", "5000");
-    String csConfFile = writeConfigurationXML(csConf,
-        "capacity-scheduler.xml");
-
-    // upload the file into Remote File System
-    uploadToRemoteFileSystem(new Path(csConfFile));
+    uploadConfiguration(csConf, "capacity-scheduler.xml");
 
     rm.adminService.refreshQueues(RefreshQueuesRequest.newInstance());
 
@@ -159,20 +158,24 @@ public class TestRMAdminService {
       throws IOException, YarnException {
     configuration.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS,
         "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider");
-    rm = new MockRM(configuration);
-    rm.init(configuration);
-    rm.start();
+    try {
+      rm = new MockRM(configuration);
+      rm.init(configuration);
+      rm.start();
+      fail("Should throw an exception");
+    } catch(Exception ex) {
+      // Expect exception here
+    }
 
-    // clean the remoteDirectory
-    cleanRemoteDirectory();
+    //upload default configurations
+    uploadDefaultConfiguration();
 
     try {
-      rm.adminService.refreshAdminAcls(RefreshAdminAclsRequest.newInstance());
-      fail("FileSystemBasedConfigurationProvider is used." +
-          " Should get an exception here");
-    } catch (Exception ex) {
-      Assert.assertTrue(ex.getMessage().contains(
-          "Can not find Configuration: yarn-site.xml"));
+      rm = new MockRM(configuration);
+      rm.init(configuration);
+      rm.start();
+    } catch(Exception ex) {
+      fail("Should not get any exceptions");
     }
 
     String aclStringBefore =
@@ -180,10 +183,8 @@ public class TestRMAdminService {
 
     YarnConfiguration yarnConf = new YarnConfiguration();
     yarnConf.set(YarnConfiguration.YARN_ADMIN_ACL, "world:anyone:rwcda");
-    String yarnConfFile = writeConfigurationXML(yarnConf, "yarn-site.xml");
+    uploadConfiguration(yarnConf, "yarn-site.xml");
 
-    // upload the file into Remote File System
-    uploadToRemoteFileSystem(new Path(yarnConfFile));
     rm.adminService.refreshAdminAcls(RefreshAdminAclsRequest.newInstance());
 
     String aclStringAfter =
@@ -214,7 +215,6 @@ public class TestRMAdminService {
     }
   }
 
-  @SuppressWarnings("resource")
   @Test
   public void testServiceAclsRefreshWithFileSystemBasedConfigurationProvider()
       throws IOException, YarnException {
@@ -224,33 +224,33 @@ public class TestRMAdminService {
         "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider");
     ResourceManager resourceManager = null;
     try {
-      resourceManager = new ResourceManager();
-      resourceManager.init(configuration);
-      resourceManager.start();
-
-      // clean the remoteDirectory
-      cleanRemoteDirectory();
-
       try {
-        resourceManager.adminService
-            .refreshServiceAcls(RefreshServiceAclsRequest
-                .newInstance());
-        fail("FileSystemBasedConfigurationProvider is used." +
-            " Should get an exception here");
+        resourceManager = new ResourceManager();
+        resourceManager.init(configuration);
+        resourceManager.start();
+        fail("Should throw an exception");
       } catch (Exception ex) {
-        Assert.assertTrue(ex.getMessage().contains(
-            "Can not find Configuration: hadoop-policy.xml"));
+        // expect to get an exception here
       }
 
-      String aclsString = "alice,bob users,wheel";
+      //upload default configurations
+      uploadDefaultConfiguration();
       Configuration conf = new Configuration();
       conf.setBoolean(
           CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, true);
-      conf.set("security.applicationclient.protocol.acl", aclsString);
-      String hadoopConfFile = writeConfigurationXML(conf, "hadoop-policy.xml");
+      uploadConfiguration(conf, "core-site.xml");
+      try {
+        resourceManager = new ResourceManager();
+        resourceManager.init(configuration);
+        resourceManager.start();
+      } catch (Exception ex) {
+        fail("Should not get any exceptions");
+      }
 
-      // upload the file into Remote File System
-      uploadToRemoteFileSystem(new Path(hadoopConfFile));
+      String aclsString = "alice,bob users,wheel";
+      Configuration newConf = new Configuration();
+      newConf.set("security.applicationclient.protocol.acl", aclsString);
+      uploadConfiguration(newConf, "hadoop-policy.xml");
 
       resourceManager.adminService.refreshServiceAcls(RefreshServiceAclsRequest
           .newInstance());
@@ -328,31 +328,31 @@ public class TestRMAdminService {
       throws IOException, YarnException {
     configuration.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS,
         "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider");
-    rm = new MockRM(configuration);
-    rm.init(configuration);
-    rm.start();
+    try {
+      rm = new MockRM(configuration);
+      rm.init(configuration);
+      rm.start();
+      fail("Should throw an exception");
+    } catch(Exception ex) {
+      // Expect exception here
+    }
 
-    // clean the remoteDirectory
-    cleanRemoteDirectory();
+    //upload default configurations
+    uploadDefaultConfiguration();
 
     try {
-      rm.adminService.refreshSuperUserGroupsConfiguration(
-          RefreshSuperUserGroupsConfigurationRequest.newInstance());
-      fail("FileSystemBasedConfigurationProvider is used." +
-          " Should get an exception here");
-    } catch (Exception ex) {
-      Assert.assertTrue(ex.getMessage().contains(
-          "Can not find Configuration: core-site.xml"));
+      rm = new MockRM(configuration);
+      rm.init(configuration);
+      rm.start();
+    } catch(Exception ex) {
+      fail("Should not get any exceptions");
     }
 
     Configuration coreConf = new Configuration(false);
     coreConf.set("hadoop.proxyuser.test.groups", "test_groups");
     coreConf.set("hadoop.proxyuser.test.hosts", "test_hosts");
-    String coreConfFile = writeConfigurationXML(coreConf,
-        "core-site.xml");
+    uploadConfiguration(coreConf, "core-site.xml");
 
-    // upload the file into Remote File System
-    uploadToRemoteFileSystem(new Path(coreConfFile));
     rm.adminService.refreshSuperUserGroupsConfiguration(
         RefreshSuperUserGroupsConfigurationRequest.newInstance());
     Assert.assertTrue(ProxyUsers.getProxyGroups()
@@ -393,11 +393,29 @@ public class TestRMAdminService {
     fs.copyFromLocalFile(filePath, workingPath);
   }
 
-  private void cleanRemoteDirectory() throws IOException {
-    if (fs.exists(workingPath)) {
-      for (FileStatus file : fs.listStatus(workingPath)) {
-        fs.delete(file.getPath(), true);
-      }
-    }
+  private void uploadConfiguration(Configuration conf, String confFileName)
+      throws IOException {
+    String csConfFile = writeConfigurationXML(conf, confFileName);
+    // upload the file into Remote File System
+    uploadToRemoteFileSystem(new Path(csConfFile));
+  }
+
+  private void uploadDefaultConfiguration() throws IOException {
+    Configuration conf = new Configuration();
+    uploadConfiguration(conf, "core-site.xml");
+
+    YarnConfiguration yarnConf = new YarnConfiguration();
+    yarnConf.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS,
+        "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider");
+    uploadConfiguration(yarnConf, "yarn-site.xml");
+
+    CapacitySchedulerConfiguration csConf =
+        new CapacitySchedulerConfiguration();
+    uploadConfiguration(csConf, "capacity-scheduler.xml");
+
+    Configuration hadoopPolicyConf = new Configuration(false);
+    hadoopPolicyConf
+        .addResource(YarnConfiguration.HADOOP_POLICY_CONFIGURATION_FILE);
+    uploadConfiguration(hadoopPolicyConf, "hadoop-policy.xml");
   }
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
index ca60db3f04c..47ec5462350 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
@@ -40,6 +40,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.net.NetworkTopology;
+import org.apache.hadoop.yarn.LocalConfigurationProvider;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -104,6 +105,7 @@ public class TestCapacityScheduler {
   private static float B3_CAPACITY = 20;
 
   private ResourceManager resourceManager = null;
+  private RMContext mockContext;
   
   @Before
   public void setUp() throws Exception {
@@ -118,6 +120,9 @@ public class TestCapacityScheduler {
     resourceManager.getRMContainerTokenSecretManager().rollMasterKey();
     resourceManager.getRMNMTokenSecretManager().rollMasterKey();
     ((AsyncDispatcher)resourceManager.getRMContext().getDispatcher()).start();
+    mockContext = mock(RMContext.class);
+    when(mockContext.getConfigurationProvider()).thenReturn(
+        new LocalConfigurationProvider());
   }
 
   @After
@@ -133,7 +138,7 @@ public class TestCapacityScheduler {
     conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 2048);
     conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 1024);
     try {
-      scheduler.reinitialize(conf, null);
+      scheduler.reinitialize(conf, mockContext);
       fail("Exception is expected because the min memory allocation is" +
         " larger than the max memory allocation.");
     } catch (YarnRuntimeException e) {
@@ -147,7 +152,7 @@ public class TestCapacityScheduler {
     conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_VCORES, 2);
     conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_VCORES, 1);
     try {
-      scheduler.reinitialize(conf, null);
+      scheduler.reinitialize(conf, mockContext);
       fail("Exception is expected because the min vcores allocation is" +
         " larger than the max vcores allocation.");
     } catch (YarnRuntimeException e) {
@@ -353,7 +358,7 @@ public class TestCapacityScheduler {
 
     conf.setCapacity(A, 80f);
     conf.setCapacity(B, 20f);
-    cs.reinitialize(conf,null);
+    cs.reinitialize(conf, mockContext);
     checkQueueCapacities(cs, 80f, 20f);
   }
 
@@ -503,7 +508,7 @@ public class TestCapacityScheduler {
       conf.setCapacity(B2, B2_CAPACITY);
       conf.setCapacity(B3, B3_CAPACITY);
       conf.setCapacity(B4, B4_CAPACITY);
-      cs.reinitialize(conf,null);
+      cs.reinitialize(conf,mockContext);
       checkQueueCapacities(cs, 80f, 20f);
       
       // Verify parent for B4

From 5c978a43c3052cc1466b23653c354399186b4e10 Mon Sep 17 00:00:00 2001
From: Chris Nauroth <cnauroth@apache.org>
Date: Mon, 10 Feb 2014 23:13:06 +0000
Subject: [PATCH 09/47] HDFS-5915. Refactor FSImageFormatProtobuf to simplify
 cross section reads. Contributed by Haohui Mai.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566824 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |   3 +
 .../server/namenode/FSImageFormatPBINode.java |  29 ++---
 .../namenode/FSImageFormatProtobuf.java       | 102 ++++++++++--------
 .../snapshot/FSImageFormatPBSnapshot.java     |  16 +--
 .../server/namenode/TestDeduplicationMap.java |  36 +++++++
 .../namenode/TestFSImageStorageInspector.java |   5 -
 6 files changed, 124 insertions(+), 67 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeduplicationMap.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 5a0e4216536..45d564259e3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -335,6 +335,9 @@ Trunk (Unreleased)
     HDFS-5911. The id of a CacheDirective instance does not get serialized in 
     the protobuf-fsimage. (Haohui Mai via jing9)
 
+    HDFS-5915. Refactor FSImageFormatProtobuf to simplify cross section reads.
+    (Haohui Mai via cnauroth)
+
 Release 2.4.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java
index 5ade5cec6a3..43bbfdbc7ff 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java
@@ -38,7 +38,7 @@ import org.apache.hadoop.hdfs.protocolPB.PBHelper;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
-import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.StringMap;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SaverContext;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection.FileUnderConstructionEntry;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection;
@@ -208,7 +208,7 @@ public final class FSImageFormatPBINode {
       case FILE:
         return loadINodeFile(n);
       case DIRECTORY:
-        return loadINodeDirectory(n, parent.getStringTable());
+        return loadINodeDirectory(n, parent.getLoaderContext().getStringTable());
       case SYMLINK:
         return loadINodeSymlink(n);
       default:
@@ -228,7 +228,7 @@ public final class FSImageFormatPBINode {
         blocks[i] = new BlockInfo(PBHelper.convert(bp.get(i)), replication);
       }
       final PermissionStatus permissions = loadPermission(f.getPermission(),
-          parent.getStringTable());
+          parent.getLoaderContext().getStringTable());
 
       final INodeFile file = new INodeFile(n.getId(),
           n.getName().toByteArray(), permissions, f.getModificationTime(),
@@ -253,13 +253,14 @@ public final class FSImageFormatPBINode {
       assert n.getType() == INodeSection.INode.Type.SYMLINK;
       INodeSection.INodeSymlink s = n.getSymlink();
       final PermissionStatus permissions = loadPermission(s.getPermission(),
-          parent.getStringTable());
+          parent.getLoaderContext().getStringTable());
       return new INodeSymlink(n.getId(), n.getName().toByteArray(), permissions,
           0, 0, s.getTarget().toStringUtf8());
     }
 
     private void loadRootINode(INodeSection.INode p) {
-      INodeDirectory root = loadINodeDirectory(p, parent.getStringTable());
+      INodeDirectory root = loadINodeDirectory(p, parent.getLoaderContext()
+          .getStringTable());
       final Quota.Counts q = root.getQuotaCounts();
       final long nsQuota = q.get(Quota.NAMESPACE);
       final long dsQuota = q.get(Quota.DISKSPACE);
@@ -273,16 +274,17 @@ public final class FSImageFormatPBINode {
 
   public final static class Saver {
     private static long buildPermissionStatus(INodeAttributes n,
-        final StringMap stringMap) {
-      long userId = stringMap.getStringId(n.getUserName());
-      long groupId = stringMap.getStringId(n.getGroupName());
+        final SaverContext.DeduplicationMap<String> stringMap) {
+      long userId = stringMap.getId(n.getUserName());
+      long groupId = stringMap.getId(n.getGroupName());
       return ((userId & USER_GROUP_STRID_MASK) << USER_STRID_OFFSET)
           | ((groupId & USER_GROUP_STRID_MASK) << GROUP_STRID_OFFSET)
           | n.getFsPermissionShort();
     }
 
     public static INodeSection.INodeFile.Builder buildINodeFile(
-        INodeFileAttributes file, final StringMap stringMap) {
+        INodeFileAttributes file,
+        final SaverContext.DeduplicationMap<String> stringMap) {
       INodeSection.INodeFile.Builder b = INodeSection.INodeFile.newBuilder()
           .setAccessTime(file.getAccessTime())
           .setModificationTime(file.getModificationTime())
@@ -293,7 +295,8 @@ public final class FSImageFormatPBINode {
     }
 
     public static INodeSection.INodeDirectory.Builder buildINodeDirectory(
-        INodeDirectoryAttributes dir, final StringMap stringMap) {
+        INodeDirectoryAttributes dir,
+        final SaverContext.DeduplicationMap<String> stringMap) {
       Quota.Counts quota = dir.getQuotaCounts();
       INodeSection.INodeDirectory.Builder b = INodeSection.INodeDirectory
           .newBuilder().setModificationTime(dir.getModificationTime())
@@ -416,7 +419,7 @@ public final class FSImageFormatPBINode {
 
     private void save(OutputStream out, INodeDirectory n) throws IOException {
       INodeSection.INodeDirectory.Builder b = buildINodeDirectory(n,
-          parent.getStringMap());
+          parent.getSaverContext().getStringMap());
       INodeSection.INode r = buildINodeCommon(n)
           .setType(INodeSection.INode.Type.DIRECTORY).setDirectory(b).build();
       r.writeDelimitedTo(out);
@@ -424,7 +427,7 @@ public final class FSImageFormatPBINode {
 
     private void save(OutputStream out, INodeFile n) throws IOException {
       INodeSection.INodeFile.Builder b = buildINodeFile(n,
-          parent.getStringMap());
+          parent.getSaverContext().getStringMap());
 
       for (Block block : n.getBlocks()) {
         b.addBlocks(PBHelper.convert(block));
@@ -447,7 +450,7 @@ public final class FSImageFormatPBINode {
     private void save(OutputStream out, INodeSymlink n) throws IOException {
       INodeSection.INodeSymlink.Builder b = INodeSection.INodeSymlink
           .newBuilder()
-          .setPermission(buildPermissionStatus(n, parent.getStringMap()))
+          .setPermission(buildPermissionStatus(n, parent.getSaverContext().getStringMap()))
           .setTarget(ByteString.copyFrom(n.getSymlink()));
       INodeSection.INode r = buildINodeCommon(n)
           .setType(INodeSection.INode.Type.SYMLINK).setSymlink(b).build();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java
index 2edc57b18d7..c03ba606410 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java
@@ -73,12 +73,56 @@ import com.google.protobuf.CodedOutputStream;
 public final class FSImageFormatProtobuf {
   private static final Log LOG = LogFactory.getLog(FSImageFormatProtobuf.class);
 
+  public static final class LoaderContext {
+    private String[] stringTable;
+
+    public String[] getStringTable() {
+      return stringTable;
+    }
+  }
+
+  public static final class SaverContext {
+    public static class DeduplicationMap<E> {
+      private final Map<E, Integer> map = Maps.newHashMap();
+      private DeduplicationMap() {}
+
+      static <T> DeduplicationMap<T> newMap() {
+        return new DeduplicationMap<T>();
+      }
+
+      int getId(E value) {
+        if (value == null) {
+          return 0;
+        }
+        Integer v = map.get(value);
+        if (v == null) {
+          int nv = map.size() + 1;
+          map.put(value, nv);
+          return nv;
+        }
+        return v;
+      }
+
+      int size() {
+        return map.size();
+      }
+
+      Set<Entry<E, Integer>> entrySet() {
+        return map.entrySet();
+      }
+    }
+    private final DeduplicationMap<String> stringMap = DeduplicationMap.newMap();
+
+    public DeduplicationMap<String> getStringMap() {
+      return stringMap;
+    }
+  }
+
   public static final class Loader implements FSImageFormat.AbstractLoader {
     static final int MINIMUM_FILE_LENGTH = 8;
     private final Configuration conf;
     private final FSNamesystem fsn;
-
-    private String[] stringTable;
+    private final LoaderContext ctx;
 
     /** The MD5 sum of the loaded file */
     private MD5Hash imgDigest;
@@ -88,6 +132,7 @@ public final class FSImageFormatProtobuf {
     Loader(Configuration conf, FSNamesystem fsn) {
       this.conf = conf;
       this.fsn = fsn;
+      this.ctx = new LoaderContext();
     }
 
     @Override
@@ -100,8 +145,8 @@ public final class FSImageFormatProtobuf {
       return imgTxId;
     }
 
-    public String[] getStringTable() {
-      return stringTable;
+    public LoaderContext getLoaderContext() {
+      return ctx;
     }
 
     void load(File file) throws IOException {
@@ -226,11 +271,11 @@ public final class FSImageFormatProtobuf {
 
     private void loadStringTableSection(InputStream in) throws IOException {
       StringTableSection s = StringTableSection.parseDelimitedFrom(in);
-      stringTable = new String[s.getNumEntry() + 1];
+      ctx.stringTable = new String[s.getNumEntry() + 1];
       for (int i = 0; i < s.getNumEntry(); ++i) {
         StringTableSection.Entry e = StringTableSection.Entry
             .parseDelimitedFrom(in);
-        stringTable[e.getId()] = e.getStr();
+        ctx.stringTable[e.getId()] = e.getStr();
       }
     }
 
@@ -269,9 +314,10 @@ public final class FSImageFormatProtobuf {
 
   public static final class Saver {
     private final SaveNamespaceContext context;
+    private final SaverContext saverContext;
+
     private long currentOffset = FSImageUtil.MAGIC_HEADER.length;
     private MD5Hash savedDigest;
-    private StringMap stringMap = new StringMap();
 
     private FileChannel fileChannel;
     // OutputStream for the section data
@@ -282,6 +328,7 @@ public final class FSImageFormatProtobuf {
 
     Saver(SaveNamespaceContext context) {
       this.context = context;
+      this.saverContext = new SaverContext();
     }
 
     public MD5Hash getSavedDigest() {
@@ -292,6 +339,10 @@ public final class FSImageFormatProtobuf {
       return context;
     }
 
+    public SaverContext getSaverContext() {
+      return saverContext;
+    }
+
     public void commitSection(FileSummary.Builder summary, SectionName name)
         throws IOException {
       long oldOffset = currentOffset;
@@ -465,48 +516,15 @@ public final class FSImageFormatProtobuf {
         throws IOException {
       OutputStream out = sectionOutputStream;
       StringTableSection.Builder b = StringTableSection.newBuilder()
-          .setNumEntry(stringMap.size());
+          .setNumEntry(saverContext.stringMap.size());
       b.build().writeDelimitedTo(out);
-      for (Entry<String, Integer> e : stringMap.entrySet()) {
+      for (Entry<String, Integer> e : saverContext.stringMap.entrySet()) {
         StringTableSection.Entry.Builder eb = StringTableSection.Entry
             .newBuilder().setId(e.getValue()).setStr(e.getKey());
         eb.build().writeDelimitedTo(out);
       }
       commitSection(summary, SectionName.STRING_TABLE);
     }
-
-    public StringMap getStringMap() {
-      return stringMap;
-    }
-  }
-
-  public static class StringMap {
-    private final Map<String, Integer> stringMap;
-
-    public StringMap() {
-      stringMap = Maps.newHashMap();
-    }
-
-    int getStringId(String str) {
-      if (str == null) {
-        return 0;
-      }
-      Integer v = stringMap.get(str);
-      if (v == null) {
-        int nv = stringMap.size() + 1;
-        stringMap.put(str, nv);
-        return nv;
-      }
-      return v;
-    }
-
-    int size() {
-      return stringMap.size();
-    }
-
-    Set<Entry<String, Integer>> entrySet() {
-      return stringMap.entrySet();
-    }
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java
index 06cc1d0ac1f..b64a3db9325 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java
@@ -115,7 +115,7 @@ public class FSImageFormatPBSnapshot {
         SnapshotSection.Snapshot pbs = SnapshotSection.Snapshot
             .parseDelimitedFrom(in);
         INodeDirectory root = loadINodeDirectory(pbs.getRoot(),
-            parent.getStringTable());
+            parent.getLoaderContext().getStringTable());
         int sid = pbs.getSnapshotId();
         INodeDirectorySnapshottable parent = (INodeDirectorySnapshottable) fsDir
             .getInode(root.getId()).asDirectory();
@@ -162,7 +162,8 @@ public class FSImageFormatPBSnapshot {
         if (pbf.hasSnapshotCopy()) {
           INodeSection.INodeFile fileInPb = pbf.getSnapshotCopy();
           PermissionStatus permission = loadPermission(
-              fileInPb.getPermission(), parent.getStringTable());
+              fileInPb.getPermission(), parent.getLoaderContext()
+                  .getStringTable());
           copy = new INodeFileAttributes.SnapshotCopy(pbf.getName()
               .toByteArray(), permission, fileInPb.getModificationTime(),
               fileInPb.getAccessTime(), (short) fileInPb.getReplication(),
@@ -249,8 +250,9 @@ public class FSImageFormatPBSnapshot {
         }else if (diffInPb.hasSnapshotCopy()) {
           INodeSection.INodeDirectory dirCopyInPb = diffInPb.getSnapshotCopy();
           final byte[] name = diffInPb.getName().toByteArray();
-          PermissionStatus permission = loadPermission(dirCopyInPb
-              .getPermission(), parent.getStringTable());
+          PermissionStatus permission = loadPermission(
+              dirCopyInPb.getPermission(), parent.getLoaderContext()
+                  .getStringTable());
           long modTime = dirCopyInPb.getModificationTime();
           boolean noQuota = dirCopyInPb.getNsQuota() == -1
               && dirCopyInPb.getDsQuota() == -1;
@@ -311,7 +313,7 @@ public class FSImageFormatPBSnapshot {
           SnapshotSection.Snapshot.Builder sb = SnapshotSection.Snapshot
               .newBuilder().setSnapshotId(s.getId());
           INodeSection.INodeDirectory.Builder db = buildINodeDirectory(sroot,
-              parent.getStringMap());
+              parent.getSaverContext().getStringMap());
           INodeSection.INode r = INodeSection.INode.newBuilder()
               .setId(sroot.getId())
               .setType(INodeSection.INode.Type.DIRECTORY)
@@ -369,7 +371,7 @@ public class FSImageFormatPBSnapshot {
           INodeFileAttributes copy = diff.snapshotINode;
           if (copy != null) {
             fb.setName(ByteString.copyFrom(copy.getLocalNameBytes()))
-                .setSnapshotCopy(buildINodeFile(copy, parent.getStringMap()));
+                .setSnapshotCopy(buildINodeFile(copy, parent.getSaverContext().getStringMap()));
           }
           fb.build().writeDelimitedTo(out);
         }
@@ -410,7 +412,7 @@ public class FSImageFormatPBSnapshot {
           if (!diff.isSnapshotRoot() && copy != null) {
             db.setName(ByteString.copyFrom(copy.getLocalNameBytes()))
                 .setSnapshotCopy(
-                    buildINodeDirectory(copy, parent.getStringMap()));
+                    buildINodeDirectory(copy, parent.getSaverContext().getStringMap()));
           }
           // process created list and deleted list
           List<INode> created = diff.getChildrenDiff()
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeduplicationMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeduplicationMap.java
new file mode 100644
index 00000000000..447c7ebd0e5
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeduplicationMap.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.namenode;
+
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SaverContext.DeduplicationMap;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestDeduplicationMap {
+  @Test
+  public void testDeduplicationMap() {
+    DeduplicationMap<String> m = DeduplicationMap.newMap();
+    Assert.assertEquals(1, m.getId("1"));
+    Assert.assertEquals(2, m.getId("2"));
+    Assert.assertEquals(3, m.getId("3"));
+    Assert.assertEquals(1, m.getId("1"));
+    Assert.assertEquals(2, m.getId("2"));
+    Assert.assertEquals(3, m.getId("3"));
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageStorageInspector.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageStorageInspector.java
index 5e3ac4b7a2b..bb03b30c860 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageStorageInspector.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageStorageInspector.java
@@ -27,17 +27,12 @@ import static org.junit.Assert.assertTrue;
 import java.io.File;
 import java.io.IOException;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
 import org.junit.Test;
 
 public class TestFSImageStorageInspector {
-  private static final Log LOG = LogFactory.getLog(
-      TestFSImageStorageInspector.class);
-
   /**
    * Simple test with image, edits, and inprogress edits
    */

From 666684eb90dc7ce8fc809cf371dfbe88c5956306 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Tue, 11 Feb 2014 00:46:45 +0000
Subject: [PATCH 10/47] HDFS-5921. Cannot browse file system via NN web UI if
 any directory has the sticky bit set. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566916 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt                   | 3 +++
 .../hadoop-hdfs/src/main/webapps/hdfs/explorer.js             | 4 ++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 45d564259e3..ec44aa253f0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -951,6 +951,9 @@ Release 2.3.0 - UNRELEASED
     HDFS-5837. dfs.namenode.replication.considerLoad should consider
     decommissioned nodes. (Tao Luo via shv)
 
+    HDFS-5921. Cannot browse file system via NN web UI if any directory has
+    the sticky bit set. (atm)
+
   BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
 
     HDFS-4985. Add storage type to the protocol and expose it in block report
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js
index 2e1af80c70e..1aa0c39079b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js
@@ -35,8 +35,8 @@
       }
 
       if (sticky) {
-	var exec = ((parms.perm % 10) & 1) == 1;
-	res[res.length - 1] = exec ? 't' : 'T';
+        var otherExec = ((ctx.current().permission % 10) & 1) == 1;
+        res = res.substr(0, res.length - 1) + (otherExec ? 't' : 'T');
       }
 
       chunk.write(dir + res);

From 7fce641c49ac8a4683f8f1158b47ff9f49579ad3 Mon Sep 17 00:00:00 2001
From: Vinod Kumar Vavilapalli <vinodkv@apache.org>
Date: Tue, 11 Feb 2014 01:05:16 +0000
Subject: [PATCH 11/47] YARN-1698. Fixed default TimelineStore in code to match
 what is documented in yarn-default.xml. Contributed by Zhijie Shen.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566937 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt                             | 3 +++
 .../applicationhistoryservice/ApplicationHistoryServer.java | 6 ++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index d31a3efe5b8..0e763c81592 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -240,6 +240,9 @@ Release 2.4.0 - UNRELEASED
     YARN-1672. YarnConfiguration is missing a default for 
     yarn.nodemanager.log.retain-seconds (Naren Koneru via kasha)
 
+    YARN-1698. Fixed default TimelineStore in code to match what is documented
+    in yarn-default.xml (Zhijie Shen via vinodkv)
+
 Release 2.3.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java
index 4ec986065b6..73a09417a01 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryServer.java
@@ -34,7 +34,7 @@ import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.server.applicationhistoryservice.apptimeline.ApplicationTimelineStore;
-import org.apache.hadoop.yarn.server.applicationhistoryservice.apptimeline.MemoryApplicationTimelineStore;
+import org.apache.hadoop.yarn.server.applicationhistoryservice.apptimeline.LeveldbApplicationTimelineStore;
 import org.apache.hadoop.yarn.server.applicationhistoryservice.webapp.AHSWebApp;
 import org.apache.hadoop.yarn.webapp.WebApp;
 import org.apache.hadoop.yarn.webapp.WebApps;
@@ -143,10 +143,8 @@ public class ApplicationHistoryServer extends CompositeService {
 
   protected ApplicationTimelineStore createApplicationTimelineStore(
       Configuration conf) {
-    // TODO: need to replace the MemoryApplicationTimelineStore.class with the
-    // LevelDB implementation
     return ReflectionUtils.newInstance(conf.getClass(
-        YarnConfiguration.ATS_STORE, MemoryApplicationTimelineStore.class,
+        YarnConfiguration.ATS_STORE, LeveldbApplicationTimelineStore.class,
         ApplicationTimelineStore.class), conf);
   }
 

From 5c7b27bae0b52ed5be6d4f7616f99cbfc7bbf8ec Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Tue, 11 Feb 2014 02:47:05 +0000
Subject: [PATCH 12/47] HADOOP-10326. M/R jobs can not access S3 if Kerberos is
 enabled. Contributed by bc Wong.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566965 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-common-project/hadoop-common/CHANGES.txt          | 3 +++
 .../main/java/org/apache/hadoop/fs/s3/S3FileSystem.java  | 6 ++++++
 .../apache/hadoop/fs/s3native/NativeS3FileSystem.java    | 6 ++++++
 .../hadoop/fs/s3/S3FileSystemContractBaseTest.java       | 7 ++++++-
 .../fs/s3native/NativeS3FileSystemContractBaseTest.java  | 9 +++++++--
 5 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 9eb7fae0f53..161ab457fb0 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -331,6 +331,9 @@ Release 2.4.0 - UNRELEASED
     HADOOP-10330. TestFrameDecoder fails if it cannot bind port 12345.
     (Arpit Agarwal)
 
+    HADOOP-10326. M/R jobs can not access S3 if Kerberos is enabled. (bc Wong
+    via atm)
+
 Release 2.3.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3FileSystem.java
index e49eefa1115..9240d3704ef 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3FileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3/S3FileSystem.java
@@ -443,6 +443,12 @@ public class S3FileSystem extends FileSystem {
     return getConf().getLong("fs.s3.block.size", 64 * 1024 * 1024);
   }
 
+  @Override
+  public String getCanonicalServiceName() {
+    // Does not support Token
+    return null;
+  }
+
   // diagnostic methods
 
   void dump() throws IOException {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java
index 191baaff410..7847ec5cc6c 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/s3native/NativeS3FileSystem.java
@@ -733,4 +733,10 @@ public class NativeS3FileSystem extends FileSystem {
   public Path getWorkingDirectory() {
     return workingDir;
   }
+
+  @Override
+  public String getCanonicalServiceName() {
+    // Does not support Token
+    return null;
+  }
 }
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java
index d1770d3b889..d704b006bef 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java
@@ -54,5 +54,10 @@ public abstract class S3FileSystemContractBaseTest
     assertEquals("Double default block size", newBlockSize,
 	fs.getFileStatus(file).getBlockSize());
   }
-  
+
+  public void testCanonicalName() throws Exception {
+    assertNull("s3 doesn't support security token and shouldn't have canonical name",
+               fs.getCanonicalServiceName());
+  }
+
 }
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java
index 220e0bd4730..f6f9ae91122 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java
@@ -48,7 +48,12 @@ public abstract class NativeS3FileSystemContractBaseTest
     store.purge("test");
     super.tearDown();
   }
-  
+
+  public void testCanonicalName() throws Exception {
+    assertNull("s3n doesn't support security token and shouldn't have canonical name",
+               fs.getCanonicalServiceName());
+  }
+
   public void testListStatusForRoot() throws Exception {
     FileStatus[] paths = fs.listStatus(path("/"));
     assertEquals("Root directory is not empty; ", 0, paths.length);
@@ -60,7 +65,7 @@ public abstract class NativeS3FileSystemContractBaseTest
     assertEquals(1, paths.length);
     assertEquals(path("/test"), paths[0].getPath());
   }
-  
+
   public void testNoTrailingBackslashOnBucket() throws Exception {
     assertTrue(fs.getFileStatus(new Path(fs.getUri().toString())).isDirectory());
   }

From 3587b6774c393e7f3f8b8777429d1716ce06ca91 Mon Sep 17 00:00:00 2001
From: Zhijie Shen <zjshen@apache.org>
Date: Tue, 11 Feb 2014 04:39:37 +0000
Subject: [PATCH 13/47] YARN-1706. Created an utility method to dump timeline
 records to JSON strings. Contributed by Zhijie Shen.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1566982 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt               |  3 +
 .../hadoop/yarn/util/TimelineUtils.java       | 86 +++++++++++++++++++
 .../TestApplicationTimelineRecords.java       | 21 ++++-
 3 files changed, 106 insertions(+), 4 deletions(-)
 create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/TimelineUtils.java

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 0e763c81592..bfc84618646 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -173,6 +173,9 @@ Release 2.4.0 - UNRELEASED
     on the configuration-provider mechanism during startup too. (Xuan Gong via
     vinodkv)
 
+    YARN-1706. Created an utility method to dump timeline records to JSON
+    strings. (zjshen)
+
   OPTIMIZATIONS
 
   BUG FIXES
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/TimelineUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/TimelineUtils.java
new file mode 100644
index 00000000000..4ab557e33e1
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/TimelineUtils.java
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.util;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Evolving;
+import org.codehaus.jackson.JsonGenerationException;
+import org.codehaus.jackson.map.AnnotationIntrospector;
+import org.codehaus.jackson.map.JsonMappingException;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.codehaus.jackson.map.annotate.JsonSerialize.Inclusion;
+import org.codehaus.jackson.xc.JaxbAnnotationIntrospector;
+
+/**
+ * The helper class for the timeline module.
+ * 
+ */
+@Public
+@Evolving
+public class TimelineUtils {
+
+  private static ObjectMapper mapper;
+
+  static {
+    mapper = new ObjectMapper();
+    AnnotationIntrospector introspector = new JaxbAnnotationIntrospector();
+    mapper.setAnnotationIntrospector(introspector);
+    mapper.getSerializationConfig()
+        .setSerializationInclusion(Inclusion.NON_NULL);
+  }
+
+  /**
+   * Serialize a POJO object into a JSON string not in a pretty format
+   * 
+   * @param o
+   *          an object to serialize
+   * @return a JSON string
+   * @throws IOException
+   * @throws JsonMappingException
+   * @throws JsonGenerationException
+   */
+  public static String dumpTimelineRecordtoJSON(Object o)
+      throws JsonGenerationException, JsonMappingException, IOException {
+    return dumpTimelineRecordtoJSON(o, false);
+  }
+
+  /**
+   * Serialize a POJO object into a JSON string
+   * 
+   * @param o
+   *          an object to serialize
+   * @param pretty
+   *          whether in a pretty format or not
+   * @return a JSON string
+   * @throws IOException
+   * @throws JsonMappingException
+   * @throws JsonGenerationException
+   */
+  public static String dumpTimelineRecordtoJSON(Object o, boolean pretty)
+      throws JsonGenerationException, JsonMappingException, IOException {
+    if (pretty) {
+      return mapper.defaultPrettyPrintingWriter().writeValueAsString(o);
+    } else {
+      return mapper.writeValueAsString(o);
+    }
+  }
+
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/apptimeline/TestApplicationTimelineRecords.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/apptimeline/TestApplicationTimelineRecords.java
index 24d1ce91e62..330e099364e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/apptimeline/TestApplicationTimelineRecords.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/records/apptimeline/TestApplicationTimelineRecords.java
@@ -19,18 +19,23 @@
 package org.apache.hadoop.yarn.api.records.apptimeline;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 
 import junit.framework.Assert;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.yarn.api.records.apptimeline.ATSPutErrors.ATSPutError;
+import org.apache.hadoop.yarn.util.TimelineUtils;
 import org.junit.Test;
 
 public class TestApplicationTimelineRecords {
 
+  private static final Log LOG =
+      LogFactory.getLog(TestApplicationTimelineRecords.class);
+
   @Test
-  public void testATSEntities() {
+  public void testATSEntities() throws Exception {
     ATSEntities entities = new ATSEntities();
     for (int j = 0; j < 2; ++j) {
       ATSEntity entity = new ATSEntity();
@@ -53,6 +58,9 @@ public class TestApplicationTimelineRecords {
       entity.addOtherInfo("okey2", "oval2");
       entities.addEntity(entity);
     }
+    LOG.info("Entities in JSON:");
+    LOG.info(TimelineUtils.dumpTimelineRecordtoJSON(entities, true));
+
     Assert.assertEquals(2, entities.getEntities().size());
     ATSEntity entity1 = entities.getEntities().get(0);
     Assert.assertEquals("entity id 0", entity1.getEntityId());
@@ -71,7 +79,7 @@ public class TestApplicationTimelineRecords {
   }
 
   @Test
-  public void testATSEvents() {
+  public void testATSEvents() throws Exception {
     ATSEvents events = new ATSEvents();
     for (int j = 0; j < 2; ++j) {
       ATSEvents.ATSEventsOfOneEntity partEvents =
@@ -88,6 +96,9 @@ public class TestApplicationTimelineRecords {
       }
       events.addEvent(partEvents);
     }
+    LOG.info("Events in JSON:");
+    LOG.info(TimelineUtils.dumpTimelineRecordtoJSON(events, true));
+
     Assert.assertEquals(2, events.getAllEvents().size());
     ATSEvents.ATSEventsOfOneEntity partEvents1 = events.getAllEvents().get(0);
     Assert.assertEquals("entity id 0", partEvents1.getEntityId());
@@ -112,7 +123,7 @@ public class TestApplicationTimelineRecords {
   }
 
   @Test
-  public void testATSPutErrors() {
+  public void testATSPutErrors() throws Exception {
     ATSPutErrors atsPutErrors = new ATSPutErrors();
     ATSPutError error1 = new ATSPutError();
     error1.setEntityId("entity id 1");
@@ -127,6 +138,8 @@ public class TestApplicationTimelineRecords {
     error2.setErrorCode(ATSPutError.IO_EXCEPTION);
     errors.add(error2);
     atsPutErrors.addErrors(errors);
+    LOG.info("Errors in JSON:");
+    LOG.info(TimelineUtils.dumpTimelineRecordtoJSON(atsPutErrors, true));
 
     Assert.assertEquals(3, atsPutErrors.getErrors().size());
     ATSPutError e = atsPutErrors.getErrors().get(0);

From c43c9dd7b5bf24fadca7dcd805af6f11dc6175e6 Mon Sep 17 00:00:00 2001
From: Arun Murthy <acmurthy@apache.org>
Date: Tue, 11 Feb 2014 13:25:30 +0000
Subject: [PATCH 14/47] Preparing to release hadoop-2.3.0

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567111 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.txt                  | 18 +++++++++++++++---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt    | 14 +++++++++++++-
 hadoop-mapreduce-project/CHANGES.txt           | 14 +++++++++++++-
 hadoop-yarn-project/CHANGES.txt                | 14 +++++++++++++-
 4 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 161ab457fb0..49a32c72342 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -334,15 +334,27 @@ Release 2.4.0 - UNRELEASED
     HADOOP-10326. M/R jobs can not access S3 if Kerberos is enabled. (bc Wong
     via atm)
 
-Release 2.3.0 - UNRELEASED
+Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
 
-    HADOOP-8545. Filesystem Implementation for OpenStack Swift
-    (Dmitry Mezhensky, David Dobbins, Stevel via stevel)
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+  BUG FIXES 
+
+Release 2.3.0 - 2014-02-18
+
+  INCOMPATIBLE CHANGES
 
   NEW FEATURES
 
+    HADOOP-8545. Filesystem Implementation for OpenStack Swift
+    (Dmitry Mezhensky, David Dobbins, Stevel via stevel)
+
   IMPROVEMENTS
 
     HADOOP-10046. Print a log message when SSL is enabled.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index ec44aa253f0..31f790fde78 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -410,7 +410,19 @@ Release 2.4.0 - UNRELEASED
     HDFS-5886. Potential null pointer deference in RpcProgramNfs3#readlink()
     (brandonli)
 
-Release 2.3.0 - UNRELEASED
+Release 2.3.1 - UNRELEASED
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+  BUG FIXES 
+
+Release 2.3.0 - 2014-02-18
 
   INCOMPATIBLE CHANGES
 
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 3b72f402223..e6cc6c5c656 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -159,7 +159,19 @@ Release 2.4.0 - UNRELEASED
 
   BUG FIXES
 
-Release 2.3.0 - UNRELEASED
+Release 2.3.1 - UNRELEASED
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+  BUG FIXES 
+
+Release 2.3.0 - 2014-02-18
 
   INCOMPATIBLE CHANGES
 
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index bfc84618646..6660a49ca25 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -246,7 +246,19 @@ Release 2.4.0 - UNRELEASED
     YARN-1698. Fixed default TimelineStore in code to match what is documented
     in yarn-default.xml (Zhijie Shen via vinodkv)
 
-Release 2.3.0 - UNRELEASED
+Release 2.3.1 - UNRELEASED
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+  BUG FIXES 
+
+Release 2.3.0 - 2014-02-18
 
   INCOMPATIBLE CHANGES
 

From 1b9cef0fdd5f0d221046d58cac632640afe5b553 Mon Sep 17 00:00:00 2001
From: Arun Murthy <acmurthy@apache.org>
Date: Tue, 11 Feb 2014 13:32:07 +0000
Subject: [PATCH 15/47] Release notes for hadoop-2.3.0.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567118 13f79535-47bb-0310-9956-ffa450edef68
---
 .../src/main/docs/releasenotes.html           | 2950 +++++++++++++++++
 1 file changed, 2950 insertions(+)

diff --git a/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html b/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html
index efbaeae4b14..d2b6156573d 100644
--- a/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html
+++ b/hadoop-common-project/hadoop-common/src/main/docs/releasenotes.html
@@ -1,3 +1,2953 @@
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<title>Hadoop  2.3.0 Release Notes</title>
+<STYLE type="text/css">
+	H1 {font-family: sans-serif}
+	H2 {font-family: sans-serif; margin-left: 7mm}
+	TABLE {margin-left: 7mm}
+</STYLE>
+</head>
+<body>
+<h1>Hadoop  2.3.0 Release Notes</h1>
+These release notes include new developer and user-facing incompatibilities, features, and major improvements. 
+<a name="changes"/>
+<h2>Changes since Hadoop 2.2.0</h2>
+<ul>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1642">YARN-1642</a>.
+     Blocker sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)<br>
+     <b>RMDTRenewer#getRMClient should use ClientRMProxy</b><br>
+     <blockquote>RMDTRenewer#getRMClient gets a proxy to the RM in the conf directly instead of going through ClientRMProxy. 
+
+{code}
+      final YarnRPC rpc = YarnRPC.create(conf);
+      return (ApplicationClientProtocol)rpc.getProxy(ApplicationClientProtocol.class, addr, conf);
+{code}</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1630">YARN-1630</a>.
+     Major bug reported by Aditya Acharya and fixed by Aditya Acharya (client)<br>
+     <b>Introduce timeout for async polling operations in YarnClientImpl</b><br>
+     <blockquote>I ran an MR2 application that would have been long running, and killed it programmatically using a YarnClient. The app was killed, but the client hung forever. The message that I saw, which spammed the logs, was "Watiting for application application_1389036507624_0018 to be killed."
+
+The RM log indicated that the app had indeed transitioned from RUNNING to KILLED, but for some reason future responses to the RPC to kill the application did not indicate that the app had been terminated.
+
+I tracked this down to YarnClientImpl.java, and though I was unable to reproduce the bug, I wrote a patch to introduce a bound on the number of times that YarnClientImpl retries the RPC before giving up.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1629">YARN-1629</a>.
+     Major bug reported by Sandy Ryza and fixed by Sandy Ryza (scheduler)<br>
+     <b>IndexOutOfBoundsException in Fair Scheduler MaxRunningAppsEnforcer</b><br>
+     <blockquote>This can occur when the second-to-last app in a queue's pending app list is made runnable.  The app is pulled out from under the iterator. </blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1628">YARN-1628</a>.
+     Major bug reported by Mit Desai and fixed by Vinod Kumar Vavilapalli <br>
+     <b>TestContainerManagerSecurity fails on trunk</b><br>
+     <blockquote>The Test fails with the following error
+
+{noformat}
+java.lang.IllegalArgumentException: java.net.UnknownHostException: InvalidHost
+	at org.apache.hadoop.security.SecurityUtil.buildTokenService(SecurityUtil.java:377)
+	at org.apache.hadoop.yarn.server.security.BaseNMTokenSecretManager.newInstance(BaseNMTokenSecretManager.java:145)
+	at org.apache.hadoop.yarn.server.security.BaseNMTokenSecretManager.createNMToken(BaseNMTokenSecretManager.java:136)
+	at org.apache.hadoop.yarn.server.TestContainerManagerSecurity.testNMTokens(TestContainerManagerSecurity.java:253)
+	at org.apache.hadoop.yarn.server.TestContainerManagerSecurity.testContainerManager(TestContainerManagerSecurity.java:144)
+{noformat}</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1624">YARN-1624</a>.
+     Major bug reported by Aditya Acharya and fixed by Aditya Acharya (scheduler)<br>
+     <b>QueuePlacementPolicy format is not easily readable via a JAXB parser</b><br>
+     <blockquote>The current format for specifying queue placement rules in the fair scheduler allocations file does not lend itself to easy parsing via a JAXB parser. In particular, relying on the tag name to encode information about which rule to use makes it very difficult for an xsd-based JAXB parser to preserve the order of the rules, which is essential.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1623">YARN-1623</a>.
+     Major improvement reported by Sandy Ryza and fixed by Sandy Ryza (scheduler)<br>
+     <b>Include queue name in RegisterApplicationMasterResponse</b><br>
+     <blockquote>This provides the YARN change necessary to support MAPREDUCE-5732.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1618">YARN-1618</a>.
+     Blocker sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)<br>
+     <b>Fix invalid RMApp transition from NEW to FINAL_SAVING</b><br>
+     <blockquote>YARN-891 augments the RMStateStore to store information on completed applications. In the process, it adds transitions from NEW to FINAL_SAVING. This leads to the RM trying to update entries in the state-store that do not exist. On ZKRMStateStore, this leads to the RM crashing. 
+
+Previous description:
+ZKRMStateStore fails to handle updates to znodes that don't exist. For instance, this can happen when an app transitions from NEW to FINAL_SAVING. In these cases, the store should create the missing znode and handle the update.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1616">YARN-1616</a>.
+     Trivial improvement reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)<br>
+     <b>RMFatalEventDispatcher should log the cause of the event</b><br>
+     <blockquote>RMFatalEventDispatcher#handle() logs the receipt of an event and its type, but leaves out the cause. The cause captures why the event was raised and would help debugging issues. </blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1608">YARN-1608</a>.
+     Trivial bug reported by Karthik Kambatla and fixed by Karthik Kambatla (nodemanager)<br>
+     <b>LinuxContainerExecutor has a few DEBUG messages at INFO level</b><br>
+     <blockquote>LCE has a few INFO level log messages meant to be at debug level. In fact, they are logged both at INFO and DEBUG. </blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1607">YARN-1607</a>.
+     Major bug reported by Sandy Ryza and fixed by Sandy Ryza <br>
+     <b>TestRM expects the capacity scheduler</b><br>
+     <blockquote>We should either explicitly set the Capacity Scheduler or make it scheduler-agnostic</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1603">YARN-1603</a>.
+     Trivial bug reported by Zhijie Shen and fixed by Zhijie Shen <br>
+     <b>Remove two *.orig files which were unexpectedly committed</b><br>
+     <blockquote>FairScheduler.java.orig and TestFifoScheduler.java.orig</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1601">YARN-1601</a>.
+     Major bug reported by Alejandro Abdelnur and fixed by Alejandro Abdelnur <br>
+     <b>3rd party JARs are missing from hadoop-dist output</b><br>
+     <blockquote>With the build changes of YARN-888 we are leaving out all 3rd party JArs used directly by YARN under /share/hadoop/yarn/lib/.
+
+We did not notice this when running minicluster because they all happen to be in the classpath from hadoop-common and hadoop-yarn.
+
+As 3d party JARs are not 'public' interfaces we cannot rely on them being provided to yarn by common and hdfs. (ie if common and hdfs stop using a 3rd party dependency that yarn uses this would break yarn if yarn does not pull that dependency explicitly).
+
+Also, this will break bigtop hadoop build when they move to use branch-2 as they expect to find jars in /share/hadoop/yarn/lib/</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1600">YARN-1600</a>.
+     Blocker bug reported by Jason Lowe and fixed by Haohui Mai (resourcemanager)<br>
+     <b>RM does not startup when security is enabled without spnego configured</b><br>
+     <blockquote>We have a custom auth filter in front of our various UI pages that handles user authentication.  However currently the RM assumes that if security is enabled then the user must have configured spnego as well for the RM web pages which is not true in our case.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1598">YARN-1598</a>.
+     Critical sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (client , resourcemanager)<br>
+     <b>HA-related rmadmin commands don't work on a secure cluster</b><br>
+     <blockquote>The HA-related commands like -getServiceState -checkHealth etc. don't work in a secure cluster.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1579">YARN-1579</a>.
+     Trivial sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)<br>
+     <b>ActiveRMInfoProto fields should be optional</b><br>
+     <blockquote>Per discussion on YARN-1568, ActiveRMInfoProto should have optional fields instead of required fields. </blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1575">YARN-1575</a>.
+     Critical sub-task reported by Jason Lowe and fixed by Jason Lowe (nodemanager)<br>
+     <b>Public localizer crashes with "Localized unkown resource"</b><br>
+     <blockquote>The public localizer can crash with the error:
+
+{noformat}
+2014-01-08 14:11:43,212 [Thread-467] ERROR org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService: Localized unkonwn resource to java.util.concurrent.FutureTask@852e26
+2014-01-08 14:11:43,212 [Thread-467] INFO org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService: Public cache exiting
+{noformat}</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1574">YARN-1574</a>.
+     Blocker sub-task reported by Xuan Gong and fixed by Xuan Gong <br>
+     <b>RMDispatcher should be reset on transition to standby</b><br>
+     <blockquote>Currently, we move rmDispatcher out of ActiveService. But we still register the Event dispatcher, such as schedulerDispatcher, RMAppEventDispatcher when we initiate the ActiveService.
+
+Almost every time when we transit RM from Active to Standby,  we need to initiate the ActiveService. That means we will register the same event Dispatcher which will cause the same event will be handled several times.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1573">YARN-1573</a>.
+     Major sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)<br>
+     <b>ZK store should use a private password for root-node-acls</b><br>
+     <blockquote>Currently, when HA is enabled, ZK store uses cluster-timestamp as the password for root node ACLs to give the Active RM exclusive access to the store. A more private value like a random number might be better. </blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1568">YARN-1568</a>.
+     Trivial task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)<br>
+     <b>Rename clusterid to clusterId in ActiveRMInfoProto </b><br>
+     <blockquote>YARN-1029 introduces ActiveRMInfoProto - just realized it defines a field clusterid, which is inconsistent with other fields. Better to fix it immediately than leave the inconsistency. </blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1567">YARN-1567</a>.
+     Major improvement reported by Sandy Ryza and fixed by Sandy Ryza (scheduler)<br>
+     <b>In Fair Scheduler, allow empty queues to change between leaf and parent on allocation file reload</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1560">YARN-1560</a>.
+     Major test reported by Ted Yu and fixed by Ted Yu <br>
+     <b>TestYarnClient#testAMMRTokens fails with null AMRM token</b><br>
+     <blockquote>The following can be reproduced locally:
+{code}
+testAMMRTokens(org.apache.hadoop.yarn.client.api.impl.TestYarnClient)  Time elapsed: 3.341 sec  &lt;&lt;&lt; FAILURE!
+junit.framework.AssertionFailedError: null
+  at junit.framework.Assert.fail(Assert.java:48)
+  at junit.framework.Assert.assertTrue(Assert.java:20)
+  at junit.framework.Assert.assertNotNull(Assert.java:218)
+  at junit.framework.Assert.assertNotNull(Assert.java:211)
+  at org.apache.hadoop.yarn.client.api.impl.TestYarnClient.testAMMRTokens(TestYarnClient.java:382)
+{code}
+This test didn't appear in https://builds.apache.org/job/Hadoop-Yarn-trunk/442/consoleFull</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1559">YARN-1559</a>.
+     Blocker sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)<br>
+     <b>Race between ServerRMProxy and ClientRMProxy setting RMProxy#INSTANCE</b><br>
+     <blockquote>RMProxy#INSTANCE is a non-final static field and both ServerRMProxy and ClientRMProxy set it. This leads to races as witnessed on - YARN-1482.
+
+Sample trace:
+{noformat}
+java.lang.IllegalArgumentException: RM does not support this client protocol
+        at com.google.common.base.Preconditions.checkArgument(Preconditions.java:88)
+        at org.apache.hadoop.yarn.client.ClientRMProxy.checkAllowedProtocols(ClientRMProxy.java:119)
+        at org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider.init(ConfiguredRMFailoverProxyProvider.java:58)
+        at org.apache.hadoop.yarn.client.RMProxy.createRMFailoverProxyProvider(RMProxy.java:158)
+        at org.apache.hadoop.yarn.client.RMProxy.createRMProxy(RMProxy.java:88)
+        at org.apache.hadoop.yarn.server.api.ServerRMProxy.createRMProxy(ServerRMProxy.java:56)
+{noformat}</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1549">YARN-1549</a>.
+     Major test reported by Ted Yu and fixed by haosdent <br>
+     <b>TestUnmanagedAMLauncher#testDSShell fails in trunk</b><br>
+     <blockquote>The following error is reproducible:
+{code}
+testDSShell(org.apache.hadoop.yarn.applications.unmanagedamlauncher.TestUnmanagedAMLauncher)  Time elapsed: 14.911 sec  &lt;&lt;&lt; ERROR!
+java.lang.RuntimeException: Failed to receive final expected state in ApplicationReport, CurrentState=RUNNING, ExpectedStates=FINISHED,FAILED,KILLED
+	at org.apache.hadoop.yarn.applications.unmanagedamlauncher.UnmanagedAMLauncher.monitorApplication(UnmanagedAMLauncher.java:447)
+	at org.apache.hadoop.yarn.applications.unmanagedamlauncher.UnmanagedAMLauncher.run(UnmanagedAMLauncher.java:352)
+	at org.apache.hadoop.yarn.applications.unmanagedamlauncher.TestUnmanagedAMLauncher.testDSShell(TestUnmanagedAMLauncher.java:147)
+{code}
+See https://builds.apache.org/job/Hadoop-Yarn-trunk/435</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1541">YARN-1541</a>.
+     Major bug reported by Jian He and fixed by Jian He <br>
+     <b>Invalidate AM Host/Port when app attempt is done so that in the mean-while client doesn&#8217;t get wrong information.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1527">YARN-1527</a>.
+     Trivial bug reported by Jian He and fixed by Akira AJISAKA <br>
+     <b>yarn rmadmin command prints wrong usage info:</b><br>
+     <blockquote>The usage should be: yarn rmadmin, instead of java RMAdmin, and the -refreshQueues should be in the second line.
+{code} Usage: java RMAdmin   -refreshQueues 
+   -refreshNodes 
+   -refreshSuperUserGroupsConfiguration 
+   -refreshUserToGroupsMappings 
+   -refreshAdminAcls 
+   -refreshServiceAcl 
+   -getGroups [username]
+   -help [cmd]
+   -transitionToActive &lt;serviceId&gt;
+   -transitionToStandby &lt;serviceId&gt;
+   -failover [--forcefence] [--forceactive] &lt;serviceId&gt; &lt;serviceId&gt;
+   -getServiceState &lt;serviceId&gt;
+   -checkHealth &lt;serviceId&gt;
+{code}</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1523">YARN-1523</a>.
+     Major sub-task reported by Bikas Saha and fixed by Karthik Kambatla <br>
+     <b>Use StandbyException instead of RMNotYetReadyException</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1522">YARN-1522</a>.
+     Major bug reported by Liyin Liang and fixed by Liyin Liang <br>
+     <b>TestApplicationCleanup.testAppCleanup occasionally fails</b><br>
+     <blockquote>TestApplicationCleanup is occasionally failing with the error:
+{code}
+-------------------------------------------------------------------------------
+Test set: org.apache.hadoop.yarn.server.resourcemanager.TestApplicationCleanup
+-------------------------------------------------------------------------------
+Tests run: 1, Failures: 1, Errors: 0, Skipped: 0, Time elapsed: 6.215 sec &lt;&lt;&lt; FAILURE! - in org.apache.hadoop.yarn.server.resourcemanager.TestApplicationCleanup
+testAppCleanup(org.apache.hadoop.yarn.server.resourcemanager.TestApplicationCleanup) Time elapsed: 5.555 sec &lt;&lt;&lt; FAILURE!
+junit.framework.AssertionFailedError: expected:&lt;1&gt; but was:&lt;0&gt;
+at org.apache.hadoop.yarn.server.resourcemanager.TestApplicationCleanup.testAppCleanup(TestApplicationCleanup.java:119)
+{code}</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1505">YARN-1505</a>.
+     Blocker bug reported by Xuan Gong and fixed by Xuan Gong <br>
+     <b>WebAppProxyServer should not set localhost as YarnConfiguration.PROXY_ADDRESS by itself</b><br>
+     <blockquote>At WebAppProxyServer::startServer(), it will set up  YarnConfiguration.PROXY_ADDRESS to localhost:9099 by itself. So, no matter what is the value we set YarnConfiguration.PROXY_ADDRESS in configuration, the proxyserver will bind to localhost:9099</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1491">YARN-1491</a>.
+     Trivial bug reported by Jonathan Eagles and fixed by Chen He <br>
+     <b>Upgrade JUnit3 TestCase to JUnit 4</b><br>
+     <blockquote>There are still four references to test classes that extend from junit.framework.TestCase
+
+hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestYarnVersionInfo.java
+hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestWindowsResourceCalculatorPlugin.java
+hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestLinuxResourceCalculatorPlugin.java
+hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestWindowsBasedProcessTree.java
+</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1485">YARN-1485</a>.
+     Major sub-task reported by Xuan Gong and fixed by Xuan Gong <br>
+     <b>Enabling HA should verify the RM service addresses configurations have been set for every RM Ids defined in RM_HA_IDs</b><br>
+     <blockquote>After YARN-1325, the YarnConfiguration.RM_HA_IDS will contain multiple RM_Ids. We need to verify that the RM service addresses configurations have been set for all of RM_Ids.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1482">YARN-1482</a>.
+     Major sub-task reported by Vinod Kumar Vavilapalli and fixed by Xuan Gong <br>
+     <b>WebApplicationProxy should be always-on w.r.t HA even if it is embedded in the RM</b><br>
+     <blockquote>This way, even if an RM goes to standby mode, we can affect a redirect to the active. And more importantly, users will not suddenly see all their links stop working.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1481">YARN-1481</a>.
+     Major sub-task reported by Vinod Kumar Vavilapalli and fixed by Vinod Kumar Vavilapalli <br>
+     <b>Move internal services logic from AdminService to ResourceManager</b><br>
+     <blockquote>This is something I found while reviewing YARN-1318, but didn't halt that patch as many cycles went there already. Some top level issues
+ - Not easy to follow RM's service life cycle
+    -- RM adds only AdminService as its service directly.
+    -- Other services are added to RM when AdminService's init calls RM.activeServices.init()
+ - Overall, AdminService shouldn't encompass all of RM's HA state management. It was originally supposed to be the implementation of just the RPC server.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1463">YARN-1463</a>.
+     Major test reported by Ted Yu and fixed by Vinod Kumar Vavilapalli <br>
+     <b>Tests should avoid starting http-server where possible or creates spnego keytab/principals</b><br>
+     <blockquote>Here is stack trace:
+{code}
+testContainerManager[1](org.apache.hadoop.yarn.server.TestContainerManagerSecurity)  Time elapsed: 1.756 sec  &lt;&lt;&lt; ERROR!
+org.apache.hadoop.yarn.exceptions.YarnRuntimeException: java.io.IOException: ResourceManager failed to start. Final state is STOPPED
+  at org.apache.hadoop.yarn.server.MiniYARNCluster$ResourceManagerWrapper.serviceStart(MiniYARNCluster.java:253)
+  at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
+  at org.apache.hadoop.service.CompositeService.serviceStart(CompositeService.java:121)
+  at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
+  at org.apache.hadoop.yarn.server.TestContainerManagerSecurity.testContainerManager(TestContainerManagerSecurity.java:110)
+{code}</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1454">YARN-1454</a>.
+     Critical bug reported by Jian He and fixed by Karthik Kambatla <br>
+     <b>TestRMRestart.testRMDelegationTokenRestoredOnRMRestart is failing intermittently </b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1451">YARN-1451</a>.
+     Minor bug reported by Sandy Ryza and fixed by Sandy Ryza <br>
+     <b>TestResourceManager relies on the scheduler assigning multiple containers in a single node update</b><br>
+     <blockquote>TestResourceManager rely on the capacity scheduler.
+
+It relies on a scheduler that assigns multiple containers in a single heartbeat, which not all schedulers do by default.  It also relies on schedulers that don't consider CPU capacities.  It would be simple to change the test to use multiple heartbeats and increase the vcore capacities of the nodes in the test.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1450">YARN-1450</a>.
+     Major bug reported by Akira AJISAKA and fixed by Binglin Chang (applications/distributed-shell)<br>
+     <b>TestUnmanagedAMLauncher#testDSShell fails on trunk</b><br>
+     <blockquote>TestUnmanagedAMLauncher fails on trunk. The console output is
+{code}
+Running org.apache.hadoop.yarn.applications.unmanagedamlauncher.TestUnmanagedAMLauncher
+Tests run: 2, Failures: 0, Errors: 1, Skipped: 0, Time elapsed: 35.937 sec &lt;&lt;&lt; FAILURE! - in org.apache.hadoop.yarn.applications.unmanagedamlauncher.TestUnmanagedAMLauncher
+testDSShell(org.apache.hadoop.yarn.applications.unmanagedamlauncher.TestUnmanagedAMLauncher)  Time elapsed: 14.558 sec  &lt;&lt;&lt; ERROR!
+java.lang.RuntimeException: Failed to receive final expected state in ApplicationReport, CurrentState=ACCEPTED, ExpectedStates=FINISHED,FAILED,KILLED
+	at org.apache.hadoop.yarn.applications.unmanagedamlauncher.UnmanagedAMLauncher.monitorApplication(UnmanagedAMLauncher.java:447)
+	at org.apache.hadoop.yarn.applications.unmanagedamlauncher.UnmanagedAMLauncher.run(UnmanagedAMLauncher.java:352)
+	at org.apache.hadoop.yarn.applications.unmanagedamlauncher.TestUnmanagedAMLauncher.testDSShell(TestUnmanagedAMLauncher.java:145)
+{code}</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1448">YARN-1448</a>.
+     Major sub-task reported by Wangda Tan and fixed by Wangda Tan (api , resourcemanager)<br>
+     <b>AM-RM protocol changes to support container resizing</b><br>
+     <blockquote>As described in YARN-1197, we need add API in RM to support
+1) Add increase request in AllocateRequest
+2) Can get successfully increased/decreased containers from RM in AllocateResponse</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1447">YARN-1447</a>.
+     Major sub-task reported by Wangda Tan and fixed by Wangda Tan (api)<br>
+     <b>Common PB type definitions for container resizing</b><br>
+     <blockquote>As described in YARN-1197, we need add some common PB types for container resource change, like ResourceChangeContext, etc. These types will be both used by RM/NM protocols</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1446">YARN-1446</a>.
+     Major sub-task reported by Jian He and fixed by Jian He (resourcemanager)<br>
+     <b>Change killing application to wait until state store is done</b><br>
+     <blockquote>When user kills an application, it should wait until the state store is done with saving the killed status of the application. Otherwise, if RM crashes in the middle between user killing the application and writing the status to the store, RM will relaunch this application after it restarts.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1435">YARN-1435</a>.
+     Major bug reported by Tassapol Athiapinya and fixed by Xuan Gong (applications/distributed-shell)<br>
+     <b>Distributed Shell should not run other commands except "sh", and run the custom script at the same time.</b><br>
+     <blockquote>Currently, if we want to run custom script at DS. We can do it like this :
+--shell_command sh --shell_script custom_script.sh
+But it may be better to separate running shell_command and shell_script</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1425">YARN-1425</a>.
+     Major bug reported by Omkar Vinit Joshi and fixed by Omkar Vinit Joshi <br>
+     <b>TestRMRestart fails because MockRM.waitForState(AttemptId) uses current attempt instead of the attempt passed as argument</b><br>
+     <blockquote>TestRMRestart is failing on trunk. Fixing it. </blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1423">YARN-1423</a>.
+     Major improvement reported by Sandy Ryza and fixed by Ted Malaska (scheduler)<br>
+     <b>Support queue placement by secondary group in the Fair Scheduler</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1419">YARN-1419</a>.
+     Minor bug reported by Jonathan Eagles and fixed by Jonathan Eagles (scheduler)<br>
+     <b>TestFifoScheduler.testAppAttemptMetrics fails intermittently under jdk7 </b><br>
+     <blockquote>QueueMetrics holds its data in a static variable causing metrics to bleed over from test to test. clearQueueMetrics is to be called for tests that need to measure metrics correctly for a single test. jdk7 comes into play since tests are run out of order, and in the case make the metrics unreliable.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1416">YARN-1416</a>.
+     Major bug reported by Omkar Vinit Joshi and fixed by Jian He <br>
+     <b>InvalidStateTransitions getting reported in multiple test cases even though they pass</b><br>
+     <blockquote>It might be worth checking why they are reporting this.
+Testcase : TestRMAppTransitions, TestRM
+there are large number of such errors.
+can't handle RMAppEventType.APP_UPDATE_SAVED at RMAppState.FAILED
+</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1411">YARN-1411</a>.
+     Critical sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla <br>
+     <b>HA config shouldn't affect NodeManager RPC addresses</b><br>
+     <blockquote>When HA is turned on, {{YarnConfiguration#getSoketAddress()}} fetches rpc-addresses corresponding to the specified rm-id. This should only be for RM rpc-addresses. Other confs, like NM rpc-addresses shouldn't be affected by this.
+
+Currently, the NM address settings in yarn-site.xml aren't reflected in the actual ports.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1409">YARN-1409</a>.
+     Major bug reported by Tsuyoshi OZAWA and fixed by Tsuyoshi OZAWA <br>
+     <b>NonAggregatingLogHandler can throw RejectedExecutionException</b><br>
+     <blockquote>This problem is caused by handling APPLICATION_FINISHED events after calling sched.shotdown() in NonAggregatingLongHandler#serviceStop(). org.apache.hadoop.mapred.TestJobCleanup can fail because of RejectedExecutionException by NonAggregatingLogHandler.
+
+{code}
+2013-11-13 10:53:06,970 FATAL [AsyncDispatcher event handler] event.AsyncDispatcher (AsyncDispatcher.java:dispatch(166)) - Error in dispatcher thread
+java.util.concurrent.RejectedExecutionException: Task java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask@d51df63 rejected from java.util.concurrent.ScheduledThreadPoolExecutor@7a20e369[Shutting down, pool size = 4, active threads = 0, queued tasks = 7, completed tasks = 0]
+        at java.util.concurrent.ThreadPoolExecutor$AbortPolicy.rejectedExecution(ThreadPoolExecutor.java:2048)
+        at java.util.concurrent.ThreadPoolExecutor.reject(ThreadPoolExecutor.java:821)
+        at java.util.concurrent.ScheduledThreadPoolExecutor.delayedExecute(ScheduledThreadPoolExecutor.java:325)
+        at java.util.concurrent.ScheduledThreadPoolExecutor.schedule(ScheduledThreadPoolExecutor.java:530)
+        at org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.NonAggregatingLogHandler.handle(NonAggregatingLogHandler.java:121)
+        at org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.NonAggregatingLogHandler.handle(NonAggregatingLogHandler.java:49)
+        at org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:159)
+        at org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:95)
+        at java.lang.Thread.run(Thread.java:724)
+{code}</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1407">YARN-1407</a>.
+     Major bug reported by Sandy Ryza and fixed by Sandy Ryza <br>
+     <b>RM Web UI and REST APIs should uniformly use YarnApplicationState</b><br>
+     <blockquote>RMAppState isn't a public facing enum like YarnApplicationState, so we shouldn't return values or list filters that come from it. However, some Blocks and AppInfo are still using RMAppState.
+
+It is not 100% clear to me whether or not fixing this would be a backwards-incompatible change.  The change would only reduce the set of possible strings that the API returns, so I think not.  We have also been changing the contents of RMAppState since 2.2.0, e.g. in YARN-891. It would still be good to fix this ASAP (i.e. for 2.2.1).</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1405">YARN-1405</a>.
+     Major sub-task reported by Yesha Vora and fixed by Jian He <br>
+     <b>RM hangs on shutdown if calling system.exit in serviceInit or serviceStart</b><br>
+     <blockquote>Enable yarn.resourcemanager.recovery.enabled=true and Pass a local path to yarn.resourcemanager.fs.state-store.uri. such as "file:///tmp/MYTMP"
+
+if the directory  /tmp/MYTMP is not readable or writable, RM should crash and should print "Permission denied Error"
+
+Currently, RM throws "java.io.FileNotFoundException: File file:/tmp/MYTMP/FSRMStateRoot/RMDTSecretManagerRoot does not exist" Error. RM returns Exiting status 1 but RM process does not shutdown. 
+
+Snapshot of Resource manager log:
+
+2013-09-27 18:31:36,621 INFO  security.NMTokenSecretManagerInRM (NMTokenSecretManagerInRM.java:rollMasterKey(97)) - Rolling master-key for nm-tokens
+2013-09-27 18:31:36,694 ERROR resourcemanager.ResourceManager (ResourceManager.java:serviceStart(640)) - Failed to load/recover state
+java.io.FileNotFoundException: File file:/tmp/MYTMP/FSRMStateRoot/RMDTSecretManagerRoot does not exist
+        at org.apache.hadoop.fs.RawLocalFileSystem.listStatus(RawLocalFileSystem.java:379)
+        at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1478)
+        at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1518)
+        at org.apache.hadoop.fs.ChecksumFileSystem.listStatus(ChecksumFileSystem.java:564)
+        at org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore.loadRMDTSecretManagerState(FileSystemRMStateStore.java:188)
+        at org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore.loadState(FileSystemRMStateStore.java:112)
+        at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.serviceStart(ResourceManager.java:635)
+        at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193)
+        at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.main(ResourceManager.java:855)
+2013-09-27 18:31:36,697 INFO  util.ExitUtil (ExitUtil.java:terminate(124)) - Exiting with status 1</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1403">YARN-1403</a>.
+     Major improvement reported by Sandy Ryza and fixed by Sandy Ryza <br>
+     <b>Separate out configuration loading from QueueManager in the Fair Scheduler</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1401">YARN-1401</a>.
+     Major bug reported by Gera Shegalov and fixed by Gera Shegalov (nodemanager)<br>
+     <b>With zero sleep-delay-before-sigkill.ms, no signal is ever sent</b><br>
+     <blockquote>If you set in yarn-site.xml yarn.nodemanager.sleep-delay-before-sigkill.ms=0 then an unresponsive child JVM is never killed. In MRv1, TT used to immediately SIGKILL in this case. </blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1400">YARN-1400</a>.
+     Trivial bug reported by Raja Aluri and fixed by Raja Aluri (resourcemanager)<br>
+     <b>yarn.cmd uses HADOOP_RESOURCEMANAGER_OPTS. Should be YARN_RESOURCEMANAGER_OPTS.</b><br>
+     <blockquote>yarn.cmd uses HADOOP_RESOURCEMANAGER_OPTS. Should be YARN_RESOURCEMANAGER_OPTS.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1395">YARN-1395</a>.
+     Major bug reported by Chris Nauroth and fixed by Chris Nauroth (applications/distributed-shell)<br>
+     <b>Distributed shell application master launched with debug flag can hang waiting for external ls process.</b><br>
+     <blockquote>Distributed shell launched with the debug flag will run {{ApplicationMaster#dumpOutDebugInfo}}.  This method launches an external process to run ls and print the contents of the current working directory.  We've seen that this can cause the application master to hang on {{Process#waitFor}}.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1392">YARN-1392</a>.
+     Major new feature reported by Sandy Ryza and fixed by Sandy Ryza (scheduler)<br>
+     <b>Allow sophisticated app-to-queue placement policies in the Fair Scheduler</b><br>
+     <blockquote>Currently the Fair Scheduler supports app-to-queue placement by username.  It would be beneficial to allow more sophisticated policies that rely on primary and secondary groups and fallbacks.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1388">YARN-1388</a>.
+     Trivial bug reported by Liyin Liang and fixed by Liyin Liang (resourcemanager)<br>
+     <b>Fair Scheduler page always displays blank fair share</b><br>
+     <blockquote>YARN-1044 fixed min/max/used resource display problem in the scheduler  page. But the "Fair Share" has the same problem and need to fix it.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1387">YARN-1387</a>.
+     Major improvement reported by Karthik Kambatla and fixed by Karthik Kambatla (api)<br>
+     <b>RMWebServices should use ClientRMService for filtering applications</b><br>
+     <blockquote>YARN's REST API allows filtering applications, this should be moved to ClientRMService to allow Java API also support the same functionality.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1386">YARN-1386</a>.
+     Critical bug reported by Jason Lowe and fixed by Jason Lowe (nodemanager)<br>
+     <b>NodeManager mistakenly loses resources and relocalizes them</b><br>
+     <blockquote>When a local resource that should already be present is requested again, the nodemanager checks to see if it still present.  However the method it uses to check for presence is via File.exists() as the user of the nodemanager process. If the resource was a private resource localized for another user, it will be localized to a location that is not accessible by the nodemanager user.  Therefore File.exists() returns false, the nodemanager mistakenly believes the resource is no longer available, and it proceeds to localize it over and over.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1381">YARN-1381</a>.
+     Minor bug reported by Ted Yu and fixed by Ted Yu <br>
+     <b>Same relaxLocality appears twice in exception message of AMRMClientImpl#checkLocalityRelaxationConflict() </b><br>
+     <blockquote>Here is related code:
+{code}
+            throw new InvalidContainerRequestException("Cannot submit a "
+                + "ContainerRequest asking for location " + location
+                + " with locality relaxation " + relaxLocality + " when it has "
+                + "already been requested with locality relaxation " + relaxLocality);
+{code}
+The last relaxLocality should be  reqs.values().iterator().next().remoteRequest.getRelaxLocality() </blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1378">YARN-1378</a>.
+     Major sub-task reported by Jian He and fixed by Jian He (resourcemanager)<br>
+     <b>Implement a RMStateStore cleaner for deleting application/attempt info</b><br>
+     <blockquote>Now that we are storing the final state of application/attempt instead of removing application/attempt info on application/attempt completion(YARN-891), we need a separate RMStateStore cleaner for cleaning the application/attempt state.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1374">YARN-1374</a>.
+     Blocker bug reported by Devaraj K and fixed by Karthik Kambatla (resourcemanager)<br>
+     <b>Resource Manager fails to start due to ConcurrentModificationException</b><br>
+     <blockquote>Resource Manager is failing to start with the below ConcurrentModificationException.
+
+{code:xml}
+2013-10-30 20:22:42,371 INFO org.apache.hadoop.util.HostsFileReader: Refreshing hosts (include/exclude) list
+2013-10-30 20:22:42,376 INFO org.apache.hadoop.service.AbstractService: Service ResourceManager failed in state INITED; cause: java.util.ConcurrentModificationException
+java.util.ConcurrentModificationException
+	at java.util.AbstractList$Itr.checkForComodification(AbstractList.java:372)
+	at java.util.AbstractList$Itr.next(AbstractList.java:343)
+	at java.util.Collections$UnmodifiableCollection$1.next(Collections.java:1010)
+	at org.apache.hadoop.service.CompositeService.serviceInit(CompositeService.java:107)
+	at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.serviceInit(ResourceManager.java:187)
+	at org.apache.hadoop.service.AbstractService.init(AbstractService.java:163)
+	at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.main(ResourceManager.java:944)
+2013-10-30 20:22:42,378 INFO org.apache.hadoop.yarn.server.resourcemanager.RMHAProtocolService: Transitioning to standby
+2013-10-30 20:22:42,378 INFO org.apache.hadoop.yarn.server.resourcemanager.RMHAProtocolService: Transitioned to standby
+2013-10-30 20:22:42,378 FATAL org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Error starting ResourceManager
+java.util.ConcurrentModificationException
+	at java.util.AbstractList$Itr.checkForComodification(AbstractList.java:372)
+	at java.util.AbstractList$Itr.next(AbstractList.java:343)
+	at java.util.Collections$UnmodifiableCollection$1.next(Collections.java:1010)
+	at org.apache.hadoop.service.CompositeService.serviceInit(CompositeService.java:107)
+	at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.serviceInit(ResourceManager.java:187)
+	at org.apache.hadoop.service.AbstractService.init(AbstractService.java:163)
+	at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.main(ResourceManager.java:944)
+2013-10-30 20:22:42,379 INFO org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: SHUTDOWN_MSG: 
+/************************************************************
+SHUTDOWN_MSG: Shutting down ResourceManager at HOST-10-18-40-24/10.18.40.24
+************************************************************/
+{code}</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1358">YARN-1358</a>.
+     Minor test reported by Chuan Liu and fixed by Chuan Liu (client)<br>
+     <b>TestYarnCLI fails on Windows due to line endings</b><br>
+     <blockquote>The unit test fails on Windows due to incorrect line endings was used for comparing the output from command line output. Error messages are as follows.
+{noformat}
+junit.framework.ComparisonFailure: expected:&lt;...argument for options[]
+usage: application
+...&gt; but was:&lt;...argument for options[
+]
+usage: application
+...&gt;
+	at junit.framework.Assert.assertEquals(Assert.java:85)
+	at junit.framework.Assert.assertEquals(Assert.java:91)
+	at org.apache.hadoop.yarn.client.cli.TestYarnCLI.testMissingArguments(TestYarnCLI.java:878)
+{noformat}</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1357">YARN-1357</a>.
+     Minor test reported by Chuan Liu and fixed by Chuan Liu (nodemanager)<br>
+     <b>TestContainerLaunch.testContainerEnvVariables fails on Windows</b><br>
+     <blockquote>This test fails on Windows due to incorrect use of batch script command. Error messages are as follows.
+{noformat}
+junit.framework.AssertionFailedError: expected:&lt;java.nio.HeapByteBuffer[pos=0 lim=19 cap=19]&gt; but was:&lt;java.nio.HeapByteBuffer[pos=0 lim=19 cap=19]&gt;
+	at junit.framework.Assert.fail(Assert.java:50)
+	at junit.framework.Assert.failNotEquals(Assert.java:287)
+	at junit.framework.Assert.assertEquals(Assert.java:67)
+	at junit.framework.Assert.assertEquals(Assert.java:74)
+	at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.TestContainerLaunch.testContainerEnvVariables(TestContainerLaunch.java:508)
+{noformat}</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1351">YARN-1351</a>.
+     Trivial bug reported by Konstantin Weitz and fixed by Konstantin Weitz (resourcemanager)<br>
+     <b>Invalid string format in Fair Scheduler log warn message</b><br>
+     <blockquote>While trying to print a warning, two values of the wrong type (Resource instead of int) are passed into a String.format method call, leading to a runtime exception, in the file:
+
+_trunk/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java_.
+
+The warning was intended to be printed whenever the resources don't fit into each other, either because the number of virtual cores or the memory is too small. I changed the %d's into %s, this way the warning will contain both the cores and the memory.
+</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1349">YARN-1349</a>.
+     Major bug reported by Chris Nauroth and fixed by Chris Nauroth (client)<br>
+     <b>yarn.cmd does not support passthrough to any arbitrary class.</b><br>
+     <blockquote>The yarn shell script supports passthrough to calling any arbitrary class if the first argument is not one of the per-defined sub-commands.  The equivalent cmd script does not implement this and instead fails trying to do a labeled goto to the first argument.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1343">YARN-1343</a>.
+     Critical bug reported by Alejandro Abdelnur and fixed by Alejandro Abdelnur (resourcemanager)<br>
+     <b>NodeManagers additions/restarts are not reported as node updates in AllocateResponse responses to AMs</b><br>
+     <blockquote>If a NodeManager joins the cluster or gets restarted, running AMs never receive the node update indicating the Node is running.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1335">YARN-1335</a>.
+     Major improvement reported by Sandy Ryza and fixed by Sandy Ryza (scheduler)<br>
+     <b>Move duplicate code from FSSchedulerApp and FiCaSchedulerApp into SchedulerApplication</b><br>
+     <blockquote>FSSchedulerApp and FiCaSchedulerApp use duplicate code in a lot of places.  They both extend SchedulerApplication.  We can move a lot of this duplicate code into SchedulerApplication.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1333">YARN-1333</a>.
+     Major improvement reported by Sandy Ryza and fixed by Tsuyoshi OZAWA (scheduler)<br>
+     <b>Support blacklisting in the Fair Scheduler</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1332">YARN-1332</a>.
+     Minor improvement reported by Sandy Ryza and fixed by Sebastian Wong <br>
+     <b>In TestAMRMClient, replace assertTrue with assertEquals where possible</b><br>
+     <blockquote>TestAMRMClient uses a lot of "assertTrue(amClient.ask.size() == 0)" where "assertEquals(0, amClient.ask.size())" would make it easier to see why it's failing at a glance.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1331">YARN-1331</a>.
+     Trivial bug reported by Chris Nauroth and fixed by Chris Nauroth (client)<br>
+     <b>yarn.cmd exits with NoClassDefFoundError trying to run rmadmin or logs</b><br>
+     <blockquote>The yarn shell script was updated so that the rmadmin and logs sub-commands launch {{org.apache.hadoop.yarn.client.cli.RMAdminCLI}} and {{org.apache.hadoop.yarn.client.cli.LogsCLI}}.  The yarn.cmd script also needs to be updated so that the commands work on Windows.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1325">YARN-1325</a>.
+     Major sub-task reported by Tsuyoshi OZAWA and fixed by Xuan Gong (resourcemanager)<br>
+     <b>Enabling HA should check Configuration contains multiple RMs</b><br>
+     <blockquote>Currently, we can enable RM HA configuration without multiple RM ids(YarnConfiguration.RM_HA_IDS).  This behaviour can cause wrong operations. ResourceManager should verify that more than 1 RM id must be specified in RM-HA-IDs.
+
+One idea is to support "strict mode" to enforce this check as configuration(e.g. yarn.resourcemanager.ha.strict-mode.enabled).</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1323">YARN-1323</a>.
+     Major sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla <br>
+     <b>Set HTTPS webapp address along with other RPC addresses in HAUtil</b><br>
+     <blockquote>YARN-1232 adds the ability to configure multiple RMs, but missed out the https web app address. Need to add that in.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1321">YARN-1321</a>.
+     Blocker bug reported by Alejandro Abdelnur and fixed by Alejandro Abdelnur (client)<br>
+     <b>NMTokenCache is a singleton, prevents multiple AMs running in a single JVM to work correctly</b><br>
+     <blockquote>NMTokenCache is a singleton. Because of this, if running multiple AMs in a single JVM NMTokens for the same node from different AMs step on each other and starting containers fail due to mismatch tokens.
+
+The error observed in the client side is something like:
+
+{code}
+ERROR org.apache.hadoop.security.UserGroupInformation: PriviledgedActionException as:llama (auth:PROXY) via llama (auth:SIMPLE) cause:org.apache.hadoop.yarn.exceptions.YarnException: Unauthorized request to start container. 
+NMToken for application attempt : appattempt_1382038445650_0002_000001 was used for starting container with container token issued for application attempt : appattempt_1382038445650_0001_000001
+{code}
+</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1320">YARN-1320</a>.
+     Major bug reported by Tassapol Athiapinya and fixed by Xuan Gong (applications/distributed-shell)<br>
+     <b>Custom log4j properties in Distributed shell does not work properly.</b><br>
+     <blockquote>Distributed shell cannot pick up custom log4j properties (specified with -log_properties). It always uses default log4j properties.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1318">YARN-1318</a>.
+     Blocker sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)<br>
+     <b>Promote AdminService to an Always-On service and merge in RMHAProtocolService</b><br>
+     <blockquote>Per discussion in YARN-1068, we want AdminService to handle HA-admin operations in addition to the regular non-HA admin operations. To facilitate this, we need to move AdminService an Always-On service. </blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1315">YARN-1315</a>.
+     Major bug reported by Sandy Ryza and fixed by Sandy Ryza (resourcemanager , scheduler)<br>
+     <b>TestQueueACLs should also test FairScheduler</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1314">YARN-1314</a>.
+     Major bug reported by Tassapol Athiapinya and fixed by Xuan Gong (applications/distributed-shell)<br>
+     <b>Cannot pass more than 1 argument to shell command</b><br>
+     <blockquote>Distributed shell cannot accept more than 1 parameters in argument parts.
+
+All of these commands are treated as 1 parameter:
+
+/usr/bin/yarn  org.apache.hadoop.yarn.applications.distributedshell.Client -jar &lt;distrubuted shell jar&gt; -shell_command echo -shell_args "'"My   name"                "is  Teddy"'"
+/usr/bin/yarn  org.apache.hadoop.yarn.applications.distributedshell.Client -jar &lt;distrubuted shell jar&gt; -shell_command echo -shell_args "''My   name'                'is  Teddy''"
+/usr/bin/yarn  org.apache.hadoop.yarn.applications.distributedshell.Client -jar &lt;distrubuted shell jar&gt; -shell_command echo -shell_args "'My   name'                'is  Teddy'"</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1311">YARN-1311</a>.
+     Trivial sub-task reported by Vinod Kumar Vavilapalli and fixed by Vinod Kumar Vavilapalli <br>
+     <b>Fix app specific scheduler-events' names to be app-attempt based</b><br>
+     <blockquote>Today, APP_ADDED and APP_REMOVED are sent to the scheduler. They are misnomers as schedulers only deal with AppAttempts today. This JIRA is for fixing their names so that we can add App-level events in the near future, notably for work-preserving RM-restart.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1307">YARN-1307</a>.
+     Major sub-task reported by Tsuyoshi OZAWA and fixed by Tsuyoshi OZAWA (resourcemanager)<br>
+     <b>Rethink znode structure for RM HA</b><br>
+     <blockquote>Rethink for znode structure for RM HA is proposed in some JIRAs(YARN-659, YARN-1222). The motivation of this JIRA is quoted from Bikas' comment in YARN-1222:
+{quote}
+We should move to creating a node hierarchy for apps such that all znodes for an app are stored under an app znode instead of the app root znode. This will help in removeApplication and also in scaling better on ZK. The earlier code was written this way to ensure create/delete happens under a root znode for fencing. But given that we have moved to multi-operations globally, this isnt required anymore.
+{quote}</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1306">YARN-1306</a>.
+     Major bug reported by Wei Yan and fixed by Wei Yan <br>
+     <b>Clean up hadoop-sls sample-conf according to YARN-1228</b><br>
+     <blockquote>Move fair scheduler allocations configuration to fair-scheduler.xml, and move all scheduler stuffs to yarn-site.xml</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1305">YARN-1305</a>.
+     Major sub-task reported by Tsuyoshi OZAWA and fixed by Tsuyoshi OZAWA (resourcemanager)<br>
+     <b>RMHAProtocolService#serviceInit should handle HAUtil's IllegalArgumentException</b><br>
+     <blockquote>When yarn.resourcemanager.ha.enabled is true, RMHAProtocolService#serviceInit calls HAUtil.setAllRpcAddresses. If the configuration values are null, it just throws IllegalArgumentException.
+It's messy to analyse which keys are null, so we should handle it and log the name of keys which are null.
+
+A current log dump is as follows:
+{code}
+2013-10-15 06:24:53,431 INFO org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: registered UNIX signal handlers for [TERM, HUP, INT]
+2013-10-15 06:24:54,203 INFO org.apache.hadoop.service.AbstractService: Service RMHAProtocolService failed in state INITED; cause: java.lang.IllegalArgumentException: Property value must not be null
+java.lang.IllegalArgumentException: Property value must not be null
+        at com.google.common.base.Preconditions.checkArgument(Preconditions.java:88)
+        at org.apache.hadoop.conf.Configuration.set(Configuration.java:816)
+        at org.apache.hadoop.conf.Configuration.set(Configuration.java:798)
+        at org.apache.hadoop.yarn.conf.HAUtil.setConfValue(HAUtil.java:100)
+        at org.apache.hadoop.yarn.conf.HAUtil.setAllRpcAddresses(HAUtil.java:105)
+        at org.apache.hadoop.yarn.server.resourcemanager.RMHAProtocolService.serviceInit(RMHAProtocolService.java:60)
+        at org.apache.hadoop.service.AbstractService.init(AbstractService.java:163)
+        at org.apache.hadoop.service.CompositeService.serviceInit(CompositeService.java:108)
+        at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.serviceInit(ResourceManager.java:187)
+        at org.apache.hadoop.service.AbstractService.init(AbstractService.java:163)
+        at org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.main(ResourceManager.java:940)
+{code}</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1303">YARN-1303</a>.
+     Major improvement reported by Tassapol Athiapinya and fixed by Xuan Gong (applications/distributed-shell)<br>
+     <b>Allow multiple commands separating with ";" in distributed-shell</b><br>
+     <blockquote>In shell, we can do "ls; ls" to run 2 commands at once. 
+
+In distributed shell, this is not working. We should improve to allow this to occur. There are practical use cases that I know of to run multiple commands or to set environment variables before a command.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1300">YARN-1300</a>.
+     Major bug reported by Ted Yu and fixed by Ted Yu <br>
+     <b>SLS tests fail because conf puts yarn properties in fair-scheduler.xml</b><br>
+     <blockquote>I was looking at https://builds.apache.org/job/PreCommit-YARN-Build/2165//testReport/org.apache.hadoop.yarn.sls/TestSLSRunner/testSimulatorRunning/
+I am able to reproduce the failure locally.
+
+I found that FairSchedulerConfiguration.getAllocationFile() doesn't read the yarn.scheduler.fair.allocation.file config entry from fair-scheduler.xml
+
+This leads to the following:
+{code}
+Caused by: org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.AllocationConfigurationException: Bad fair scheduler config file: top-level element not &lt;allocations&gt;
+	at org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.QueueManager.reloadAllocs(QueueManager.java:302)
+	at org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.QueueManager.initialize(QueueManager.java:108)
+	at org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler.reinitialize(FairScheduler.java:1145)
+{code}</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1295">YARN-1295</a>.
+     Major bug reported by Sandy Ryza and fixed by Sandy Ryza (nodemanager)<br>
+     <b>In UnixLocalWrapperScriptBuilder, using bash -c can cause "Text file busy" errors</b><br>
+     <blockquote>I missed this when working on YARN-1271.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1293">YARN-1293</a>.
+     Major bug reported by Tsuyoshi OZAWA and fixed by Tsuyoshi OZAWA <br>
+     <b>TestContainerLaunch.testInvalidEnvSyntaxDiagnostics fails on trunk</b><br>
+     <blockquote>{quote}
+-------------------------------------------------------------------------------
+Test set: org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.TestContainerLaunch
+-------------------------------------------------------------------------------
+Tests run: 8, Failures: 1, Errors: 0, Skipped: 0, Time elapsed: 12.655 sec &lt;&lt;&lt; FAILURE! - in org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.TestContainerLaunch
+testInvalidEnvSyntaxDiagnostics(org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.TestContainerLaunch)  Time elapsed: 0.114 sec  &lt;&lt;&lt; FAILURE!
+junit.framework.AssertionFailedError: null
+        at junit.framework.Assert.fail(Assert.java:48)
+        at junit.framework.Assert.assertTrue(Assert.java:20)
+        at junit.framework.Assert.assertTrue(Assert.java:27)
+        at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.TestContainerLaunch.testInvalidEnvSyntaxDiagnostics(TestContainerLaunch.java:273)
+{quote}</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1290">YARN-1290</a>.
+     Major improvement reported by Wei Yan and fixed by Wei Yan <br>
+     <b>Let continuous scheduling achieve more balanced task assignment</b><br>
+     <blockquote>Currently, in continuous scheduling (YARN-1010), in each round, the thread iterates over pre-ordered nodes and assigns tasks. This mechanism may overload the first several nodes, while the latter nodes have no tasks.
+
+We should sort all nodes according to available resource. In each round, always assign tasks to nodes with larger capacity, which can balance the load distribution among all nodes.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1288">YARN-1288</a>.
+     Major bug reported by Sandy Ryza and fixed by Sandy Ryza (scheduler)<br>
+     <b>Make Fair Scheduler ACLs more user friendly</b><br>
+     <blockquote>The Fair Scheduler currently defaults the root queue's acl to empty and all other queues' acl to "*".  Now that YARN-1258 enables configuring the root queue, we should reverse this.  This will also bring the Fair Scheduler in line with the Capacity Scheduler.
+
+We should also not trim the acl strings, which makes it impossible to only specify groups in an acl.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1284">YARN-1284</a>.
+     Blocker bug reported by Alejandro Abdelnur and fixed by Alejandro Abdelnur (nodemanager)<br>
+     <b>LCE: Race condition leaves dangling cgroups entries for killed containers</b><br>
+     <blockquote>When LCE &amp; cgroups are enabled, when a container is is killed (in this case by its owning AM, an MRAM) it seems to be a race condition at OS level when doing a SIGTERM/SIGKILL and when the OS does all necessary cleanup. 
+
+LCE code, after sending the SIGTERM/SIGKILL and getting the exitcode, immediately attempts to clean up the cgroups entry for the container. But this is failing with an error like:
+
+{code}
+2013-10-07 15:21:24,359 WARN org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor: Exit code from container container_1381179532433_0016_01_000011 is : 143
+2013-10-07 15:21:24,359 DEBUG org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container: Processing container_1381179532433_0016_01_000011 of type UPDATE_DIAGNOSTICS_MSG
+2013-10-07 15:21:24,359 DEBUG org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler: deleteCgroup: /run/cgroups/cpu/hadoop-yarn/container_1381179532433_0016_01_000011
+2013-10-07 15:21:24,359 WARN org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler: Unable to delete cgroup at: /run/cgroups/cpu/hadoop-yarn/container_1381179532433_0016_01_000011
+{code}
+
+
+CgroupsLCEResourcesHandler.clearLimits() has logic to wait for 500 ms for AM containers to avoid this problem. it seems this should be done for all containers.
+
+Still, waiting for extra 500ms seems too expensive.
+
+We should look at a way of doing this in a more 'efficient way' from time perspective, may be spinning while the deleteCgroup() cannot be done with a minimal sleep and a timeout.
+</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1283">YARN-1283</a>.
+     Major sub-task reported by Yesha Vora and fixed by Omkar Vinit Joshi <br>
+     <b>Invalid 'url of job' mentioned in Job output with yarn.http.policy=HTTPS_ONLY</b><br>
+     <blockquote>After setting yarn.http.policy=HTTPS_ONLY, the job output shows incorrect "The url to track the job".
+
+Currently, its printing http://RM:&lt;httpsport&gt;/proxy/application_1381162886563_0001/ instead https://RM:&lt;httpsport&gt;/proxy/application_1381162886563_0001/
+
+http://hostname:8088/proxy/application_1381162886563_0001/ is invalid
+
+hadoop  jar hadoop-mapreduce-client-jobclient-tests.jar sleep -m 1 -r 1 
+13/10/07 18:39:39 INFO client.RMProxy: Connecting to ResourceManager at hostname/100.00.00.000:8032
+13/10/07 18:39:40 INFO mapreduce.JobSubmitter: number of splits:1
+13/10/07 18:39:40 INFO Configuration.deprecation: user.name is deprecated. Instead, use mapreduce.job.user.name
+13/10/07 18:39:40 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar
+13/10/07 18:39:40 INFO Configuration.deprecation: mapred.map.tasks.speculative.execution is deprecated. Instead, use mapreduce.map.speculative
+13/10/07 18:39:40 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces
+13/10/07 18:39:40 INFO Configuration.deprecation: mapreduce.partitioner.class is deprecated. Instead, use mapreduce.job.partitioner.class
+13/10/07 18:39:40 INFO Configuration.deprecation: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative
+13/10/07 18:39:40 INFO Configuration.deprecation: mapred.mapoutput.value.class is deprecated. Instead, use mapreduce.map.output.value.class
+13/10/07 18:39:40 INFO Configuration.deprecation: mapreduce.map.class is deprecated. Instead, use mapreduce.job.map.class
+13/10/07 18:39:40 INFO Configuration.deprecation: mapred.job.name is deprecated. Instead, use mapreduce.job.name
+13/10/07 18:39:40 INFO Configuration.deprecation: mapreduce.reduce.class is deprecated. Instead, use mapreduce.job.reduce.class
+13/10/07 18:39:40 INFO Configuration.deprecation: mapreduce.inputformat.class is deprecated. Instead, use mapreduce.job.inputformat.class
+13/10/07 18:39:40 INFO Configuration.deprecation: mapred.input.dir is deprecated. Instead, use mapreduce.input.fileinputformat.inputdir
+13/10/07 18:39:40 INFO Configuration.deprecation: mapreduce.outputformat.class is deprecated. Instead, use mapreduce.job.outputformat.class
+13/10/07 18:39:40 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps
+13/10/07 18:39:40 INFO Configuration.deprecation: mapred.mapoutput.key.class is deprecated. Instead, use mapreduce.map.output.key.class
+13/10/07 18:39:40 INFO Configuration.deprecation: mapred.working.dir is deprecated. Instead, use mapreduce.job.working.dir
+13/10/07 18:39:40 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1381162886563_0001
+13/10/07 18:39:40 INFO impl.YarnClientImpl: Submitted application application_1381162886563_0001 to ResourceManager at hostname/100.00.00.000:8032
+13/10/07 18:39:40 INFO mapreduce.Job: The url to track the job: http://hostname:8088/proxy/application_1381162886563_0001/
+13/10/07 18:39:40 INFO mapreduce.Job: Running job: job_1381162886563_0001
+13/10/07 18:39:46 INFO mapreduce.Job: Job job_1381162886563_0001 running in uber mode : false
+13/10/07 18:39:46 INFO mapreduce.Job:  map 0% reduce 0%
+13/10/07 18:39:53 INFO mapreduce.Job:  map 100% reduce 0%
+13/10/07 18:39:58 INFO mapreduce.Job:  map 100% reduce 100%
+13/10/07 18:39:58 INFO mapreduce.Job: Job job_1381162886563_0001 completed successfully
+13/10/07 18:39:58 INFO mapreduce.Job: Counters: 43
+	File System Counters
+		FILE: Number of bytes read=26
+		FILE: Number of bytes written=177279
+		FILE: Number of read operations=0
+		FILE: Number of large read operations=0
+		FILE: Number of write operations=0
+		HDFS: Number of bytes read=48
+		HDFS: Number of bytes written=0
+		HDFS: Number of read operations=1
+		HDFS: Number of large read operations=0
+		HDFS: Number of write operations=0
+	Job Counters 
+		Launched map tasks=1
+		Launched reduce tasks=1
+		Other local map tasks=1
+		Total time spent by all maps in occupied slots (ms)=7136
+		Total time spent by all reduces in occupied slots (ms)=6062
+	Map-Reduce Framework
+		Map input records=1
+		Map output records=1
+		Map output bytes=4
+		Map output materialized bytes=22
+		Input split bytes=48
+		Combine input records=0
+		Combine output records=0
+		Reduce input groups=1
+		Reduce shuffle bytes=22
+		Reduce input records=1
+		Reduce output records=0
+		Spilled Records=2
+		Shuffled Maps =1
+		Failed Shuffles=0
+		Merged Map outputs=1
+		GC time elapsed (ms)=60
+		CPU time spent (ms)=1700
+		Physical memory (bytes) snapshot=567582720
+		Virtual memory (bytes) snapshot=4292997120
+		Total committed heap usage (bytes)=846594048
+	Shuffle Errors
+		BAD_ID=0
+		CONNECTION=0
+		IO_ERROR=0
+		WRONG_LENGTH=0
+		WRONG_MAP=0
+		WRONG_REDUCE=0
+	File Input Format Counters 
+		Bytes Read=0
+	File Output Format Counters 
+		Bytes Written=0
+
+</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1268">YARN-1268</a>.
+     Major bug reported by Sandy Ryza and fixed by Sandy Ryza (scheduler)<br>
+     <b>TestFairScheduler.testContinuousScheduling is flaky</b><br>
+     <blockquote>It looks like there's a timeout in it that's causing it to be flaky.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1265">YARN-1265</a>.
+     Major bug reported by Sandy Ryza and fixed by Sandy Ryza (resourcemanager , scheduler)<br>
+     <b>Fair Scheduler chokes on unhealthy node reconnect</b><br>
+     <blockquote>Only nodes in the RUNNING state are tracked by schedulers.  When a node reconnects, RMNodeImpl.ReconnectNodeTransition tries to remove it, even if it's in the RUNNING state.  The FairScheduler doesn't guard against this.
+
+I think the best way to fix this is to check to see whether a node is RUNNING before telling the scheduler to remove it.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1259">YARN-1259</a>.
+     Trivial bug reported by Sandy Ryza and fixed by Robert Kanter (scheduler)<br>
+     <b>In Fair Scheduler web UI, queue num pending and num active apps switched</b><br>
+     <blockquote>The values returned in FairSchedulerLeafQueueInfo by numPendingApplications and numActiveApplications should be switched.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1258">YARN-1258</a>.
+     Major improvement reported by Sandy Ryza and fixed by Sandy Ryza (scheduler)<br>
+     <b>Allow configuring the Fair Scheduler root queue</b><br>
+     <blockquote>This would be useful for acls, maxRunningApps, scheduling modes, etc.
+
+The allocation file should be able to accept both:
+* An implicit root queue
+* A root queue at the top of the hierarchy with all queues under/inside of it</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1253">YARN-1253</a>.
+     Blocker new feature reported by Alejandro Abdelnur and fixed by Roman Shaposhnik (nodemanager)<br>
+     <b>Changes to LinuxContainerExecutor to run containers as a single dedicated user in non-secure mode</b><br>
+     <blockquote>When using cgroups we require LCE to be configured in the cluster to start containers. 
+
+When LCE starts containers as the user that submitted the job. While this works correctly in a secure setup, in an un-secure setup this presents a couple issues:
+
+* LCE requires all Hadoop users submitting jobs to be Unix users in all nodes
+* Because users can impersonate other users, any user would have access to any local file of other users
+
+Particularly, the second issue is not desirable as a user could get access to ssh keys of other users in the nodes or if there are NFS mounts, get to other users data outside of the cluster.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1241">YARN-1241</a>.
+     Major bug reported by Sandy Ryza and fixed by Sandy Ryza <br>
+     <b>In Fair Scheduler, maxRunningApps does not work for non-leaf queues</b><br>
+     <blockquote>Setting the maxRunningApps property on a parent queue should make it that the sum of apps in all subqueues can't exceed it</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1239">YARN-1239</a>.
+     Major sub-task reported by Bikas Saha and fixed by Jian He (resourcemanager)<br>
+     <b>Save version information in the state store</b><br>
+     <blockquote>When creating root dir for the first time we should write version 1. If root dir exists then we should check that the version in the state store matches the version from config.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1232">YARN-1232</a>.
+     Major sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)<br>
+     <b>Configuration to support multiple RMs</b><br>
+     <blockquote>We should augment the configuration to allow users specify two RMs and the individual RPC addresses for them.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1222">YARN-1222</a>.
+     Major sub-task reported by Bikas Saha and fixed by Karthik Kambatla <br>
+     <b>Make improvements in ZKRMStateStore for fencing</b><br>
+     <blockquote>Using multi-operations for every ZK interaction. 
+In every operation, automatically creating/deleting a lock znode that is the child of the root znode. This is to achieve fencing by modifying the create/delete permissions on the root znode.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1210">YARN-1210</a>.
+     Major sub-task reported by Vinod Kumar Vavilapalli and fixed by Omkar Vinit Joshi <br>
+     <b>During RM restart, RM should start a new attempt only when previous attempt exits for real</b><br>
+     <blockquote>When RM recovers, it can wait for existing AMs to contact RM back and then kill them forcefully before even starting a new AM. Worst case, RM will start a new AppAttempt after waiting for 10 mins ( the expiry interval). This way we'll minimize multiple AMs racing with each other. This can help issues with downstream components like Pig, Hive and Oozie during RM restart.
+
+In the mean while, new apps will proceed as usual as existing apps wait for recovery.
+
+This can continue to be useful after work-preserving restart, so that AMs which can properly sync back up with RM can continue to run and those that don't are guaranteed to be killed before starting a new attempt.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1199">YARN-1199</a>.
+     Major improvement reported by Mit Desai and fixed by Mit Desai <br>
+     <b>Make NM/RM Versions Available</b><br>
+     <blockquote>Now as we have the NM and RM Versions available, we can display the YARN version of nodes running in the cluster.
+
+</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1188">YARN-1188</a>.
+     Trivial bug reported by Akira AJISAKA and fixed by Tsuyoshi OZAWA <br>
+     <b>The context of QueueMetrics becomes 'default' when using FairScheduler</b><br>
+     <blockquote>I found the context of QueueMetrics changed to 'default' from 'yarn' when I was using FairScheduler.
+The context should always be 'yarn' by adding an annotation to FSQueueMetrics like below:
+
+{code}
++ @Metrics(context="yarn")
+public class FSQueueMetrics extends QueueMetrics {
+{code}</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1185">YARN-1185</a>.
+     Major sub-task reported by Jason Lowe and fixed by Omkar Vinit Joshi (resourcemanager)<br>
+     <b>FileSystemRMStateStore can leave partial files that prevent subsequent recovery</b><br>
+     <blockquote>FileSystemRMStateStore writes directly to the destination file when storing state. However if the RM were to crash in the middle of the write, the recovery method could encounter a partially-written file and either outright crash during recovery or silently load incomplete state.
+
+To avoid this, the data should be written to a temporary file and renamed to the destination file afterwards.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1183">YARN-1183</a>.
+     Major bug reported by Andrey Klochkov and fixed by Andrey Klochkov <br>
+     <b>MiniYARNCluster shutdown takes several minutes intermittently</b><br>
+     <blockquote>As described in MAPREDUCE-5501 sometimes M/R tests leave MRAppMaster java processes living for several minutes after successful completion of the corresponding test. There is a concurrency issue in MiniYARNCluster shutdown logic which leads to this. Sometimes RM stops before an app master sends it's last report, and then the app master keeps retrying for &gt;6 minutes. In some cases it leads to failures in subsequent tests, and it affects performance of tests as app masters eat resources.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1182">YARN-1182</a>.
+     Major bug reported by Karthik Kambatla and fixed by Karthik Kambatla <br>
+     <b>MiniYARNCluster creates and inits the RM/NM only on start()</b><br>
+     <blockquote>MiniYARNCluster creates and inits the RM/NM only on start(). It should create and init() during init() itself.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1181">YARN-1181</a>.
+     Major sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla <br>
+     <b>Augment MiniYARNCluster to support HA mode</b><br>
+     <blockquote>MiniYARNHACluster, along the lines of MiniYARNCluster, is needed for end-to-end HA tests.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1180">YARN-1180</a>.
+     Trivial bug reported by Thomas Graves and fixed by Chen He (capacityscheduler)<br>
+     <b>Update capacity scheduler docs to include types on the configs</b><br>
+     <blockquote>The capacity scheduler docs (http://hadoop.apache.org/docs/r2.1.0-beta/hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html) don't include types for all the configs. For instance the minimum-user-limit-percent doesn't say its an Int.  It also the only setting for the Resource Allocation configs that is an Int rather then a float.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1176">YARN-1176</a>.
+     Critical bug reported by Thomas Graves and fixed by Jonathan Eagles (resourcemanager)<br>
+     <b>RM web services ClusterMetricsInfo total nodes doesn't include unhealthy nodes</b><br>
+     <blockquote>In the web services api for the cluster/metrics, the totalNodes reported doesn't include the unhealthy nodes.
+
+this.totalNodes = activeNodes + lostNodes + decommissionedNodes
+	        + rebootedNodes;</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1172">YARN-1172</a>.
+     Major sub-task reported by Karthik Kambatla and fixed by Tsuyoshi OZAWA (resourcemanager)<br>
+     <b>Convert *SecretManagers in the RM to services</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1145">YARN-1145</a>.
+     Major bug reported by Rohith and fixed by Rohith <br>
+     <b>Potential file handle leak in aggregated logs web ui</b><br>
+     <blockquote>Any problem in getting aggregated logs for rendering on web ui, then LogReader is not closed. 
+
+Now, it reader is not closed which causing many connections in close_wait state.
+
+hadoopuser@hadoopuser:&gt; jps
+*27909* JobHistoryServer
+
+DataNode port is 50010. When greped with DataNode port, many connections are in CLOSE_WAIT from JHS.
+hadoopuser@hadoopuser:&gt; netstat -tanlp |grep 50010
+tcp        0      0 10.18.40.48:50010       0.0.0.0:*               LISTEN      21453/java          
+tcp        1      0 10.18.40.48:20596       10.18.40.48:50010       CLOSE_WAIT  *27909*/java          
+tcp        1      0 10.18.40.48:19667       10.18.40.152:50010      CLOSE_WAIT  *27909*/java          
+tcp        1      0 10.18.40.48:20593       10.18.40.48:50010       CLOSE_WAIT  *27909*/java          
+tcp        1      0 10.18.40.48:12290       10.18.40.48:50010       CLOSE_WAIT  *27909*/java          
+tcp        1      0 10.18.40.48:19662       10.18.40.152:50010      CLOSE_WAIT  *27909*/java          </blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1138">YARN-1138</a>.
+     Major bug reported by Yingda Chen and fixed by Chuan Liu (api)<br>
+     <b>yarn.application.classpath is set to point to $HADOOP_CONF_DIR etc., which does not work on Windows</b><br>
+     <blockquote>yarn-default.xml has "yarn.application.classpath" entry set to $HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/share/hadoop/common/,$HADOOP_COMMON_HOME/share/hadoop/common/lib/,$HADOOP_HDFS_HOME/share/hadoop/hdfs/,$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/,$HADOOP_YARN_HOME/share/hadoop/yarn/*,$HADOOP_YARN_HOME/share/hadoop/yarn/lib. It does not work on Windows which needs to be fixed.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1121">YARN-1121</a>.
+     Major sub-task reported by Bikas Saha and fixed by Jian He (resourcemanager)<br>
+     <b>RMStateStore should flush all pending store events before closing</b><br>
+     <blockquote>on serviceStop it should wait for all internal pending events to drain before stopping.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1119">YARN-1119</a>.
+     Major test reported by Robert Parker and fixed by Mit Desai (resourcemanager)<br>
+     <b>Add ClusterMetrics checks to tho TestRMNodeTransitions tests</b><br>
+     <blockquote>YARN-1101 identified an issue where UNHEALTHY nodes could double decrement the active nodes. We should add checks for RUNNING node transitions.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1109">YARN-1109</a>.
+     Major improvement reported by Sandy Ryza and fixed by haosdent (nodemanager)<br>
+     <b>Demote NodeManager "Sending out status for container" logs to debug</b><br>
+     <blockquote>Diagnosing NodeManager and container launch problems is made more difficult by the enormous number of logs like
+{code}
+Sending out status for container: container_id {, app_attempt_id {, application_id {, id: 18, cluster_timestamp: 1377559361179, }, attemptId: 1, }, id: 1337, }, state: C_RUNNING, diagnostics: "Container killed by the ApplicationMaster.\n", exit_status: -1000
+{code}
+
+On an NM with a few containers I am seeing tens of these per second.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1101">YARN-1101</a>.
+     Major bug reported by Robert Parker and fixed by Robert Parker (resourcemanager)<br>
+     <b>Active nodes can be decremented below 0</b><br>
+     <blockquote>The issue is in RMNodeImpl where both RUNNING and UNHEALTHY states that transition to a deactive state (LOST, DECOMMISSIONED, REBOOTED) use the same DeactivateNodeTransition class.  The DeactivateNodeTransition class naturally decrements the active node, however the in cases where the node has transition to UNHEALTHY the active count has already been decremented.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1098">YARN-1098</a>.
+     Major sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)<br>
+     <b>Separate out RM services into "Always On" and "Active"</b><br>
+     <blockquote>From discussion on YARN-1027, it makes sense to separate out services that are stateful and stateless. The stateless services can  run perennially irrespective of whether the RM is in Active/Standby state, while the stateful services need to  be started on transitionToActive() and completely shutdown on transitionToStandby().
+
+The external-facing stateless services should respond to the client/AM/NM requests depending on whether the RM is Active/Standby.
+</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1068">YARN-1068</a>.
+     Major sub-task reported by Karthik Kambatla and fixed by Karthik Kambatla (resourcemanager)<br>
+     <b>Add admin support for HA operations</b><br>
+     <blockquote>Support HA admin operations to facilitate transitioning the RM to Active and Standby states.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1060">YARN-1060</a>.
+     Major bug reported by Sandy Ryza and fixed by Niranjan Singh (scheduler)<br>
+     <b>Two tests in TestFairScheduler are missing @Test annotation</b><br>
+     <blockquote>Amazingly, these tests appear to pass with the annotations added.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1053">YARN-1053</a>.
+     Blocker bug reported by Omkar Vinit Joshi and fixed by Omkar Vinit Joshi <br>
+     <b>Diagnostic message from ContainerExitEvent is ignored in ContainerImpl</b><br>
+     <blockquote>If the container launch fails then we send ContainerExitEvent. This event contains exitCode and diagnostic message. Today we are ignoring diagnostic message while handling this event inside ContainerImpl. Fixing it as it is useful in diagnosing the failure.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1044">YARN-1044</a>.
+     Critical bug reported by Sangjin Lee and fixed by Sangjin Lee (resourcemanager , scheduler)<br>
+     <b>used/min/max resources do not display info in the scheduler page</b><br>
+     <blockquote>Go to the scheduler page in RM, and click any queue to display the detailed info. You'll find that none of the resources entries (used, min, or max) would display values.
+
+It is because the values contain brackets ("&lt;" and "&gt;") and are not properly html-escaped.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1033">YARN-1033</a>.
+     Major sub-task reported by Nemon Lou and fixed by Karthik Kambatla <br>
+     <b>Expose RM active/standby state to Web UI and REST API</b><br>
+     <blockquote>Both active and standby RM shall expose it's web server and show it's current state (active or standby) on web page. Users should be able to access this information through the REST API as well.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1029">YARN-1029</a>.
+     Major sub-task reported by Bikas Saha and fixed by Karthik Kambatla <br>
+     <b>Allow embedding leader election into the RM</b><br>
+     <blockquote>It should be possible to embed common ActiveStandyElector into the RM such that ZooKeeper based leader election and notification is in-built. In conjunction with a ZK state store, this configuration will be a simple deployment option.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1028">YARN-1028</a>.
+     Major sub-task reported by Bikas Saha and fixed by Karthik Kambatla <br>
+     <b>Add FailoverProxyProvider like capability to RMProxy</b><br>
+     <blockquote>RMProxy layer currently abstracts RM discovery and implements it by looking up service information from configuration. Motivated by HDFS and using existing classes from Common, we can add failover proxy providers that may provide RM discovery in extensible ways.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1027">YARN-1027</a>.
+     Major sub-task reported by Bikas Saha and fixed by Karthik Kambatla <br>
+     <b>Implement RMHAProtocolService</b><br>
+     <blockquote>Implement existing HAServiceProtocol from Hadoop common. This protocol is the single point of interaction between the RM and HA clients/services.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1022">YARN-1022</a>.
+     Trivial bug reported by Bikas Saha and fixed by haosdent <br>
+     <b>Unnecessary INFO logs in AMRMClientAsync</b><br>
+     <blockquote>Logs like the following should be debug or else every legitimate stop causes unnecessary exception traces in the logs.
+
+464 2013-08-03 20:01:34,459 INFO [AMRM Heartbeater thread] org.apache.hadoop.yarn.client.api.async.impl.AMRMClientAsyncImpl:            Heartbeater interrupted
+465 java.lang.InterruptedException: sleep interrupted
+466   at java.lang.Thread.sleep(Native Method)
+467   at org.apache.hadoop.yarn.client.api.async.impl.AMRMClientAsyncImpl$HeartbeatThread.run(AMRMClientAsyncImpl.java:249)
+468 2013-08-03 20:01:34,460 INFO [AMRM Callback Handler Thread] org.apache.hadoop.yarn.client.api.async.impl.AMRMClientAsyncImpl:       Interrupted while waiting for queue
+469 java.lang.InterruptedException
+470   at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.reportInterruptAfterWait(AbstractQueuedSynchronizer.     java:1961)
+471   at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1996)
+472   at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:399)
+473   at org.apache.hadoop.yarn.client.api.async.impl.AMRMClientAsyncImpl$CallbackHandlerThread.run(AMRMClientAsyncImpl.java:275)</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1021">YARN-1021</a>.
+     Major new feature reported by Wei Yan and fixed by Wei Yan (scheduler)<br>
+     <b>Yarn Scheduler Load Simulator</b><br>
+     <blockquote>The Yarn Scheduler is a fertile area of interest with different implementations, e.g., Fifo, Capacity and Fair  schedulers. Meanwhile, several optimizations are also made to improve scheduler performance for different scenarios and workload. Each scheduler algorithm has its own set of features, and drives scheduling decisions by many factors, such as fairness, capacity guarantee, resource availability, etc. It is very important to evaluate a scheduler algorithm very well before we deploy it in a production cluster. Unfortunately, currently it is non-trivial to evaluate a scheduling algorithm. Evaluating in a real cluster is always time and cost consuming, and it is also very hard to find a large-enough cluster. Hence, a simulator which can predict how well a scheduler algorithm for some specific workload would be quite useful.
+
+We want to build a Scheduler Load Simulator to simulate large-scale Yarn clusters and application loads in a single machine. This would be invaluable in furthering Yarn by providing a tool for researchers and developers to prototype new scheduler features and predict their behavior and performance with reasonable amount of confidence, there-by aiding rapid innovation.
+
+The simulator will exercise the real Yarn ResourceManager removing the network factor by simulating NodeManagers and ApplicationMasters via handling and dispatching NM/AMs heartbeat events from within the same JVM.
+
+To keep tracking of scheduler behavior and performance, a scheduler wrapper will wrap the real scheduler.
+
+The simulator will produce real time metrics while executing, including:
+
+* Resource usages for whole cluster and each queue, which can be utilized to configure cluster and queue's capacity.
+* The detailed application execution trace (recorded in relation to simulated time), which can be analyzed to understand/validate the  scheduler behavior (individual jobs turn around time, throughput, fairness, capacity guarantee, etc).
+* Several key metrics of scheduler algorithm, such as time cost of each scheduler operation (allocate, handle, etc), which can be utilized by Hadoop developers to find the code spots and scalability limits.
+
+The simulator will provide real time charts showing the behavior of the scheduler and its performance.
+
+A short demo is available http://www.youtube.com/watch?v=6thLi8q0qLE, showing how to use simulator to simulate Fair Scheduler and Capacity Scheduler.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-1010">YARN-1010</a>.
+     Critical improvement reported by Alejandro Abdelnur and fixed by Wei Yan (scheduler)<br>
+     <b>FairScheduler: decouple container scheduling from nodemanager heartbeats</b><br>
+     <blockquote>Currently scheduling for a node is done when a node heartbeats.
+
+For large cluster where the heartbeat interval is set to several seconds this delays scheduling of incoming allocations significantly.
+
+We could have a continuous loop scanning all nodes and doing scheduling. If there is availability AMs will get the allocation in the next heartbeat after the one that placed the request.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-985">YARN-985</a>.
+     Major improvement reported by Ravi Prakash and fixed by Ravi Prakash (nodemanager)<br>
+     <b>Nodemanager should log where a resource was localized</b><br>
+     <blockquote>When a resource is localized, we should log WHERE on the local disk it was localized. This helps in debugging afterwards (e.g. if the disk was to go bad).</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-976">YARN-976</a>.
+     Major sub-task reported by Sandy Ryza and fixed by Sandy Ryza (documentation)<br>
+     <b>Document the meaning of a virtual core</b><br>
+     <blockquote>As virtual cores are a somewhat novel concept, it would be helpful to have thorough documentation that clarifies their meaning.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-895">YARN-895</a>.
+     Major sub-task reported by Jian He and fixed by Jian He (resourcemanager)<br>
+     <b>RM crashes if it restarts while the state-store is down</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-891">YARN-891</a>.
+     Major sub-task reported by Bikas Saha and fixed by Jian He (resourcemanager)<br>
+     <b>Store completed application information in RM state store</b><br>
+     <blockquote>Store completed application/attempt info in RMStateStore when application/attempt completes. This solves some problems like finished application get lost after RM restart and some other races like YARN-1195</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-888">YARN-888</a>.
+     Major bug reported by Alejandro Abdelnur and fixed by Alejandro Abdelnur <br>
+     <b>clean up POM dependencies</b><br>
+     <blockquote>Intermediate 'pom' modules define dependencies inherited by leaf modules.
+
+This is causing issues in intellij IDE.
+
+We should normalize the leaf modules like in common, hdfs and tools where all dependencies are defined in each leaf module and the intermediate 'pom' module do not define any dependency.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-879">YARN-879</a>.
+     Major bug reported by Junping Du and fixed by Junping Du <br>
+     <b>Fix tests w.r.t o.a.h.y.server.resourcemanager.Application</b><br>
+     <blockquote>getResources() will return a list of containers that allocated by RM. However, it is now return null directly. The worse thing is: if LOG.debug is enabled, then it will definitely cause NPE exception.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-819">YARN-819</a>.
+     Major sub-task reported by Robert Parker and fixed by Robert Parker (nodemanager , resourcemanager)<br>
+     <b>ResourceManager and NodeManager should check for a minimum allowed version</b><br>
+     <blockquote>Our use case is during upgrade on a large cluster several NodeManagers may not restart with the new version.  Once the RM comes back up the NodeManager will re-register without issue to the RM.
+
+The NM should report the version the RM.  The RM should have a configuration to disallow the check (default), equal to the RM (to prevent config change for each release), equal to or greater than RM (to allow NM upgrades), and finally an explicit version or version range.
+
+The RM should also have an configuration on how to treat the mismatch: REJECT, or REBOOT the NM.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-807">YARN-807</a>.
+     Major improvement reported by Sandy Ryza and fixed by Sandy Ryza <br>
+     <b>When querying apps by queue, iterating over all apps is inefficient and limiting </b><br>
+     <blockquote>The question "which apps are in queue x" can be asked via the RM REST APIs, through the ClientRMService, and through the command line.  In all these cases, the question is answered by scanning through every RMApp and filtering by the app's queue name.
+
+All schedulers maintain a mapping of queues to applications.  I think it would make more sense to ask the schedulers which applications are in a given queue. This is what was done in MR1. This would also have the advantage of allowing a parent queue to return all the applications on leaf queues under it, and allow queue name aliases, as in the way that "root.default" and "default" refer to the same queue in the fair scheduler.
+
+</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-786">YARN-786</a>.
+     Major improvement reported by Sandy Ryza and fixed by Sandy Ryza <br>
+     <b>Expose application resource usage in RM REST API</b><br>
+     <blockquote>It might be good to require users to explicitly ask for this information, as it's a little more expensive to collect than the other fields in AppInfo.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-764">YARN-764</a>.
+     Major bug reported by Nemon Lou and fixed by Nemon Lou (resourcemanager)<br>
+     <b>blank Used Resources on Capacity Scheduler page </b><br>
+     <blockquote>Even when there are jobs running,used resources is empty on Capacity Scheduler page for leaf queue.(I use google-chrome on windows 7.)
+After changing resource.java's toString method by replacing "&lt;&gt;" with "{}",this bug gets fixed.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-709">YARN-709</a>.
+     Major sub-task reported by Jian He and fixed by Jian He (resourcemanager)<br>
+     <b>verify that new jobs submitted with old RM delegation tokens after RM restart are accepted</b><br>
+     <blockquote>More elaborate test for restoring RM delegation tokens on RM restart.
+New jobs with old RM delegation tokens should be accepted by new RM as long as the token is still valid</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-674">YARN-674</a>.
+     Major sub-task reported by Vinod Kumar Vavilapalli and fixed by Omkar Vinit Joshi (resourcemanager)<br>
+     <b>Slow or failing DelegationToken renewals on submission itself make RM unavailable</b><br>
+     <blockquote>This was caused by YARN-280. A slow or a down NameNode for will make it look like RM is unavailable as it may run out of RPC handlers due to blocked client submissions.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-649">YARN-649</a>.
+     Major sub-task reported by Sandy Ryza and fixed by Sandy Ryza (nodemanager)<br>
+     <b>Make container logs available over HTTP in plain text</b><br>
+     <blockquote>It would be good to make container logs available over the REST API for MAPREDUCE-4362 and so that they can be accessed programatically in general.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-584">YARN-584</a>.
+     Major bug reported by Sandy Ryza and fixed by Harshit Daga (scheduler)<br>
+     <b>In scheduler web UIs, queues unexpand on refresh</b><br>
+     <blockquote>In the fair scheduler web UI, you can expand queue information.  Refreshing the page causes the expansions to go away, which is annoying for someone who wants to monitor the scheduler page and needs to reopen all the queues they care about each time.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-546">YARN-546</a>.
+     Major bug reported by Lohit Vijayarenu and fixed by Sandy Ryza (scheduler)<br>
+     <b>Allow disabling the Fair Scheduler event log</b><br>
+     <blockquote>Hadoop 1.0 supported an option to turn on/off FairScheduler event logging using mapred.fairscheduler.eventlog.enabled. In Hadoop 2.0, it looks like this option has been removed (or not ported?) which causes event logging to be enabled by default and there is no way to turn it off.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-478">YARN-478</a>.
+     Major sub-task reported by Aleksey Gorshkov and fixed by Aleksey Gorshkov <br>
+     <b>fix coverage org.apache.hadoop.yarn.webapp.log</b><br>
+     <blockquote>fix coverage org.apache.hadoop.yarn.webapp.log
+one patch for trunk, branch-2, branch-0.23</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-465">YARN-465</a>.
+     Major sub-task reported by Aleksey Gorshkov and fixed by Andrey Klochkov <br>
+     <b>fix coverage  org.apache.hadoop.yarn.server.webproxy</b><br>
+     <blockquote>fix coverage  org.apache.hadoop.yarn.server.webproxy
+patch YARN-465-trunk.patch for trunk
+patch YARN-465-branch-2.patch for branch-2
+patch YARN-465-branch-0.23.patch for branch-0.23
+
+There is issue in branch-0.23 . Patch does not creating .keep file.
+For fix it need to run commands:
+
+mkdir yhadoop-common/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/proxy
+touch yhadoop-common/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/proxy/.keep 
+</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-461">YARN-461</a>.
+     Major bug reported by Sandy Ryza and fixed by Wei Yan (resourcemanager)<br>
+     <b>Fair scheduler should not accept apps with empty string queue name</b><br>
+     <blockquote>When an app is submitted with "" for the queue, the RMAppManager passes it on like it does with any other string.
+</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-427">YARN-427</a>.
+     Major sub-task reported by Aleksey Gorshkov and fixed by Aleksey Gorshkov <br>
+     <b>Coverage fix for org.apache.hadoop.yarn.server.api.*</b><br>
+     <blockquote>Coverage fix for org.apache.hadoop.yarn.server.api.*
+
+patch YARN-427-trunk.patch for trunk
+patch YARN-427-branch-2.patch for branch-2 and branch-0.23</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-425">YARN-425</a>.
+     Major sub-task reported by Aleksey Gorshkov and fixed by Aleksey Gorshkov <br>
+     <b>coverage fix for yarn api</b><br>
+     <blockquote>coverage fix for yarn api
+patch YARN-425-trunk-a.patch for trunk
+patch YARN-425-branch-2.patch for branch-2
+patch YARN-425-branch-0.23.patch for branch-0.23</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-408">YARN-408</a>.
+     Minor bug reported by Mayank Bansal and fixed by Mayank Bansal (scheduler)<br>
+     <b>Capacity Scheduler delay scheduling should not be disabled by default</b><br>
+     <blockquote>Capacity Scheduler delay scheduling should not be disabled by default.
+Enabling it to number of nodes in one rack.
+
+Thanks,
+Mayank</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-353">YARN-353</a>.
+     Major sub-task reported by Hitesh Shah and fixed by Karthik Kambatla (resourcemanager)<br>
+     <b>Add Zookeeper-based store implementation for RMStateStore</b><br>
+     <blockquote>Add store that write RM state data to ZK
+</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-312">YARN-312</a>.
+     Major sub-task reported by Junping Du and fixed by Junping Du (api)<br>
+     <b>Add updateNodeResource in ResourceManagerAdministrationProtocol</b><br>
+     <blockquote>Add fundamental RPC (ResourceManagerAdministrationProtocol) to support node's resource change. For design detail, please refer parent JIRA: YARN-291.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-311">YARN-311</a>.
+     Major sub-task reported by Junping Du and fixed by Junping Du (resourcemanager , scheduler)<br>
+     <b>Dynamic node resource configuration: core scheduler changes</b><br>
+     <blockquote>As the first step, we go for resource change on RM side and expose admin APIs (admin protocol, CLI, REST and JMX API) later. In this jira, we will only contain changes in scheduler. 
+The flow to update node's resource and awareness in resource scheduling is: 
+1. Resource update is through admin API to RM and take effect on RMNodeImpl.
+2. When next NM heartbeat for updating status comes, the RMNode's resource change will be aware and the delta resource is added to schedulerNode's availableResource before actual scheduling happens.
+3. Scheduler do resource allocation according to new availableResource in SchedulerNode.
+For more design details, please refer proposal and discussions in parent JIRA: YARN-291.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-305">YARN-305</a>.
+     Critical bug reported by Lohit Vijayarenu and fixed by Lohit Vijayarenu (resourcemanager)<br>
+     <b>Fair scheduler logs too many "Node offered to app:..." messages</b><br>
+     <blockquote>Running fair scheduler YARN shows that RM has lots of messages like the below.
+{noformat}
+INFO org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.AppSchedulable: Node offered to app: application_1357147147433_0002 reserved: false
+{noformat}
+
+They dont seem to tell much and same line is dumped many times in RM log. It would be good to have it improved with node information or moved to some other logging level with enough debug information</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/YARN-7">YARN-7</a>.
+     Major sub-task reported by Arun C Murthy and fixed by Junping Du <br>
+     <b>Add support for DistributedShell to ask for CPUs along with memory</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5744">MAPREDUCE-5744</a>.
+     Blocker bug reported by Sangjin Lee and fixed by Gera Shegalov <br>
+     <b>Job hangs because RMContainerAllocator$AssignedRequests.preemptReduce() violates the comparator contract</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5743">MAPREDUCE-5743</a>.
+     Major bug reported by Ted Yu and fixed by Ted Yu <br>
+     <b>TestRMContainerAllocator is failing</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5729">MAPREDUCE-5729</a>.
+     Critical bug reported by Karthik Kambatla and fixed by Karthik Kambatla (mrv2)<br>
+     <b>mapred job -list throws NPE</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5725">MAPREDUCE-5725</a>.
+     Major bug reported by Sandy Ryza and fixed by Sandy Ryza <br>
+     <b>TestNetworkedJob relies on the Capacity Scheduler</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5724">MAPREDUCE-5724</a>.
+     Critical bug reported by Alejandro Abdelnur and fixed by Alejandro Abdelnur (jobhistoryserver)<br>
+     <b>JobHistoryServer does not start if HDFS is not running</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5723">MAPREDUCE-5723</a>.
+     Blocker bug reported by Mohammad Kamrul Islam and fixed by Mohammad Kamrul Islam (applicationmaster)<br>
+     <b>MR AM container log can be truncated or empty</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5694">MAPREDUCE-5694</a>.
+     Major bug reported by Mohammad Kamrul Islam and fixed by Mohammad Kamrul Islam <br>
+     <b>MR AM container syslog is empty  </b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5693">MAPREDUCE-5693</a>.
+     Major bug reported by Gera Shegalov and fixed by Gera Shegalov (mrv2)<br>
+     <b>Restore MRv1 behavior for log flush</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5692">MAPREDUCE-5692</a>.
+     Major improvement reported by Gera Shegalov and fixed by Gera Shegalov (mrv2)<br>
+     <b>Add explicit diagnostics when a task attempt is killed due to speculative execution</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5689">MAPREDUCE-5689</a>.
+     Critical bug reported by Lohit Vijayarenu and fixed by Lohit Vijayarenu <br>
+     <b>MRAppMaster does not preempt reducers when scheduled maps cannot be fulfilled</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5687">MAPREDUCE-5687</a>.
+     Major test reported by Ted Yu and fixed by Jian He <br>
+     <b>TestYARNRunner#testResourceMgrDelegate fails with NPE after YARN-1446</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5685">MAPREDUCE-5685</a>.
+     Blocker bug reported by Yi Song and fixed by Yi Song (client)<br>
+     <b>getCacheFiles()  api doesn't work in WrappedReducer.java due to typo</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5679">MAPREDUCE-5679</a>.
+     Major bug reported by Liyin Liang and fixed by Liyin Liang <br>
+     <b>TestJobHistoryParsing has race condition</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5674">MAPREDUCE-5674</a>.
+     Major bug reported by Chuan Liu and fixed by Chuan Liu (client)<br>
+     <b>Missing start and finish time in mapred.JobStatus</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5672">MAPREDUCE-5672</a>.
+     Major improvement reported by Gera Shegalov and fixed by Gera Shegalov (mr-am , mrv2)<br>
+     <b>Provide optional RollingFileAppender for container log4j (syslog)</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5656">MAPREDUCE-5656</a>.
+     Critical bug reported by Jason Lowe and fixed by Jason Lowe <br>
+     <b>bzip2 codec can drop records when reading data in splits</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5650">MAPREDUCE-5650</a>.
+     Major bug reported by Gera Shegalov and fixed by Gera Shegalov (mrv2)<br>
+     <b>Job fails when hprof mapreduce.task.profile.map/reduce.params is specified</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5645">MAPREDUCE-5645</a>.
+     Major bug reported by Jonathan Eagles and fixed by Mit Desai <br>
+     <b>TestFixedLengthInputFormat fails with native libs</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5640">MAPREDUCE-5640</a>.
+     Trivial improvement reported by Jason Lowe and fixed by Jason Lowe (test)<br>
+     <b>Rename TestLineRecordReader in jobclient module</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5632">MAPREDUCE-5632</a>.
+     Major test reported by Ted Yu and fixed by Jonathan Eagles <br>
+     <b>TestRMContainerAllocator#testUpdatedNodes fails</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5631">MAPREDUCE-5631</a>.
+     Major bug reported by Jonathan Eagles and fixed by Jonathan Eagles <br>
+     <b>TestJobEndNotifier.testNotifyRetries fails with Should have taken more than 5 seconds in jdk7</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5625">MAPREDUCE-5625</a>.
+     Major test reported by Jonathan Eagles and fixed by Mariappan Asokan <br>
+     <b>TestFixedLengthInputFormat fails in jdk7 environment</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5623">MAPREDUCE-5623</a>.
+     Major bug reported by Tsuyoshi OZAWA and fixed by Jason Lowe <br>
+     <b>TestJobCleanup fails because of RejectedExecutionException and NPE.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5616">MAPREDUCE-5616</a>.
+     Major bug reported by Chris Nauroth and fixed by Chris Nauroth (client)<br>
+     <b>MR Client-AppMaster RPC max retries on socket timeout is too high.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5613">MAPREDUCE-5613</a>.
+     Major bug reported by Gera Shegalov and fixed by Gera Shegalov (applicationmaster)<br>
+     <b>DefaultSpeculator holds and checks hashmap that is always empty</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5610">MAPREDUCE-5610</a>.
+     Major test reported by Jonathan Eagles and fixed by Jonathan Eagles <br>
+     <b>TestSleepJob fails in jdk7</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5604">MAPREDUCE-5604</a>.
+     Minor bug reported by Chris Nauroth and fixed by Chris Nauroth (test)<br>
+     <b>TestMRAMWithNonNormalizedCapabilities fails on Windows due to exceeding max path length</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5601">MAPREDUCE-5601</a>.
+     Major improvement reported by Sandy Ryza and fixed by Sandy Ryza <br>
+     <b>ShuffleHandler fadvises file regions as DONTNEED even when fetch fails</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5598">MAPREDUCE-5598</a>.
+     Major bug reported by Robert Kanter and fixed by Robert Kanter (test)<br>
+     <b>TestUserDefinedCounters.testMapReduceJob is flakey</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5596">MAPREDUCE-5596</a>.
+     Major improvement reported by Sandy Ryza and fixed by Sandy Ryza <br>
+     <b>Allow configuring the number of threads used to serve shuffle connections</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5587">MAPREDUCE-5587</a>.
+     Major bug reported by Jonathan Eagles and fixed by Jonathan Eagles <br>
+     <b>TestTextOutputFormat fails on JDK7</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5586">MAPREDUCE-5586</a>.
+     Major bug reported by Jonathan Eagles and fixed by Jonathan Eagles <br>
+     <b>TestCopyMapper#testCopyFailOnBlockSizeDifference fails when run from hadoop-tools/hadoop-distcp directory</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5585">MAPREDUCE-5585</a>.
+     Major bug reported by Jonathan Eagles and fixed by Jonathan Eagles <br>
+     <b>TestCopyCommitter#testNoCommitAction Fails on JDK7</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5569">MAPREDUCE-5569</a>.
+     Major bug reported by Nathan Roberts and fixed by Nathan Roberts <br>
+     <b>FloatSplitter is not generating correct splits</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5561">MAPREDUCE-5561</a>.
+     Critical bug reported by Cindy Li and fixed by Karthik Kambatla <br>
+     <b>org.apache.hadoop.mapreduce.v2.app.job.impl.TestJobImpl testcase failing on trunk</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5550">MAPREDUCE-5550</a>.
+     Major bug reported by Vrushali C and fixed by Gera Shegalov <br>
+     <b>Task Status message (reporter.setStatus) not shown in UI with Hadoop 2.0</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5546">MAPREDUCE-5546</a>.
+     Major bug reported by Chuan Liu and fixed by Chuan Liu <br>
+     <b>mapred.cmd on Windows set HADOOP_OPTS incorrectly</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5522">MAPREDUCE-5522</a>.
+     Minor bug reported by Jinghui Wang and fixed by Jinghui Wang (test)<br>
+     <b>Incorrectly expect the array of JobQueueInfo returned by o.a.h.mapred.QueueManager#getJobQueueInfos to have a specific order.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5518">MAPREDUCE-5518</a>.
+     Trivial bug reported by Albert Chu and fixed by Albert Chu (examples)<br>
+     <b>Fix typo "can't read paritions file"</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5514">MAPREDUCE-5514</a>.
+     Blocker bug reported by Zhijie Shen and fixed by Zhijie Shen <br>
+     <b>TestRMContainerAllocator fails on trunk</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5504">MAPREDUCE-5504</a>.
+     Major bug reported by Thomas Graves and fixed by Kousuke Saruta (client)<br>
+     <b>mapred queue -info inconsistent with types</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5487">MAPREDUCE-5487</a>.
+     Major improvement reported by Sandy Ryza and fixed by Sandy Ryza (performance , task)<br>
+     <b>In task processes, JobConf is unnecessarily loaded again in Limits</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5484">MAPREDUCE-5484</a>.
+     Major improvement reported by Sandy Ryza and fixed by Sandy Ryza (task)<br>
+     <b>YarnChild unnecessarily loads job conf twice</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5481">MAPREDUCE-5481</a>.
+     Blocker bug reported by Jason Lowe and fixed by Sandy Ryza (mrv2 , test)<br>
+     <b>Enable uber jobs to have multiple reducers </b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5464">MAPREDUCE-5464</a>.
+     Major task reported by Sandy Ryza and fixed by Sandy Ryza <br>
+     <b>Add analogs of the SLOTS_MILLIS counters that jive with the YARN resource model</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5463">MAPREDUCE-5463</a>.
+     Major task reported by Sandy Ryza and fixed by Tsuyoshi OZAWA <br>
+     <b>Deprecate SLOTS_MILLIS counters</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5457">MAPREDUCE-5457</a>.
+     Major improvement reported by Sandy Ryza and fixed by Sandy Ryza <br>
+     <b>Add a KeyOnlyTextOutputReader to enable streaming to write out text files without separators</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5451">MAPREDUCE-5451</a>.
+     Major bug reported by Mostafa Elhemali and fixed by Yingda Chen <br>
+     <b>MR uses LD_LIBRARY_PATH which doesn't mean anything in Windows</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5431">MAPREDUCE-5431</a>.
+     Major bug reported by Timothy St. Clair and fixed by Timothy St. Clair (build)<br>
+     <b>Missing pom dependency in MR-client</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5411">MAPREDUCE-5411</a>.
+     Major sub-task reported by Ashwin Shankar and fixed by Ashwin Shankar (jobhistoryserver)<br>
+     <b>Refresh size of loaded job cache on history server</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5409">MAPREDUCE-5409</a>.
+     Major sub-task reported by Devaraj K and fixed by Gera Shegalov <br>
+     <b>MRAppMaster throws InvalidStateTransitonException: Invalid event: TA_TOO_MANY_FETCH_FAILURE at KILLED for TaskAttemptImpl</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5404">MAPREDUCE-5404</a>.
+     Major bug reported by Ted Yu and fixed by Ted Yu (jobhistoryserver)<br>
+     <b>HSAdminServer does not use ephemeral ports in minicluster mode</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5386">MAPREDUCE-5386</a>.
+     Major sub-task reported by Ashwin Shankar and fixed by Ashwin Shankar (jobhistoryserver)<br>
+     <b>Ability to refresh history server job retention and job cleaner settings</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5380">MAPREDUCE-5380</a>.
+     Major bug reported by Stephen Chu and fixed by Stephen Chu <br>
+     <b>Invalid mapred command should return non-zero exit code</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5373">MAPREDUCE-5373</a>.
+     Major bug reported by Chuan Liu and fixed by Jonathan Eagles <br>
+     <b>TestFetchFailure.testFetchFailureMultipleReduces could fail intermittently</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5356">MAPREDUCE-5356</a>.
+     Major sub-task reported by Ashwin Shankar and fixed by Ashwin Shankar (jobhistoryserver)<br>
+     <b>Ability to refresh aggregated log retention period and check interval </b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5332">MAPREDUCE-5332</a>.
+     Major new feature reported by Jason Lowe and fixed by Jason Lowe (jobhistoryserver)<br>
+     <b>Support token-preserving restart of history server</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5329">MAPREDUCE-5329</a>.
+     Major bug reported by Avner BenHanoch and fixed by Avner BenHanoch (mr-am)<br>
+     <b>APPLICATION_INIT is never sent to AuxServices other than the builtin ShuffleHandler</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5316">MAPREDUCE-5316</a>.
+     Major bug reported by Ashwin Shankar and fixed by Ashwin Shankar (client)<br>
+     <b>job -list-attempt-ids command does not handle illegal task-state</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5266">MAPREDUCE-5266</a>.
+     Major new feature reported by Jason Lowe and fixed by Ashwin Shankar (jobhistoryserver)<br>
+     <b>Ability to refresh retention settings on history server</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5265">MAPREDUCE-5265</a>.
+     Major new feature reported by Jason Lowe and fixed by Ashwin Shankar (jobhistoryserver)<br>
+     <b>History server admin service to refresh user and superuser group mappings</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5186">MAPREDUCE-5186</a>.
+     Critical bug reported by Sangjin Lee and fixed by Robert Parker (job submission)<br>
+     <b>mapreduce.job.max.split.locations causes some splits created by CombineFileInputFormat to fail</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5102">MAPREDUCE-5102</a>.
+     Major test reported by Aleksey Gorshkov and fixed by Andrey Klochkov <br>
+     <b>fix coverage  org.apache.hadoop.mapreduce.lib.db and org.apache.hadoop.mapred.lib.db</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5084">MAPREDUCE-5084</a>.
+     Major test reported by Aleksey Gorshkov and fixed by Aleksey Gorshkov <br>
+     <b>fix coverage  org.apache.hadoop.mapreduce.v2.app.webapp and org.apache.hadoop.mapreduce.v2.hs.webapp</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5052">MAPREDUCE-5052</a>.
+     Critical bug reported by Kendall Thrapp and fixed by Chen He (jobhistoryserver , webapps)<br>
+     <b>Job History UI and web services confusing job start time and job submit time</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-5020">MAPREDUCE-5020</a>.
+     Major bug reported by Trevor Robinson and fixed by Trevor Robinson (client)<br>
+     <b>Compile failure with JDK8</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-4680">MAPREDUCE-4680</a>.
+     Major bug reported by Sandy Ryza and fixed by Robert Kanter (jobhistoryserver)<br>
+     <b>Job history cleaner should only check timestamps of files in old enough directories</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-4421">MAPREDUCE-4421</a>.
+     Major improvement reported by Arun C Murthy and fixed by Jason Lowe <br>
+     <b>Run MapReduce framework via the distributed cache</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-3310">MAPREDUCE-3310</a>.
+     Major improvement reported by Mathias Herberts and fixed by Alejandro Abdelnur (client)<br>
+     <b>Custom grouping comparator cannot be set for Combiners</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-1176">MAPREDUCE-1176</a>.
+     Major new feature reported by BitsOfInfo and fixed by Mariappan Asokan <br>
+     <b>FixedLengthInputFormat and FixedLengthRecordReader</b><br>
+     <blockquote>Addition of FixedLengthInputFormat and FixedLengthRecordReader in the org.apache.hadoop.mapreduce.lib.input package. These two classes can be used when you need to read data from files containing fixed length (fixed width) records. Such files have no CR/LF (or any combination thereof), no delimiters etc, but each record is a fixed length, and extra data is padded with spaces. The data is one gigantic line within a file. When creating a job that specifies this input format, the job must have the "mapreduce.input.fixedlengthinputformat.record.length" property set as follows myJobConf.setInt("mapreduce.input.fixedlengthinputformat.record.length",[myFixedRecordLength]); 
+
+Please see javadoc for more details.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/MAPREDUCE-434">MAPREDUCE-434</a>.
+     Minor improvement reported by Yoram Arnon and fixed by Aaron Kimball <br>
+     <b>LocalJobRunner limited to single reducer</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5921">HDFS-5921</a>.
+     Critical bug reported by Aaron T. Myers and fixed by Aaron T. Myers (namenode)<br>
+     <b>Cannot browse file system via NN web UI if any directory has the sticky bit set</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5876">HDFS-5876</a>.
+     Major bug reported by Haohui Mai and fixed by Haohui Mai (datanode)<br>
+     <b>SecureDataNodeStarter does not pick up configuration in hdfs-site.xml</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5873">HDFS-5873</a>.
+     Major bug reported by Yesha Vora and fixed by Haohui Mai <br>
+     <b>dfs.http.policy should have higher precedence over dfs.https.enable</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5845">HDFS-5845</a>.
+     Blocker bug reported by Andrew Wang and fixed by Andrew Wang (namenode)<br>
+     <b>SecondaryNameNode dies when checkpointing with cache pools</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5844">HDFS-5844</a>.
+     Minor bug reported by Akira AJISAKA and fixed by Akira AJISAKA (documentation)<br>
+     <b>Fix broken link in WebHDFS.apt.vm</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5842">HDFS-5842</a>.
+     Major bug reported by Arpit Gupta and fixed by Jing Zhao (security)<br>
+     <b>Cannot create hftp filesystem when using a proxy user ugi and a doAs on a secure cluster</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5841">HDFS-5841</a>.
+     Major improvement reported by Andrew Wang and fixed by Andrew Wang <br>
+     <b>Update HDFS caching documentation with new changes</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5837">HDFS-5837</a>.
+     Major bug reported by Bryan Beaudreault and fixed by Tao Luo (namenode)<br>
+     <b>dfs.namenode.replication.considerLoad does not consider decommissioned nodes</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5833">HDFS-5833</a>.
+     Trivial improvement reported by Bangtao Zhou and fixed by  (namenode)<br>
+     <b>SecondaryNameNode have an incorrect java doc</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5830">HDFS-5830</a>.
+     Blocker bug reported by Yongjun Zhang and fixed by Yongjun Zhang (caching , hdfs-client)<br>
+     <b>WebHdfsFileSystem.getFileBlockLocations throws IllegalArgumentException when accessing another cluster. </b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5825">HDFS-5825</a>.
+     Minor improvement reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Use FileUtils.copyFile() to implement DFSTestUtils.copyFile()</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5806">HDFS-5806</a>.
+     Major bug reported by Nathan Roberts and fixed by Nathan Roberts (balancer)<br>
+     <b>balancer should set SoTimeout to avoid indefinite hangs</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5800">HDFS-5800</a>.
+     Trivial bug reported by Kousuke Saruta and fixed by Kousuke Saruta (hdfs-client)<br>
+     <b>Typo: soft-limit for hard-limit in DFSClient</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5789">HDFS-5789</a>.
+     Major bug reported by Uma Maheswara Rao G and fixed by Uma Maheswara Rao G (namenode)<br>
+     <b>Some of snapshot APIs missing checkOperation double check in fsn</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5788">HDFS-5788</a>.
+     Major improvement reported by Nathan Roberts and fixed by Nathan Roberts (namenode)<br>
+     <b>listLocatedStatus response can be very large</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5784">HDFS-5784</a>.
+     Major sub-task reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (namenode)<br>
+     <b>reserve space in edit log header and fsimage header for feature flag section</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5777">HDFS-5777</a>.
+     Major bug reported by Jing Zhao and fixed by Jing Zhao (namenode)<br>
+     <b>Update LayoutVersion for the new editlog op OP_ADD_BLOCK</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5766">HDFS-5766</a>.
+     Major bug reported by Liang Xie and fixed by Liang Xie (hdfs-client)<br>
+     <b>In DFSInputStream, do not add datanode to deadNodes after InvalidEncryptionKeyException in fetchBlockByteRange</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5762">HDFS-5762</a>.
+     Major bug reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe <br>
+     <b>BlockReaderLocal doesn't return -1 on EOF when doing zero-length reads</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5756">HDFS-5756</a>.
+     Major bug reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (libhdfs)<br>
+     <b>hadoopRzOptionsSetByteBufferPool does not accept NULL argument, contrary to docs</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5748">HDFS-5748</a>.
+     Major improvement reported by Kihwal Lee and fixed by Haohui Mai <br>
+     <b>Too much information shown in the dfs health page.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5747">HDFS-5747</a>.
+     Minor bug reported by Tsz Wo (Nicholas), SZE and fixed by Arpit Agarwal (namenode)<br>
+     <b>BlocksMap.getStoredBlock(..) and BlockInfoUnderConstruction.addReplicaIfNotPresent(..) may throw NullPointerException</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5728">HDFS-5728</a>.
+     Critical bug reported by Vinayakumar B and fixed by Vinayakumar B (datanode)<br>
+     <b>[Diskfull] Block recovery will fail if the metafile does not have crc for all chunks of the block</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5721">HDFS-5721</a>.
+     Minor improvement reported by Ted Yu and fixed by Ted Yu <br>
+     <b>sharedEditsImage in Namenode#initializeSharedEdits() should be closed before method returns</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5719">HDFS-5719</a>.
+     Minor bug reported by Ted Yu and fixed by Ted Yu (namenode)<br>
+     <b>FSImage#doRollback() should close prevState before return</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5710">HDFS-5710</a>.
+     Major bug reported by Ted Yu and fixed by Uma Maheswara Rao G <br>
+     <b>FSDirectory#getFullPathName should check inodes against null</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5704">HDFS-5704</a>.
+     Major bug reported by Suresh Srinivas and fixed by Jing Zhao (namenode)<br>
+     <b>Change OP_UPDATE_BLOCKS  with a new OP_ADD_BLOCK</b><br>
+     <blockquote>Add a new editlog record (OP_ADD_BLOCK) that only records allocation of the new block instead of the entire block list, on every block allocation.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5703">HDFS-5703</a>.
+     Major new feature reported by Alejandro Abdelnur and fixed by Alejandro Abdelnur (webhdfs)<br>
+     <b>Add support for HTTPS and swebhdfs to HttpFS</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5695">HDFS-5695</a>.
+     Major improvement reported by Haohui Mai and fixed by Haohui Mai (test)<br>
+     <b>Clean up TestOfflineEditsViewer and OfflineEditsViewerHelper</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5691">HDFS-5691</a>.
+     Minor bug reported by Akira AJISAKA and fixed by Akira AJISAKA (documentation)<br>
+     <b>Fix typo in ShortCircuitLocalRead document</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5690">HDFS-5690</a>.
+     Blocker bug reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>DataNode fails to start in secure mode when dfs.http.policy equals to HTTP_ONLY</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5681">HDFS-5681</a>.
+     Major bug reported by Daryn Sharp and fixed by Daryn Sharp (namenode)<br>
+     <b>renewLease should not hold fsn write lock</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5677">HDFS-5677</a>.
+     Minor improvement reported by Vincent Sheffer and fixed by Vincent Sheffer (datanode , ha)<br>
+     <b>Need error checking for HA cluster configuration</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5676">HDFS-5676</a>.
+     Minor improvement reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (hdfs-client)<br>
+     <b>fix inconsistent synchronization of CachingStrategy</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5675">HDFS-5675</a>.
+     Minor bug reported by Plamen Jeliazkov and fixed by Plamen Jeliazkov (benchmarks)<br>
+     <b>Add Mkdirs operation to NNThroughputBenchmark</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5674">HDFS-5674</a>.
+     Minor improvement reported by Tsz Wo (Nicholas), SZE and fixed by Tsz Wo (Nicholas), SZE (namenode)<br>
+     <b>Editlog code cleanup</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5671">HDFS-5671</a>.
+     Critical bug reported by JamesLi and fixed by JamesLi (hdfs-client)<br>
+     <b>Fix socket leak in DFSInputStream#getBlockReader</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5667">HDFS-5667</a>.
+     Major sub-task reported by Eric Sirianni and fixed by Arpit Agarwal (datanode)<br>
+     <b>Include DatanodeStorage in StorageReport</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5666">HDFS-5666</a>.
+     Minor bug reported by Colin Patrick McCabe and fixed by Jimmy Xiang (namenode)<br>
+     <b>Fix inconsistent synchronization in BPOfferService</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5663">HDFS-5663</a>.
+     Major improvement reported by Liang Xie and fixed by Liang Xie (hdfs-client)<br>
+     <b>make the retry time and interval value configurable in openInfo()</b><br>
+     <blockquote>Makes the retries and time between retries getting the length of the last block on file configurable.  Below are the new configurations.
+
+dfs.client.retry.times.get-last-block-length
+dfs.client.retry.interval-ms.get-last-block-length
+
+They are set to the 3 and 4000 respectively, these being what was previously hardcoded.
+
+</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5662">HDFS-5662</a>.
+     Major improvement reported by Brandon Li and fixed by Brandon Li (namenode)<br>
+     <b>Can't decommission a DataNode due to file's replication factor larger than the rest of the cluster size</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5661">HDFS-5661</a>.
+     Major bug reported by Benoy Antony and fixed by Benoy Antony <br>
+     <b>Browsing FileSystem via web ui, should use datanode's fqdn instead of ip address</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5657">HDFS-5657</a>.
+     Major bug reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>race condition causes writeback state error in NFS gateway</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5652">HDFS-5652</a>.
+     Minor improvement reported by Liang Xie and fixed by Liang Xie (hdfs-client)<br>
+     <b>refactoring/uniforming invalid block token exception handling in DFSInputStream</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5649">HDFS-5649</a>.
+     Major bug reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>Unregister NFS and Mount service when NFS gateway is shutting down</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5637">HDFS-5637</a>.
+     Major improvement reported by Liang Xie and fixed by Liang Xie (hdfs-client , security)<br>
+     <b>try to refeatchToken while local read InvalidToken occurred</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5634">HDFS-5634</a>.
+     Major sub-task reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (hdfs-client)<br>
+     <b>allow BlockReaderLocal to switch between checksumming and not</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5633">HDFS-5633</a>.
+     Minor improvement reported by Jing Zhao and fixed by Jing Zhao <br>
+     <b>Improve OfflineImageViewer to use less memory</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5629">HDFS-5629</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Support HTTPS in JournalNode and SecondaryNameNode</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5592">HDFS-5592</a>.
+     Major bug reported by Vinayakumar B and fixed by Vinayakumar B <br>
+     <b>"DIR* completeFile: /file is closed by DFSClient_" should be logged only for successful closure of the file.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5590">HDFS-5590</a>.
+     Major bug reported by Jing Zhao and fixed by Jing Zhao <br>
+     <b>Block ID and generation stamp may be reused when persistBlocks is set to false</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5587">HDFS-5587</a>.
+     Minor improvement reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>add debug information when NFS fails to start with duplicate user or group names</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5582">HDFS-5582</a>.
+     Minor bug reported by Henry Hung and fixed by sathish <br>
+     <b>hdfs getconf -excludeFile or -includeFile always failed</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5581">HDFS-5581</a>.
+     Major bug reported by Vinayakumar B and fixed by Vinayakumar B (namenode)<br>
+     <b>NameNodeFsck should use only one instance of BlockPlacementPolicy</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5580">HDFS-5580</a>.
+     Major bug reported by Binglin Chang and fixed by Binglin Chang <br>
+     <b>Infinite loop in Balancer.waitForMoveCompletion</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5579">HDFS-5579</a>.
+     Major bug reported by zhaoyunjiong and fixed by zhaoyunjiong (namenode)<br>
+     <b>Under construction files make DataNode decommission take very long hours</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5577">HDFS-5577</a>.
+     Trivial improvement reported by Brandon Li and fixed by Brandon Li (documentation)<br>
+     <b>NFS user guide update</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5568">HDFS-5568</a>.
+     Major improvement reported by Vinayakumar B and fixed by Vinayakumar B (snapshots)<br>
+     <b>Support inclusion of snapshot paths in Namenode fsck</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5563">HDFS-5563</a>.
+     Major improvement reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>NFS gateway should commit the buffered data when read request comes after write to the same file</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5561">HDFS-5561</a>.
+     Minor improvement reported by Fengdong Yu and fixed by Haohui Mai (namenode)<br>
+     <b>FSNameSystem#getNameJournalStatus() in JMX should return plain text instead of HTML</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5560">HDFS-5560</a>.
+     Major bug reported by Josh Elser and fixed by Josh Elser <br>
+     <b>Trash configuration log statements prints incorrect units</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5558">HDFS-5558</a>.
+     Major bug reported by Kihwal Lee and fixed by Kihwal Lee <br>
+     <b>LeaseManager monitor thread can crash if the last block is complete but another block is not.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5557">HDFS-5557</a>.
+     Critical bug reported by Kihwal Lee and fixed by Kihwal Lee <br>
+     <b>Write pipeline recovery for the last packet in the block may cause rejection of valid replicas</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5552">HDFS-5552</a>.
+     Major bug reported by Shinichi Yamashita and fixed by Haohui Mai (namenode)<br>
+     <b>Fix wrong information of "Cluster summay" in dfshealth.html</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5548">HDFS-5548</a>.
+     Major improvement reported by Haohui Mai and fixed by Haohui Mai (nfs)<br>
+     <b>Use ConcurrentHashMap in portmap</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5545">HDFS-5545</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Allow specifying endpoints for listeners in HttpServer</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5544">HDFS-5544</a>.
+     Minor bug reported by sathish and fixed by sathish (hdfs-client)<br>
+     <b>Adding Test case For Checking dfs.checksum type as NULL value</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5540">HDFS-5540</a>.
+     Minor bug reported by Binglin Chang and fixed by Binglin Chang <br>
+     <b>Fix intermittent failure in TestBlocksWithNotEnoughRacks</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5538">HDFS-5538</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>URLConnectionFactory should pick up the SSL related configuration by default</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5536">HDFS-5536</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Implement HTTP policy for Namenode and DataNode</b><br>
+     <blockquote>Add new HTTP policy configuration. Users can use "dfs.http.policy" to control the HTTP endpoints for NameNode and DataNode. Specifically, The following values are supported:
+- HTTP_ONLY : Service is provided only on http
+- HTTPS_ONLY : Service is provided only on https
+- HTTP_AND_HTTPS : Service is provided both on http and https
+
+hadoop.ssl.enabled and dfs.https.enabled are deprecated. When the deprecated configuration properties are still configured, currently http policy is decided based on the following rules:
+1. If dfs.http.policy is set to HTTPS_ONLY or HTTP_AND_HTTPS. It picks the specified policy, otherwise it proceeds to 2~4.
+2. It picks HTTPS_ONLY if hadoop.ssl.enabled equals to true.
+3. It picks HTTP_AND_HTTPS if dfs.https.enable equals to true.
+4. It picks HTTP_ONLY for other configurations.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5533">HDFS-5533</a>.
+     Minor bug reported by Binglin Chang and fixed by Binglin Chang (snapshots)<br>
+     <b>Symlink delete/create should be treated as DELETE/CREATE in snapshot diff report</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5532">HDFS-5532</a>.
+     Major improvement reported by Vinayakumar B and fixed by Vinayakumar B (webhdfs)<br>
+     <b>Enable the webhdfs by default to support new HDFS web UI</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5526">HDFS-5526</a>.
+     Blocker bug reported by Tsz Wo (Nicholas), SZE and fixed by Kihwal Lee (datanode)<br>
+     <b>Datanode cannot roll back to previous layout version</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5525">HDFS-5525</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Inline dust templates</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5519">HDFS-5519</a>.
+     Minor sub-task reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>COMMIT handler should update the commit status after sync</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5514">HDFS-5514</a>.
+     Major sub-task reported by Daryn Sharp and fixed by Daryn Sharp (namenode)<br>
+     <b>FSNamesystem's fsLock should allow custom implementation</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5506">HDFS-5506</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Use URLConnectionFactory in DelegationTokenFetcher</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5504">HDFS-5504</a>.
+     Major bug reported by Vinayakumar B and fixed by Vinayakumar B (snapshots)<br>
+     <b>In HA mode, OP_DELETE_SNAPSHOT is not decrementing the safemode threshold, leads to NN safemode.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5502">HDFS-5502</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Fix HTTPS support in HsftpFileSystem</b><br>
+     <blockquote>Fix the https support in HsftpFileSystem. With the change the client now verifies the server certificate. In particular, client side will verify the Common Name of the certificate using a strategy specified by the configuration property "hadoop.ssl.hostname.verifier".</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5495">HDFS-5495</a>.
+     Major improvement reported by Andrew Wang and fixed by Jarek Jarcec Cecho <br>
+     <b>Remove further JUnit3 usages from HDFS</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5489">HDFS-5489</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Use TokenAspect in WebHDFSFileSystem</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5488">HDFS-5488</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Clean up TestHftpURLTimeout</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5487">HDFS-5487</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Introduce unit test for TokenAspect</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5476">HDFS-5476</a>.
+     Major bug reported by Jing Zhao and fixed by Jing Zhao <br>
+     <b>Snapshot: clean the blocks/files/directories under a renamed file/directory while deletion</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5474">HDFS-5474</a>.
+     Major bug reported by Uma Maheswara Rao G and fixed by sathish (snapshots)<br>
+     <b>Deletesnapshot can make Namenode in safemode on NN restarts.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5469">HDFS-5469</a>.
+     Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>Add configuration property for the sub-directroy export path</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5467">HDFS-5467</a>.
+     Trivial improvement reported by Andrew Wang and fixed by Shinichi Yamashita <br>
+     <b>Remove tab characters in hdfs-default.xml</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5458">HDFS-5458</a>.
+     Major bug reported by Andrew Wang and fixed by Mike Mellenthin (datanode)<br>
+     <b>Datanode failed volume threshold ignored if exception is thrown in getDataDirsFromURIs</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5456">HDFS-5456</a>.
+     Critical bug reported by Chris Nauroth and fixed by Chris Nauroth (namenode)<br>
+     <b>NameNode startup progress creates new steps if caller attempts to create a counter for a step that doesn't already exist.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5454">HDFS-5454</a>.
+     Minor sub-task reported by Eric Sirianni and fixed by Arpit Agarwal (datanode)<br>
+     <b>DataNode UUID should be assigned prior to FsDataset initialization</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5449">HDFS-5449</a>.
+     Blocker bug reported by Kihwal Lee and fixed by Kihwal Lee <br>
+     <b>WebHdfs compatibility broken between 2.2 and 1.x / 23.x</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5444">HDFS-5444</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Choose default web UI based on browser capabilities</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5443">HDFS-5443</a>.
+     Major bug reported by Uma Maheswara Rao G and fixed by Jing Zhao (snapshots)<br>
+     <b>Delete 0-sized block when deleting an under-construction file that is included in snapshot</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5440">HDFS-5440</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Extract the logic of handling delegation tokens in HftpFileSystem to the TokenAspect class</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5438">HDFS-5438</a>.
+     Critical bug reported by Kihwal Lee and fixed by Kihwal Lee (namenode)<br>
+     <b>Flaws in block report processing can cause data loss</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5436">HDFS-5436</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Move HsFtpFileSystem and HFtpFileSystem into org.apache.hdfs.web</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5434">HDFS-5434</a>.
+     Minor bug reported by Buddy and fixed by  (namenode)<br>
+     <b>Write resiliency for replica count 1</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5433">HDFS-5433</a>.
+     Critical bug reported by Aaron T. Myers and fixed by Aaron T. Myers (snapshots)<br>
+     <b>When reloading fsimage during checkpointing, we should clear existing snapshottable directories</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5432">HDFS-5432</a>.
+     Trivial bug reported by Chris Nauroth and fixed by Chris Nauroth (datanode , test)<br>
+     <b>TestDatanodeJsp fails on Windows due to assumption that loopback address resolves to host name localhost.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5428">HDFS-5428</a>.
+     Major bug reported by Vinayakumar B and fixed by Jing Zhao (snapshots)<br>
+     <b>under construction files deletion after snapshot+checkpoint+nn restart leads nn safemode</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5427">HDFS-5427</a>.
+     Major bug reported by Vinayakumar B and fixed by Vinayakumar B (snapshots)<br>
+     <b>not able to read deleted files from snapshot directly under snapshottable dir after checkpoint and NN restart</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5425">HDFS-5425</a>.
+     Major bug reported by sathish and fixed by Jing Zhao (namenode , snapshots)<br>
+     <b>Renaming underconstruction file with snapshots can make NN failure on restart</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5413">HDFS-5413</a>.
+     Major bug reported by Chris Nauroth and fixed by Chris Nauroth (scripts)<br>
+     <b>hdfs.cmd does not support passthrough to any arbitrary class.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5407">HDFS-5407</a>.
+     Trivial bug reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Fix typos in DFSClientCache</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5406">HDFS-5406</a>.
+     Major sub-task reported by Arpit Agarwal and fixed by Arpit Agarwal (datanode)<br>
+     <b>Send incremental block reports for all storages in a single call</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5403">HDFS-5403</a>.
+     Major bug reported by Aaron T. Myers and fixed by Aaron T. Myers (webhdfs)<br>
+     <b>WebHdfs client cannot communicate with older WebHdfs servers post HDFS-5306</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5400">HDFS-5400</a>.
+     Major bug reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe <br>
+     <b>DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT constant is set to the wrong value</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5399">HDFS-5399</a>.
+     Major improvement reported by Jing Zhao and fixed by Jing Zhao <br>
+     <b>Revisit SafeModeException and corresponding retry policies</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5393">HDFS-5393</a>.
+     Minor sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Serve bootstrap and jQuery locally</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5382">HDFS-5382</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Implement the UI of browsing filesystems in HTML 5 page</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5379">HDFS-5379</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Update links to datanode information in dfshealth.html</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5375">HDFS-5375</a>.
+     Minor bug reported by Chris Nauroth and fixed by Chris Nauroth (tools)<br>
+     <b>hdfs.cmd does not expose several snapshot commands.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5374">HDFS-5374</a>.
+     Trivial bug reported by Suresh Srinivas and fixed by Suresh Srinivas <br>
+     <b>Remove deadcode in DFSOutputStream</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5372">HDFS-5372</a>.
+     Major bug reported by Tsz Wo (Nicholas), SZE and fixed by Vinayakumar B (namenode)<br>
+     <b>In FSNamesystem, hasReadLock() returns false if the current thread holds the write lock</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5371">HDFS-5371</a>.
+     Minor improvement reported by Jing Zhao and fixed by Jing Zhao (ha , test)<br>
+     <b>Let client retry the same NN when "dfs.client.test.drop.namenode.response.number" is enabled</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5370">HDFS-5370</a>.
+     Trivial bug reported by Kousuke Saruta and fixed by Kousuke Saruta (hdfs-client)<br>
+     <b>Typo in Error Message:  different between range in condition and range in error message</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5365">HDFS-5365</a>.
+     Major bug reported by Radim Kolar and fixed by Radim Kolar (build , libhdfs)<br>
+     <b>Fix libhdfs compile error on FreeBSD9</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5364">HDFS-5364</a>.
+     Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>Add OpenFileCtx cache</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5363">HDFS-5363</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Refactor WebHdfsFileSystem: move SPENGO-authenticated connection creation to URLConnectionFactory </b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5360">HDFS-5360</a>.
+     Minor improvement reported by Shinichi Yamashita and fixed by Shinichi Yamashita (snapshots)<br>
+     <b>Improvement of usage message of renameSnapshot and deleteSnapshot</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5353">HDFS-5353</a>.
+     Blocker bug reported by Haohui Mai and fixed by Colin Patrick McCabe <br>
+     <b>Short circuit reads fail when dfs.encrypt.data.transfer is enabled</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5352">HDFS-5352</a>.
+     Minor bug reported by Ted Yu and fixed by Ted Yu <br>
+     <b>Server#initLog() doesn't close InputStream in httpfs</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5350">HDFS-5350</a>.
+     Minor improvement reported by Rob Weltman and fixed by Jimmy Xiang (namenode)<br>
+     <b>Name Node should report fsimage transfer time as a metric</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5347">HDFS-5347</a>.
+     Major sub-task reported by Brandon Li and fixed by Brandon Li (documentation)<br>
+     <b>add HDFS NFS user guide</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5346">HDFS-5346</a>.
+     Major bug reported by Kihwal Lee and fixed by Ravi Prakash (namenode , performance)<br>
+     <b>Avoid unnecessary call to getNumLiveDataNodes() for each block during IBR processing</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5344">HDFS-5344</a>.
+     Minor improvement reported by sathish and fixed by sathish (snapshots , tools)<br>
+     <b>Make LsSnapshottableDir as Tool interface implementation</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5343">HDFS-5343</a>.
+     Major bug reported by sathish and fixed by sathish (hdfs-client)<br>
+     <b>When cat command is issued on snapshot files getting unexpected result</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5342">HDFS-5342</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Provide more information in the FSNamesystem JMX interfaces</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5341">HDFS-5341</a>.
+     Major bug reported by qus-jiawei and fixed by qus-jiawei (datanode)<br>
+     <b>Reduce fsdataset lock duration during directory scanning.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5338">HDFS-5338</a>.
+     Major improvement reported by Tsz Wo (Nicholas), SZE and fixed by Tsz Wo (Nicholas), SZE (namenode)<br>
+     <b>Add a conf to disable hostname check in DN registration</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5337">HDFS-5337</a>.
+     Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>should do hsync for a commit request even there is no pending writes</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5336">HDFS-5336</a>.
+     Minor bug reported by Akira AJISAKA and fixed by Akira AJISAKA (namenode)<br>
+     <b>DataNode should not output 'StartupProgress' metrics</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5335">HDFS-5335</a>.
+     Major bug reported by Arpit Gupta and fixed by Haohui Mai <br>
+     <b>DFSOutputStream#close() keeps throwing exceptions when it is called multiple times</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5334">HDFS-5334</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Implement dfshealth.jsp in HTML pages</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5331">HDFS-5331</a>.
+     Major improvement reported by Vinayakumar B and fixed by Vinayakumar B (snapshots)<br>
+     <b>make SnapshotDiff.java to a o.a.h.util.Tool interface implementation</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5330">HDFS-5330</a>.
+     Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>fix readdir and readdirplus for large directories</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5329">HDFS-5329</a>.
+     Major bug reported by Brandon Li and fixed by Brandon Li (namenode , nfs)<br>
+     <b>Update FSNamesystem#getListing() to handle inode path in startAfter token</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5325">HDFS-5325</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Remove WebHdfsFileSystem#ConnRunner</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5323">HDFS-5323</a>.
+     Minor improvement reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (namenode)<br>
+     <b>Remove some deadcode in BlockManager</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5322">HDFS-5322</a>.
+     Major bug reported by Arpit Gupta and fixed by Jing Zhao (ha)<br>
+     <b>HDFS delegation token not found in cache errors seen on secure HA clusters</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5317">HDFS-5317</a>.
+     Critical sub-task reported by Suresh Srinivas and fixed by Haohui Mai <br>
+     <b>Go back to DFS Home link does not work on datanode webUI</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5316">HDFS-5316</a>.
+     Critical sub-task reported by Suresh Srinivas and fixed by Haohui Mai <br>
+     <b>Namenode ignores the default https port</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5312">HDFS-5312</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Generate HTTP / HTTPS URL in DFSUtil#getInfoServer() based on the configured http policy</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5307">HDFS-5307</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Support both HTTP and HTTPS in jsp pages</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5305">HDFS-5305</a>.
+     Major bug reported by Suresh Srinivas and fixed by Suresh Srinivas <br>
+     <b>Add https support in HDFS</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5297">HDFS-5297</a>.
+     Major bug reported by Akira AJISAKA and fixed by Akira AJISAKA (documentation)<br>
+     <b>Fix dead links in HDFS site documents</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5291">HDFS-5291</a>.
+     Critical bug reported by Arpit Gupta and fixed by Jing Zhao (ha)<br>
+     <b>Clients need to retry when Active NN is in SafeMode</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5288">HDFS-5288</a>.
+     Major sub-task reported by Haohui Mai and fixed by Haohui Mai (nfs)<br>
+     <b>Close idle connections in portmap</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5283">HDFS-5283</a>.
+     Critical bug reported by Vinayakumar B and fixed by Vinayakumar B (snapshots)<br>
+     <b>NN not coming out of startup safemode due to under construction blocks only inside snapshots also counted in safemode threshhold</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5281">HDFS-5281</a>.
+     Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>COMMIT request should not block</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5276">HDFS-5276</a>.
+     Major bug reported by Chengxiang Li and fixed by Colin Patrick McCabe <br>
+     <b>FileSystem.Statistics got performance issue on multi-thread read/write.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5267">HDFS-5267</a>.
+     Minor improvement reported by Junping Du and fixed by Junping Du <br>
+     <b>Remove volatile from LightWeightHashSet</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5260">HDFS-5260</a>.
+     Major new feature reported by Chris Nauroth and fixed by Chris Nauroth (hdfs-client , libhdfs)<br>
+     <b>Merge zero-copy memory-mapped HDFS client reads to trunk and branch-2.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5257">HDFS-5257</a>.
+     Major bug reported by Vinayakumar B and fixed by Vinayakumar B (hdfs-client , namenode)<br>
+     <b>addBlock() retry should return LocatedBlock with locations else client will get AIOBE</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5252">HDFS-5252</a>.
+     Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>Stable write is not handled correctly in someplace</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5240">HDFS-5240</a>.
+     Major sub-task reported by Daryn Sharp and fixed by Daryn Sharp (namenode)<br>
+     <b>Separate formatting from logging in the audit logger API</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5239">HDFS-5239</a>.
+     Major sub-task reported by Daryn Sharp and fixed by Daryn Sharp (namenode)<br>
+     <b>Allow FSNamesystem lock fairness to be configurable</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5220">HDFS-5220</a>.
+     Major improvement reported by Rob Weltman and fixed by Jimmy Xiang (namenode)<br>
+     <b>Expose group resolution time as metric</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5207">HDFS-5207</a>.
+     Major improvement reported by Junping Du and fixed by Junping Du (namenode)<br>
+     <b>In BlockPlacementPolicy, update 2 parameters of chooseTarget()</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5188">HDFS-5188</a>.
+     Major improvement reported by Tsz Wo (Nicholas), SZE and fixed by Tsz Wo (Nicholas), SZE (namenode)<br>
+     <b>Clean up BlockPlacementPolicy and its implementations</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5171">HDFS-5171</a>.
+     Major sub-task reported by Brandon Li and fixed by Haohui Mai (nfs)<br>
+     <b>NFS should create input stream for a file and try to share it with multiple read requests</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5170">HDFS-5170</a>.
+     Trivial bug reported by Andrew Wang and fixed by Andrew Wang <br>
+     <b>BlockPlacementPolicyDefault uses the wrong classname when alerting to enable debug logging</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5164">HDFS-5164</a>.
+     Minor bug reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (namenode)<br>
+     <b>deleteSnapshot should check if OperationCategory.WRITE is possible before taking write lock</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5144">HDFS-5144</a>.
+     Minor improvement reported by Akira AJISAKA and fixed by Akira AJISAKA (documentation)<br>
+     <b>Document time unit to NameNodeMetrics.java</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5136">HDFS-5136</a>.
+     Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>MNT EXPORT should give the full group list which can mount the exports</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5130">HDFS-5130</a>.
+     Minor test reported by Binglin Chang and fixed by Binglin Chang (test)<br>
+     <b>Add test for snapshot related FsShell and DFSAdmin commands</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5122">HDFS-5122</a>.
+     Major bug reported by Arpit Gupta and fixed by Haohui Mai (ha , webhdfs)<br>
+     <b>Support failover and retry in WebHdfsFileSystem for NN HA</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5110">HDFS-5110</a>.
+     Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>Change FSDataOutputStream to HdfsDataOutputStream for opened streams to fix type cast error</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5107">HDFS-5107</a>.
+     Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>Fix array copy error in Readdir and Readdirplus responses</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5104">HDFS-5104</a>.
+     Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>Support dotdot name in NFS LOOKUP operation</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5093">HDFS-5093</a>.
+     Minor bug reported by Chuan Liu and fixed by Chuan Liu (test)<br>
+     <b>TestGlobPaths should re-use the MiniDFSCluster to avoid failure on Windows</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5078">HDFS-5078</a>.
+     Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>Support file append in NFSv3 gateway to enable data streaming to HDFS</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5075">HDFS-5075</a>.
+     Major bug reported by Timothy St. Clair and fixed by Timothy St. Clair <br>
+     <b>httpfs-config.sh calls out incorrect env script name</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5074">HDFS-5074</a>.
+     Major bug reported by Todd Lipcon and fixed by Todd Lipcon (ha , namenode)<br>
+     <b>Allow starting up from an fsimage checkpoint in the middle of a segment</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5073">HDFS-5073</a>.
+     Minor bug reported by Kihwal Lee and fixed by Arpit Agarwal (test)<br>
+     <b>TestListCorruptFileBlocks fails intermittently </b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5071">HDFS-5071</a>.
+     Major sub-task reported by Kihwal Lee and fixed by Brandon Li (nfs)<br>
+     <b>Change hdfs-nfs parent project to hadoop-project</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5069">HDFS-5069</a>.
+     Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>Include hadoop-nfs and hadoop-hdfs-nfs into hadoop dist for NFS deployment</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5068">HDFS-5068</a>.
+     Major improvement reported by Konstantin Shvachko and fixed by Konstantin Shvachko (benchmarks)<br>
+     <b>Convert NNThroughputBenchmark to a Tool to allow generic options.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5065">HDFS-5065</a>.
+     Major bug reported by Ivan Mitic and fixed by Ivan Mitic (hdfs-client , test)<br>
+     <b>TestSymlinkHdfsDisable fails on Windows</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5043">HDFS-5043</a>.
+     Major bug reported by Brandon Li and fixed by Brandon Li <br>
+     <b>For HdfsFileStatus, set default value of childrenNum to -1 instead of 0 to avoid confusing applications</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5037">HDFS-5037</a>.
+     Critical improvement reported by Todd Lipcon and fixed by Andrew Wang (ha , namenode)<br>
+     <b>Active NN should trigger its own edit log rolls</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5035">HDFS-5035</a>.
+     Major bug reported by Andrew Wang and fixed by Andrew Wang (namenode)<br>
+     <b>getFileLinkStatus and rename do not correctly check permissions of symlinks</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5034">HDFS-5034</a>.
+     Trivial improvement reported by Andrew Wang and fixed by Andrew Wang (namenode)<br>
+     <b>Remove debug prints from getFileLinkInfo</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5023">HDFS-5023</a>.
+     Major bug reported by Ravi Prakash and fixed by Mit Desai (snapshots , test)<br>
+     <b>TestSnapshotPathINodes.testAllowSnapshot is failing with jdk7</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5014">HDFS-5014</a>.
+     Major bug reported by Vinayakumar B and fixed by Vinayakumar B (datanode , ha)<br>
+     <b>BPOfferService#processCommandFromActor() synchronization on namenode RPC call delays IBR to Active NN, if Stanby NN is unstable</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-5004">HDFS-5004</a>.
+     Major improvement reported by Trevor Lorimer and fixed by Trevor Lorimer (namenode)<br>
+     <b>Add additional JMX bean for NameNode status data</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4997">HDFS-4997</a>.
+     Major bug reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (libhdfs)<br>
+     <b>libhdfs doesn't return correct error codes in most cases</b><br>
+     <blockquote>libhdfs now returns correct codes in errno. Previously, due to a bug, many functions set errno to 255 instead of the more specific error code.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4995">HDFS-4995</a>.
+     Major bug reported by Kihwal Lee and fixed by Kihwal Lee (namenode)<br>
+     <b>Make getContentSummary() less expensive</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4994">HDFS-4994</a>.
+     Minor bug reported by Kihwal Lee and fixed by Robert Parker (namenode)<br>
+     <b>Audit log getContentSummary() calls</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4983">HDFS-4983</a>.
+     Major improvement reported by Harsh J and fixed by Yongjun Zhang (webhdfs)<br>
+     <b>Numeric usernames do not work with WebHDFS FS</b><br>
+     <blockquote>Add a new configuration property "dfs.webhdfs.user.provider.user.pattern" for specifying user name filters for WebHDFS.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4962">HDFS-4962</a>.
+     Minor sub-task reported by Tsz Wo (Nicholas), SZE and fixed by Tsz Wo (Nicholas), SZE (nfs)<br>
+     <b>Use enum for nfs constants</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4949">HDFS-4949</a>.
+     Major new feature reported by Andrew Wang and fixed by Andrew Wang (datanode , namenode)<br>
+     <b>Centralized cache management in HDFS</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4948">HDFS-4948</a>.
+     Major bug reported by Robert Joseph Evans and fixed by Brandon Li <br>
+     <b>mvn site for hadoop-hdfs-nfs fails</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4947">HDFS-4947</a>.
+     Major sub-task reported by Brandon Li and fixed by Jing Zhao (nfs)<br>
+     <b>Add NFS server export table to control export by hostname or IP range</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4885">HDFS-4885</a>.
+     Major sub-task reported by Junping Du and fixed by Junping Du <br>
+     <b>Update verifyBlockPlacement() API in BlockPlacementPolicy</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4879">HDFS-4879</a>.
+     Major improvement reported by Todd Lipcon and fixed by Todd Lipcon (namenode)<br>
+     <b>Add "blocked ArrayList" collection to avoid CMS full GCs</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4860">HDFS-4860</a>.
+     Major improvement reported by Trevor Lorimer and fixed by Trevor Lorimer (namenode)<br>
+     <b>Add additional attributes to JMX beans</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4816">HDFS-4816</a>.
+     Major bug reported by Andrew Wang and fixed by Andrew Wang (namenode)<br>
+     <b>transitionToActive blocks if the SBN is doing checkpoint image transfer</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4772">HDFS-4772</a>.
+     Minor improvement reported by Brandon Li and fixed by Brandon Li (namenode)<br>
+     <b>Add number of children in HdfsFileStatus</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4763">HDFS-4763</a>.
+     Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>Add script changes/utility for starting NFS gateway</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4762">HDFS-4762</a>.
+     Major sub-task reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>Provide HDFS based NFSv3 and Mountd implementation</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4657">HDFS-4657</a>.
+     Major bug reported by Aaron T. Myers and fixed by Aaron T. Myers (namenode)<br>
+     <b>Limit the number of blocks logged by the NN after a block report to a configurable value.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4633">HDFS-4633</a>.
+     Major bug reported by Chris Nauroth and fixed by Chris Nauroth (hdfs-client , test)<br>
+     <b>TestDFSClientExcludedNodes fails sporadically if excluded nodes cache expires too quickly</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4517">HDFS-4517</a>.
+     Major test reported by Vadim Bondarev and fixed by Ivan A. Veselovsky <br>
+     <b>Cover class RemoteBlockReader with unit tests</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4516">HDFS-4516</a>.
+     Critical bug reported by Uma Maheswara Rao G and fixed by Vinayakumar B (namenode)<br>
+     <b>Client crash after block allocation and NN switch before lease recovery for the same file can cause readers to fail forever</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4512">HDFS-4512</a>.
+     Major test reported by Vadim Bondarev and fixed by Vadim Bondarev <br>
+     <b>Cover package org.apache.hadoop.hdfs.server.common with tests</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4511">HDFS-4511</a>.
+     Major test reported by Vadim Bondarev and fixed by Andrey Klochkov <br>
+     <b>Cover package org.apache.hadoop.hdfs.tools with unit test</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4510">HDFS-4510</a>.
+     Major test reported by Vadim Bondarev and fixed by Andrey Klochkov <br>
+     <b>Cover classes ClusterJspHelper/NamenodeJspHelper with unit tests</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4491">HDFS-4491</a>.
+     Major test reported by Tsuyoshi OZAWA and fixed by Andrey Klochkov (test)<br>
+     <b>Parallel testing HDFS</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4376">HDFS-4376</a>.
+     Major bug reported by Aaron T. Myers and fixed by Junping Du (balancer)<br>
+     <b> Fix several race conditions in Balancer and resolve intermittent timeout of TestBalancerWithNodeGroup</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4329">HDFS-4329</a>.
+     Major bug reported by Andy Isaacson and fixed by Cristina L. Abad (hdfs-client)<br>
+     <b>DFSShell issues with directories with spaces in name</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4278">HDFS-4278</a>.
+     Major improvement reported by Harsh J and fixed by Kousuke Saruta (datanode , namenode)<br>
+     <b>Log an ERROR when DFS_BLOCK_ACCESS_TOKEN_ENABLE config  is disabled but security is turned on.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4201">HDFS-4201</a>.
+     Critical bug reported by Eli Collins and fixed by Jimmy Xiang (namenode)<br>
+     <b>NPE in BPServiceActor#sendHeartBeat</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-4096">HDFS-4096</a>.
+     Major sub-task reported by Jing Zhao and fixed by Haohui Mai (datanode , namenode)<br>
+     <b>Add snapshot information to namenode WebUI</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-3987">HDFS-3987</a>.
+     Major sub-task reported by Alejandro Abdelnur and fixed by Haohui Mai <br>
+     <b>Support webhdfs over HTTPS</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-3981">HDFS-3981</a>.
+     Major bug reported by Xiaobo Peng and fixed by Xiaobo Peng (namenode)<br>
+     <b>access time is set without holding FSNamesystem write lock</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-3934">HDFS-3934</a>.
+     Minor bug reported by Andy Isaacson and fixed by Colin Patrick McCabe <br>
+     <b>duplicative dfs_hosts entries handled wrong</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HDFS-2933">HDFS-2933</a>.
+     Major improvement reported by Philip Zeyliger and fixed by Vivek Ganesan (datanode)<br>
+     <b>Improve DataNode Web UI Index Page</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10317">HADOOP-10317</a>.
+     Major bug reported by Andrew Wang and fixed by Andrew Wang <br>
+     <b>Rename branch-2.3 release version from 2.4.0-SNAPSHOT to 2.3.0-SNAPSHOT</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10313">HADOOP-10313</a>.
+     Major bug reported by Alejandro Abdelnur and fixed by Alejandro Abdelnur (build)<br>
+     <b>Script and jenkins job to produce Hadoop release artifacts</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10311">HADOOP-10311</a>.
+     Blocker bug reported by Suresh Srinivas and fixed by Alejandro Abdelnur <br>
+     <b>Cleanup vendor names from the code base</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10310">HADOOP-10310</a>.
+     Blocker bug reported by Aaron T. Myers and fixed by Aaron T. Myers (security)<br>
+     <b>SaslRpcServer should be initialized even when no secret manager present</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10305">HADOOP-10305</a>.
+     Major bug reported by Akira AJISAKA and fixed by Akira AJISAKA (metrics)<br>
+     <b>Add "rpc.metrics.quantile.enable" and "rpc.metrics.percentiles.intervals" to core-default.xml</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10292">HADOOP-10292</a>.
+     Major bug reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Restore HttpServer from branch-2.2 in branch-2</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10291">HADOOP-10291</a>.
+     Major bug reported by Mit Desai and fixed by Mit Desai <br>
+     <b>TestSecurityUtil#testSocketAddrWithIP fails</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10288">HADOOP-10288</a>.
+     Major bug reported by Todd Lipcon and fixed by Todd Lipcon (util)<br>
+     <b>Explicit reference to Log4JLogger breaks non-log4j users</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10274">HADOOP-10274</a>.
+     Minor improvement reported by takeshi.miao and fixed by takeshi.miao (security)<br>
+     <b>Lower the logging level from ERROR to WARN for UGI.doAs method</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10273">HADOOP-10273</a>.
+     Major bug reported by Arpit Agarwal and fixed by Arpit Agarwal (build)<br>
+     <b>Fix 'mvn site'</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10255">HADOOP-10255</a>.
+     Blocker bug reported by Haohui Mai and fixed by Haohui Mai <br>
+     <b>Rename HttpServer to HttpServer2 to retain older HttpServer in branch-2 for compatibility</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10252">HADOOP-10252</a>.
+     Major bug reported by Jimmy Xiang and fixed by Jimmy Xiang <br>
+     <b>HttpServer can't start if hostname is not specified</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10250">HADOOP-10250</a>.
+     Major bug reported by Yongjun Zhang and fixed by Yongjun Zhang <br>
+     <b>VersionUtil returns wrong value when comparing two versions</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10248">HADOOP-10248</a>.
+     Major improvement reported by Ted Yu and fixed by Akira AJISAKA <br>
+     <b>Property name should be included in the exception where property value is null</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10240">HADOOP-10240</a>.
+     Trivial bug reported by Chris Nauroth and fixed by Chris Nauroth (documentation)<br>
+     <b>Windows build instructions incorrectly state requirement of protoc 2.4.1 instead of 2.5.0</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10236">HADOOP-10236</a>.
+     Trivial bug reported by Akira AJISAKA and fixed by Akira AJISAKA <br>
+     <b>Fix typo in o.a.h.ipc.Client#checkResponse</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10235">HADOOP-10235</a>.
+     Major bug reported by Alejandro Abdelnur and fixed by Alejandro Abdelnur (build)<br>
+     <b>Hadoop tarball has 2 versions of stax-api JARs</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10234">HADOOP-10234</a>.
+     Major bug reported by Chris Nauroth and fixed by Chris Nauroth (scripts)<br>
+     <b>"hadoop.cmd jar" does not propagate exit code.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10228">HADOOP-10228</a>.
+     Minor improvement reported by Haohui Mai and fixed by Haohui Mai (fs)<br>
+     <b>FsPermission#fromShort() should cache FsAction.values()</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10223">HADOOP-10223</a>.
+     Minor bug reported by Ted Yu and fixed by Ted Yu <br>
+     <b>MiniKdc#main() should close the FileReader it creates</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10214">HADOOP-10214</a>.
+     Major bug reported by Liang Xie and fixed by Liang Xie (ha)<br>
+     <b>Fix multithreaded correctness warnings in ActiveStandbyElector</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10212">HADOOP-10212</a>.
+     Major bug reported by Akira AJISAKA and fixed by Akira AJISAKA (documentation)<br>
+     <b>Incorrect compile command in Native Library document</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10208">HADOOP-10208</a>.
+     Trivial improvement reported by Benoy Antony and fixed by Benoy Antony <br>
+     <b>Remove duplicate initialization in StringUtils.getStringCollection</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10207">HADOOP-10207</a>.
+     Minor test reported by Jimmy Xiang and fixed by Jimmy Xiang <br>
+     <b>TestUserGroupInformation#testLogin is flaky</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10203">HADOOP-10203</a>.
+     Major bug reported by Andrei Savu and fixed by Andrei Savu (fs/s3)<br>
+     <b>Connection leak in Jets3tNativeFileSystemStore#retrieveMetadata </b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10198">HADOOP-10198</a>.
+     Minor improvement reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (native)<br>
+     <b>DomainSocket: add support for socketpair</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10193">HADOOP-10193</a>.
+     Minor bug reported by Gregory Chanan and fixed by Gregory Chanan (security)<br>
+     <b>hadoop-auth's PseudoAuthenticationHandler can consume getInputStream</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10178">HADOOP-10178</a>.
+     Major bug reported by shanyu zhao and fixed by shanyu zhao (conf)<br>
+     <b>Configuration deprecation always emit "deprecated" warnings when a new key is used</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10175">HADOOP-10175</a>.
+     Major bug reported by Chuan Liu and fixed by Chuan Liu (fs)<br>
+     <b>Har files system authority should preserve userinfo</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10173">HADOOP-10173</a>.
+     Critical improvement reported by Daryn Sharp and fixed by Daryn Sharp (ipc)<br>
+     <b>Remove UGI from DIGEST-MD5 SASL server creation</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10172">HADOOP-10172</a>.
+     Critical improvement reported by Daryn Sharp and fixed by Daryn Sharp (ipc)<br>
+     <b>Cache SASL server factories</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10171">HADOOP-10171</a>.
+     Major bug reported by Mit Desai and fixed by Mit Desai <br>
+     <b>TestRPC fails intermittently on jkd7</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10169">HADOOP-10169</a>.
+     Minor improvement reported by Liang Xie and fixed by Liang Xie (metrics)<br>
+     <b>remove the unnecessary  synchronized in JvmMetrics class</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10168">HADOOP-10168</a>.
+     Major bug reported by Thejas M Nair and fixed by Thejas M Nair <br>
+     <b>fix javadoc of ReflectionUtils.copy </b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10167">HADOOP-10167</a>.
+     Major improvement reported by Mikhail Antonov and fixed by  (build)<br>
+     <b>Mark hadoop-common source as UTF-8 in Maven pom files / refactoring</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10164">HADOOP-10164</a>.
+     Major improvement reported by Robert Joseph Evans and fixed by Robert Joseph Evans <br>
+     <b>Allow UGI to login with a known Subject</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10162">HADOOP-10162</a>.
+     Major bug reported by Mit Desai and fixed by Mit Desai <br>
+     <b>Fix symlink-related test failures in TestFileContextResolveAfs and TestStat in branch-2</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10147">HADOOP-10147</a>.
+     Minor bug reported by Eric Sirianni and fixed by Steve Loughran (build)<br>
+     <b>Upgrade to commons-logging 1.1.3 to avoid potential deadlock in MiniDFSCluster</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10146">HADOOP-10146</a>.
+     Critical bug reported by Daryn Sharp and fixed by Daryn Sharp (util)<br>
+     <b>Workaround JDK7 Process fd close bug</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10143">HADOOP-10143</a>.
+     Major improvement reported by Liang Xie and fixed by Liang Xie (io)<br>
+     <b>replace WritableFactories's hashmap with ConcurrentHashMap</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10142">HADOOP-10142</a>.
+     Major bug reported by Vinayakumar B and fixed by Vinayakumar B <br>
+     <b>Avoid groups lookup for unprivileged users such as "dr.who"</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10135">HADOOP-10135</a>.
+     Major bug reported by David Dobbins and fixed by David Dobbins (fs)<br>
+     <b>writes to swift fs over partition size leave temp files and empty output file</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10132">HADOOP-10132</a>.
+     Minor improvement reported by Ted Yu and fixed by Ted Yu <br>
+     <b>RPC#stopProxy() should log the class of proxy when IllegalArgumentException is encountered</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10130">HADOOP-10130</a>.
+     Minor bug reported by Binglin Chang and fixed by Binglin Chang <br>
+     <b>RawLocalFS::LocalFSFileInputStream.pread does not track FS::Statistics</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10129">HADOOP-10129</a>.
+     Critical bug reported by Daryn Sharp and fixed by Daryn Sharp (tools/distcp)<br>
+     <b>Distcp may succeed when it fails</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10127">HADOOP-10127</a>.
+     Major bug reported by Karthik Kambatla and fixed by Karthik Kambatla (ipc)<br>
+     <b>Add ipc.client.connect.retry.interval to control the frequency of connection retries</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10126">HADOOP-10126</a>.
+     Minor bug reported by Vinayakumar B and fixed by Vinayakumar B (util)<br>
+     <b>LightWeightGSet log message is confusing : "2.0% max memory = 2.0 GB"</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10125">HADOOP-10125</a>.
+     Major bug reported by Ming Ma and fixed by Ming Ma (ipc)<br>
+     <b>no need to process RPC request if the client connection has been dropped</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10112">HADOOP-10112</a>.
+     Major bug reported by Brandon Li and fixed by Brandon Li (tools)<br>
+     <b>har file listing  doesn't work with wild card</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10111">HADOOP-10111</a>.
+     Major improvement reported by Kihwal Lee and fixed by Kihwal Lee <br>
+     <b>Allow DU to be initialized with an initial value</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10110">HADOOP-10110</a>.
+     Blocker bug reported by Chuan Liu and fixed by Chuan Liu (build)<br>
+     <b>hadoop-auth has a build break due to missing dependency</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10109">HADOOP-10109</a>.
+     Major sub-task reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe (test)<br>
+     <b>Fix test failure in TestOfflineEditsViewer introduced by HADOOP-10052</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10107">HADOOP-10107</a>.
+     Major sub-task reported by Tsz Wo (Nicholas), SZE and fixed by Kihwal Lee (ipc)<br>
+     <b>Server.getNumOpenConnections may throw NPE</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10106">HADOOP-10106</a>.
+     Minor bug reported by Ming Ma and fixed by Ming Ma <br>
+     <b>Incorrect thread name in RPC log messages</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10103">HADOOP-10103</a>.
+     Minor sub-task reported by Steve Loughran and fixed by Akira AJISAKA (build)<br>
+     <b>update commons-lang to 2.6</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10102">HADOOP-10102</a>.
+     Minor sub-task reported by Steve Loughran and fixed by Akira AJISAKA (build)<br>
+     <b>update commons IO from 2.1 to 2.4</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10100">HADOOP-10100</a>.
+     Major bug reported by Robert Kanter and fixed by Robert Kanter <br>
+     <b>MiniKDC shouldn't use apacheds-all artifact</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10095">HADOOP-10095</a>.
+     Minor improvement reported by Nicolas Liochon and fixed by Nicolas Liochon (io)<br>
+     <b>Performance improvement in CodecPool</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10094">HADOOP-10094</a>.
+     Trivial bug reported by Enis Soztutar and fixed by Enis Soztutar (util)<br>
+     <b>NPE in GenericOptionsParser#preProcessForWindows()</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10093">HADOOP-10093</a>.
+     Major bug reported by shanyu zhao and fixed by shanyu zhao (conf)<br>
+     <b>hadoop-env.cmd sets HADOOP_CLIENT_OPTS with a max heap size that is too small.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10090">HADOOP-10090</a>.
+     Major bug reported by Ivan Mitic and fixed by Ivan Mitic (metrics)<br>
+     <b>Jobtracker metrics not updated properly after execution of a mapreduce job</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10088">HADOOP-10088</a>.
+     Major bug reported by Raja Aluri and fixed by Raja Aluri (build)<br>
+     <b>copy-nativedistlibs.sh needs to quote snappy lib dir</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10087">HADOOP-10087</a>.
+     Major bug reported by Yu Gao and fixed by Colin Patrick McCabe (security)<br>
+     <b>UserGroupInformation.getGroupNames() fails to return primary group first when JniBasedUnixGroupsMappingWithFallback is used</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10086">HADOOP-10086</a>.
+     Minor improvement reported by Masatake Iwasaki and fixed by Masatake Iwasaki (documentation)<br>
+     <b>User document for authentication in secure cluster</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10081">HADOOP-10081</a>.
+     Critical bug reported by Jason Lowe and fixed by Tsuyoshi OZAWA (ipc)<br>
+     <b>Client.setupIOStreams can leak socket resources on exception or error</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10079">HADOOP-10079</a>.
+     Major improvement reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe <br>
+     <b>log a warning message if group resolution takes too long.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10078">HADOOP-10078</a>.
+     Minor bug reported by Robert Kanter and fixed by Robert Kanter (security)<br>
+     <b>KerberosAuthenticator always does SPNEGO</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10072">HADOOP-10072</a>.
+     Trivial bug reported by Chris Nauroth and fixed by Chris Nauroth (nfs , test)<br>
+     <b>TestNfsExports#testMultiMatchers fails due to non-deterministic timing around cache expiry check.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10067">HADOOP-10067</a>.
+     Minor improvement reported by Robert Rati and fixed by Robert Rati <br>
+     <b>Missing POM dependency on jsr305</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10064">HADOOP-10064</a>.
+     Major improvement reported by Arpit Agarwal and fixed by Arpit Agarwal (build)<br>
+     <b>Upgrade to maven antrun plugin version 1.7</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10058">HADOOP-10058</a>.
+     Minor bug reported by Akira AJISAKA and fixed by Chen He (metrics)<br>
+     <b>TestMetricsSystemImpl#testInitFirstVerifyStopInvokedImmediately fails on trunk</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10055">HADOOP-10055</a>.
+     Trivial bug reported by Eli Collins and fixed by Akira AJISAKA (documentation)<br>
+     <b>FileSystemShell.apt.vm doc has typo "numRepicas" </b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10052">HADOOP-10052</a>.
+     Major sub-task reported by Andrew Wang and fixed by Andrew Wang (fs)<br>
+     <b>Temporarily disable client-side symlink resolution</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10047">HADOOP-10047</a>.
+     Major new feature reported by Gopal V and fixed by Gopal V (io)<br>
+     <b>Add a directbuffer Decompressor API to hadoop</b><br>
+     <blockquote>Direct Bytebuffer decompressors for Zlib (Deflate &amp; Gzip) and Snappy </blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10046">HADOOP-10046</a>.
+     Trivial improvement reported by David S. Wang and fixed by David S. Wang <br>
+     <b>Print a log message when SSL is enabled</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10040">HADOOP-10040</a>.
+     Major bug reported by Yingda Chen and fixed by Chris Nauroth <br>
+     <b>hadoop.cmd in UNIX format and would not run by default on Windows</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10039">HADOOP-10039</a>.
+     Major bug reported by Suresh Srinivas and fixed by Haohui Mai (security)<br>
+     <b>Add Hive to the list of projects using AbstractDelegationTokenSecretManager</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10031">HADOOP-10031</a>.
+     Major bug reported by Chuan Liu and fixed by Chuan Liu (fs)<br>
+     <b>FsShell -get/copyToLocal/moveFromLocal should support Windows local path</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10030">HADOOP-10030</a>.
+     Major bug reported by Chuan Liu and fixed by Chuan Liu <br>
+     <b>FsShell -put/copyFromLocal should support Windows local path</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10029">HADOOP-10029</a>.
+     Major bug reported by Suresh Srinivas and fixed by Suresh Srinivas (fs)<br>
+     <b>Specifying har file to MR job fails in secure cluster</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10028">HADOOP-10028</a>.
+     Minor bug reported by Jing Zhao and fixed by Haohui Mai <br>
+     <b>Malformed ssl-server.xml.example </b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10006">HADOOP-10006</a>.
+     Blocker bug reported by Junping Du and fixed by Junping Du (fs , util)<br>
+     <b>Compilation failure in trunk for o.a.h.fs.swift.util.JSONUtil</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-10005">HADOOP-10005</a>.
+     Trivial improvement reported by Jackie Chang and fixed by Jackie Chang <br>
+     <b>No need to check INFO severity level is enabled or not</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9998">HADOOP-9998</a>.
+     Major improvement reported by Junping Du and fixed by Junping Du (net)<br>
+     <b>Provide methods to clear only part of the DNSToSwitchMapping</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9982">HADOOP-9982</a>.
+     Major bug reported by Akira AJISAKA and fixed by Akira AJISAKA (documentation)<br>
+     <b>Fix dead links in hadoop site docs</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9981">HADOOP-9981</a>.
+     Critical bug reported by Kihwal Lee and fixed by Colin Patrick McCabe <br>
+     <b>globStatus should minimize its listStatus and getFileStatus calls</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9964">HADOOP-9964</a>.
+     Major bug reported by Junping Du and fixed by Junping Du (util)<br>
+     <b>O.A.H.U.ReflectionUtils.printThreadInfo() is not thread-safe which cause TestHttpServer pending 10 minutes or longer.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9956">HADOOP-9956</a>.
+     Major sub-task reported by Daryn Sharp and fixed by Daryn Sharp (ipc)<br>
+     <b>RPC listener inefficiently assigns connections to readers</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9955">HADOOP-9955</a>.
+     Major sub-task reported by Daryn Sharp and fixed by Daryn Sharp (ipc)<br>
+     <b>RPC idle connection closing is extremely inefficient</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9929">HADOOP-9929</a>.
+     Major bug reported by Jason Lowe and fixed by Colin Patrick McCabe (fs)<br>
+     <b>Insufficient permissions for a path reported as file not found</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9915">HADOOP-9915</a>.
+     Trivial improvement reported by Binglin Chang and fixed by Binglin Chang <br>
+     <b>o.a.h.fs.Stat support on Macosx</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9909">HADOOP-9909</a>.
+     Major improvement reported by Shinichi Yamashita and fixed by  (fs)<br>
+     <b>org.apache.hadoop.fs.Stat should permit other LANG</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9908">HADOOP-9908</a>.
+     Major bug reported by Todd Lipcon and fixed by Todd Lipcon (util)<br>
+     <b>Fix NPE when versioninfo properties file is missing</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9898">HADOOP-9898</a>.
+     Minor bug reported by Todd Lipcon and fixed by Todd Lipcon (ipc , net)<br>
+     <b>Set SO_KEEPALIVE on all our sockets</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9897">HADOOP-9897</a>.
+     Trivial improvement reported by Binglin Chang and fixed by Binglin Chang (fs)<br>
+     <b>Add method to get path start position without drive specifier in o.a.h.fs.Path  </b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9889">HADOOP-9889</a>.
+     Major bug reported by Wei Yan and fixed by Wei Yan <br>
+     <b>Refresh the Krb5 configuration when creating a new kdc in Hadoop-MiniKDC</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9887">HADOOP-9887</a>.
+     Major bug reported by Chris Nauroth and fixed by Chuan Liu (fs)<br>
+     <b>globStatus does not correctly handle paths starting with a drive spec on Windows</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9875">HADOOP-9875</a>.
+     Minor bug reported by Aaron T. Myers and fixed by Aaron T. Myers (test)<br>
+     <b>TestDoAsEffectiveUser can fail on JDK 7</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9871">HADOOP-9871</a>.
+     Minor bug reported by Luke Lu and fixed by Junping Du <br>
+     <b>Fix intermittent findbug warnings in DefaultMetricsSystem</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9866">HADOOP-9866</a>.
+     Major test reported by Alejandro Abdelnur and fixed by Wei Yan (test)<br>
+     <b>convert hadoop-auth testcases requiring kerberos to use minikdc</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9865">HADOOP-9865</a>.
+     Major bug reported by Chuan Liu and fixed by Chuan Liu <br>
+     <b>FileContext.globStatus() has a regression with respect to relative path</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9860">HADOOP-9860</a>.
+     Major improvement reported by Wei Yan and fixed by Wei Yan <br>
+     <b>Remove class HackedKeytab and HackedKeytabEncoder from hadoop-minikdc once jira DIRSERVER-1882 solved</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9848">HADOOP-9848</a>.
+     Major new feature reported by Wei Yan and fixed by Wei Yan (security , test)<br>
+     <b>Create a MiniKDC for use with security testing</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9847">HADOOP-9847</a>.
+     Minor bug reported by Andrew Wang and fixed by Colin Patrick McCabe <br>
+     <b>TestGlobPath symlink tests fail to cleanup properly</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9833">HADOOP-9833</a>.
+     Minor improvement reported by Steve Loughran and fixed by Kousuke Saruta (build)<br>
+     <b>move slf4j to version 1.7.5</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9830">HADOOP-9830</a>.
+     Trivial bug reported by Dmitry Lysnichenko and fixed by Kousuke Saruta (documentation)<br>
+     <b>Typo at http://hadoop.apache.org/docs/current/</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9820">HADOOP-9820</a>.
+     Blocker bug reported by Daryn Sharp and fixed by Daryn Sharp (ipc , security)<br>
+     <b>RPCv9 wire protocol is insufficient to support multiplexing</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9817">HADOOP-9817</a>.
+     Major bug reported by Colin Patrick McCabe and fixed by Colin Patrick McCabe <br>
+     <b>FileSystem#globStatus and FileContext#globStatus need to work with symlinks</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9806">HADOOP-9806</a>.
+     Major bug reported by Brandon Li and fixed by Brandon Li (nfs)<br>
+     <b>PortmapInterface should check if the procedure is out-of-range</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9791">HADOOP-9791</a>.
+     Major bug reported by Ivan Mitic and fixed by Ivan Mitic <br>
+     <b>Add a test case covering long paths for new FileUtil access check methods</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9787">HADOOP-9787</a>.
+     Major bug reported by Karthik Kambatla and fixed by Karthik Kambatla (util)<br>
+     <b>ShutdownHelper util to shutdown threads and threadpools</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9784">HADOOP-9784</a>.
+     Major improvement reported by Junping Du and fixed by Junping Du <br>
+     <b>Add a builder for HttpServer</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9748">HADOOP-9748</a>.
+     Critical sub-task reported by Daryn Sharp and fixed by Daryn Sharp (security)<br>
+     <b>Reduce blocking on UGI.ensureInitialized</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9703">HADOOP-9703</a>.
+     Minor bug reported by Mark Miller and fixed by Tsuyoshi OZAWA <br>
+     <b>org.apache.hadoop.ipc.Client leaks threads on stop.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9698">HADOOP-9698</a>.
+     Blocker sub-task reported by Daryn Sharp and fixed by Daryn Sharp (ipc)<br>
+     <b>RPCv9 client must honor server's SASL negotiate response</b><br>
+     <blockquote>The RPC client now waits for the Server's SASL negotiate response before instantiating its SASL client.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9693">HADOOP-9693</a>.
+     Trivial improvement reported by Steve Loughran and fixed by  <br>
+     <b>Shell should add a probe for OSX</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9686">HADOOP-9686</a>.
+     Major improvement reported by Jason Lowe and fixed by Jason Lowe (conf)<br>
+     <b>Easy access to final parameters in Configuration</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9683">HADOOP-9683</a>.
+     Blocker sub-task reported by Luke Lu and fixed by Daryn Sharp (ipc)<br>
+     <b>Wrap IpcConnectionContext in RPC headers</b><br>
+     <blockquote>Connection context is now sent as a rpc header wrapped protobuf.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9660">HADOOP-9660</a>.
+     Major bug reported by Enis Soztutar and fixed by Enis Soztutar (scripts , util)<br>
+     <b>[WINDOWS] Powershell / cmd parses -Dkey=value from command line as [-Dkey, value] which breaks GenericsOptionParser</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9652">HADOOP-9652</a>.
+     Major improvement reported by Colin Patrick McCabe and fixed by Andrew Wang <br>
+     <b>Allow RawLocalFs#getFileLinkStatus to fill in the link owner and mode if requested</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9635">HADOOP-9635</a>.
+     Major bug reported by V. Karthik Kumar and fixed by  (native)<br>
+     <b>Fix Potential Stack Overflow in DomainSocket.c</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9623">HADOOP-9623</a>.
+     Major improvement reported by Timothy St. Clair and fixed by Amandeep Khurana (fs/s3)<br>
+     <b>Update jets3t dependency to  0.9.0 </b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9618">HADOOP-9618</a>.
+     Major new feature reported by Todd Lipcon and fixed by Todd Lipcon (util)<br>
+     <b>Add thread which detects JVM pauses</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9611">HADOOP-9611</a>.
+     Major improvement reported by Timothy St. Clair and fixed by Timothy St. Clair (build)<br>
+     <b>mvn-rpmbuild against google-guice &gt; 3.0 yields missing cglib dependency</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9598">HADOOP-9598</a>.
+     Major test reported by Aleksey Gorshkov and fixed by Andrey Klochkov <br>
+     <b>Improve code coverage of RMAdminCLI</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9594">HADOOP-9594</a>.
+     Major improvement reported by Timothy St. Clair and fixed by Timothy St. Clair (build)<br>
+     <b>Update apache commons math dependency</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9582">HADOOP-9582</a>.
+     Major bug reported by Ashwin Shankar and fixed by Ashwin Shankar (conf)<br>
+     <b>Non-existent file to "hadoop fs -conf" doesn't throw error</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9527">HADOOP-9527</a>.
+     Major bug reported by Arpit Agarwal and fixed by Arpit Agarwal (fs , test)<br>
+     <b>Add symlink support to LocalFileSystem on Windows</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9515">HADOOP-9515</a>.
+     Major new feature reported by Brandon Li and fixed by Brandon Li <br>
+     <b>Add general interface for NFS and Mount</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9509">HADOOP-9509</a>.
+     Major new feature reported by Brandon Li and fixed by Brandon Li <br>
+     <b>Implement ONCRPC and XDR</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9494">HADOOP-9494</a>.
+     Major improvement reported by Dennis Y and fixed by Andrey Klochkov <br>
+     <b>Excluded auto-generated and examples code from clover reports</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9487">HADOOP-9487</a>.
+     Major improvement reported by Steve Loughran and fixed by  (conf)<br>
+     <b>Deprecation warnings in Configuration should go to their own log or otherwise be suppressible</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9470">HADOOP-9470</a>.
+     Major improvement reported by Ivan A. Veselovsky and fixed by Ivan A. Veselovsky (test)<br>
+     <b>eliminate duplicate FQN tests in different Hadoop modules</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9432">HADOOP-9432</a>.
+     Minor new feature reported by Steve Loughran and fixed by  (build , documentation)<br>
+     <b>Add support for markdown .md files in site documentation</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9421">HADOOP-9421</a>.
+     Blocker sub-task reported by Sanjay Radia and fixed by Daryn Sharp <br>
+     <b>Convert SASL to use ProtoBuf and provide negotiation capabilities</b><br>
+     <blockquote>Raw SASL protocol now uses protobufs wrapped with RPC headers.
+The negotiation sequence incorporates the state of the exchange.
+The server now has the ability to advertise its supported auth types.</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9420">HADOOP-9420</a>.
+     Major bug reported by Todd Lipcon and fixed by Liang Xie (ipc , metrics)<br>
+     <b>Add percentile or max metric for rpcQueueTime, processing time</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9417">HADOOP-9417</a>.
+     Major sub-task reported by Andrew Wang and fixed by Andrew Wang (fs)<br>
+     <b>Support for symlink resolution in LocalFileSystem / RawLocalFileSystem</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9350">HADOOP-9350</a>.
+     Minor bug reported by Steve Loughran and fixed by Robert Kanter (build)<br>
+     <b>Hadoop not building against Java7 on OSX </b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9319">HADOOP-9319</a>.
+     Major improvement reported by Arpit Agarwal and fixed by Binglin Chang <br>
+     <b>Update bundled lz4 source to latest version</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9291">HADOOP-9291</a>.
+     Major test reported by Ivan A. Veselovsky and fixed by Ivan A. Veselovsky <br>
+     <b>enhance unit-test coverage of package o.a.h.metrics2</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9254">HADOOP-9254</a>.
+     Major test reported by Vadim Bondarev and fixed by Vadim Bondarev <br>
+     <b>Cover packages org.apache.hadoop.util.bloom, org.apache.hadoop.util.hash</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9241">HADOOP-9241</a>.
+     Trivial improvement reported by Harsh J and fixed by Harsh J <br>
+     <b>DU refresh interval is not configurable</b><br>
+     <blockquote>The 'du' (disk usage command from Unix) script refresh monitor is now configurable in the same way as its 'df' counterpart, via the property 'fs.du.interval', the default of which is 10 minute (in ms).</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9225">HADOOP-9225</a>.
+     Major test reported by Vadim Bondarev and fixed by Andrey Klochkov <br>
+     <b>Cover package org.apache.hadoop.compress.Snappy</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9199">HADOOP-9199</a>.
+     Major test reported by Vadim Bondarev and fixed by Andrey Klochkov <br>
+     <b>Cover package org.apache.hadoop.io with unit tests</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9114">HADOOP-9114</a>.
+     Minor bug reported by liuyang and fixed by sathish <br>
+     <b>After defined the dfs.checksum.type as the NULL, write file and hflush will through java.lang.ArrayIndexOutOfBoundsException</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9078">HADOOP-9078</a>.
+     Major test reported by Ivan A. Veselovsky and fixed by Ivan A. Veselovsky <br>
+     <b>enhance unit-test coverage of class org.apache.hadoop.fs.FileContext</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9063">HADOOP-9063</a>.
+     Minor test reported by Ivan A. Veselovsky and fixed by Ivan A. Veselovsky <br>
+     <b>enhance unit-test coverage of class org.apache.hadoop.fs.FileUtil</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-9016">HADOOP-9016</a>.
+     Minor bug reported by Ivan A. Veselovsky and fixed by Ivan A. Veselovsky <br>
+     <b>org.apache.hadoop.fs.HarFileSystem.HarFSDataInputStream.HarFsInputStream.skip(long) must never return negative value.</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8814">HADOOP-8814</a>.
+     Minor improvement reported by Brandon Li and fixed by Brandon Li (conf , fs , fs/s3 , ha , io , metrics , performance , record , security , util)<br>
+     <b>Inefficient comparison with the empty string. Use isEmpty() instead</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8753">HADOOP-8753</a>.
+     Minor bug reported by Nishan Shetty, Huawei and fixed by Benoy Antony <br>
+     <b>LocalDirAllocator throws "ArithmeticException: / by zero" when there is no available space on configured local dir</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8704">HADOOP-8704</a>.
+     Major improvement reported by Thomas Graves and fixed by Jonathan Eagles <br>
+     <b>add request logging to jetty/httpserver</b><br>
+     <blockquote></blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-8545">HADOOP-8545</a>.
+     Major new feature reported by Tim Miller and fixed by Dmitry Mezhensky (fs)<br>
+     <b>Filesystem Implementation for OpenStack Swift</b><br>
+     <blockquote>Added file system implementation for OpenStack Swift.
+There are two implementation: block and native (similar to Amazon S3 integration).
+Data locality issue solved by patch in Swift, commit procedure to OpenStack is in progress.
+
+To use implementation add to core-site.xml following:
+...
+	&lt;property&gt;
+	        &lt;name&gt;fs.swift.impl&lt;/name&gt;
+	    	&lt;value&gt;com.mirantis.fs.SwiftFileSystem&lt;/value&gt;
+	&lt;/property&gt;
+	&lt;property&gt;
+	    	&lt;name&gt;fs.swift.block.impl&lt;/name&gt;
+	         &lt;value&gt;com.mirantis.fs.block.SwiftBlockFileSystem&lt;/value&gt;
+        &lt;/property&gt;
+...
+
+In MapReduce job specify following configs for OpenStack Keystone authentication:
+conf.set("swift.auth.url", "http://172.18.66.117:5000/v2.0/tokens");
+conf.set("swift.tenant", "superuser");
+conf.set("swift.username", "admin1");
+conf.set("swift.password", "password");
+conf.setInt("swift.http.port", 8080);
+conf.setInt("swift.https.port", 443);
+
+Additional information specified on github: https://github.com/DmitryMezhensky/Hadoop-and-Swift-integration</blockquote></li>
+<li> <a href="https://issues.apache.org/jira/browse/HADOOP-7344">HADOOP-7344</a>.
+     Major bug reported by Daryn Sharp and fixed by Colin Patrick McCabe (fs)<br>
+     <b>globStatus doesn't grok groupings with a slash</b><br>
+     <blockquote></blockquote></li>
+</ul>
+</body></html>
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with

From 9024ad4aa0c2df70d463ac4fd0265a06ed6612f8 Mon Sep 17 00:00:00 2001
From: Sanford Ryza <sandy@apache.org>
Date: Tue, 11 Feb 2014 20:14:30 +0000
Subject: [PATCH 16/47] YARN-1697. NodeManager reports negative running
 containers (Sandy Ryza)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567356 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt                        |  2 ++
 .../containermanager/container/ContainerImpl.java      | 10 ++++++++--
 .../server/nodemanager/metrics/NodeManagerMetrics.java |  4 ++++
 .../containermanager/container/TestContainer.java      |  3 +++
 4 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 6660a49ca25..14566ab4ba9 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -246,6 +246,8 @@ Release 2.4.0 - UNRELEASED
     YARN-1698. Fixed default TimelineStore in code to match what is documented
     in yarn-default.xml (Zhijie Shen via vinodkv)
 
+    YARN=1697. NodeManager reports negative running containers (Sandy Ryza)
+
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
index 486f3ce00c5..862e3fa9bcd 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
@@ -83,6 +83,7 @@ public class ContainerImpl implements Container {
   private final String user;
   private int exitCode = ContainerExitStatus.INVALID;
   private final StringBuilder diagnostics;
+  private boolean wasLaunched;
 
   /** The NM-wide configuration - not specific to this container */
   private final Configuration daemonConf;
@@ -418,7 +419,9 @@ public class ContainerImpl implements Container {
             applicationId, containerId);
         break;
       case EXITED_WITH_FAILURE:
-        metrics.endRunningContainer();
+        if (wasLaunched) {
+          metrics.endRunningContainer();
+        }
         // fall through
       case LOCALIZATION_FAILED:
         metrics.failedContainer();
@@ -428,7 +431,9 @@ public class ContainerImpl implements Container {
             applicationId, containerId);
         break;
       case CONTAINER_CLEANEDUP_AFTER_KILL:
-        metrics.endRunningContainer();
+        if (wasLaunched) {
+          metrics.endRunningContainer();
+        }
         // fall through
       case NEW:
         metrics.killedContainer();
@@ -636,6 +641,7 @@ public class ContainerImpl implements Container {
           new ContainerStartMonitoringEvent(container.containerId,
               vmemBytes, pmemBytes));
       container.metrics.runningContainer();
+      container.wasLaunched  = true;
     }
   }
 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
index 4d62247539a..1feb8c70e77 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
@@ -99,4 +99,8 @@ public class NodeManagerMetrics {
   public void addResource(Resource res) {
     availableGB.incr(res.getMemory() / 1024);
   }
+  
+  public int getRunningContainers() {
+    return containersRunning.value();
+  }
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
index addb28d83a2..3199fdfeeb6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
@@ -348,6 +348,9 @@ public class TestContainer {
           wc.c.getContainerState());
       assertNull(wc.c.getLocalizedResources());
       verifyCleanupCall(wc);
+      wc.c.handle(new ContainerEvent(wc.c.getContainerId(),
+          ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP));
+      assertEquals(0, metrics.getRunningContainers());
     } finally {
       if (wc != null) {
         wc.finished();

From 93eafc8590c7eb725890bfff42bb5e962ad59ad6 Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Tue, 11 Feb 2014 22:04:15 +0000
Subject: [PATCH 17/47] HDFS-5929. Add blockpool % usage to HDFS federated nn
 page. Contributed by Siqi Li.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567411 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt                    | 3 +++
 .../apache/hadoop/hdfs/server/namenode/ClusterJspHelper.java   | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 31f790fde78..72748a70f7e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -365,6 +365,9 @@ Release 2.4.0 - UNRELEASED
 
     HDFS-4370. Fix typo Blanacer in DataNode. (Chu Tong via shv)
 
+    HDFS-5929. Add blockpool % usage to HDFS federated nn page.
+    (Siqi Li via suresh)
+
   OPTIMIZATIONS
 
     HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ClusterJspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ClusterJspHelper.java
index b0ab1b7c1fd..e2e71e40065 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ClusterJspHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ClusterJspHelper.java
@@ -587,6 +587,8 @@ class ClusterJspHelper {
         toXmlItemBlockWithLink(doc, nn.host, nn.httpAddress, "NameNode");
         toXmlItemBlock(doc, "Blockpool Used",
             StringUtils.byteDesc(nn.bpUsed));
+        toXmlItemBlock(doc, "Blockpool Used%",
+            DFSUtil.percent2String(DFSUtil.getPercentUsed(nn.bpUsed, total)));
         toXmlItemBlock(doc, "Files And Directories",
             Long.toString(nn.filesAndDirectories));
         toXmlItemBlock(doc, "Blocks", Long.toString(nn.blocksCount));

From 98f5da5f58f7e5f10d2324f72a618c0c054bcd1c Mon Sep 17 00:00:00 2001
From: Sanford Ryza <sandy@apache.org>
Date: Wed, 12 Feb 2014 01:30:25 +0000
Subject: [PATCH 18/47] Move YARN-1496, YARN-1498, YARN-1504, YARN-1499, and
 YARN-1497 under 2.4 in CHANGES.txt

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567487 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 14566ab4ba9..88025fce2ac 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -6,20 +6,6 @@ Trunk - Unreleased
 
   NEW FEATURES
 
-    YARN-1496. Protocol additions to allow moving apps between queues (Sandy
-    Ryza)
-
-    YARN-1498. Common scheduler changes for moving apps between queues (Sandy
-    Ryza)
-
-    YARN-1504. RM changes for moving apps between queues (Sandy Ryza)
-
-    YARN-1499. Fair Scheduler changes for moving apps between queues (Sandy
-    Ryza)
-
-    YARN-1497. Command line additions for moving apps between queues (Sandy
-    Ryza)
-
   IMPROVEMENTS
 
   OPTIMIZATIONS
@@ -121,6 +107,20 @@ Release 2.4.0 - UNRELEASED
 
     YARN-1637. Implemented a client library for Java users to post timeline
     entities and events. (zjshen)
+    
+    YARN-1496. Protocol additions to allow moving apps between queues (Sandy
+    Ryza)
+
+    YARN-1498. Common scheduler changes for moving apps between queues (Sandy
+    Ryza)
+
+    YARN-1504. RM changes for moving apps between queues (Sandy Ryza)
+
+    YARN-1499. Fair Scheduler changes for moving apps between queues (Sandy
+    Ryza)
+
+    YARN-1497. Command line additions for moving apps between queues (Sandy
+    Ryza)
 
   IMPROVEMENTS
 

From 53fed66d918e31a9c3d8bf0d4e6d3861c9b058b3 Mon Sep 17 00:00:00 2001
From: Colin McCabe <cmccabe@apache.org>
Date: Wed, 12 Feb 2014 02:12:11 +0000
Subject: [PATCH 19/47] HADOOP-10338. Cannot get the FileStatus of the root
 inode from the new Globber (cmccabe)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567497 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.txt                 |  3 ++
 .../java/org/apache/hadoop/fs/Globber.java    | 28 ++++++++++++++----
 .../org/apache/hadoop/fs/TestGlobPaths.java   | 29 +++++++++++++++++++
 3 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 49a32c72342..7bfad349293 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -334,6 +334,9 @@ Release 2.4.0 - UNRELEASED
     HADOOP-10326. M/R jobs can not access S3 if Kerberos is enabled. (bc Wong
     via atm)
 
+    HADOOP-10338. Cannot get the FileStatus of the root inode from the new
+    Globber (cmccabe)
+
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java
index d00c387f9b3..5eee5e4fb3d 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java
@@ -114,7 +114,8 @@ class Globber {
       if (fs != null) {
         scheme = fs.getUri().getScheme();
       } else {
-        scheme = fc.getDefaultFileSystem().getUri().getScheme();
+        scheme = fc.getFSofPath(fc.fixRelativePart(path)).
+                    getUri().getScheme();
       }
     }
     return scheme;
@@ -126,7 +127,8 @@ class Globber {
       if (fs != null) {
         authority = fs.getUri().getAuthority();
       } else {
-        authority = fc.getDefaultFileSystem().getUri().getAuthority();
+        authority = fc.getFSofPath(fc.fixRelativePart(path)).
+                      getUri().getAuthority();
       }
     }
     return authority ;
@@ -162,18 +164,26 @@ class Globber {
       // Starting out at the root of the filesystem, we try to match
       // filesystem entries against pattern components.
       ArrayList<FileStatus> candidates = new ArrayList<FileStatus>(1);
+      // To get the "real" FileStatus of root, we'd have to do an expensive
+      // RPC to the NameNode.  So we create a placeholder FileStatus which has
+      // the correct path, but defaults for the rest of the information.
+      // Later, if it turns out we actually want the FileStatus of root, we'll
+      // replace the placeholder with a real FileStatus obtained from the
+      // NameNode.
+      FileStatus rootPlaceholder;
       if (Path.WINDOWS && !components.isEmpty()
           && Path.isWindowsAbsolutePath(absPattern.toUri().getPath(), true)) {
         // On Windows the path could begin with a drive letter, e.g. /E:/foo.
         // We will skip matching the drive letter and start from listing the
         // root of the filesystem on that drive.
         String driveLetter = components.remove(0);
-        candidates.add(new FileStatus(0, true, 0, 0, 0, new Path(scheme,
-            authority, Path.SEPARATOR + driveLetter + Path.SEPARATOR)));
+        rootPlaceholder = new FileStatus(0, true, 0, 0, 0, new Path(scheme,
+            authority, Path.SEPARATOR + driveLetter + Path.SEPARATOR));
       } else {
-        candidates.add(new FileStatus(0, true, 0, 0, 0,
-            new Path(scheme, authority, Path.SEPARATOR)));
+        rootPlaceholder = new FileStatus(0, true, 0, 0, 0,
+            new Path(scheme, authority, Path.SEPARATOR));
       }
+      candidates.add(rootPlaceholder);
       
       for (int componentIdx = 0; componentIdx < components.size();
           componentIdx++) {
@@ -245,6 +255,12 @@ class Globber {
         candidates = newCandidates;
       }
       for (FileStatus status : candidates) {
+        // Use object equality to see if this status is the root placeholder.
+        // See the explanation for rootPlaceholder above for more information.
+        if (status == rootPlaceholder) {
+          status = getFileStatus(rootPlaceholder.getPath());
+          if (status == null) continue;
+        }
         // HADOOP-3497 semantics: the user-defined filter is applied at the
         // end, once the full path is built up.
         if (filter.accept(status.getPath())) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java
index 8eb9847ebb5..5ba797c8b01 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java
@@ -21,6 +21,7 @@ import static org.junit.Assert.*;
 
 import java.io.IOException;
 import java.security.PrivilegedExceptionAction;
+import java.util.UUID;
 import java.util.regex.Pattern;
 
 import org.apache.commons.lang.StringUtils;
@@ -1175,4 +1176,32 @@ public class TestGlobPaths {
   public void testReservedHdfsPathsOnFC() throws Exception {
     testOnFileContext(new TestReservedHdfsPaths());
   }
+  
+  /**
+   * Test trying to glob the root.  Regression test for HDFS-5888.
+   **/
+  private static class TestGlobRoot implements FSTestWrapperGlobTest {
+    public void run(FSTestWrapper wrap, FSTestWrapper unprivilegedWrap,
+        FileSystem fs, FileContext fc) throws Exception {
+      final Path rootPath = new Path("/");
+      FileStatus oldRootStatus = wrap.getFileStatus(rootPath);
+      String newOwner = UUID.randomUUID().toString();
+      wrap.setOwner(new Path("/"), newOwner, null);
+      FileStatus[] status = 
+          wrap.globStatus(rootPath, new AcceptAllPathFilter());
+      Assert.assertEquals(1, status.length);
+      Assert.assertEquals(newOwner, status[0].getOwner());
+      wrap.setOwner(new Path("/"), oldRootStatus.getOwner(), null);
+    }
+  }
+
+  @Test
+  public void testGlobRootOnFS() throws Exception {
+    testOnFileSystem(new TestGlobRoot());
+  }
+
+  @Test
+  public void testGlobRootOnFC() throws Exception {
+    testOnFileContext(new TestGlobRoot());
+  }
 }

From 1b5254a4de3349943de05fe602fb029800c07abd Mon Sep 17 00:00:00 2001
From: Zhijie Shen <zjshen@apache.org>
Date: Wed, 12 Feb 2014 02:13:23 +0000
Subject: [PATCH 20/47] YARN-1719. Fixed the root path related Jersey warnings
 produced in ATSWebServices. Contributed by Billie Rinaldi.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567498 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt                                | 3 +++
 .../applicationhistoryservice/webapp/ATSWebServices.java       | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 88025fce2ac..a3b2765a571 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -248,6 +248,9 @@ Release 2.4.0 - UNRELEASED
 
     YARN=1697. NodeManager reports negative running containers (Sandy Ryza)
 
+    YARN-1719. Fixed the root path related Jersey warnings produced in
+    ATSWebServices. (Billie Rinaldi via zjshen)
+
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/webapp/ATSWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/webapp/ATSWebServices.java
index 063b67afd07..baf00d68f16 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/webapp/ATSWebServices.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/webapp/ATSWebServices.java
@@ -107,7 +107,6 @@ public class ATSWebServices {
    * Return the description of the application timeline web services.
    */
   @GET
-  @Path("/")
   @Produces({ MediaType.APPLICATION_JSON /* , MediaType.APPLICATION_XML */})
   public AboutInfo about(
       @Context HttpServletRequest req,
@@ -235,7 +234,6 @@ public class ATSWebServices {
    * that happen during storing.
    */
   @POST
-  @Path("/")
   @Consumes({ MediaType.APPLICATION_JSON /* , MediaType.APPLICATION_XML */})
   public ATSPutErrors postEntities(
       @Context HttpServletRequest req,

From fa78a7a99a30ed8845498bb22fe3af1c7b26b1dd Mon Sep 17 00:00:00 2001
From: Konstantin Shvachko <shv@apache.org>
Date: Wed, 12 Feb 2014 07:01:29 +0000
Subject: [PATCH 21/47] HDFS-4858. HDFS DataNode to NameNode RPC should
 timeout. Contributed by Henry Wang.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567535 13f79535-47bb-0310-9956-ffa450edef68
---
 .../src/main/java/org/apache/hadoop/ipc/Client.java          | 2 +-
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt                  | 3 +++
 .../protocolPB/DatanodeProtocolClientSideTranslatorPB.java   | 5 +++--
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
index 6e578031809..45d0cd1f466 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
@@ -220,7 +220,7 @@ public class Client {
    * @param conf Configuration
    * @return the ping interval
    */
-  final static int getPingInterval(Configuration conf) {
+  final public static int getPingInterval(Configuration conf) {
     return conf.getInt(CommonConfigurationKeys.IPC_PING_INTERVAL_KEY,
         CommonConfigurationKeys.IPC_PING_INTERVAL_DEFAULT);
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 72748a70f7e..e82b880415c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -413,6 +413,9 @@ Release 2.4.0 - UNRELEASED
     HDFS-5886. Potential null pointer deference in RpcProgramNfs3#readlink()
     (brandonli)
 
+    HDFS-4858. HDFS DataNode to NameNode RPC should timeout.
+    (Henry Wang via shv)
+
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java
index c765315f1c6..2c6f3243c5c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java
@@ -102,9 +102,10 @@ public class DatanodeProtocolClientSideTranslatorPB implements
   private static DatanodeProtocolPB createNamenode(
       InetSocketAddress nameNodeAddr, Configuration conf,
       UserGroupInformation ugi) throws IOException {
-    return RPC.getProxy(DatanodeProtocolPB.class,
+    return RPC.getProtocolProxy(DatanodeProtocolPB.class,
         RPC.getProtocolVersion(DatanodeProtocolPB.class), nameNodeAddr, ugi,
-        conf, NetUtils.getSocketFactory(conf, DatanodeProtocolPB.class));
+        conf, NetUtils.getSocketFactory(conf, DatanodeProtocolPB.class),
+        org.apache.hadoop.ipc.Client.getPingInterval(conf), null).getProxy();
   }
 
   /** Create a {@link NameNode} proxy */

From d82bc7873357c9f1f57b4bd31e0a794cc5917c0e Mon Sep 17 00:00:00 2001
From: Sanford Ryza <sandy@apache.org>
Date: Wed, 12 Feb 2014 08:06:14 +0000
Subject: [PATCH 22/47] Fix YARN-1697 in CHANGES.txt

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567553 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index a3b2765a571..efb08326fbe 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -246,7 +246,7 @@ Release 2.4.0 - UNRELEASED
     YARN-1698. Fixed default TimelineStore in code to match what is documented
     in yarn-default.xml (Zhijie Shen via vinodkv)
 
-    YARN=1697. NodeManager reports negative running containers (Sandy Ryza)
+    YARN-1697. NodeManager reports negative running containers (Sandy Ryza)
 
     YARN-1719. Fixed the root path related Jersey warnings produced in
     ATSWebServices. (Billie Rinaldi via zjshen)

From 41ec3cce7698c4f6a8156cb6a20e752e25a993a0 Mon Sep 17 00:00:00 2001
From: Karthik Kambatla <kasha@apache.org>
Date: Wed, 12 Feb 2014 14:09:13 +0000
Subject: [PATCH 23/47] YARN-1641. ZK store should attempt a write periodically
 to ensure it is still Active. (kasha)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567628 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt               |  3 ++
 .../recovery/RMStateStore.java                |  6 ++--
 .../recovery/ZKRMStateStore.java              | 33 +++++++++++++++++++
 .../recovery/TestZKRMStateStore.java          | 33 ++-----------------
 4 files changed, 42 insertions(+), 33 deletions(-)

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index efb08326fbe..8b95dcb667a 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -176,6 +176,9 @@ Release 2.4.0 - UNRELEASED
     YARN-1706. Created an utility method to dump timeline records to JSON
     strings. (zjshen)
 
+    YARN-1641. ZK store should attempt a write periodically to ensure it is 
+    still Active. (kasha)
+
   OPTIMIZATIONS
 
   BUG FIXES
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
index e603e9f8e66..05bfb3bb49e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
@@ -676,11 +676,11 @@ public abstract class RMStateStore extends AbstractService {
 
   @SuppressWarnings("unchecked")
   /**
-   * In {#handleStoreEvent}, this method is called to notify the
-   * ResourceManager that the store operation has failed.
+   * This method is called to notify the ResourceManager that the store
+   * operation has failed.
    * @param failureCause the exception due to which the operation failed
    */
-  private void notifyStoreOperationFailed(Exception failureCause) {
+  protected void notifyStoreOperationFailed(Exception failureCause) {
     RMFatalEventType type;
     if (failureCause instanceof StoreFencedException) {
       type = RMFatalEventType.STATE_STORE_FENCED;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java
index d8fdaae0fdd..eebeee791bb 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java
@@ -137,6 +137,7 @@ public class ZKRMStateStore extends RMStateStore {
   private String fencingNodePath;
   private Op createFencingNodePathOp;
   private Op deleteFencingNodePathOp;
+  private Thread verifyActiveStatusThread;
   private String zkRootNodeUsername;
   private final String zkRootNodePassword = Long.toString(random.nextLong());
 
@@ -258,6 +259,8 @@ public class ZKRMStateStore extends RMStateStore {
     createRootDir(zkRootNodePath);
     if (HAUtil.isHAEnabled(getConfig())){
       fence();
+      verifyActiveStatusThread = new VerifyActiveStatusThread();
+      verifyActiveStatusThread.start();
     }
     createRootDir(rmAppRoot);
     createRootDir(rmDTSecretManagerRoot);
@@ -350,6 +353,10 @@ public class ZKRMStateStore extends RMStateStore {
 
   @Override
   protected synchronized void closeInternal() throws Exception {
+    if (verifyActiveStatusThread != null) {
+      verifyActiveStatusThread.interrupt();
+      verifyActiveStatusThread.join(1000);
+    }
     closeZkClients();
   }
 
@@ -856,6 +863,32 @@ public class ZKRMStateStore extends RMStateStore {
     }.runWithRetries();
   }
 
+  /**
+   * Helper class that periodically attempts creating a znode to ensure that
+   * this RM continues to be the Active.
+   */
+  private class VerifyActiveStatusThread extends Thread {
+    private List<Op> emptyOpList = new ArrayList<Op>();
+
+    VerifyActiveStatusThread() {
+      super(VerifyActiveStatusThread.class.getName());
+    }
+
+    public void run() {
+      try {
+        while (true) {
+          doMultiWithRetries(emptyOpList);
+          Thread.sleep(zkSessionTimeout);
+        }
+      } catch (InterruptedException ie) {
+        LOG.info(VerifyActiveStatusThread.class.getName() + " thread " +
+            "interrupted! Exiting!");
+      } catch (Exception e) {
+        notifyStoreOperationFailed(new StoreFencedException());
+      }
+    }
+  }
+
   private abstract class ZKAction<T> {
     // run() expects synchronization on ZKRMStateStore.this
     abstract T run() throws KeeperException, InterruptedException;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java
index 41fdca24aef..48fede8c930 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java
@@ -23,10 +23,7 @@ import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.mock;
 
 import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
 import java.util.List;
-import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -34,15 +31,8 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.ha.HAServiceProtocol;
 import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
 import org.apache.hadoop.service.Service;
-import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
-import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
-import org.apache.hadoop.yarn.api.records.Priority;
-import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.conf.HAUtil;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.server.resourcemanager.ClientRMService;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.RMStateVersion;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.RMStateVersionPBImpl;
@@ -54,6 +44,7 @@ import org.junit.Test;
 public class TestZKRMStateStore extends RMStateStoreTestBase {
 
   public static final Log LOG = LogFactory.getLog(TestZKRMStateStore.class);
+  private static final int ZK_TIMEOUT_MS = 1000;
 
   class TestZKRMStateStoreTester implements RMStateStoreHelper {
 
@@ -141,6 +132,7 @@ public class TestZKRMStateStore extends RMStateStoreTestBase {
     conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true);
     conf.set(YarnConfiguration.RM_STORE, ZKRMStateStore.class.getName());
     conf.set(YarnConfiguration.RM_ZK_ADDRESS, hostPort);
+    conf.setInt(YarnConfiguration.RM_ZK_TIMEOUT_MS, ZK_TIMEOUT_MS);
     conf.set(YarnConfiguration.RM_HA_ID, rmId);
     for (String rpcAddress : YarnConfiguration.RM_SERVICES_ADDRESS_CONF_KEYS) {
       for (String id : HAUtil.getRMHAIds(conf)) {
@@ -182,26 +174,7 @@ public class TestZKRMStateStore extends RMStateStoreTestBase {
         HAServiceProtocol.HAServiceState.ACTIVE,
         rm2.getRMContext().getRMAdminService().getServiceStatus().getState());
 
-    // Submitting an application to RM1 to trigger a state store operation.
-    // RM1 should realize that it got fenced and is not the Active RM anymore.
-    Map mockMap = mock(Map.class);
-    ApplicationSubmissionContext asc =
-        ApplicationSubmissionContext.newInstance(
-            ApplicationId.newInstance(1000, 1),
-            "testApplication", // app Name
-            "default", // queue name
-            Priority.newInstance(0),
-            ContainerLaunchContext.newInstance(mockMap, mockMap,
-                new ArrayList<String>(), mockMap, mock(ByteBuffer.class),
-                mockMap),
-            false, // unmanaged AM
-            true, // cancelTokens
-            1, // max app attempts
-            Resource.newInstance(1024, 1));
-    ClientRMService rmService = rm1.getClientRMService();
-    rmService.submitApplication(SubmitApplicationRequest.newInstance(asc));
-
-    for (int i = 0; i < 30; i++) {
+    for (int i = 0; i < ZK_TIMEOUT_MS / 50; i++) {
       if (HAServiceProtocol.HAServiceState.ACTIVE ==
           rm1.getRMContext().getRMAdminService().getServiceStatus().getState()) {
         Thread.sleep(100);

From 308f5faf18375424c22c91b413c545658a5f65ff Mon Sep 17 00:00:00 2001
From: Karthik Kambatla <kasha@apache.org>
Date: Wed, 12 Feb 2014 16:04:18 +0000
Subject: [PATCH 24/47] MAPREDUCE-5746. Job diagnostics can implicate wrong
 task for a failed job. (Jason Lowe via kasha)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567666 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt          |  3 ++
 .../jobhistory/JobHistoryParser.java          |  6 ++-
 .../v2/hs/TestJobHistoryParsing.java          | 41 ++++++++++++++++++-
 3 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index e6cc6c5c656..fe8ee947b09 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -158,6 +158,9 @@ Release 2.4.0 - UNRELEASED
   OPTIMIZATIONS
 
   BUG FIXES
+    
+    MAPREDUCE-5746. Job diagnostics can implicate wrong task for a failed job.
+    (Jason Lowe via kasha)
 
 Release 2.3.1 - UNRELEASED
 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java
index 9d6f579c44d..19e2a51a132 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java
@@ -353,8 +353,10 @@ public class JobHistoryParser implements HistoryEventHandler {
     taskInfo.error = StringInterner.weakIntern(event.getError());
     taskInfo.failedDueToAttemptId = event.getFailedAttemptID();
     taskInfo.counters = event.getCounters();
-    info.errorInfo = "Task " + taskInfo.taskId +" failed " +
-    taskInfo.attemptsMap.size() + " times ";
+    if (info.errorInfo.isEmpty()) {
+      info.errorInfo = "Task " + taskInfo.taskId + " failed " +
+          taskInfo.attemptsMap.size() + " times ";
+    }
   }
 
   private void handleTaskStartedEvent(TaskStartedEvent event) {
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java
index 382c1971f6c..3d704ef0530 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java
@@ -40,6 +40,8 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TaskID;
 import org.apache.hadoop.mapreduce.TypeConverter;
@@ -51,7 +53,9 @@ import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.AMInfo;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo;
+import org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent;
 import org.apache.hadoop.mapreduce.jobhistory.TaskFinishedEvent;
+import org.apache.hadoop.mapreduce.jobhistory.TaskStartedEvent;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.JobState;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
@@ -69,7 +73,6 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
 import org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager.HistoryFileInfo;
 import org.apache.hadoop.mapreduce.v2.hs.TestJobHistoryEvents.MRAppWithHistory;
 import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.JobsInfo;
-import org.apache.hadoop.mapreduce.v2.jobhistory.FileNameIndexUtils;
 import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils;
 import org.apache.hadoop.mapreduce.v2.jobhistory.JobIndexInfo;
 import org.apache.hadoop.net.DNSToSwitchMapping;
@@ -730,4 +733,40 @@ public class TestJobHistoryParsing {
     assertNull(test.getAMInfos());
 
   }
+
+  @Test
+  public void testMultipleFailedTasks() throws Exception {
+    JobHistoryParser parser =
+        new JobHistoryParser(Mockito.mock(FSDataInputStream.class));
+    EventReader reader = Mockito.mock(EventReader.class);
+    final AtomicInteger numEventsRead = new AtomicInteger(0); // Hack!
+    final org.apache.hadoop.mapreduce.TaskType taskType =
+        org.apache.hadoop.mapreduce.TaskType.MAP;
+    final TaskID[] tids = new TaskID[2];
+    JobID jid = new JobID("1", 1);
+    tids[0] = new TaskID(jid, taskType, 0);
+    tids[1] = new TaskID(jid, taskType, 1);
+    Mockito.when(reader.getNextEvent()).thenAnswer(
+        new Answer<HistoryEvent>() {
+          public HistoryEvent answer(InvocationOnMock invocation)
+              throws IOException {
+            // send two task start and two task fail events for tasks 0 and 1
+            int eventId = numEventsRead.getAndIncrement();
+            TaskID tid = tids[eventId & 0x1];
+            if (eventId < 2) {
+              return new TaskStartedEvent(tid, 0, taskType, "");
+            }
+            if (eventId < 4) {
+              TaskFailedEvent tfe = new TaskFailedEvent(tid, 0, taskType,
+                  "failed", "FAILED", null, new Counters());
+              tfe.setDatum(tfe.getDatum());
+              return tfe;
+            }
+            return null;
+          }
+        });
+    JobInfo info = parser.parse(reader);
+    assertTrue("Task 0 not implicated",
+        info.getErrorInfo().contains(tids[0].toString()));
+  }
 }

From 5efc9978ddf35f8f4e194e34a102a729dae69992 Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Wed, 12 Feb 2014 18:02:21 +0000
Subject: [PATCH 25/47] HDFS-5879. Some TestHftpFileSystem tests do not close
 streams. Contributed by Gera Shegalov.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567704 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt                    | 3 +++
 .../java/org/apache/hadoop/hdfs/web/TestHftpFileSystem.java    | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index e82b880415c..190f010cc3c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -416,6 +416,9 @@ Release 2.4.0 - UNRELEASED
     HDFS-4858. HDFS DataNode to NameNode RPC should timeout.
     (Henry Wang via shv)
 
+    HDFS-5879. Some TestHftpFileSystem tests do not close streams.
+    (Gera Shegalov via suresh)
+
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestHftpFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestHftpFileSystem.java
index 06ac50a0c7f..db0fda5e743 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestHftpFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestHftpFileSystem.java
@@ -136,6 +136,7 @@ public class TestHftpFileSystem {
       out.close();
       FSDataInputStream in = hftpFs.open(p);
       assertEquals('0', in.read());
+      in.close();
 
       // Check the file status matches the path. Hftp returns a FileStatus
       // with the entire URI, extract the path part.
@@ -250,6 +251,7 @@ public class TestHftpFileSystem {
     FSDataInputStream in = hftpFs.open(testFile);
     in.seek(7);
     assertEquals('7', in.read());
+    in.close();
   }
 
   @Test

From beb0d25d2a7ba5004c6aabd105546ba9a9fec9be Mon Sep 17 00:00:00 2001
From: Colin McCabe <cmccabe@apache.org>
Date: Wed, 12 Feb 2014 19:08:52 +0000
Subject: [PATCH 26/47] HDFS-5810. Unify mmap cache and short-circuit file
 descriptor cache (cmccabe)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567720 13f79535-47bb-0310-9956-ffa450edef68
---
 .../java/org/apache/hadoop/util/Waitable.java |  53 ++
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |   3 +
 .../org/apache/hadoop/hdfs/BlockReader.java   |   4 +-
 .../hadoop/hdfs/BlockReaderFactory.java       | 849 +++++++++++++----
 .../apache/hadoop/hdfs/BlockReaderLocal.java  |  85 +-
 .../hadoop/hdfs/BlockReaderLocalLegacy.java   |  22 +-
 .../org/apache/hadoop/hdfs/ClientContext.java | 204 ++++
 .../org/apache/hadoop/hdfs/DFSClient.java     | 126 ++-
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java |  16 +-
 .../apache/hadoop/hdfs/DFSInputStream.java    | 283 ++----
 .../hadoop/hdfs/DomainSocketFactory.java      | 115 ++-
 .../hadoop/hdfs/FileInputStreamCache.java     | 287 ------
 .../org/apache/hadoop/hdfs/PeerCache.java     |  29 +-
 .../apache/hadoop/hdfs/RemoteBlockReader.java |   4 +-
 .../hadoop/hdfs/RemoteBlockReader2.java       |   4 +-
 .../apache/hadoop/hdfs/RemotePeerFactory.java |  37 +
 .../apache/hadoop/hdfs/client/ClientMmap.java | 121 +--
 .../hadoop/hdfs/client/ClientMmapManager.java | 482 ----------
 .../hadoop/hdfs/client/ShortCircuitCache.java | 880 ++++++++++++++++++
 .../hdfs/client/ShortCircuitReplica.java      | 324 +++++++
 .../hdfs/client/ShortCircuitReplicaInfo.java  |  64 ++
 .../hadoop/hdfs/server/common/JspHelper.java  |  88 +-
 .../server/datanode/BlockMetadataHeader.java  |   8 +-
 .../hdfs/server/namenode/NamenodeFsck.java    |  69 +-
 .../src/main/resources/hdfs-default.xml       |  41 +
 .../fs/TestEnhancedByteBufferAccess.java      | 235 ++---
 .../hadoop/hdfs/BlockReaderTestUtil.java      |  71 +-
 .../org/apache/hadoop/hdfs/DFSTestUtil.java   |  37 +-
 .../hadoop/hdfs/TestBlockReaderFactory.java   | 285 ++++++
 .../hadoop/hdfs/TestBlockReaderLocal.java     |  15 +-
 .../org/apache/hadoop/hdfs/TestConnCache.java |  69 +-
 .../hdfs/TestDataTransferKeepalive.java       |  42 +-
 .../hadoop/hdfs/TestDisableConnCache.java     |   3 +-
 .../hadoop/hdfs/TestFileInputStreamCache.java | 126 ---
 .../hadoop/hdfs/TestShortCircuitCache.java    | 351 +++++++
 .../hdfs/TestShortCircuitLocalRead.java       |  37 +-
 .../TestBlockTokenWithDFS.java                |  95 +-
 .../datanode/TestDataNodeVolumeFailure.java   |  52 +-
 38 files changed, 3668 insertions(+), 1948 deletions(-)
 create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Waitable.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/ClientContext.java
 delete mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/FileInputStreamCache.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemotePeerFactory.java
 delete mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ClientMmapManager.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitCache.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitReplica.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitReplicaInfo.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderFactory.java
 delete mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileInputStreamCache.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitCache.java

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Waitable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Waitable.java
new file mode 100644
index 00000000000..5c5d8eef884
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Waitable.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import java.util.concurrent.locks.Condition;
+
+/**
+ * Represents an object that you can wait for.
+ */
+public class Waitable<T> {
+  private T val;
+  private final Condition cond;
+
+  public Waitable(Condition cond) {
+    this.val = null;
+    this.cond = cond;
+  }
+
+  public T await() throws InterruptedException {
+    while (this.val == null) {
+      this.cond.await();
+    }
+    return this.val;
+  }
+
+  public void provide(T val) {
+    this.val = val;
+    this.cond.signalAll();
+  }
+
+  public boolean hasVal() {
+    return this.val != null;
+  }
+
+  public T getVal() {
+    return this.val;
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 190f010cc3c..726dbee00a0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -368,6 +368,9 @@ Release 2.4.0 - UNRELEASED
     HDFS-5929. Add blockpool % usage to HDFS federated nn page.
     (Siqi Li via suresh)
 
+    HDFS-5810. Unify mmap cache and short-circuit file descriptor cache
+    (cmccabe)
+
   OPTIMIZATIONS
 
     HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java
index 4e6c0645d6c..22c41a12ad5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java
@@ -23,7 +23,6 @@ import java.util.EnumSet;
 import org.apache.hadoop.fs.ByteBufferReadable;
 import org.apache.hadoop.fs.ReadOption;
 import org.apache.hadoop.hdfs.client.ClientMmap;
-import org.apache.hadoop.hdfs.client.ClientMmapManager;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 
 /**
@@ -97,6 +96,5 @@ public interface BlockReader extends ByteBufferReadable {
    * @return              The ClientMmap object, or null if mmap is not
    *                      supported.
    */
-  ClientMmap getClientMmap(EnumSet<ReadOption> opts,
-        ClientMmapManager mmapManager);
+  ClientMmap getClientMmap(EnumSet<ReadOption> opts);
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
index ae98e573c06..c6af5e82c36 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
@@ -24,217 +24,749 @@ import java.io.FileInputStream;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.client.ShortCircuitCache;
+import org.apache.hadoop.hdfs.client.ShortCircuitCache.ShortCircuitReplicaCreator;
+import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
+import org.apache.hadoop.hdfs.client.ShortCircuitReplica.Key;
+import org.apache.hadoop.hdfs.client.ShortCircuitReplicaInfo;
+import org.apache.hadoop.hdfs.net.DomainPeer;
 import org.apache.hadoop.hdfs.net.Peer;
-import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocol.datatransfer.InvalidEncryptionKeyException;
 import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
 import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
 import org.apache.hadoop.hdfs.protocolPB.PBHelper;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
-import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
-import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
 import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.net.unix.DomainSocket;
 import org.apache.hadoop.security.AccessControlException;
+import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.util.Time;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 
 
 /** 
  * Utility class to create BlockReader implementations.
  */
 @InterfaceAudience.Private
-public class BlockReaderFactory {
-  /**
-   * Create a new BlockReader specifically to satisfy a read.
-   * This method also sends the OP_READ_BLOCK request.
-   * 
-   * @param conf the DFSClient configuration
-   * @param file  File location
-   * @param block  The block object
-   * @param blockToken  The block token for security
-   * @param startOffset  The read offset, relative to block head
-   * @param len  The number of bytes to read, or -1 to read as many as
-   *             possible.
-   * @param bufferSize  The IO buffer size (not the client buffer size)
-   *                    Ignored except on the legacy BlockReader.
-   * @param verifyChecksum  Whether to verify checksum
-   * @param clientName  Client name.  Used for log messages.
-   * @param peer  The peer
-   * @param datanodeID  The datanode that the Peer is connected to
-   * @param domainSocketFactory  The DomainSocketFactory to notify if the Peer
-   *                             is a DomainPeer which turns out to be faulty.
-   *                             If null, no factory will be notified in this
-   *                             case.
-   * @param allowShortCircuitLocalReads  True if short-circuit local reads
-   *                                     should be allowed.
-   * @return New BlockReader instance
-   */
-  public static BlockReader newBlockReader(DFSClient.Conf conf,
-                                     String file,
-                                     ExtendedBlock block, 
-                                     Token<BlockTokenIdentifier> blockToken,
-                                     long startOffset, long len,
-                                     boolean verifyChecksum,
-                                     String clientName,
-                                     Peer peer,
-                                     DatanodeID datanodeID,
-                                     DomainSocketFactory domSockFactory,
-                                     PeerCache peerCache,
-                                     FileInputStreamCache fisCache,
-                                     boolean allowShortCircuitLocalReads,
-                                     CachingStrategy cachingStrategy)
-  throws IOException {
-    peer.setReadTimeout(conf.socketTimeout);
-    peer.setWriteTimeout(HdfsServerConstants.WRITE_TIMEOUT);
+public class BlockReaderFactory implements ShortCircuitReplicaCreator {
+  static final Log LOG = LogFactory.getLog(BlockReaderFactory.class);
 
-    if (peer.getDomainSocket() != null) {
-      if (allowShortCircuitLocalReads && !conf.useLegacyBlockReaderLocal) {
-        // If this is a domain socket, and short-circuit local reads are 
-        // enabled, try to set up a BlockReaderLocal.
-        BlockReader reader = newShortCircuitBlockReader(conf, file,
-            block, blockToken, startOffset, len, peer, datanodeID,
-            domSockFactory, verifyChecksum, fisCache, cachingStrategy);
+  @VisibleForTesting
+  static ShortCircuitReplicaCreator
+      createShortCircuitReplicaInfoCallback = null;
+
+  private final DFSClient.Conf conf;
+
+  /**
+   * The file name, for logging and debugging purposes.
+   */
+  private String fileName;
+
+  /**
+   * The block ID and block pool ID to use.
+   */
+  private ExtendedBlock block;
+
+  /**
+   * The block token to use for security purposes.
+   */
+  private Token<BlockTokenIdentifier> token;
+
+  /**
+   * The offset within the block to start reading at.
+   */
+  private long startOffset;
+
+  /**
+   * If false, we won't try to verify the block checksum.
+   */
+  private boolean verifyChecksum;
+
+  /**
+   * The name of this client.
+   */
+  private String clientName; 
+
+  /**
+   * The DataNode we're talking to.
+   */
+  private DatanodeInfo datanode;
+
+  /**
+   * If false, we won't try short-circuit local reads.
+   */
+  private boolean allowShortCircuitLocalReads;
+
+  /**
+   * The ClientContext to use for things like the PeerCache.
+   */
+  private ClientContext clientContext;
+
+  /**
+   * Number of bytes to read.  -1 indicates no limit.
+   */
+  private long length = -1;
+
+  /**
+   * Caching strategy to use when reading the block.
+   */
+  private CachingStrategy cachingStrategy;
+
+  /**
+   * Socket address to use to connect to peer.
+   */
+  private InetSocketAddress inetSocketAddress;
+
+  /**
+   * Remote peer factory to use to create a peer, if needed.
+   */
+  private RemotePeerFactory remotePeerFactory;
+
+  /**
+   * UserGroupInformation  to use for legacy block reader local objects, if needed.
+   */
+  private UserGroupInformation userGroupInformation;
+
+  /**
+   * Configuration to use for legacy block reader local objects, if needed.
+   */
+  private Configuration configuration;
+
+  /**
+   * Information about the domain socket path we should use to connect to the
+   * local peer-- or null if we haven't examined the local domain socket.
+   */
+  private DomainSocketFactory.PathInfo pathInfo;
+
+  /**
+   * The remaining number of times that we'll try to pull a socket out of the
+   * cache.
+   */
+  private int remainingCacheTries;
+
+  public BlockReaderFactory(DFSClient.Conf conf) {
+    this.conf = conf;
+    this.remainingCacheTries = conf.nCachedConnRetry;
+  }
+
+  public BlockReaderFactory setFileName(String fileName) {
+    this.fileName = fileName;
+    return this;
+  }
+
+  public BlockReaderFactory setBlock(ExtendedBlock block) {
+    this.block = block;
+    return this;
+  }
+
+  public BlockReaderFactory setBlockToken(Token<BlockTokenIdentifier> token) {
+    this.token = token;
+    return this;
+  }
+
+  public BlockReaderFactory setStartOffset(long startOffset) {
+    this.startOffset = startOffset;
+    return this;
+  }
+
+  public BlockReaderFactory setVerifyChecksum(boolean verifyChecksum) {
+    this.verifyChecksum = verifyChecksum;
+    return this;
+  }
+
+  public BlockReaderFactory setClientName(String clientName) {
+    this.clientName = clientName;
+    return this;
+  }
+
+  public BlockReaderFactory setDatanodeInfo(DatanodeInfo datanode) {
+    this.datanode = datanode;
+    return this;
+  }
+
+  public BlockReaderFactory setAllowShortCircuitLocalReads(
+      boolean allowShortCircuitLocalReads) {
+    this.allowShortCircuitLocalReads = allowShortCircuitLocalReads;
+    return this;
+  }
+
+  public BlockReaderFactory setClientCacheContext(
+      ClientContext clientContext) {
+    this.clientContext = clientContext;
+    return this;
+  }
+
+  public BlockReaderFactory setLength(long length) {
+    this.length = length;
+    return this;
+  }
+
+  public BlockReaderFactory setCachingStrategy(
+      CachingStrategy cachingStrategy) {
+    this.cachingStrategy = cachingStrategy;
+    return this;
+  }
+
+  public BlockReaderFactory setInetSocketAddress (
+      InetSocketAddress inetSocketAddress) {
+    this.inetSocketAddress = inetSocketAddress;
+    return this;
+  }
+
+  public BlockReaderFactory setUserGroupInformation(
+      UserGroupInformation userGroupInformation) {
+    this.userGroupInformation = userGroupInformation;
+    return this;
+  }
+
+  public BlockReaderFactory setRemotePeerFactory(
+      RemotePeerFactory remotePeerFactory) {
+    this.remotePeerFactory = remotePeerFactory;
+    return this;
+  }
+
+  public BlockReaderFactory setConfiguration(
+      Configuration configuration) {
+    this.configuration = configuration;
+    return this;
+  }
+
+  /**
+   * Build a BlockReader with the given options.
+   *
+   * This function will do the best it can to create a block reader that meets
+   * all of our requirements.  We prefer short-circuit block readers
+   * (BlockReaderLocal and BlockReaderLocalLegacy) over remote ones, since the
+   * former avoid the overhead of socket communication.  If short-circuit is
+   * unavailable, our next fallback is data transfer over UNIX domain sockets,
+   * if dfs.client.domain.socket.data.traffic has been enabled.  If that doesn't
+   * work, we will try to create a remote block reader that operates over TCP
+   * sockets.
+   *
+   * There are a few caches that are important here.
+   *
+   * The ShortCircuitCache stores file descriptor objects which have been passed
+   * from the DataNode. 
+   *
+   * The DomainSocketFactory stores information about UNIX domain socket paths
+   * that we not been able to use in the past, so that we don't waste time
+   * retrying them over and over.  (Like all the caches, it does have a timeout,
+   * though.)
+   *
+   * The PeerCache stores peers that we have used in the past.  If we can reuse
+   * one of these peers, we avoid the overhead of re-opening a socket.  However,
+   * if the socket has been timed out on the remote end, our attempt to reuse
+   * the socket may end with an IOException.  For that reason, we limit our
+   * attempts at socket reuse to dfs.client.cached.conn.retry times.  After
+   * that, we create new sockets.  This avoids the problem where a thread tries
+   * to talk to a peer that it hasn't talked to in a while, and has to clean out
+   * every entry in a socket cache full of stale entries.
+   *
+   * @return The new BlockReader.  We will not return null.
+   *
+   * @throws InvalidToken
+   *             If the block token was invalid.
+   *         InvalidEncryptionKeyException
+   *             If the encryption key was invalid.
+   *         Other IOException
+   *             If there was another problem.
+   */
+  public BlockReader build() throws IOException {
+    BlockReader reader = null;
+
+    Preconditions.checkNotNull(configuration);
+    if (conf.shortCircuitLocalReads && allowShortCircuitLocalReads) {
+      if (clientContext.getUseLegacyBlockReaderLocal()) {
+        reader = getLegacyBlockReaderLocal();
         if (reader != null) {
-          // One we've constructed the short-circuit block reader, we don't
-          // need the socket any more.  So let's return it to the cache.
-          if (peerCache != null) {
-            peerCache.put(datanodeID, peer);
-          } else {
-            IOUtils.cleanup(null, peer);
+          if (LOG.isTraceEnabled()) {
+            LOG.trace(this + ": returning new legacy block reader local.");
+          }
+          return reader;
+        }
+      } else {
+        reader = getBlockReaderLocal();
+        if (reader != null) {
+          if (LOG.isTraceEnabled()) {
+            LOG.trace(this + ": returning new block reader local.");
           }
           return reader;
         }
       }
-      // If this is a domain socket and we couldn't (or didn't want to) set
-      // up a BlockReaderLocal, check that we are allowed to pass data traffic
-      // over the socket before proceeding.
-      if (!conf.domainSocketDataTraffic) {
-        throw new IOException("Because we can't do short-circuit access, " +
-          "and data traffic over domain sockets is disabled, " +
-          "we cannot use this socket to talk to " + datanodeID);
+    }
+    if (conf.domainSocketDataTraffic) {
+      reader = getRemoteBlockReaderFromDomain();
+      if (reader != null) {
+        if (LOG.isTraceEnabled()) {
+          LOG.trace(this + ": returning new remote block reader using " +
+              "UNIX domain socket on " + pathInfo.getPath());
+        }
+        return reader;
       }
     }
-
-    if (conf.useLegacyBlockReader) {
-      @SuppressWarnings("deprecation")
-      RemoteBlockReader reader = RemoteBlockReader.newBlockReader(file,
-          block, blockToken, startOffset, len, conf.ioBufferSize,
-          verifyChecksum, clientName, peer, datanodeID, peerCache,
-          cachingStrategy);
-      return reader;
-    } else {
-      return RemoteBlockReader2.newBlockReader(
-          file, block, blockToken, startOffset, len,
-          verifyChecksum, clientName, peer, datanodeID, peerCache,
-          cachingStrategy);
-    }
+    Preconditions.checkState(!DFSInputStream.tcpReadsDisabledForTesting,
+        "TCP reads were disabled for testing, but we failed to " +
+        "do a non-TCP read.");
+    return getRemoteBlockReaderFromTcp();
   }
 
   /**
-   * Create a new short-circuit BlockReader.
-   * 
-   * Here, we ask the DataNode to pass us file descriptors over our
-   * DomainSocket.  If the DataNode declines to do so, we'll return null here;
-   * otherwise, we'll return the BlockReaderLocal.  If the DataNode declines,
-   * this function will inform the DomainSocketFactory that short-circuit local
-   * reads are disabled for this DataNode, so that we don't ask again.
-   * 
-   * @param conf               the configuration.
-   * @param file               the file name. Used in log messages.
-   * @param block              The block object.
-   * @param blockToken         The block token for security.
-   * @param startOffset        The read offset, relative to block head.
-   * @param len                The number of bytes to read, or -1 to read 
-   *                           as many as possible.
-   * @param peer               The peer to use.
-   * @param datanodeID         The datanode that the Peer is connected to.
-   * @param domSockFactory     The DomainSocketFactory to notify if the Peer
-   *                           is a DomainPeer which turns out to be faulty.
-   *                           If null, no factory will be notified in this
-   *                           case.
-   * @param verifyChecksum     True if we should verify the checksums.
-   *                           Note: even if this is true, when
-   *                           DFS_CLIENT_READ_CHECKSUM_SKIP_CHECKSUM_KEY is
-   *                           set or the block is mlocked, we will skip
-   *                           checksums.
-   *
-   * @return                   The BlockReaderLocal, or null if the
-   *                           DataNode declined to provide short-circuit
-   *                           access.
-   * @throws IOException       If there was a communication error.
+   * Get {@link BlockReaderLocalLegacy} for short circuited local reads.
+   * This block reader implements the path-based style of local reads
+   * first introduced in HDFS-2246.
    */
-  private static BlockReaderLocal newShortCircuitBlockReader(
-      DFSClient.Conf conf, String file, ExtendedBlock block,
-      Token<BlockTokenIdentifier> blockToken, long startOffset,
-      long len, Peer peer, DatanodeID datanodeID,
-      DomainSocketFactory domSockFactory, boolean verifyChecksum,
-      FileInputStreamCache fisCache,
-      CachingStrategy cachingStrategy) throws IOException {
+  private BlockReader getLegacyBlockReaderLocal() throws IOException {
+    if (LOG.isTraceEnabled()) {
+      LOG.trace(this + ": trying to construct BlockReaderLocalLegacy");
+    }
+    if (!DFSClient.isLocalAddress(inetSocketAddress)) {
+      if (LOG.isTraceEnabled()) {
+        LOG.trace(this + ": can't construct BlockReaderLocalLegacy because " +
+            "the address " + inetSocketAddress + " is not local");
+      }
+      return null;
+    }
+    if (clientContext.getDisableLegacyBlockReaderLocal()) {
+      if (LOG.isTraceEnabled()) {
+        LOG.trace(this + ": can't construct BlockReaderLocalLegacy because " +
+            "disableLegacyBlockReaderLocal is set.");
+      }
+      return null;
+    }
+    IOException ioe = null;
+    try {
+      return BlockReaderLocalLegacy.newBlockReader(conf,
+          userGroupInformation, configuration, fileName, block, token,
+          datanode, startOffset, length);
+    } catch (RemoteException remoteException) {
+      ioe = remoteException.unwrapRemoteException(
+                InvalidToken.class, AccessControlException.class);
+    } catch (IOException e) {
+      ioe = e;
+    }
+    if ((!(ioe instanceof AccessControlException)) &&
+        isSecurityException(ioe)) {
+      // Handle security exceptions.
+      // We do not handle AccessControlException here, since
+      // BlockReaderLocalLegacy#newBlockReader uses that exception to indicate
+      // that the user is not in dfs.block.local-path-access.user, a condition
+      // which requires us to disable legacy SCR.
+      throw ioe;
+    }
+    LOG.warn(this + ": error creating legacy BlockReaderLocal.  " +
+        "Disabling legacy local reads.", ioe);
+    clientContext.setDisableLegacyBlockReaderLocal();
+    return null;
+  }
+
+  private BlockReader getBlockReaderLocal() throws InvalidToken {
+    if (LOG.isTraceEnabled()) {
+      LOG.trace(this + ": trying to construct a BlockReaderLocal " +
+          "for short-circuit reads.");
+    }
+    if (pathInfo == null) {
+      pathInfo = clientContext.getDomainSocketFactory().
+                      getPathInfo(inetSocketAddress, conf);
+    }
+    if (!pathInfo.getPathState().getUsableForShortCircuit()) {
+      if (LOG.isTraceEnabled()) {
+        LOG.trace(this + ": " + pathInfo + " is not " +
+            "usable for short circuit; giving up on BlockReaderLocal.");
+      }
+      return null;
+    }
+    ShortCircuitCache cache = clientContext.getShortCircuitCache();
+    Key key = new Key(block.getBlockId(), block.getBlockPoolId());
+    ShortCircuitReplicaInfo info = cache.fetchOrCreate(key, this);
+    InvalidToken exc = info.getInvalidTokenException();
+    if (exc != null) {
+      if (LOG.isTraceEnabled()) {
+        LOG.trace(this + ": got InvalidToken exception while trying to " +
+            "construct BlockReaderLocal via " + pathInfo.getPath());
+      }
+      throw exc;
+    }
+    if (info.getReplica() == null) {
+      if (LOG.isTraceEnabled()) {
+        LOG.trace(this + ": failed to get ShortCircuitReplica.  " +
+            "Cannot construct BlockReaderLocal via " + pathInfo.getPath());
+      }
+      return null;
+    }
+    return new BlockReaderLocal.Builder(conf).
+        setFilename(fileName).
+        setBlock(block).
+        setStartOffset(startOffset).
+        setShortCircuitReplica(info.getReplica()).
+        setDatanodeID(datanode).
+        setVerifyChecksum(verifyChecksum).
+        setCachingStrategy(cachingStrategy).
+        build();
+  }
+
+  /**
+   * Fetch a pair of short-circuit block descriptors from a local DataNode.
+   *
+   * @return    Null if we could not communicate with the datanode,
+   *            a new ShortCircuitReplicaInfo object otherwise.
+   *            ShortCircuitReplicaInfo objects may contain either an InvalidToken
+   *            exception, or a ShortCircuitReplica object ready to use.
+   */
+  @Override
+  public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
+    if (createShortCircuitReplicaInfoCallback != null) {
+      ShortCircuitReplicaInfo info =
+        createShortCircuitReplicaInfoCallback.createShortCircuitReplicaInfo();
+      if (info != null) return info;
+    }
+    if (LOG.isTraceEnabled()) {
+      LOG.trace(this + ": trying to create ShortCircuitReplicaInfo.");
+    }
+    BlockReaderPeer curPeer;
+    while (true) {
+      curPeer = nextDomainPeer();
+      if (curPeer == null) break;
+      DomainPeer peer = (DomainPeer)curPeer.peer;
+      try {
+        ShortCircuitReplicaInfo info = requestFileDescriptors(peer);
+        clientContext.getPeerCache().put(datanode, peer);
+        return info;
+      } catch (IOException e) {
+        if (curPeer.fromCache) {
+          // Handle an I/O error we got when using a cached socket.
+          // These are considered less serious, because the socket may be stale.
+          if (LOG.isDebugEnabled()) {
+            LOG.debug(this + ": closing stale domain peer " + peer, e);
+          }
+          IOUtils.cleanup(LOG, peer);
+        } else {
+          // Handle an I/O error we got when using a newly created socket.
+          // We temporarily disable the domain socket path for a few minutes in
+          // this case, to prevent wasting more time on it.
+          LOG.warn(this + ": I/O error requesting file descriptors.  " + 
+              "Disabling domain socket " + peer.getDomainSocket(), e);
+          IOUtils.cleanup(LOG, peer);
+          clientContext.getDomainSocketFactory()
+              .disableDomainSocketPath(pathInfo.getPath());
+          return null;
+        }
+      }
+    }
+    return null;
+  }
+
+  /**
+   * Request file descriptors from a DomainPeer.
+   *
+   * @return  A ShortCircuitReplica object if we could communicate with the
+   *          datanode; null, otherwise. 
+   * @throws  IOException If we encountered an I/O exception while communicating
+   *          with the datanode.
+   */
+  private ShortCircuitReplicaInfo requestFileDescriptors(DomainPeer peer)
+        throws IOException {
     final DataOutputStream out =
-        new DataOutputStream(new BufferedOutputStream(
-          peer.getOutputStream()));
-    new Sender(out).requestShortCircuitFds(block, blockToken, 1);
-    DataInputStream in =
-        new DataInputStream(peer.getInputStream());
+        new DataOutputStream(new BufferedOutputStream(peer.getOutputStream()));
+    new Sender(out).requestShortCircuitFds(block, token, 1);
+    DataInputStream in = new DataInputStream(peer.getInputStream());
     BlockOpResponseProto resp = BlockOpResponseProto.parseFrom(
         PBHelper.vintPrefixed(in));
     DomainSocket sock = peer.getDomainSocket();
     switch (resp.getStatus()) {
     case SUCCESS:
-      BlockReaderLocal reader = null;
       byte buf[] = new byte[1];
       FileInputStream fis[] = new FileInputStream[2];
       sock.recvFileInputStreams(fis, buf, 0, buf.length);
+      ShortCircuitReplica replica = null;
       try {
-        reader = new BlockReaderLocal.Builder(conf).
-            setFilename(file).
-            setBlock(block).
-            setStartOffset(startOffset).
-            setStreams(fis).
-            setDatanodeID(datanodeID).
-            setVerifyChecksum(verifyChecksum).
-            setBlockMetadataHeader(
-                BlockMetadataHeader.preadHeader(fis[1].getChannel())).
-            setFileInputStreamCache(fisCache).
-            setCachingStrategy(cachingStrategy).
-            build();
+        Key key = new Key(block.getBlockId(), block.getBlockPoolId());
+        replica = new ShortCircuitReplica(key, fis[0], fis[1],
+            clientContext.getShortCircuitCache(), Time.monotonicNow());
+      } catch (IOException e) {
+        // This indicates an error reading from disk, or a format error.  Since
+        // it's not a socket communication problem, we return null rather than
+        // throwing an exception.
+        LOG.warn(this + ": error creating ShortCircuitReplica.", e);
+        return null;
       } finally {
-        if (reader == null) {
+        if (replica == null) {
           IOUtils.cleanup(DFSClient.LOG, fis[0], fis[1]);
         }
       }
-      return reader;
+      return new ShortCircuitReplicaInfo(replica);
     case ERROR_UNSUPPORTED:
       if (!resp.hasShortCircuitAccessVersion()) {
-        DFSClient.LOG.warn("short-circuit read access is disabled for " +
-            "DataNode " + datanodeID + ".  reason: " + resp.getMessage());
-        domSockFactory.disableShortCircuitForPath(sock.getPath());
+        LOG.warn("short-circuit read access is disabled for " +
+            "DataNode " + datanode + ".  reason: " + resp.getMessage());
+        clientContext.getDomainSocketFactory()
+            .disableShortCircuitForPath(pathInfo.getPath());
       } else {
-        DFSClient.LOG.warn("short-circuit read access for the file " +
-            file + " is disabled for DataNode " + datanodeID +
+        LOG.warn("short-circuit read access for the file " +
+            fileName + " is disabled for DataNode " + datanode +
             ".  reason: " + resp.getMessage());
       }
       return null;
     case ERROR_ACCESS_TOKEN:
       String msg = "access control error while " +
           "attempting to set up short-circuit access to " +
-          file + resp.getMessage();
-      DFSClient.LOG.debug(msg);
-      throw new InvalidBlockTokenException(msg);
+          fileName + resp.getMessage();
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(this + ":" + msg);
+      }
+      return new ShortCircuitReplicaInfo(new InvalidToken(msg));
     default:
-      DFSClient.LOG.warn("error while attempting to set up short-circuit " +
-          "access to " + file + ": " + resp.getMessage());
-      domSockFactory.disableShortCircuitForPath(sock.getPath());
+      LOG.warn(this + "unknown response code " + resp.getStatus() + " while " +
+          "attempting to set up short-circuit access. " + resp.getMessage());
+      clientContext.getDomainSocketFactory()
+          .disableShortCircuitForPath(pathInfo.getPath());
       return null;
     }
   }
 
+  /**
+   * Get a RemoteBlockReader that communicates over a UNIX domain socket.
+   *
+   * @return The new BlockReader, or null if we failed to create the block
+   * reader.
+   *
+   * @throws InvalidToken    If the block token was invalid.
+   * Potentially other security-related execptions.
+   */
+  private BlockReader getRemoteBlockReaderFromDomain() throws IOException {
+    if (pathInfo == null) {
+      pathInfo = clientContext.getDomainSocketFactory().
+                      getPathInfo(inetSocketAddress, conf);
+    }
+    if (!pathInfo.getPathState().getUsableForDataTransfer()) {
+      if (LOG.isTraceEnabled()) {
+        LOG.trace(this + ": not trying to create a remote block reader " +
+            "because the UNIX domain socket at " + pathInfo +
+            " is not usable.");
+      }
+      return null;
+    }
+    if (LOG.isTraceEnabled()) {
+      LOG.trace(this + ": trying to create a remote block reader from the " +
+          "UNIX domain socket at " + pathInfo.getPath());
+    }
+
+    while (true) {
+      BlockReaderPeer curPeer = nextDomainPeer();
+      if (curPeer == null) break;
+      DomainPeer peer = (DomainPeer)curPeer.peer;
+      BlockReader blockReader = null;
+      try {
+        blockReader = getRemoteBlockReader(peer);
+        return blockReader;
+      } catch (IOException ioe) {
+        IOUtils.cleanup(LOG, peer);
+        if (isSecurityException(ioe)) {
+          if (LOG.isTraceEnabled()) {
+            LOG.trace(this + ": got security exception while constructing " +
+                "a remote block reader from the unix domain socket at " +
+                pathInfo.getPath(), ioe);
+          }
+          throw ioe;
+        }
+        if (curPeer.fromCache) {
+          // Handle an I/O error we got when using a cached peer.  These are
+          // considered less serious, because the underlying socket may be stale.
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Closed potentially stale domain peer " + peer, ioe);
+          }
+        } else {
+          // Handle an I/O error we got when using a newly created domain peer.
+          // We temporarily disable the domain socket path for a few minutes in
+          // this case, to prevent wasting more time on it.
+          LOG.warn("I/O error constructing remote block reader.  Disabling " +
+              "domain socket " + peer.getDomainSocket(), ioe);
+          clientContext.getDomainSocketFactory()
+              .disableDomainSocketPath(pathInfo.getPath());
+          return null;
+        }
+      } finally {
+        if (blockReader == null) {
+          IOUtils.cleanup(LOG, peer);
+        }
+      }
+    }
+    return null;
+  }
+
+  /**
+   * Get a RemoteBlockReader that communicates over a TCP socket.
+   *
+   * @return The new BlockReader.  We will not return null, but instead throw
+   *         an exception if this fails.
+   *
+   * @throws InvalidToken
+   *             If the block token was invalid.
+   *         InvalidEncryptionKeyException
+   *             If the encryption key was invalid.
+   *         Other IOException
+   *             If there was another problem.
+   */
+  private BlockReader getRemoteBlockReaderFromTcp() throws IOException {
+    if (LOG.isTraceEnabled()) {
+      LOG.trace(this + ": trying to create a remote block reader from a " +
+          "TCP socket");
+    }
+    BlockReader blockReader = null;
+    while (true) {
+      BlockReaderPeer curPeer = null;
+      Peer peer = null;
+      try {
+        curPeer = nextTcpPeer();
+        if (curPeer == null) break;
+        peer = curPeer.peer;
+        blockReader = getRemoteBlockReader(peer);
+        return blockReader;
+      } catch (IOException ioe) {
+        if (isSecurityException(ioe)) {
+          if (LOG.isTraceEnabled()) {
+            LOG.trace(this + ": got security exception while constructing " +
+                "a remote block reader from " + peer, ioe);
+          }
+          throw ioe;
+        }
+        if ((curPeer != null) && curPeer.fromCache) {
+          // Handle an I/O error we got when using a cached peer.  These are
+          // considered less serious, because the underlying socket may be
+          // stale.
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Closed potentially stale remote peer " + peer, ioe);
+          }
+        } else {
+          // Handle an I/O error we got when using a newly created peer.
+          LOG.warn("I/O error constructing remote block reader.", ioe);
+          throw ioe;
+        }
+      } finally {
+        if (blockReader == null) {
+          IOUtils.cleanup(LOG, peer);
+        }
+      }
+    }
+    return null;
+  }
+
+  private class BlockReaderPeer {
+    final Peer peer;
+    final boolean fromCache;
+    
+    BlockReaderPeer(Peer peer, boolean fromCache) {
+      this.peer = peer;
+      this.fromCache = fromCache;
+    }
+  }
+
+  /**
+   * Get the next DomainPeer-- either from the cache or by creating it.
+   *
+   * @return the next DomainPeer, or null if we could not construct one.
+   */
+  private BlockReaderPeer nextDomainPeer() {
+    if (remainingCacheTries > 0) {
+      Peer peer = clientContext.getPeerCache().get(datanode, true);
+      if (peer != null) {
+        remainingCacheTries--;
+        if (LOG.isTraceEnabled()) {
+          LOG.trace("nextDomainPeer: reusing existing peer " + peer);
+        }
+        return new BlockReaderPeer(peer, true);
+      }
+    }
+    DomainSocket sock = clientContext.getDomainSocketFactory().
+        createSocket(pathInfo, conf.socketTimeout);
+    if (sock == null) return null;
+    return new BlockReaderPeer(new DomainPeer(sock), false);
+  }
+
+  /**
+   * Get the next TCP-based peer-- either from the cache or by creating it.
+   *
+   * @return the next Peer, or null if we could not construct one.
+   *
+   * @throws IOException  If there was an error while constructing the peer
+   *                      (such as an InvalidEncryptionKeyException)
+   */
+  private BlockReaderPeer nextTcpPeer() throws IOException {
+    if (remainingCacheTries > 0) {
+      Peer peer = clientContext.getPeerCache().get(datanode, false);
+      if (peer != null) {
+        remainingCacheTries--;
+        if (LOG.isTraceEnabled()) {
+          LOG.trace("nextTcpPeer: reusing existing peer " + peer);
+        }
+        return new BlockReaderPeer(peer, true);
+      }
+    }
+    try {
+      Peer peer = remotePeerFactory.newConnectedPeer(inetSocketAddress);
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("nextTcpPeer: created newConnectedPeer " + peer);
+      }
+      return new BlockReaderPeer(peer, false);
+    } catch (IOException e) {
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("nextTcpPeer: failed to create newConnectedPeer " +
+                  "connected to " + datanode);
+      }
+      throw e;
+    }
+  }
+
+  /**
+   * Determine if an exception is security-related.
+   *
+   * We need to handle these exceptions differently than other IOExceptions.
+   * They don't indicate a communication problem.  Instead, they mean that there
+   * is some action the client needs to take, such as refetching block tokens,
+   * renewing encryption keys, etc.
+   *
+   * @param ioe    The exception
+   * @return       True only if the exception is security-related.
+   */
+  private static boolean isSecurityException(IOException ioe) {
+    return (ioe instanceof InvalidToken) ||
+            (ioe instanceof InvalidEncryptionKeyException) ||
+            (ioe instanceof InvalidBlockTokenException) ||
+            (ioe instanceof AccessControlException);
+  }
+
+  @SuppressWarnings("deprecation")
+  private BlockReader getRemoteBlockReader(Peer peer) throws IOException {
+    if (conf.useLegacyBlockReader) {
+      return RemoteBlockReader.newBlockReader(fileName,
+          block, token, startOffset, length, conf.ioBufferSize,
+          verifyChecksum, clientName, peer, datanode,
+          clientContext.getPeerCache(), cachingStrategy);
+    } else {
+      return RemoteBlockReader2.newBlockReader(
+          fileName, block, token, startOffset, length,
+          verifyChecksum, clientName, peer, datanode,
+          clientContext.getPeerCache(), cachingStrategy);
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "BlockReaderFactory(fileName=" + fileName + ", block=" + block + ")";
+  }
+
   /**
    * File name to print when accessing a block directly (from servlets)
    * @param s Address of the block location
@@ -246,23 +778,4 @@ public class BlockReaderFactory {
       final String poolId, final long blockId) {
     return s.toString() + ":" + poolId + ":" + blockId;
   }
-
-  /**
-   * Get {@link BlockReaderLocalLegacy} for short circuited local reads.
-   * This block reader implements the path-based style of local reads
-   * first introduced in HDFS-2246.
-   */
-  static BlockReader getLegacyBlockReaderLocal(DFSClient dfsClient,
-      String src, ExtendedBlock blk,
-      Token<BlockTokenIdentifier> accessToken, DatanodeInfo chosenNode,
-      long offsetIntoBlock) throws InvalidToken, IOException {
-    try {
-      final long length = blk.getNumBytes() - offsetIntoBlock;
-      return BlockReaderLocalLegacy.newBlockReader(dfsClient, src, blk,
-          accessToken, chosenNode, offsetIntoBlock, length);
-    } catch (RemoteException re) {
-      throw re.unwrapRemoteException(InvalidToken.class,
-          AccessControlException.class);
-    }
-  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
index c83e20a89ad..a5efcc174ff 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
@@ -28,8 +28,9 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.ReadOption;
 import org.apache.hadoop.hdfs.client.ClientMmap;
+import org.apache.hadoop.hdfs.client.ShortCircuitCache;
+import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
 import org.apache.hadoop.hdfs.DFSClient.Conf;
-import org.apache.hadoop.hdfs.client.ClientMmapManager;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
@@ -67,12 +68,10 @@ class BlockReaderLocal implements BlockReader {
     private boolean verifyChecksum;
     private int maxReadahead;
     private String filename;
-    private FileInputStream streams[];
+    private ShortCircuitReplica replica;
     private long dataPos;
     private DatanodeID datanodeID;
-    private FileInputStreamCache fisCache;
     private boolean mlocked;
-    private BlockMetadataHeader header;
     private ExtendedBlock block;
 
     public Builder(Conf conf) {
@@ -99,8 +98,8 @@ class BlockReaderLocal implements BlockReader {
       return this;
     }
 
-    public Builder setStreams(FileInputStream streams[]) {
-      this.streams = streams;
+    public Builder setShortCircuitReplica(ShortCircuitReplica replica) {
+      this.replica = replica;
       return this;
     }
 
@@ -114,30 +113,18 @@ class BlockReaderLocal implements BlockReader {
       return this;
     }
 
-    public Builder setFileInputStreamCache(FileInputStreamCache fisCache) {
-      this.fisCache = fisCache;
-      return this;
-    }
-
     public Builder setMlocked(boolean mlocked) {
       this.mlocked = mlocked;
       return this;
     }
 
-    public Builder setBlockMetadataHeader(BlockMetadataHeader header) {
-      this.header = header;
-      return this;
-    }
-
     public Builder setBlock(ExtendedBlock block) {
       this.block = block;
       return this;
     }
 
     public BlockReaderLocal build() {
-      Preconditions.checkNotNull(streams);
-      Preconditions.checkArgument(streams.length == 2);
-      Preconditions.checkNotNull(header);
+      Preconditions.checkNotNull(replica);
       return new BlockReaderLocal(this);
     }
   }
@@ -147,7 +134,7 @@ class BlockReaderLocal implements BlockReader {
   /**
    * Pair of streams for this block.
    */
-  private final FileInputStream streams[];
+  private final ShortCircuitReplica replica;
 
   /**
    * The data FileChannel.
@@ -207,12 +194,6 @@ class BlockReaderLocal implements BlockReader {
    */
   private int checksumSize;
 
-  /**
-   * FileInputStream cache to return the streams to upon closing,
-   * or null if we should just close them unconditionally.
-   */
-  private final FileInputStreamCache fisCache;
-
   /**
    * Maximum number of chunks to allocate.
    *
@@ -257,20 +238,18 @@ class BlockReaderLocal implements BlockReader {
    */
   private ByteBuffer checksumBuf;
 
-  private boolean mmapDisabled = false;
-
   private BlockReaderLocal(Builder builder) {
-    this.streams = builder.streams;
-    this.dataIn = builder.streams[0].getChannel();
+    this.replica = builder.replica;
+    this.dataIn = replica.getDataStream().getChannel();
     this.dataPos = builder.dataPos;
-    this.checksumIn = builder.streams[1].getChannel();
-    this.checksum = builder.header.getChecksum();
+    this.checksumIn = replica.getMetaStream().getChannel();
+    BlockMetadataHeader header = builder.replica.getMetaHeader();
+    this.checksum = header.getChecksum();
     this.verifyChecksum = builder.verifyChecksum &&
         (this.checksum.getChecksumType().id != DataChecksum.CHECKSUM_NULL);
     this.mlocked = new AtomicBoolean(builder.mlocked);
     this.filename = builder.filename;
     this.datanodeID = builder.datanodeID;
-    this.fisCache = builder.fisCache;
     this.block = builder.block;
     this.bytesPerChecksum = checksum.getBytesPerChecksum();
     this.checksumSize = checksum.getChecksumSize();
@@ -642,20 +621,7 @@ class BlockReaderLocal implements BlockReader {
     if (LOG.isTraceEnabled()) {
       LOG.trace("close(filename=" + filename + ", block=" + block + ")");
     }
-    if (clientMmap != null) {
-      clientMmap.unref();
-      clientMmap = null;
-    }
-    if (fisCache != null) {
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("putting FileInputStream for " + filename +
-            " back into FileInputStreamCache");
-      }
-      fisCache.put(datanodeID, block, streams);
-    } else {
-      LOG.debug("closing FileInputStream for " + filename);
-      IOUtils.cleanup(LOG, dataIn, checksumIn);
-    }
+    replica.unref();
     freeDataBufIfExists();
     freeChecksumBufIfExists();
   }
@@ -683,8 +649,7 @@ class BlockReaderLocal implements BlockReader {
   }
 
   @Override
-  public synchronized ClientMmap getClientMmap(EnumSet<ReadOption> opts,
-        ClientMmapManager mmapManager) {
+  public ClientMmap getClientMmap(EnumSet<ReadOption> opts) {
     if ((!opts.contains(ReadOption.SKIP_CHECKSUMS)) &&
           verifyChecksum && (!mlocked.get())) {
       if (LOG.isTraceEnabled()) {
@@ -694,27 +659,7 @@ class BlockReaderLocal implements BlockReader {
       }
       return null;
     }
-    if (clientMmap == null) {
-      if (mmapDisabled) {
-        return null;
-      }
-      try {
-        clientMmap = mmapManager.fetch(datanodeID, block, streams[0]);
-        if (clientMmap == null) {
-          mmapDisabled = true;
-          return null;
-        }
-      } catch (InterruptedException e) {
-        LOG.error("Interrupted while setting up mmap for " + filename, e);
-        Thread.currentThread().interrupt();
-        return null;
-      } catch (IOException e) {
-        LOG.error("unable to set up mmap for " + filename, e);
-        mmapDisabled = true;
-        return null;
-      }
-    }
-    return clientMmap;
+    return replica.getOrCreateClientMmap();
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java
index ffc4eb9f8ed..f9c498d2cdf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java
@@ -31,7 +31,6 @@ import java.util.Map;
 
 import org.apache.hadoop.fs.ReadOption;
 import org.apache.hadoop.hdfs.client.ClientMmap;
-import org.apache.hadoop.hdfs.client.ClientMmapManager;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -175,19 +174,21 @@ class BlockReaderLocalLegacy implements BlockReader {
   /**
    * The only way this object can be instantiated.
    */
-  static BlockReaderLocalLegacy newBlockReader(DFSClient dfsClient,
-      String file, ExtendedBlock blk, Token<BlockTokenIdentifier> token,
-      DatanodeInfo node, long startOffset, long length)
-      throws IOException {
-    final DFSClient.Conf conf = dfsClient.getConf();
-
+  static BlockReaderLocalLegacy newBlockReader(DFSClient.Conf conf,
+      UserGroupInformation userGroupInformation,
+      Configuration configuration, String file, ExtendedBlock blk,
+      Token<BlockTokenIdentifier> token, DatanodeInfo node, 
+      long startOffset, long length) throws IOException {
     LocalDatanodeInfo localDatanodeInfo = getLocalDatanodeInfo(node
         .getIpcPort());
     // check the cache first
     BlockLocalPathInfo pathinfo = localDatanodeInfo.getBlockLocalPathInfo(blk);
     if (pathinfo == null) {
-      pathinfo = getBlockPathInfo(dfsClient.ugi, blk, node,
-          dfsClient.getConfiguration(), dfsClient.getHdfsTimeout(), token,
+      if (userGroupInformation == null) {
+        userGroupInformation = UserGroupInformation.getCurrentUser();
+      }
+      pathinfo = getBlockPathInfo(userGroupInformation, blk, node,
+          configuration, conf.hdfsTimeout, token,
           conf.connectToDnViaHostname);
     }
 
@@ -708,8 +709,7 @@ class BlockReaderLocalLegacy implements BlockReader {
   }
 
   @Override
-  public ClientMmap getClientMmap(EnumSet<ReadOption> opts,
-        ClientMmapManager mmapManager) {
+  public ClientMmap getClientMmap(EnumSet<ReadOption> opts) {
     return null;
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/ClientContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/ClientContext.java
new file mode 100644
index 00000000000..b363850f2d4
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/ClientContext.java
@@ -0,0 +1,204 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import com.google.common.annotations.VisibleForTesting;
+
+import java.util.HashMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hdfs.DFSClient;
+import org.apache.hadoop.hdfs.DFSClient.Conf;
+import org.apache.hadoop.hdfs.client.ShortCircuitCache;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * ClientContext contains context information for a client.
+ * 
+ * This allows us to share caches such as the socket cache across
+ * DFSClient instances.
+ */
+@InterfaceAudience.Private
+public class ClientContext {
+  private static final Log LOG = LogFactory.getLog(ClientContext.class);
+
+  /**
+   * Global map of context names to caches contexts.
+   */
+  private final static HashMap<String, ClientContext> CACHES =
+      new HashMap<String, ClientContext>();
+
+  /**
+   * Name of context.
+   */
+  private final String name;
+
+  /**
+   * String representation of the configuration.
+   */
+  private final String confString;
+
+  /**
+   * Caches short-circuit file descriptors, mmap regions.
+   */
+  private final ShortCircuitCache shortCircuitCache;
+
+  /**
+   * Caches TCP and UNIX domain sockets for reuse.
+   */
+  private final PeerCache peerCache;
+
+  /**
+   * Stores information about socket paths.
+   */
+  private final DomainSocketFactory domainSocketFactory;
+
+  /**
+   * True if we should use the legacy BlockReaderLocal.
+   */
+  private final boolean useLegacyBlockReaderLocal;
+
+  /**
+   * True if the legacy BlockReaderLocal is disabled.
+   *
+   * The legacy block reader local gets disabled completely whenever there is an
+   * error or miscommunication.  The new block reader local code handles this
+   * case more gracefully inside DomainSocketFactory.
+   */
+  private volatile boolean disableLegacyBlockReaderLocal = false;
+
+  /**
+   * Whether or not we complained about a DFSClient fetching a CacheContext that
+   * didn't match its config values yet.
+   */
+  private boolean printedConfWarning = false;
+
+  private ClientContext(String name, Conf conf) {
+    this.name = name;
+    this.confString = confAsString(conf);
+    this.shortCircuitCache = new ShortCircuitCache(
+        conf.shortCircuitStreamsCacheSize,
+        conf.shortCircuitStreamsCacheExpiryMs,
+        conf.shortCircuitMmapCacheSize,
+        conf.shortCircuitMmapCacheExpiryMs,
+        conf.shortCircuitMmapCacheRetryTimeout,
+        conf.shortCircuitCacheStaleThresholdMs);
+    this.peerCache =
+          new PeerCache(conf.socketCacheCapacity, conf.socketCacheExpiry);
+    this.useLegacyBlockReaderLocal = conf.useLegacyBlockReaderLocal;
+    this.domainSocketFactory = new DomainSocketFactory(conf);
+  }
+
+  public static String confAsString(Conf conf) {
+    StringBuilder builder = new StringBuilder();
+    builder.append("shortCircuitStreamsCacheSize = ").
+      append(conf.shortCircuitStreamsCacheSize).
+      append(", shortCircuitStreamsCacheExpiryMs = ").
+      append(conf.shortCircuitStreamsCacheExpiryMs).
+      append(", shortCircuitMmapCacheSize = ").
+      append(conf.shortCircuitMmapCacheSize).
+      append(", shortCircuitMmapCacheExpiryMs = ").
+      append(conf.shortCircuitMmapCacheExpiryMs).
+      append(", shortCircuitMmapCacheRetryTimeout = ").
+      append(conf.shortCircuitMmapCacheRetryTimeout).
+      append(", shortCircuitCacheStaleThresholdMs = ").
+      append(conf.shortCircuitCacheStaleThresholdMs).
+      append(", socketCacheCapacity = ").
+      append(conf.socketCacheCapacity).
+      append(", socketCacheExpiry = ").
+      append(conf.socketCacheExpiry).
+      append(", shortCircuitLocalReads = ").
+      append(conf.shortCircuitLocalReads).
+      append(", useLegacyBlockReaderLocal = ").
+      append(conf.useLegacyBlockReaderLocal).
+      append(", domainSocketDataTraffic = ").
+      append(conf.domainSocketDataTraffic);
+
+    return builder.toString();
+  }
+
+  public static ClientContext get(String name, Conf conf) {
+    ClientContext context;
+    synchronized(ClientContext.class) {
+      context = CACHES.get(name);
+      if (context == null) {
+        context = new ClientContext(name, conf);
+        CACHES.put(name, context);
+      } else {
+        context.printConfWarningIfNeeded(conf);
+      }
+    }
+    return context;
+  }
+
+  /**
+   * Get a client context, from a Configuration object.
+   *
+   * This method is less efficient than the version which takes a DFSClient#Conf
+   * object, and should be mostly used by tests.
+   */
+  @VisibleForTesting
+  public static ClientContext getFromConf(Configuration conf) {
+    return get(conf.get(DFSConfigKeys.DFS_CLIENT_CONTEXT,
+        DFSConfigKeys.DFS_CLIENT_CONTEXT_DEFAULT),
+            new DFSClient.Conf(conf));
+  }
+
+  private void printConfWarningIfNeeded(Conf conf) {
+    String existing = this.getConfString();
+    String requested = confAsString(conf);
+    if (!existing.equals(requested)) {
+      if (!printedConfWarning) {
+        printedConfWarning = true;
+        LOG.warn("Existing client context '" + name + "' does not match " +
+            "requested configuration.  Existing: " + existing + 
+            ", Requested: " + requested);
+      }
+    }
+  }
+
+  public String getConfString() {
+    return confString;
+  }
+
+  public ShortCircuitCache getShortCircuitCache() {
+    return shortCircuitCache;
+  }
+
+  public PeerCache getPeerCache() {
+    return peerCache;
+  }
+
+  public boolean getUseLegacyBlockReaderLocal() {
+    return useLegacyBlockReaderLocal;
+  }
+
+  public boolean getDisableLegacyBlockReaderLocal() {
+    return disableLegacyBlockReaderLocal;
+  }
+
+  public void setDisableLegacyBlockReaderLocal() {
+    disableLegacyBlockReaderLocal = true;
+  }
+
+  public DomainSocketFactory getDomainSocketFactory() {
+    return domainSocketFactory;
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index c1a1a73bf77..dbd27c728e8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -56,6 +56,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_PACKET_SIZE_
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CONTEXT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CONTEXT_DEFAULT;
 
 import java.io.BufferedOutputStream;
 import java.io.DataInputStream;
@@ -108,9 +110,10 @@ import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.VolumeId;
 import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.hdfs.client.ClientMmapManager;
 import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
+import org.apache.hadoop.hdfs.net.Peer;
+import org.apache.hadoop.hdfs.net.TcpPeerServer;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveIterator;
@@ -189,7 +192,7 @@ import com.google.common.net.InetAddresses;
  *
  ********************************************************/
 @InterfaceAudience.Private
-public class DFSClient implements java.io.Closeable {
+public class DFSClient implements java.io.Closeable, RemotePeerFactory {
   public static final Log LOG = LogFactory.getLog(DFSClient.class);
   public static final long SERVER_DEFAULTS_VALIDITY_PERIOD = 60 * 60 * 1000L; // 1 hour
   static final int TCP_WINDOW_SIZE = 128 * 1024; // 128 KB
@@ -210,50 +213,13 @@ public class DFSClient implements java.io.Closeable {
   final ReplaceDatanodeOnFailure dtpReplaceDatanodeOnFailure;
   final FileSystem.Statistics stats;
   private final String authority;
-  final PeerCache peerCache;
   private Random r = new Random();
   private SocketAddress[] localInterfaceAddrs;
   private DataEncryptionKey encryptionKey;
-  private boolean shouldUseLegacyBlockReaderLocal;
   private final CachingStrategy defaultReadCachingStrategy;
   private final CachingStrategy defaultWriteCachingStrategy;
-  private ClientMmapManager mmapManager;
+  private final ClientContext clientContext;
   
-  private static final ClientMmapManagerFactory MMAP_MANAGER_FACTORY =
-      new ClientMmapManagerFactory();
-
-  private static final class ClientMmapManagerFactory {
-    private ClientMmapManager mmapManager = null;
-    /**
-     * Tracks the number of users of mmapManager.
-     */
-    private int refcnt = 0;
-
-    synchronized ClientMmapManager get(Configuration conf) {
-      if (refcnt++ == 0) {
-        mmapManager = ClientMmapManager.fromConf(conf);
-      } else {
-        String mismatches = mmapManager.verifyConfigurationMatches(conf);
-        if (!mismatches.isEmpty()) {
-          LOG.warn("The ClientMmapManager settings you specified " +
-            "have been ignored because another thread created the " +
-            "ClientMmapManager first.  " + mismatches);
-        }
-      }
-      return mmapManager;
-    }
-    
-    synchronized void unref(ClientMmapManager mmapManager) {
-      if (this.mmapManager != mmapManager) {
-        throw new IllegalArgumentException();
-      }
-      if (--refcnt == 0) {
-        IOUtils.cleanup(LOG, mmapManager);
-        mmapManager = null;
-      }
-    }
-  }
-
   /**
    * DFSClient configuration 
    */
@@ -298,6 +264,11 @@ public class DFSClient implements java.io.Closeable {
     final boolean domainSocketDataTraffic;
     final int shortCircuitStreamsCacheSize;
     final long shortCircuitStreamsCacheExpiryMs; 
+    
+    final int shortCircuitMmapCacheSize;
+    final long shortCircuitMmapCacheExpiryMs;
+    final long shortCircuitMmapCacheRetryTimeout;
+    final long shortCircuitCacheStaleThresholdMs;
 
     public Conf(Configuration conf) {
       // The hdfsTimeout is currently the same as the ipc timeout 
@@ -414,6 +385,18 @@ public class DFSClient implements java.io.Closeable {
       shortCircuitStreamsCacheExpiryMs = conf.getLong(
           DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY,
           DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT);
+      shortCircuitMmapCacheSize = conf.getInt(
+          DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE,
+          DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT);
+      shortCircuitMmapCacheExpiryMs = conf.getLong(
+          DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS,
+          DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT);
+      shortCircuitMmapCacheRetryTimeout = conf.getLong(
+          DFSConfigKeys.DFS_CLIENT_MMAP_RETRY_TIMEOUT_MS,
+          DFSConfigKeys.DFS_CLIENT_MMAP_RETRY_TIMEOUT_MS_DEFAULT);
+      shortCircuitCacheStaleThresholdMs = conf.getLong(
+          DFSConfigKeys.DFS_CLIENT_SHORT_CIRCUIT_REPLICA_STALE_THRESHOLD_MS,
+          DFSConfigKeys.DFS_CLIENT_SHORT_CIRCUIT_REPLICA_STALE_THRESHOLD_MS_DEFAULT);
     }
 
     private DataChecksum.Type getChecksumType(Configuration conf) {
@@ -462,11 +445,11 @@ public class DFSClient implements java.io.Closeable {
   public Conf getConf() {
     return dfsClientConf;
   }
-  
+
   Configuration getConfiguration() {
     return conf;
   }
-  
+
   /**
    * A map from file names to {@link DFSOutputStream} objects
    * that are currently being written by this client.
@@ -475,8 +458,6 @@ public class DFSClient implements java.io.Closeable {
   private final Map<String, DFSOutputStream> filesBeingWritten
       = new HashMap<String, DFSOutputStream>();
 
-  private final DomainSocketFactory domainSocketFactory;
-  
   /**
    * Same as this(NameNode.getAddress(conf), conf);
    * @see #DFSClient(InetSocketAddress, Configuration)
@@ -524,8 +505,6 @@ public class DFSClient implements java.io.Closeable {
     throws IOException {
     // Copy only the required DFSClient configuration
     this.dfsClientConf = new Conf(conf);
-    this.shouldUseLegacyBlockReaderLocal = 
-        this.dfsClientConf.useLegacyBlockReaderLocal;
     if (this.dfsClientConf.useLegacyBlockReaderLocal) {
       LOG.debug("Using legacy short-circuit local reads.");
     }
@@ -570,9 +549,6 @@ public class DFSClient implements java.io.Closeable {
       this.namenode = proxyInfo.getProxy();
     }
 
-    // read directly from the block file if configured.
-    this.domainSocketFactory = new DomainSocketFactory(dfsClientConf);
-
     String localInterfaces[] =
       conf.getTrimmedStrings(DFSConfigKeys.DFS_CLIENT_LOCAL_INTERFACES);
     localInterfaceAddrs = getLocalInterfaceAddrs(localInterfaces);
@@ -582,7 +558,6 @@ public class DFSClient implements java.io.Closeable {
       Joiner.on(',').join(localInterfaceAddrs) + "]");
     }
     
-    this.peerCache = PeerCache.getInstance(dfsClientConf.socketCacheCapacity, dfsClientConf.socketCacheExpiry);
     Boolean readDropBehind = (conf.get(DFS_CLIENT_CACHE_DROP_BEHIND_READS) == null) ?
         null : conf.getBoolean(DFS_CLIENT_CACHE_DROP_BEHIND_READS, false);
     Long readahead = (conf.get(DFS_CLIENT_CACHE_READAHEAD) == null) ?
@@ -593,7 +568,9 @@ public class DFSClient implements java.io.Closeable {
         new CachingStrategy(readDropBehind, readahead);
     this.defaultWriteCachingStrategy =
         new CachingStrategy(writeDropBehind, readahead);
-    this.mmapManager = MMAP_MANAGER_FACTORY.get(conf);
+    this.clientContext = ClientContext.get(
+        conf.get(DFS_CLIENT_CONTEXT, DFS_CLIENT_CONTEXT_DEFAULT),
+        dfsClientConf);
   }
   
   /**
@@ -798,10 +775,6 @@ public class DFSClient implements java.io.Closeable {
   
   /** Abort and release resources held.  Ignore all errors. */
   void abort() {
-    if (mmapManager != null) {
-      MMAP_MANAGER_FACTORY.unref(mmapManager);
-      mmapManager = null;
-    }
     clientRunning = false;
     closeAllFilesBeingWritten(true);
     try {
@@ -847,10 +820,6 @@ public class DFSClient implements java.io.Closeable {
    */
   @Override
   public synchronized void close() throws IOException {
-    if (mmapManager != null) {
-      MMAP_MANAGER_FACTORY.unref(mmapManager);
-      mmapManager = null;
-    }
     if(clientRunning) {
       closeAllFilesBeingWritten(false);
       clientRunning = false;
@@ -2620,18 +2589,6 @@ public class DFSClient implements java.io.Closeable {
         + ", ugi=" + ugi + "]"; 
   }
 
-  public DomainSocketFactory getDomainSocketFactory() {
-    return domainSocketFactory;
-  }
-
-  public void disableLegacyBlockReaderLocal() {
-    shouldUseLegacyBlockReaderLocal = false;
-  }
-
-  public boolean useLegacyBlockReaderLocal() {
-    return shouldUseLegacyBlockReaderLocal;
-  }
-
   public CachingStrategy getDefaultReadCachingStrategy() {
     return defaultReadCachingStrategy;
   }
@@ -2640,8 +2597,29 @@ public class DFSClient implements java.io.Closeable {
     return defaultWriteCachingStrategy;
   }
 
-  @VisibleForTesting
-  public ClientMmapManager getMmapManager() {
-    return mmapManager;
+  public ClientContext getClientContext() {
+    return clientContext;
+  }
+
+  @Override // RemotePeerFactory
+  public Peer newConnectedPeer(InetSocketAddress addr) throws IOException {
+    Peer peer = null;
+    boolean success = false;
+    Socket sock = null;
+    try {
+      sock = socketFactory.createSocket();
+      NetUtils.connect(sock, addr,
+        getRandomLocalInterfaceAddr(),
+        dfsClientConf.socketTimeout);
+      peer = TcpPeerServer.peerFromSocketAndKey(sock, 
+          getDataEncryptionKey());
+      success = true;
+      return peer;
+    } finally {
+      if (!success) {
+        IOUtils.cleanup(LOG, peer);
+        IOUtils.closeSocket(sock);
+      }
+    }
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 02297a64fd6..ce627901be3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -59,6 +59,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_CLIENT_CACHE_DROP_BEHIND_WRITES = "dfs.client.cache.drop.behind.writes";
   public static final String  DFS_CLIENT_CACHE_DROP_BEHIND_READS = "dfs.client.cache.drop.behind.reads";
   public static final String  DFS_CLIENT_CACHE_READAHEAD = "dfs.client.cache.readahead";
+  public static final String  DFS_CLIENT_CONTEXT = "dfs.client.context";
+  public static final String  DFS_CLIENT_CONTEXT_DEFAULT = "default";
   public static final String  DFS_HDFS_BLOCKS_METADATA_ENABLED = "dfs.datanode.hdfs-blocks-metadata.enabled";
   public static final boolean DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT = false;
   public static final String  DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_NUM_THREADS = "dfs.client.file-block-storage-locations.num-threads";
@@ -416,18 +418,20 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final boolean DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT = false;
   public static final String DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY = "dfs.client.read.shortcircuit.buffer.size";
   public static final String DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_KEY = "dfs.client.read.shortcircuit.streams.cache.size";
-  public static final int DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_DEFAULT = 100;
+  public static final int DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_DEFAULT = 256;
   public static final String DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY = "dfs.client.read.shortcircuit.streams.cache.expiry.ms";
-  public static final long DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT = 5000;
+  public static final long DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT = 5 * 60 * 1000;
   public static final int DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT = 1024 * 1024;
   public static final String DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC = "dfs.client.domain.socket.data.traffic";
   public static final boolean DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT = false;
   public static final String DFS_CLIENT_MMAP_CACHE_SIZE = "dfs.client.mmap.cache.size";
-  public static final int DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT = 1024;
+  public static final int DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT = 256;
   public static final String DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS = "dfs.client.mmap.cache.timeout.ms";
-  public static final long DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT  = 15 * 60 * 1000;
-  public static final String DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT = "dfs.client.mmap.cache.thread.runs.per.timeout";
-  public static final int DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT_DEFAULT  = 4;
+  public static final long DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT  = 60 * 60 * 1000;
+  public static final String DFS_CLIENT_MMAP_RETRY_TIMEOUT_MS = "dfs.client.mmap.retry.timeout.ms";
+  public static final long DFS_CLIENT_MMAP_RETRY_TIMEOUT_MS_DEFAULT = 5 * 60 * 1000;
+  public static final String DFS_CLIENT_SHORT_CIRCUIT_REPLICA_STALE_THRESHOLD_MS = "dfs.client.short.circuit.replica.stale.threshold.ms";
+  public static final long DFS_CLIENT_SHORT_CIRCUIT_REPLICA_STALE_THRESHOLD_MS_DEFAULT = 30 * 60 * 1000;
 
   // property for fsimage compression
   public static final String DFS_IMAGE_COMPRESS_KEY = "dfs.image.compress";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
index 438030eaa97..10420035d26 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
@@ -46,9 +46,6 @@ import org.apache.hadoop.fs.HasEnhancedByteBufferAccess;
 import org.apache.hadoop.fs.ReadOption;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.hdfs.client.ClientMmap;
-import org.apache.hadoop.hdfs.net.DomainPeer;
-import org.apache.hadoop.hdfs.net.Peer;
-import org.apache.hadoop.hdfs.net.TcpPeerServer;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@@ -82,7 +79,6 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
     HasEnhancedByteBufferAccess {
   @VisibleForTesting
   static boolean tcpReadsDisabledForTesting = false;
-  private final PeerCache peerCache;
   private final DFSClient dfsClient;
   private boolean closed = false;
   private final String src;
@@ -190,8 +186,6 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
     private long totalZeroCopyBytesRead;
   }
   
-  private final FileInputStreamCache fileInputStreamCache;
-
   /**
    * This variable tracks the number of failures since the start of the
    * most recent user-facing operation. That is to say, it should be reset
@@ -223,10 +217,6 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
     this.verifyChecksum = verifyChecksum;
     this.buffersize = buffersize;
     this.src = src;
-    this.peerCache = dfsClient.peerCache;
-    this.fileInputStreamCache = new FileInputStreamCache(
-        dfsClient.getConf().shortCircuitStreamsCacheSize,
-        dfsClient.getConf().shortCircuitStreamsCacheExpiryMs);
     this.cachingStrategy =
         dfsClient.getDefaultReadCachingStrategy();
     openInfo();
@@ -572,18 +562,28 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
       try {
         ExtendedBlock blk = targetBlock.getBlock();
         Token<BlockTokenIdentifier> accessToken = targetBlock.getBlockToken();
-        blockReader = getBlockReader(targetAddr, chosenNode, src, blk,
-            accessToken, offsetIntoBlock, blk.getNumBytes() - offsetIntoBlock,
-            buffersize, verifyChecksum, dfsClient.clientName, cachingStrategy);
+        blockReader = new BlockReaderFactory(dfsClient.getConf()).
+            setInetSocketAddress(targetAddr).
+            setRemotePeerFactory(dfsClient).
+            setDatanodeInfo(chosenNode).
+            setFileName(src).
+            setBlock(blk).
+            setBlockToken(accessToken).
+            setStartOffset(offsetIntoBlock).
+            setVerifyChecksum(verifyChecksum).
+            setClientName(dfsClient.clientName).
+            setLength(blk.getNumBytes() - offsetIntoBlock).
+            setCachingStrategy(cachingStrategy).
+            setAllowShortCircuitLocalReads(!shortCircuitForbidden()).
+            setClientCacheContext(dfsClient.getClientContext()).
+            setUserGroupInformation(dfsClient.ugi).
+            setConfiguration(dfsClient.getConfiguration()).
+            build();
         if(connectFailedOnce) {
           DFSClient.LOG.info("Successfully connected to " + targetAddr +
                              " for " + blk);
         }
         return chosenNode;
-      } catch (AccessControlException ex) {
-        DFSClient.LOG.warn("Short circuit access failed " + ex);
-        dfsClient.disableLegacyBlockReaderLocal();
-        continue;
       } catch (IOException ex) {
         if (ex instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) {
           DFSClient.LOG.info("Will fetch a new encryption key and retry, " 
@@ -635,7 +635,6 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
       blockReader = null;
     }
     super.close();
-    fileInputStreamCache.close();
     closed = true;
   }
 
@@ -932,9 +931,11 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
       // or fetchBlockAt(). Always get the latest list of locations at the 
       // start of the loop.
       CachingStrategy curCachingStrategy;
+      boolean allowShortCircuitLocalReads;
       synchronized (this) {
         block = getBlockAt(block.getStartOffset(), false);
         curCachingStrategy = cachingStrategy;
+        allowShortCircuitLocalReads = !shortCircuitForbidden();
       }
       DNAddrPair retval = chooseDataNode(block);
       DatanodeInfo chosenNode = retval.info;
@@ -943,11 +944,24 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
           
       try {
         Token<BlockTokenIdentifier> blockToken = block.getBlockToken();
-            
         int len = (int) (end - start + 1);
-        reader = getBlockReader(targetAddr, chosenNode, src, block.getBlock(),
-            blockToken, start, len, buffersize, verifyChecksum,
-            dfsClient.clientName, curCachingStrategy);
+        reader = new BlockReaderFactory(dfsClient.getConf()).
+            setInetSocketAddress(targetAddr).
+            setRemotePeerFactory(dfsClient).
+            setDatanodeInfo(chosenNode).
+            setFileName(src).
+            setBlock(block.getBlock()).
+            setBlockToken(blockToken).
+            setStartOffset(start).
+            setVerifyChecksum(verifyChecksum).
+            setClientName(dfsClient.clientName).
+            setLength(len).
+            setCachingStrategy(curCachingStrategy).
+            setAllowShortCircuitLocalReads(allowShortCircuitLocalReads).
+            setClientCacheContext(dfsClient.getClientContext()).
+            setUserGroupInformation(dfsClient.ugi).
+            setConfiguration(dfsClient.getConfiguration()).
+            build();
         int nread = reader.readAll(buf, offset, len);
         if (nread != len) {
           throw new IOException("truncated return from reader.read(): " +
@@ -960,10 +974,6 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
                  e.getPos() + " from " + chosenNode);
         // we want to remember what we have tried
         addIntoCorruptedBlockMap(block.getBlock(), chosenNode, corruptedBlockMap);
-      } catch (AccessControlException ex) {
-        DFSClient.LOG.warn("Short circuit access failed " + ex);
-        dfsClient.disableLegacyBlockReaderLocal();
-        continue;
       } catch (IOException e) {
         if (e instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) {
           DFSClient.LOG.info("Will fetch a new encryption key and retry, " 
@@ -1022,194 +1032,6 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
     return false;
   }
 
-  private Peer newTcpPeer(InetSocketAddress addr) throws IOException {
-    Peer peer = null;
-    boolean success = false;
-    Socket sock = null;
-    try {
-      sock = dfsClient.socketFactory.createSocket();
-      NetUtils.connect(sock, addr,
-        dfsClient.getRandomLocalInterfaceAddr(),
-        dfsClient.getConf().socketTimeout);
-      peer = TcpPeerServer.peerFromSocketAndKey(sock, 
-          dfsClient.getDataEncryptionKey());
-      success = true;
-      return peer;
-    } finally {
-      if (!success) {
-        IOUtils.closeQuietly(peer);
-        IOUtils.closeQuietly(sock);
-      }
-    }
-  }
-
-  /**
-   * Retrieve a BlockReader suitable for reading.
-   * This method will reuse the cached connection to the DN if appropriate.
-   * Otherwise, it will create a new connection.
-   * Throwing an IOException from this method is basically equivalent to 
-   * declaring the DataNode bad, so we try to connect a lot of different ways
-   * before doing that.
-   *
-   * @param dnAddr  Address of the datanode
-   * @param chosenNode Chosen datanode information
-   * @param file  File location
-   * @param block  The Block object
-   * @param blockToken  The access token for security
-   * @param startOffset  The read offset, relative to block head
-   * @param len  The number of bytes to read
-   * @param bufferSize  The IO buffer size (not the client buffer size)
-   * @param verifyChecksum  Whether to verify checksum
-   * @param clientName  Client name
-   * @param CachingStrategy  caching strategy to use
-   * @return New BlockReader instance
-   */
-  protected BlockReader getBlockReader(InetSocketAddress dnAddr,
-                                       DatanodeInfo chosenNode,
-                                       String file,
-                                       ExtendedBlock block,
-                                       Token<BlockTokenIdentifier> blockToken,
-                                       long startOffset,
-                                       long len,
-                                       int bufferSize,
-                                       boolean verifyChecksum,
-                                       String clientName,
-                                       CachingStrategy curCachingStrategy)
-      throws IOException {
-    // Firstly, we check to see if we have cached any file descriptors for
-    // local blocks.  If so, we can just re-use those file descriptors.
-    FileInputStream fis[] = fileInputStreamCache.get(chosenNode, block);
-    if (fis != null) {
-      if (DFSClient.LOG.isDebugEnabled()) {
-        DFSClient.LOG.debug("got FileInputStreams for " + block + " from " +
-            "the FileInputStreamCache.");
-      }
-      return new BlockReaderLocal.Builder(dfsClient.getConf()).
-          setFilename(file).
-          setBlock(block).
-          setStartOffset(startOffset).
-          setStreams(fis).
-          setDatanodeID(chosenNode).
-          setVerifyChecksum(verifyChecksum).
-          setBlockMetadataHeader(BlockMetadataHeader.
-              preadHeader(fis[1].getChannel())).
-          setFileInputStreamCache(fileInputStreamCache).
-          setCachingStrategy(curCachingStrategy).
-          build();
-    }
-    
-    // If the legacy local block reader is enabled and we are reading a local
-    // block, try to create a BlockReaderLocalLegacy.  The legacy local block
-    // reader implements local reads in the style first introduced by HDFS-2246.
-    if ((dfsClient.useLegacyBlockReaderLocal()) &&
-        DFSClient.isLocalAddress(dnAddr) &&
-        (!shortCircuitForbidden())) {
-      try {
-        return BlockReaderFactory.getLegacyBlockReaderLocal(dfsClient,
-            clientName, block, blockToken, chosenNode, startOffset);
-      } catch (IOException e) {
-        DFSClient.LOG.warn("error creating legacy BlockReaderLocal.  " +
-            "Disabling legacy local reads.", e);
-        dfsClient.disableLegacyBlockReaderLocal();
-      }
-    }
-
-    // Look for cached domain peers.
-    int cacheTries = 0;
-    DomainSocketFactory dsFactory = dfsClient.getDomainSocketFactory();
-    BlockReader reader = null;
-    final int nCachedConnRetry = dfsClient.getConf().nCachedConnRetry;
-    for (; cacheTries < nCachedConnRetry; ++cacheTries) {
-      Peer peer = peerCache.get(chosenNode, true);
-      if (peer == null) break;
-      try {
-        boolean allowShortCircuitLocalReads = dfsClient.getConf().
-            shortCircuitLocalReads && (!shortCircuitForbidden());
-        reader = BlockReaderFactory.newBlockReader(
-            dfsClient.getConf(), file, block, blockToken, startOffset,
-            len, verifyChecksum, clientName, peer, chosenNode, 
-            dsFactory, peerCache, fileInputStreamCache,
-            allowShortCircuitLocalReads, curCachingStrategy);
-        return reader;
-      } catch (IOException ex) {
-        DFSClient.LOG.debug("Error making BlockReader with DomainSocket. " +
-            "Closing stale " + peer, ex);
-      } finally {
-        if (reader == null) {
-          IOUtils.closeQuietly(peer);
-        }
-      }
-    }
-
-    // Try to create a DomainPeer.
-    DomainSocket domSock = dsFactory.create(dnAddr, this);
-    if (domSock != null) {
-      Peer peer = new DomainPeer(domSock);
-      try {
-        boolean allowShortCircuitLocalReads = dfsClient.getConf().
-            shortCircuitLocalReads && (!shortCircuitForbidden());
-        reader = BlockReaderFactory.newBlockReader(
-            dfsClient.getConf(), file, block, blockToken, startOffset,
-            len, verifyChecksum, clientName, peer, chosenNode,
-            dsFactory, peerCache, fileInputStreamCache,
-            allowShortCircuitLocalReads, curCachingStrategy);
-        return reader;
-      } catch (IOException e) {
-        DFSClient.LOG.warn("failed to connect to " + domSock, e);
-      } finally {
-        if (reader == null) {
-         // If the Peer that we got the error from was a DomainPeer,
-         // mark the socket path as bad, so that newDataSocket will not try 
-         // to re-open this socket for a while.
-         dsFactory.disableDomainSocketPath(domSock.getPath());
-         IOUtils.closeQuietly(peer);
-        }
-      }
-    }
-
-    // Look for cached peers.
-    for (; cacheTries < nCachedConnRetry; ++cacheTries) {
-      Peer peer = peerCache.get(chosenNode, false);
-      if (peer == null) break;
-      try {
-        reader = BlockReaderFactory.newBlockReader(
-            dfsClient.getConf(), file, block, blockToken, startOffset,
-            len, verifyChecksum, clientName, peer, chosenNode, 
-            dsFactory, peerCache, fileInputStreamCache, false,
-            curCachingStrategy);
-        return reader;
-      } catch (IOException ex) {
-        DFSClient.LOG.debug("Error making BlockReader. Closing stale " +
-          peer, ex);
-      } finally {
-        if (reader == null) {
-          IOUtils.closeQuietly(peer);
-        }
-      }
-    }
-    if (tcpReadsDisabledForTesting) {
-      throw new IOException("TCP reads are disabled.");
-    }
-    // Try to create a new remote peer.
-    Peer peer = newTcpPeer(dnAddr);
-    try {
-      reader = BlockReaderFactory.newBlockReader(dfsClient.getConf(), file,
-          block, blockToken, startOffset, len, verifyChecksum, clientName,
-          peer, chosenNode, dsFactory, peerCache, fileInputStreamCache, false,
-          curCachingStrategy);
-      return reader;
-    } catch (IOException ex) {
-      DFSClient.LOG.debug(
-          "Exception while getting block reader, closing stale " + peer, ex);
-      throw ex;
-    } finally {
-      if (reader == null) {
-        IOUtils.closeQuietly(peer);
-      }
-    }
-  }
-
-
   /**
    * Read bytes starting from the specified position.
    * 
@@ -1555,8 +1377,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
     long blockStartInFile = currentLocatedBlock.getStartOffset();
     long blockPos = curPos - blockStartInFile;
     long limit = blockPos + length;
-    ClientMmap clientMmap =
-        blockReader.getClientMmap(opts, dfsClient.getMmapManager());
+    ClientMmap clientMmap = blockReader.getClientMmap(opts);
     if (clientMmap == null) {
       if (DFSClient.LOG.isDebugEnabled()) {
         DFSClient.LOG.debug("unable to perform a zero-copy read from offset " +
@@ -1565,17 +1386,25 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
       }
       return null;
     }
-    seek(pos + length);
-    ByteBuffer buffer = clientMmap.getMappedByteBuffer().asReadOnlyBuffer();
-    buffer.position((int)blockPos);
-    buffer.limit((int)limit);
-    clientMmap.ref();
-    extendedReadBuffers.put(buffer, clientMmap);
-    readStatistics.addZeroCopyBytes(length);
-    if (DFSClient.LOG.isDebugEnabled()) {
-      DFSClient.LOG.debug("readZeroCopy read " + maxLength + " bytes from " +
-          "offset " + curPos + " via the zero-copy read path.  " +
-          "blockEnd = " + blockEnd);
+    boolean success = false;
+    ByteBuffer buffer;
+    try {
+      seek(pos + length);
+      buffer = clientMmap.getMappedByteBuffer().asReadOnlyBuffer();
+      buffer.position((int)blockPos);
+      buffer.limit((int)limit);
+      extendedReadBuffers.put(buffer, clientMmap);
+      readStatistics.addZeroCopyBytes(length);
+      if (DFSClient.LOG.isDebugEnabled()) {
+        DFSClient.LOG.debug("readZeroCopy read " + maxLength + " bytes from " +
+            "offset " + curPos + " via the zero-copy read path.  " +
+            "blockEnd = " + blockEnd);
+      }
+      success = true;
+    } finally {
+      if (!success) {
+        clientMmap.unref();
+      }
     }
     return buffer;
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
index d420a83d6fc..0d57a63d0ff 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
@@ -27,29 +27,71 @@ import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.hdfs.DFSClient.Conf;
 import org.apache.hadoop.net.unix.DomainSocket;
 
+import com.google.common.base.Preconditions;
 import com.google.common.cache.Cache;
 import com.google.common.cache.CacheBuilder;
 
 class DomainSocketFactory {
   private static final Log LOG = BlockReaderLocal.LOG;
-  private final Conf conf;
 
-  enum PathStatus {
-    UNUSABLE,
-    SHORT_CIRCUIT_DISABLED,
+  public enum PathState {
+    UNUSABLE(false, false),
+    SHORT_CIRCUIT_DISABLED(true, false),
+    VALID(true, true);
+
+    PathState(boolean usableForDataTransfer, boolean usableForShortCircuit) {
+      this.usableForDataTransfer = usableForDataTransfer;
+      this.usableForShortCircuit = usableForShortCircuit;
+    }
+
+    public boolean getUsableForDataTransfer() {
+      return usableForDataTransfer;
+    }
+
+    public boolean getUsableForShortCircuit() {
+      return usableForShortCircuit;
+    }
+
+    private final boolean usableForDataTransfer;
+    private final boolean usableForShortCircuit;
+  }
+
+  public static class PathInfo {
+    private final static PathInfo NOT_CONFIGURED =
+          new PathInfo("", PathState.UNUSABLE);
+
+    final private String path;
+    final private PathState state;
+
+    PathInfo(String path, PathState state) {
+      this.path = path;
+      this.state = state;
+    }
+
+    public String getPath() {
+      return path;
+    }
+
+    public PathState getPathState() {
+      return state;
+    }
+    
+    @Override
+    public String toString() {
+      return new StringBuilder().append("PathInfo{path=").append(path).
+          append(", state=").append(state).append("}").toString();
+    }
   }
 
   /**
    * Information about domain socket paths.
    */
-  Cache<String, PathStatus> pathInfo =
+  Cache<String, PathState> pathMap =
       CacheBuilder.newBuilder()
       .expireAfterWrite(10, TimeUnit.MINUTES)
       .build();
 
   public DomainSocketFactory(Conf conf) {
-    this.conf = conf;
-
     final String feature;
     if (conf.shortCircuitLocalReads && (!conf.useLegacyBlockReaderLocal)) {
       feature = "The short-circuit local reads feature";
@@ -75,51 +117,46 @@ class DomainSocketFactory {
   }
 
   /**
-   * Create a DomainSocket.
-   * 
-   * @param addr        The address of the DataNode
-   * @param stream      The DFSInputStream the socket will be created for.
+   * Get information about a domain socket path.
    *
-   * @return            null if the socket could not be created; the
-   *                    socket otherwise.  If there was an error while
-   *                    creating the socket, we will add the socket path
-   *                    to our list of failed domain socket paths.
+   * @param addr         The inet address to use.
+   * @param conf         The client configuration.
+   *
+   * @return             Information about the socket path.
    */
-  DomainSocket create(InetSocketAddress addr, DFSInputStream stream) {
+  public PathInfo getPathInfo(InetSocketAddress addr, DFSClient.Conf conf) {
     // If there is no domain socket path configured, we can't use domain
     // sockets.
-    if (conf.domainSocketPath.isEmpty()) return null;
+    if (conf.domainSocketPath.isEmpty()) return PathInfo.NOT_CONFIGURED;
     // If we can't do anything with the domain socket, don't create it.
     if (!conf.domainSocketDataTraffic &&
         (!conf.shortCircuitLocalReads || conf.useLegacyBlockReaderLocal)) {
-      return null;
+      return PathInfo.NOT_CONFIGURED;
     }
-    // UNIX domain sockets can only be used to talk to local peers
-    if (!DFSClient.isLocalAddress(addr)) return null;
     // If the DomainSocket code is not loaded, we can't create
     // DomainSocket objects.
-    if (DomainSocket.getLoadingFailureReason() != null) return null;
+    if (DomainSocket.getLoadingFailureReason() != null) {
+      return PathInfo.NOT_CONFIGURED;
+    }
+    // UNIX domain sockets can only be used to talk to local peers
+    if (!DFSClient.isLocalAddress(addr)) return PathInfo.NOT_CONFIGURED;
     String escapedPath = DomainSocket.
         getEffectivePath(conf.domainSocketPath, addr.getPort());
-    PathStatus info = pathInfo.getIfPresent(escapedPath);
-    if (info == PathStatus.UNUSABLE) {
-      // We tried to connect to this domain socket before, and it was totally
-      // unusable.
-      return null;
-    }
-    if ((!conf.domainSocketDataTraffic) &&
-        ((info == PathStatus.SHORT_CIRCUIT_DISABLED) || 
-            stream.shortCircuitForbidden())) {
-      // If we don't want to pass data over domain sockets, and we don't want
-      // to pass file descriptors over them either, we have no use for domain
-      // sockets.
-      return null;
+    PathState status = pathMap.getIfPresent(escapedPath);
+    if (status == null) {
+      return new PathInfo(escapedPath, PathState.VALID);
+    } else {
+      return new PathInfo(escapedPath, status);
     }
+  }
+
+  public DomainSocket createSocket(PathInfo info, int socketTimeout) {
+    Preconditions.checkArgument(info.getPathState() != PathState.UNUSABLE);
     boolean success = false;
     DomainSocket sock = null;
     try {
-      sock = DomainSocket.connect(escapedPath);
-      sock.setAttribute(DomainSocket.RECEIVE_TIMEOUT, conf.socketTimeout);
+      sock = DomainSocket.connect(info.getPath());
+      sock.setAttribute(DomainSocket.RECEIVE_TIMEOUT, socketTimeout);
       success = true;
     } catch (IOException e) {
       LOG.warn("error creating DomainSocket", e);
@@ -129,7 +166,7 @@ class DomainSocketFactory {
         if (sock != null) {
           IOUtils.closeQuietly(sock);
         }
-        pathInfo.put(escapedPath, PathStatus.UNUSABLE);
+        pathMap.put(info.getPath(), PathState.UNUSABLE);
         sock = null;
       }
     }
@@ -137,10 +174,10 @@ class DomainSocketFactory {
   }
 
   public void disableShortCircuitForPath(String path) {
-    pathInfo.put(path, PathStatus.SHORT_CIRCUIT_DISABLED);
+    pathMap.put(path, PathState.SHORT_CIRCUIT_DISABLED);
   }
 
   public void disableDomainSocketPath(String path) {
-    pathInfo.put(path, PathStatus.UNUSABLE);
+    pathMap.put(path, PathState.UNUSABLE);
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/FileInputStreamCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/FileInputStreamCache.java
deleted file mode 100644
index 5724621e8a8..00000000000
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/FileInputStreamCache.java
+++ /dev/null
@@ -1,287 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hdfs;
-
-import java.io.Closeable;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.lang.ref.WeakReference;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map.Entry;
-import java.util.concurrent.ScheduledFuture;
-import java.util.concurrent.ScheduledThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hdfs.protocol.DatanodeID;
-import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.util.Time;
-
-import com.google.common.collect.LinkedListMultimap;
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
-
-/**
- * FileInputStream cache is used to cache FileInputStream objects that we
- * have received from the DataNode.
- */
-class FileInputStreamCache {
-  private final static Log LOG = LogFactory.getLog(FileInputStreamCache.class);
-
-  /**
-   * The executor service that runs the cacheCleaner.  There is only one of
-   * these per VM.
-   */
-  private final static ScheduledThreadPoolExecutor executor
-      = new ScheduledThreadPoolExecutor(1, new ThreadFactoryBuilder().
-          setDaemon(true).setNameFormat("FileInputStreamCache Cleaner").
-          build());
-  
-  /**
-   * The CacheCleaner for this FileInputStreamCache.  We don't create this
-   * and schedule it until it becomes necessary.
-   */
-  private CacheCleaner cacheCleaner;
-  
-  /**
-   * Maximum number of entries to allow in the cache.
-   */
-  private final int maxCacheSize;
-  
-  /**
-   * The minimum time in milliseconds to preserve an element in the cache.
-   */
-  private final long expiryTimeMs;
-  
-  /**
-   * True if the FileInputStreamCache is closed.
-   */
-  private boolean closed = false;
-  
-  /**
-   * Cache entries.
-   */
-  private final LinkedListMultimap<Key, Value> map = LinkedListMultimap.create();
-
-  /**
-   * Expiry thread which makes sure that the file descriptors get closed
-   * after a while.
-   */
-  private static class CacheCleaner implements Runnable, Closeable {
-    private WeakReference<FileInputStreamCache> cacheRef;
-    private ScheduledFuture<?> future;
-    
-    CacheCleaner(FileInputStreamCache cache) {
-      this.cacheRef = new WeakReference<FileInputStreamCache>(cache);
-    }
-    
-    @Override
-    public void run() {
-      FileInputStreamCache cache = cacheRef.get();
-      if (cache == null) return;
-      synchronized(cache) {
-        if (cache.closed) return;
-        long curTime = Time.monotonicNow();
-        for (Iterator<Entry<Key, Value>> iter =
-                  cache.map.entries().iterator(); iter.hasNext();
-              iter = cache.map.entries().iterator()) {
-          Entry<Key, Value> entry = iter.next();
-          if (entry.getValue().getTime() + cache.expiryTimeMs >= curTime) {
-            break;
-          }
-          entry.getValue().close();
-          iter.remove();
-        }
-      }
-    }
-
-    @Override
-    public void close() throws IOException {
-      if (future != null) {
-        future.cancel(false);
-      }
-    }
-    
-    public void setFuture(ScheduledFuture<?> future) {
-      this.future = future;
-    }
-  }
-
-  /**
-   * The key identifying a FileInputStream array.
-   */
-  static class Key {
-    private final DatanodeID datanodeID;
-    private final ExtendedBlock block;
-    
-    public Key(DatanodeID datanodeID, ExtendedBlock block) {
-      this.datanodeID = datanodeID;
-      this.block = block;
-    }
-    
-    @Override
-    public boolean equals(Object other) {
-      if (!(other instanceof FileInputStreamCache.Key)) {
-        return false;
-      }
-      FileInputStreamCache.Key otherKey = (FileInputStreamCache.Key)other;
-      return (block.equals(otherKey.block) &&
-          (block.getGenerationStamp() == otherKey.block.getGenerationStamp()) &&
-          datanodeID.equals(otherKey.datanodeID));
-    }
-
-    @Override
-    public int hashCode() {
-      return block.hashCode();
-    }
-  }
-
-  /**
-   * The value containing a FileInputStream array and the time it was added to
-   * the cache.
-   */
-  static class Value {
-    private final FileInputStream fis[];
-    private final long time;
-    
-    public Value (FileInputStream fis[]) {
-      this.fis = fis;
-      this.time = Time.monotonicNow();
-    }
-
-    public FileInputStream[] getFileInputStreams() {
-      return fis;
-    }
-
-    public long getTime() {
-      return time;
-    }
-    
-    public void close() {
-      IOUtils.cleanup(LOG, fis);
-    }
-  }
-  
-  /**
-   * Create a new FileInputStream
-   *
-   * @param maxCacheSize         The maximum number of elements to allow in 
-   *                             the cache.
-   * @param expiryTimeMs         The minimum time in milliseconds to preserve
-   *                             elements in the cache.
-   */
-  public FileInputStreamCache(int maxCacheSize, long expiryTimeMs) {
-    this.maxCacheSize = maxCacheSize;
-    this.expiryTimeMs = expiryTimeMs;
-  }
-  
-  /**
-   * Put an array of FileInputStream objects into the cache.
-   *
-   * @param datanodeID          The DatanodeID to store the streams under.
-   * @param block               The Block to store the streams under.
-   * @param fis                 The streams.
-   */
-  public void put(DatanodeID datanodeID, ExtendedBlock block,
-      FileInputStream fis[]) {
-    boolean inserted = false;
-    try {
-      synchronized(this) {
-        if (closed) return;
-        if (map.size() + 1 > maxCacheSize) {
-          Iterator<Entry<Key, Value>> iter = map.entries().iterator();
-          if (!iter.hasNext()) return;
-          Entry<Key, Value> entry = iter.next();
-          entry.getValue().close();
-          iter.remove();
-        }
-        if (cacheCleaner == null) {
-          cacheCleaner = new CacheCleaner(this);
-          ScheduledFuture<?> future = 
-              executor.scheduleAtFixedRate(cacheCleaner, expiryTimeMs, expiryTimeMs,
-                  TimeUnit.MILLISECONDS);
-          cacheCleaner.setFuture(future);
-        }
-        map.put(new Key(datanodeID, block), new Value(fis));
-        inserted = true;
-      }
-    } finally {
-      if (!inserted) {
-        IOUtils.cleanup(LOG, fis);
-      }
-    }
-  }
-  
-  /**
-   * Find and remove an array of FileInputStream objects from the cache.
-   *
-   * @param datanodeID          The DatanodeID to search for.
-   * @param block               The Block to search for.
-   *
-   * @return                    null if no streams can be found; the
-   *                            array otherwise.  If this is non-null, the
-   *                            array will have been removed from the cache.
-   */
-  public synchronized FileInputStream[] get(DatanodeID datanodeID,
-      ExtendedBlock block) {
-    Key key = new Key(datanodeID, block);
-    List<Value> ret = map.get(key);
-    if (ret.isEmpty()) return null;
-    Value val = ret.get(0);
-    map.remove(key, val);
-    return val.getFileInputStreams();
-  }
-  
-  /**
-   * Close the cache and free all associated resources.
-   */
-  public synchronized void close() {
-    if (closed) return;
-    closed = true;
-    IOUtils.cleanup(LOG, cacheCleaner);
-    for (Iterator<Entry<Key, Value>> iter = map.entries().iterator();
-          iter.hasNext();) {
-      Entry<Key, Value> entry = iter.next();
-      entry.getValue().close();
-      iter.remove();
-    }
-  }
-  
-  public synchronized String toString() {
-    StringBuilder bld = new StringBuilder();
-    bld.append("FileInputStreamCache(");
-    String prefix = "";
-    for (Entry<Key, Value> entry : map.entries()) {
-      bld.append(prefix);
-      bld.append(entry.getKey());
-      prefix = ", ";
-    }
-    bld.append(")");
-    return bld.toString();
-  }
-  
-  public long getExpiryTimeMs() {
-    return expiryTimeMs;
-  }
-  
-  public int getMaxCacheSize() {
-    return maxCacheSize;
-  }
-}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java
index ba6736a9db4..47e79ba020a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java
@@ -89,42 +89,19 @@ class PeerCache {
     LinkedListMultimap.create();
   private final int capacity;
   private final long expiryPeriod;
-  private static PeerCache instance = null;
   
-  @VisibleForTesting
-  PeerCache(int c, long e) {
+  public PeerCache(int c, long e) {
     this.capacity = c;
     this.expiryPeriod = e;
 
     if (capacity == 0 ) {
       LOG.info("SocketCache disabled.");
-    }
-    else if (expiryPeriod == 0) {
+    } else if (expiryPeriod == 0) {
       throw new IllegalStateException("Cannot initialize expiryPeriod to " +
-         expiryPeriod + "when cache is enabled.");
+         expiryPeriod + " when cache is enabled.");
     }
   }
  
-  public static synchronized PeerCache getInstance(int c, long e) {
-    // capacity is only initialized once
-    if (instance == null) {
-      instance = new PeerCache(c, e);
-    } else { //already initialized once
-      if (instance.capacity != c || instance.expiryPeriod != e) {
-        LOG.info("capacity and expiry periods already set to " +
-          instance.capacity + " and " + instance.expiryPeriod +
-          " respectively. Cannot set it to " + c + " and " + e);
-      }
-    }
-
-    return instance;
-  }
-
-  @VisibleForTesting
-  public static synchronized void setInstance(int c, long e) {
-    instance = new PeerCache(c, e);
-  }
-
   private boolean isDaemonStarted() {
     return (daemon == null)? false: true;
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java
index 94a00ccc580..f37565ed8d2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java
@@ -30,7 +30,6 @@ import org.apache.hadoop.fs.FSInputChecker;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.ReadOption;
 import org.apache.hadoop.hdfs.client.ClientMmap;
-import org.apache.hadoop.hdfs.client.ClientMmapManager;
 import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@@ -492,8 +491,7 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
   }
 
   @Override
-  public ClientMmap getClientMmap(EnumSet<ReadOption> opts,
-        ClientMmapManager mmapManager) {
+  public ClientMmap getClientMmap(EnumSet<ReadOption> opts) {
     return null;
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java
index e76a65a40b7..885671632a2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java
@@ -32,7 +32,6 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.fs.ReadOption;
 import org.apache.hadoop.hdfs.client.ClientMmap;
-import org.apache.hadoop.hdfs.client.ClientMmapManager;
 import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@@ -457,8 +456,7 @@ public class RemoteBlockReader2  implements BlockReader {
   }
 
   @Override
-  public ClientMmap getClientMmap(EnumSet<ReadOption> opts,
-        ClientMmapManager mmapManager) {
+  public ClientMmap getClientMmap(EnumSet<ReadOption> opts) {
     return null;
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemotePeerFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemotePeerFactory.java
new file mode 100644
index 00000000000..d844262aa2c
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemotePeerFactory.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.net.Peer;
+import org.apache.hadoop.security.UserGroupInformation;
+
+public interface RemotePeerFactory {
+  /**
+   * @param addr          The address to connect to.
+   * 
+   * @return              A new Peer connected to the address.
+   *
+   * @throws IOException  If there was an error connecting or creating 
+   *                      the remote socket, encrypted stream, etc.
+   */
+  Peer newConnectedPeer(InetSocketAddress addr) throws IOException;
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ClientMmap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ClientMmap.java
index 91a62306f74..471b45d4cef 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ClientMmap.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ClientMmap.java
@@ -17,24 +17,14 @@
  */
 package org.apache.hadoop.hdfs.client;
 
-import java.io.FileInputStream;
-
 import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hdfs.protocol.DatanodeID;
-import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.io.nativeio.NativeIO;
 
-import java.io.IOException;
-import java.lang.ref.WeakReference;
 import java.nio.MappedByteBuffer;
-import java.nio.channels.FileChannel.MapMode;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
-import com.google.common.annotations.VisibleForTesting;
-
 /**
  * A memory-mapped region used by an HDFS client.
  * 
@@ -46,111 +36,46 @@ public class ClientMmap {
   static final Log LOG = LogFactory.getLog(ClientMmap.class);
   
   /**
-   * A reference to the manager of this mmap.
-   * 
-   * This is only a weak reference to help minimize the damange done by
-   * code which leaks references accidentally.
+   * A reference to the block replica which this mmap relates to.
    */
-  private final WeakReference<ClientMmapManager> manager;
+  private final ShortCircuitReplica replica;
   
   /**
-   * The actual mapped memory region.
+   * The java ByteBuffer object.
    */
   private final MappedByteBuffer map;
-  
+
   /**
-   * A reference count tracking how many threads are using this object.
+   * Reference count of this ClientMmap object.
    */
   private final AtomicInteger refCount = new AtomicInteger(1);
 
-  /**
-   * Block pertaining to this mmap
-   */
-  private final ExtendedBlock block;
-  
-  /**
-   * The DataNode where this mmap came from.
-   */
-  private final DatanodeID datanodeID;
-
-  /**
-   * The monotonic time when this mmap was last evictable.
-   */
-  private long lastEvictableTimeNs;
-
-  public static ClientMmap load(ClientMmapManager manager, FileInputStream in, 
-      ExtendedBlock block, DatanodeID datanodeID) 
-          throws IOException {
-    MappedByteBuffer map =
-        in.getChannel().map(MapMode.READ_ONLY, 0,
-            in.getChannel().size());
-    return new ClientMmap(manager, map, block, datanodeID);
-  }
-
-  private ClientMmap(ClientMmapManager manager, MappedByteBuffer map, 
-        ExtendedBlock block, DatanodeID datanodeID) 
-            throws IOException {
-    this.manager = new WeakReference<ClientMmapManager>(manager);
+  ClientMmap(ShortCircuitReplica replica, MappedByteBuffer map) {
+    this.replica = replica;
     this.map = map;
-    this.block = block;
-    this.datanodeID = datanodeID;
-    this.lastEvictableTimeNs = 0;
   }
 
   /**
-   * Decrement the reference count on this object.
-   * Should be called with the ClientMmapManager lock held.
+   * Increment the reference count.
+   *
+   * @return   The new reference count.
+   */
+  void ref() {
+    refCount.addAndGet(1);
+  }
+
+  /**
+   * Decrement the reference count.
+   *
+   * The parent replica gets unreferenced each time the reference count 
+   * of this object goes to 0.
    */
   public void unref() {
-    int count = refCount.decrementAndGet();
-    if (count < 0) {
-      throw new IllegalArgumentException("can't decrement the " +
-          "reference count on this ClientMmap lower than 0.");
-    } else if (count == 0) {
-      ClientMmapManager man = manager.get();
-      if (man == null) {
-        unmap();
-      } else {
-        man.makeEvictable(this);
-      }
-    }
-  }
-
-  /**
-   * Increment the reference count on this object.
-   *
-   * @return     The new reference count.
-   */
-  public int ref() {
-    return refCount.getAndIncrement();
-  }
-
-  @VisibleForTesting
-  public ExtendedBlock getBlock() {
-    return block;
-  }
-
-  DatanodeID getDatanodeID() {
-    return datanodeID;
+    refCount.addAndGet(-1);
+    replica.unref();
   }
 
   public MappedByteBuffer getMappedByteBuffer() {
     return map;
   }
-
-  public void setLastEvictableTimeNs(long lastEvictableTimeNs) {
-    this.lastEvictableTimeNs = lastEvictableTimeNs;
-  }
-
-  public long getLastEvictableTimeNs() {
-    return this.lastEvictableTimeNs;
-  }
-
-  /**
-   * Unmap the memory region.
-   */
-  void unmap() {
-    assert(refCount.get() == 0);
-    NativeIO.POSIX.munmap(map);
-  }
-}
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ClientMmapManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ClientMmapManager.java
deleted file mode 100644
index 856e586e8e6..00000000000
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ClientMmapManager.java
+++ /dev/null
@@ -1,482 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hdfs.client;
-
-import java.io.Closeable;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.lang.ref.WeakReference;
-import java.util.Iterator;
-import java.util.TreeMap;
-import java.util.Map.Entry;
-import java.util.concurrent.ScheduledFuture;
-import java.util.concurrent.ScheduledThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.locks.Condition;
-import java.util.concurrent.locks.Lock;
-import java.util.concurrent.locks.ReentrantLock;
-
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT_DEFAULT;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hdfs.protocol.DatanodeID;
-import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.io.IOUtils;
-
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.ComparisonChain;
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
-
-/**
- * Tracks mmap instances used on an HDFS client.
- *
- * mmaps can be used concurrently by multiple threads at once.
- * mmaps cannot be closed while they are in use.
- *
- * The cache is important for performance, because the first time an mmap is
- * created, the page table entries (PTEs) are not yet set up.
- * Even when reading data that is entirely resident in memory, reading an
- * mmap the second time is faster.
- */
-@InterfaceAudience.Private
-public class ClientMmapManager implements Closeable {
-  public static final Log LOG = LogFactory.getLog(ClientMmapManager.class);
-
-  private boolean closed = false;
-
-  private final int cacheSize;
-
-  private final long timeoutNs;
-
-  private final int runsPerTimeout;
-
-  private final Lock lock = new ReentrantLock();
-  
-  /**
-   * Maps block, datanode_id to the client mmap object.
-   * If the ClientMmap is in the process of being loaded,
-   * {@link Waitable<ClientMmap>#await()} will block.
-   *
-   * Protected by the ClientMmapManager lock.
-   */
-  private final TreeMap<Key, Waitable<ClientMmap>> mmaps =
-      new TreeMap<Key, Waitable<ClientMmap>>();
-
-  /**
-   * Maps the last use time to the client mmap object.
-   * We ensure that each last use time is unique by inserting a jitter of a
-   * nanosecond or two if necessary.
-   * 
-   * Protected by the ClientMmapManager lock.
-   * ClientMmap objects that are in use are never evictable.
-   */
-  private final TreeMap<Long, ClientMmap> evictable =
-      new TreeMap<Long, ClientMmap>();
-
-  private final ScheduledThreadPoolExecutor executor = 
-      new ScheduledThreadPoolExecutor(1, new ThreadFactoryBuilder().
-          setDaemon(true).setNameFormat("ClientMmapManager").
-          build());
-  
-  /**
-   * The CacheCleaner for this ClientMmapManager.  We don't create this
-   * and schedule it until it becomes necessary.
-   */
-  private CacheCleaner cacheCleaner;
-
-  /**
-   * Factory method to create a ClientMmapManager from a Hadoop
-   * configuration.
-   */
-  public static ClientMmapManager fromConf(Configuration conf) {
-    return new ClientMmapManager(conf.getInt(DFS_CLIENT_MMAP_CACHE_SIZE,
-      DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT),
-      conf.getLong(DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS,
-        DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT),
-      conf.getInt(DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT,
-        DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT_DEFAULT));
-  }
-
-  public ClientMmapManager(int cacheSize, long timeoutMs, int runsPerTimeout) {
-    this.cacheSize = cacheSize;
-    this.timeoutNs = timeoutMs * 1000000;
-    this.runsPerTimeout = runsPerTimeout;
-  }
-  
-  long getTimeoutMs() {
-    return this.timeoutNs / 1000000;
-  }
-
-  int getRunsPerTimeout() {
-    return this.runsPerTimeout;
-  }
-  
-  public String verifyConfigurationMatches(Configuration conf) {
-    StringBuilder bld = new StringBuilder();
-    int cacheSize = conf.getInt(DFS_CLIENT_MMAP_CACHE_SIZE,
-                    DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT);
-    if (this.cacheSize != cacheSize) {
-      bld.append("You specified a cache size of ").append(cacheSize).
-          append(", but the existing cache size is ").append(this.cacheSize).
-          append(".  ");
-    }
-    long timeoutMs = conf.getLong(DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS,
-        DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT);
-    if (getTimeoutMs() != timeoutMs) {
-      bld.append("You specified a cache timeout of ").append(timeoutMs).
-          append(" ms, but the existing cache timeout is ").
-          append(getTimeoutMs()).append("ms").append(".  ");
-    }
-    int runsPerTimeout = conf.getInt(
-        DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT,
-        DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT_DEFAULT);
-    if (getRunsPerTimeout() != runsPerTimeout) {
-      bld.append("You specified ").append(runsPerTimeout).
-          append(" runs per timeout, but the existing runs per timeout is ").
-          append(getTimeoutMs()).append(".  ");
-    }
-    return bld.toString();
-  }
-
-  private static class Waitable<T> {
-    private T val;
-    private final Condition cond;
-
-    public Waitable(Condition cond) {
-      this.val = null;
-      this.cond = cond;
-    }
-
-    public T await() throws InterruptedException {
-      while (this.val == null) {
-        this.cond.await();
-      }
-      return this.val;
-    }
-
-    public void provide(T val) {
-      this.val = val;
-      this.cond.signalAll();
-    }
-  }
-
-  private static class Key implements Comparable<Key> {
-    private final ExtendedBlock block;
-    private final DatanodeID datanode;
-    
-    Key(ExtendedBlock block, DatanodeID datanode) {
-      this.block = block;
-      this.datanode = datanode;
-    }
-
-    /**
-     * Compare two ClientMmap regions that we're storing.
-     *
-     * When we append to a block, we bump the genstamp.  It is important to 
-     * compare the genStamp here.  That way, we will not return a shorter 
-     * mmap than required.
-     */
-    @Override
-    public int compareTo(Key o) {
-      return ComparisonChain.start().
-          compare(block.getBlockId(), o.block.getBlockId()).
-          compare(block.getGenerationStamp(), o.block.getGenerationStamp()).
-          compare(block.getBlockPoolId(), o.block.getBlockPoolId()).
-          compare(datanode, o.datanode).
-          result();
-    }
-
-    @Override
-    public boolean equals(Object rhs) {
-      if (rhs == null) {
-        return false;
-      }
-      try {
-        Key o = (Key)rhs;
-        return (compareTo(o) == 0);
-      } catch (ClassCastException e) {
-        return false;
-      }
-    }
-
-    @Override
-    public int hashCode() {
-      return block.hashCode() ^ datanode.hashCode();
-    }
-  }
-
-  /**
-   * Thread which handles expiring mmaps from the cache.
-   */
-  private static class CacheCleaner implements Runnable, Closeable {
-    private WeakReference<ClientMmapManager> managerRef;
-    private ScheduledFuture<?> future;
-    
-    CacheCleaner(ClientMmapManager manager) {
-      this.managerRef= new WeakReference<ClientMmapManager>(manager);
-    }
-
-    @Override
-    public void run() {
-      ClientMmapManager manager = managerRef.get();
-      if (manager == null) return;
-      long curTime = System.nanoTime();
-      try {
-        manager.lock.lock();
-        manager.evictStaleEntries(curTime);
-      } finally {
-        manager.lock.unlock();
-      }
-    }
-    
-    void setFuture(ScheduledFuture<?> future) {
-      this.future = future;
-    }
-
-    @Override
-    public void close() throws IOException {
-      future.cancel(false);
-    }
-  }
-
-  /**
-   * Evict entries which are older than curTime + timeoutNs from the cache.
-   *
-   * NOTE: you must call this function with the lock held.
-   */
-  private void evictStaleEntries(long curTime) {
-    if (closed) {
-      return;
-    }
-    Iterator<Entry<Long, ClientMmap>> iter =
-        evictable.entrySet().iterator(); 
-    while (iter.hasNext()) {
-      Entry<Long, ClientMmap> entry = iter.next();
-      if (entry.getKey() + timeoutNs >= curTime) {
-        return;
-      }
-      ClientMmap mmap = entry.getValue();
-      Key key = new Key(mmap.getBlock(), mmap.getDatanodeID());
-      mmaps.remove(key);
-      iter.remove();
-      mmap.unmap();
-    }
-  }
-
-  /**
-   * Evict one mmap object from the cache.
-   *
-   * NOTE: you must call this function with the lock held.
-   *
-   * @return                  True if an object was evicted; false if none
-   *                          could be evicted.
-   */
-  private boolean evictOne() {
-    Entry<Long, ClientMmap> entry = evictable.pollFirstEntry();
-    if (entry == null) {
-      // We don't want to try creating another mmap region, because the
-      // cache is full.
-      return false;
-    }
-    ClientMmap evictedMmap = entry.getValue(); 
-    Key evictedKey = new Key(evictedMmap.getBlock(), 
-                             evictedMmap.getDatanodeID());
-    mmaps.remove(evictedKey);
-    evictedMmap.unmap();
-    return true;
-  }
-
-  /**
-   * Create a new mmap object.
-   * 
-   * NOTE: you must call this function with the lock held.
-   *
-   * @param key              The key which describes this mmap.
-   * @param in               The input stream to use to create the mmap.
-   * @return                 The new mmap object, or null if there were
-   *                         insufficient resources.
-   * @throws IOException     If there was an I/O error creating the mmap.
-   */
-  private ClientMmap create(Key key, FileInputStream in) throws IOException {
-    if (mmaps.size() + 1 > cacheSize) {
-      if (!evictOne()) {
-        LOG.warn("mmap cache is full (with " + cacheSize + " elements) and " +
-              "nothing is evictable.  Ignoring request for mmap with " +
-              "datanodeID=" + key.datanode + ", " + "block=" + key.block);
-        return null;
-      }
-    }
-    // Create the condition variable that other threads may wait on.
-    Waitable<ClientMmap> waitable =
-        new Waitable<ClientMmap>(lock.newCondition());
-    mmaps.put(key, waitable);
-    // Load the entry
-    boolean success = false;
-    ClientMmap mmap = null;
-    try {
-      try {
-        lock.unlock();
-        mmap = ClientMmap.load(this, in, key.block, key.datanode);
-      } finally {
-        lock.lock();
-      }
-      if (cacheCleaner == null) {
-        cacheCleaner = new CacheCleaner(this);
-        ScheduledFuture<?> future = 
-            executor.scheduleAtFixedRate(cacheCleaner,
-                timeoutNs, timeoutNs / runsPerTimeout, TimeUnit.NANOSECONDS);
-        cacheCleaner.setFuture(future);
-      }
-      success = true;
-    } finally {
-      if (!success) {
-        LOG.warn("failed to create mmap for datanodeID=" + key.datanode +
-                  ", " + "block=" + key.block);
-        mmaps.remove(key);
-      }
-      waitable.provide(mmap);
-    }
-    if (LOG.isDebugEnabled()) {
-      LOG.info("created a new ClientMmap for block " + key.block +
-          " on datanode " + key.datanode);
-    }
-    return mmap;
-  }
-
-  /**
-   * Get or create an mmap region.
-   * 
-   * @param node       The DataNode that owns the block for this mmap region.
-   * @param block      The block ID, block pool ID, and generation stamp of 
-   *                     the block we want to read.
-   * @param in         An open file for this block.  This stream is only used
-   *                     if we have to create a new mmap; if we use an
-   *                     existing one, it is ignored.
-   *
-   * @return           The client mmap region.
-   */
-  public ClientMmap fetch(DatanodeID datanodeID, ExtendedBlock block,
-      FileInputStream in) throws IOException, InterruptedException {
-    LOG.debug("fetching mmap with datanodeID=" + datanodeID + ", " +
-        "block=" + block);
-    Key key = new Key(block, datanodeID);
-    ClientMmap mmap = null;
-    try {
-      lock.lock();
-      if (closed) {
-        throw new IOException("ClientMmapManager is closed.");
-      }
-      while (mmap == null) {
-        Waitable<ClientMmap> entry = mmaps.get(key);
-        if (entry == null) {
-          return create(key, in);
-        }
-        mmap = entry.await();
-      }
-      if (mmap.ref() == 1) {
-        // When going from nobody using the mmap (ref = 0) to somebody
-        // using the mmap (ref = 1), we must make the mmap un-evictable.
-        evictable.remove(mmap.getLastEvictableTimeNs());
-      }
-    }
-    finally {
-      lock.unlock();
-    }
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("reusing existing mmap with datanodeID=" + datanodeID +
-              ", " + "block=" + block);
-    }
-    return mmap;
-  }
-
-  /**
-   * Make an mmap evictable.
-   * 
-   * When an mmap is evictable, it may be removed from the cache if necessary.
-   * mmaps can only be evictable if nobody is using them.
-   *
-   * @param mmap             The mmap to make evictable.
-   */
-  void makeEvictable(ClientMmap mmap) {
-    try {
-      lock.lock();
-      if (closed) {
-        // If this ClientMmapManager is closed, then don't bother with the
-        // cache; just close the mmap.
-        mmap.unmap();
-        return;
-      }
-      long now = System.nanoTime();
-      while (evictable.containsKey(now)) {
-        now++;
-      }
-      mmap.setLastEvictableTimeNs(now);
-      evictable.put(now, mmap);
-    } finally {
-      lock.unlock();
-    }
-  }
-
-  @Override
-  public void close() throws IOException {
-    try {
-      lock.lock();
-      closed = true;
-      IOUtils.cleanup(LOG, cacheCleaner);
-
-      // Unmap all the mmaps that nobody is using.
-      // The ones which are in use will be unmapped just as soon as people stop
-      // using them.
-      evictStaleEntries(Long.MAX_VALUE);
-
-      executor.shutdown();
-    } finally {
-      lock.unlock();
-    }
-  }
-
-  @VisibleForTesting
-  public interface ClientMmapVisitor {
-    void accept(ClientMmap mmap);
-  }
-
-  @VisibleForTesting
-  public synchronized void visitMmaps(ClientMmapVisitor visitor)
-      throws InterruptedException {
-    for (Waitable<ClientMmap> entry : mmaps.values()) {
-      visitor.accept(entry.await());
-    }
-  }
-
-  public void visitEvictable(ClientMmapVisitor visitor)
-      throws InterruptedException {
-    for (ClientMmap mmap : evictable.values()) {
-      visitor.accept(mmap);
-    }
-  }
-}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitCache.java
new file mode 100644
index 00000000000..29bff7d7979
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitCache.java
@@ -0,0 +1,880 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.client;
+
+import java.io.Closeable;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+import java.io.IOException;
+import java.nio.MappedByteBuffer;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+import java.util.concurrent.ScheduledFuture;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.Condition;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
+import org.apache.hadoop.hdfs.client.ShortCircuitReplica.Key;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.ipc.RetriableException;
+import org.apache.hadoop.security.token.SecretManager.InvalidToken;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.Time;
+import org.apache.hadoop.util.Waitable;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+
+/**
+ * The ShortCircuitCache tracks things which the client needs to access
+ * HDFS block files via short-circuit.
+ *
+ * These things include: memory-mapped regions, file descriptors, and shared
+ * memory areas for communicating with the DataNode.
+ */
+@InterfaceAudience.Private
+public class ShortCircuitCache implements Closeable {
+  public static final Log LOG = LogFactory.getLog(ShortCircuitCache.class);
+
+  /**
+   * Expiry thread which makes sure that the file descriptors get closed
+   * after a while.
+   */
+  private class CacheCleaner implements Runnable, Closeable {
+    private ScheduledFuture<?> future;
+
+    /**
+     * Run the CacheCleaner thread.
+     *
+     * Whenever a thread requests a ShortCircuitReplica object, we will make
+     * sure it gets one.  That ShortCircuitReplica object can then be re-used
+     * when another thread requests a ShortCircuitReplica object for the same
+     * block.  So in that sense, there is no maximum size to the cache.
+     *
+     * However, when a ShortCircuitReplica object is unreferenced by the
+     * thread(s) that are using it, it becomes evictable.  There are two
+     * separate eviction lists-- one for mmaped objects, and another for
+     * non-mmaped objects.  We do this in order to avoid having the regular
+     * files kick the mmaped files out of the cache too quickly.  Reusing
+     * an already-existing mmap gives a huge performance boost, since the
+     * page table entries don't have to be re-populated.  Both the mmap
+     * and non-mmap evictable lists have maximum sizes and maximum lifespans.
+     */
+    @Override
+    public void run() {
+      ShortCircuitCache.this.lock.lock();
+      try {
+        if (ShortCircuitCache.this.closed) return;
+        long curMs = Time.monotonicNow();
+
+        if (LOG.isDebugEnabled()) {
+          LOG.debug(this + ": cache cleaner running at " + curMs);
+        }
+
+        int numDemoted = demoteOldEvictableMmaped(curMs);
+        int numPurged = 0;
+        Long evictionTimeNs = Long.valueOf(0);
+        while (true) {
+          Entry<Long, ShortCircuitReplica> entry = 
+              evictableMmapped.ceilingEntry(evictionTimeNs);
+          if (entry == null) break;
+          evictionTimeNs = entry.getKey();
+          long evictionTimeMs = 
+              TimeUnit.MILLISECONDS.convert(evictionTimeNs, TimeUnit.NANOSECONDS);
+          if (evictionTimeMs + maxNonMmappedEvictableLifespanMs >= curMs) break;
+          ShortCircuitReplica replica = entry.getValue();
+          if (LOG.isTraceEnabled()) {
+            LOG.trace("CacheCleaner: purging " + replica + ": " + 
+                  StringUtils.getStackTrace(Thread.currentThread()));
+          }
+          purge(replica);
+          numPurged++;
+        }
+
+        if (LOG.isDebugEnabled()) {
+          LOG.debug(this + ": finishing cache cleaner run started at " +
+            curMs + ".  Demoted " + numDemoted + " mmapped replicas; " +
+            "purged " + numPurged + " replicas.");
+        }
+      } finally {
+        ShortCircuitCache.this.lock.unlock();
+      }
+    }
+
+    @Override
+    public void close() throws IOException {
+      if (future != null) {
+        future.cancel(false);
+      }
+    }
+
+    public void setFuture(ScheduledFuture<?> future) {
+      this.future = future;
+    }
+
+    /**
+     * Get the rate at which this cleaner thread should be scheduled.
+     *
+     * We do this by taking the minimum expiration time and dividing by 4.
+     *
+     * @return the rate in milliseconds at which this thread should be
+     *         scheduled.
+     */
+    public long getRateInMs() {
+      long minLifespanMs =
+          Math.min(maxNonMmappedEvictableLifespanMs,
+              maxEvictableMmapedLifespanMs);
+      long sampleTimeMs = minLifespanMs / 4;
+      return (sampleTimeMs < 1) ? 1 : sampleTimeMs;
+    }
+  }
+
+  public interface ShortCircuitReplicaCreator {
+    /**
+     * Attempt to create a ShortCircuitReplica object.
+     *
+     * This callback will be made without holding any locks.
+     *
+     * @return a non-null ShortCircuitReplicaInfo object.
+     */
+    ShortCircuitReplicaInfo createShortCircuitReplicaInfo();
+  }
+
+  /**
+   * Lock protecting the cache.
+   */
+  private final ReentrantLock lock = new ReentrantLock();
+
+  /**
+   * The executor service that runs the cacheCleaner.
+   */
+  private final ScheduledThreadPoolExecutor executor
+      = new ScheduledThreadPoolExecutor(1, new ThreadFactoryBuilder().
+          setDaemon(true).setNameFormat("ShortCircuitCache Cleaner").
+          build());
+
+  /**
+   * A map containing all ShortCircuitReplicaInfo objects, organized by Key.
+   * ShortCircuitReplicaInfo objects may contain a replica, or an InvalidToken
+   * exception.
+   */
+  private final HashMap<Key, Waitable<ShortCircuitReplicaInfo>>
+      replicaInfoMap = new HashMap<Key, Waitable<ShortCircuitReplicaInfo>>();
+
+  /**
+   * The CacheCleaner.  We don't create this and schedule it until it becomes
+   * necessary.
+   */
+  private CacheCleaner cacheCleaner;
+
+  /**
+   * Tree of evictable elements.
+   *
+   * Maps (unique) insertion time in nanoseconds to the element.
+   */
+  private final TreeMap<Long, ShortCircuitReplica> evictable =
+      new TreeMap<Long, ShortCircuitReplica>();
+
+  /**
+   * Maximum total size of the cache, including both mmapped and
+   * no$-mmapped elements.
+   */
+  private int maxTotalSize;
+
+  /**
+   * Non-mmaped elements older than this will be closed.
+   */
+  private long maxNonMmappedEvictableLifespanMs;
+
+  /**
+   * Tree of mmaped evictable elements.
+   *
+   * Maps (unique) insertion time in nanoseconds to the element.
+   */
+  private final TreeMap<Long, ShortCircuitReplica> evictableMmapped =
+      new TreeMap<Long, ShortCircuitReplica>();
+
+  /**
+   * Maximum number of mmaped evictable elements.
+   */
+  private int maxEvictableMmapedSize;
+
+  /**
+   * Mmaped elements older than this will be closed.
+   */
+  private final long maxEvictableMmapedLifespanMs;
+
+  /**
+   * The minimum number of milliseconds we'll wait after an unsuccessful
+   * mmap attempt before trying again.
+   */
+  private final long mmapRetryTimeoutMs;
+
+  /**
+   * How long we will keep replicas in the cache before declaring them
+   * to be stale.
+   */
+  private final long staleThresholdMs;
+
+  /**
+   * True if the ShortCircuitCache is closed.
+   */
+  private boolean closed = false;
+
+  /**
+   * Number of existing mmaps associated with this cache.
+   */
+  private int outstandingMmapCount = 0;
+
+  /**
+   * Create a {@link ShortCircuitCache} object from a {@link Configuration}
+   */
+  public static ShortCircuitCache fromConf(Configuration conf) {
+    return new ShortCircuitCache(
+        conf.getInt(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_KEY,
+            DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_DEFAULT),
+        conf.getLong(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY,
+            DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT),
+        conf.getInt(DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE,
+            DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT),
+        conf.getLong(DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS,
+            DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT),
+        conf.getLong(DFSConfigKeys.DFS_CLIENT_MMAP_RETRY_TIMEOUT_MS,
+            DFSConfigKeys.DFS_CLIENT_MMAP_RETRY_TIMEOUT_MS_DEFAULT),
+        conf.getLong(DFSConfigKeys.DFS_CLIENT_SHORT_CIRCUIT_REPLICA_STALE_THRESHOLD_MS,
+            DFSConfigKeys.DFS_CLIENT_SHORT_CIRCUIT_REPLICA_STALE_THRESHOLD_MS_DEFAULT));
+  }
+
+  public ShortCircuitCache(int maxTotalSize, long maxNonMmappedEvictableLifespanMs,
+      int maxEvictableMmapedSize, long maxEvictableMmapedLifespanMs,
+      long mmapRetryTimeoutMs, long staleThresholdMs) {
+    Preconditions.checkArgument(maxTotalSize >= 0);
+    this.maxTotalSize = maxTotalSize;
+    Preconditions.checkArgument(maxNonMmappedEvictableLifespanMs >= 0);
+    this.maxNonMmappedEvictableLifespanMs = maxNonMmappedEvictableLifespanMs;
+    Preconditions.checkArgument(maxEvictableMmapedSize >= 0);
+    this.maxEvictableMmapedSize = maxEvictableMmapedSize;
+    Preconditions.checkArgument(maxEvictableMmapedLifespanMs >= 0);
+    this.maxEvictableMmapedLifespanMs = maxEvictableMmapedLifespanMs;
+    this.mmapRetryTimeoutMs = mmapRetryTimeoutMs;
+    this.staleThresholdMs = staleThresholdMs;
+  }
+
+  public long getMmapRetryTimeoutMs() {
+    return mmapRetryTimeoutMs;
+  }
+
+  public long getStaleThresholdMs() {
+    return staleThresholdMs;
+  }
+
+  /**
+   * Increment the reference count of a replica, and remove it from any free
+   * list it may be in.
+   *
+   * You must hold the cache lock while calling this function.
+   *
+   * @param replica      The replica we're removing.
+   */
+  private void ref(ShortCircuitReplica replica) {
+    lock.lock();
+    try {
+      Preconditions.checkArgument(replica.refCount > 0,
+          "can't ref " + replica + " because its refCount reached " +
+          replica.refCount);
+      Long evictableTimeNs = replica.getEvictableTimeNs();
+      replica.refCount++;
+      if (evictableTimeNs != null) {
+        String removedFrom = removeEvictable(replica);
+        if (LOG.isTraceEnabled()) {
+          LOG.trace(this + ": " + removedFrom +
+              " no longer contains " + replica + ".  refCount " +
+              (replica.refCount - 1) + " -> " + replica.refCount +
+              StringUtils.getStackTrace(Thread.currentThread()));
+
+        }
+      } else if (LOG.isTraceEnabled()) {
+        LOG.trace(this + ": replica  refCount " +
+            (replica.refCount - 1) + " -> " + replica.refCount +
+            StringUtils.getStackTrace(Thread.currentThread()));
+      }
+    } finally {
+      lock.unlock();
+    }
+  }
+
+  /**
+   * Unreference a replica.
+   *
+   * You must hold the cache lock while calling this function.
+   *
+   * @param replica   The replica being unreferenced.
+   */
+  void unref(ShortCircuitReplica replica) {
+    lock.lock();
+    try {
+      String addedString = "";
+      int newRefCount = --replica.refCount;
+      if (newRefCount == 0) {
+        // Close replica, since there are no remaining references to it.
+        Preconditions.checkArgument(replica.purged,
+            "Replica " + replica + " reached a refCount of 0 without " +
+            "being purged");
+        replica.close();
+      } else if (newRefCount == 1) {
+        Preconditions.checkState(null == replica.getEvictableTimeNs(),
+            "Replica " + replica + " had a refCount higher than 1, " +
+              "but was still evictable (evictableTimeNs = " +
+                replica.getEvictableTimeNs() + ")");
+        if (!replica.purged) {
+          // Add the replica to the end of an eviction list.
+          // Eviction lists are sorted by time.
+          if (replica.hasMmap()) {
+            insertEvictable(System.nanoTime(), replica, evictableMmapped);
+            addedString = "added to evictableMmapped, ";
+          } else {
+            insertEvictable(System.nanoTime(), replica, evictable);
+            addedString = "added to evictable, ";
+          }
+          trimEvictionMaps();
+        }
+      } else {
+        Preconditions.checkArgument(replica.refCount >= 0,
+            "replica's refCount went negative (refCount = " +
+            replica.refCount + " for " + replica + ")");
+      }
+      if (LOG.isTraceEnabled()) {
+        LOG.trace(this + ": unref replica " + replica +
+            ": " + addedString + " refCount " +
+            (newRefCount + 1) + " -> " + newRefCount +
+            StringUtils.getStackTrace(Thread.currentThread()));
+      }
+    } finally {
+      lock.unlock();
+    }
+  }
+
+  /**
+   * Demote old evictable mmaps into the regular eviction map.
+   *
+   * You must hold the cache lock while calling this function.
+   *
+   * @param now   Current time in monotonic milliseconds.
+   * @return      Number of replicas demoted.
+   */
+  private int demoteOldEvictableMmaped(long now) {
+    int numDemoted = 0;
+    boolean needMoreSpace = false;
+    Long evictionTimeNs = Long.valueOf(0);
+
+    while (true) {
+      Entry<Long, ShortCircuitReplica> entry = 
+          evictableMmapped.ceilingEntry(evictionTimeNs);
+      if (entry == null) break;
+      evictionTimeNs = entry.getKey();
+      long evictionTimeMs = 
+          TimeUnit.MILLISECONDS.convert(evictionTimeNs, TimeUnit.NANOSECONDS);
+      if (evictionTimeMs + maxEvictableMmapedLifespanMs >= now) {
+        if (evictableMmapped.size() < maxEvictableMmapedSize) {
+          break;
+        }
+        needMoreSpace = true;
+      }
+      ShortCircuitReplica replica = entry.getValue();
+      if (LOG.isTraceEnabled()) {
+        String rationale = needMoreSpace ? "because we need more space" : 
+            "because it's too old";
+        LOG.trace("demoteOldEvictable: demoting " + replica + ": " +
+            rationale + ": " +
+            StringUtils.getStackTrace(Thread.currentThread()));
+      }
+      removeEvictable(replica, evictableMmapped);
+      munmap(replica);
+      insertEvictable(evictionTimeNs, replica, evictable);
+      numDemoted++;
+    }
+    return numDemoted;
+  }
+
+  /**
+   * Trim the eviction lists.
+   */
+  private void trimEvictionMaps() {
+    long now = Time.monotonicNow();
+    demoteOldEvictableMmaped(now);
+
+    while (true) {
+      long evictableSize = evictable.size();
+      long evictableMmappedSize = evictableMmapped.size();
+      if (evictableSize + evictableMmappedSize <= maxTotalSize) {
+        return;
+      }
+      ShortCircuitReplica replica;
+      if (evictableSize == 0) {
+       replica = evictableMmapped.firstEntry().getValue();
+      } else {
+       replica = evictable.firstEntry().getValue();
+      }
+      if (LOG.isTraceEnabled()) {
+        LOG.trace(this + ": trimEvictionMaps is purging " +
+          StringUtils.getStackTrace(Thread.currentThread()));
+      }
+      purge(replica);
+    }
+  }
+
+  /**
+   * Munmap a replica, updating outstandingMmapCount.
+   *
+   * @param replica  The replica to munmap.
+   */
+  private void munmap(ShortCircuitReplica replica) {
+    replica.munmap();
+    outstandingMmapCount--;
+  }
+
+  /**
+   * Remove a replica from an evictable map.
+   *
+   * @param replica   The replica to remove.
+   * @return          The map it was removed from.
+   */
+  private String removeEvictable(ShortCircuitReplica replica) {
+    if (replica.hasMmap()) {
+      removeEvictable(replica, evictableMmapped);
+      return "evictableMmapped";
+    } else {
+      removeEvictable(replica, evictable);
+      return "evictable";
+    }
+  }
+
+  /**
+   * Remove a replica from an evictable map.
+   *
+   * @param replica   The replica to remove.
+   * @param map       The map to remove it from.
+   */
+  private void removeEvictable(ShortCircuitReplica replica,
+      TreeMap<Long, ShortCircuitReplica> map) {
+    Long evictableTimeNs = replica.getEvictableTimeNs();
+    Preconditions.checkNotNull(evictableTimeNs);
+    ShortCircuitReplica removed = map.remove(evictableTimeNs);
+    Preconditions.checkState(removed == replica,
+        "failed to make " + replica + " unevictable");
+    replica.setEvictableTimeNs(null);
+  }
+
+  /**
+   * Insert a replica into an evictable map.
+   *
+   * If an element already exists with this eviction time, we add a nanosecond
+   * to it until we find an unused key.
+   *
+   * @param evictionTimeNs   The eviction time in absolute nanoseconds.
+   * @param replica          The replica to insert.
+   * @param map              The map to insert it into.
+   */
+  private void insertEvictable(Long evictionTimeNs,
+      ShortCircuitReplica replica, TreeMap<Long, ShortCircuitReplica> map) {
+    while (map.containsKey(evictionTimeNs)) {
+      evictionTimeNs++;
+    }
+    Preconditions.checkState(null == replica.getEvictableTimeNs());
+    Long time = Long.valueOf(evictionTimeNs);
+    replica.setEvictableTimeNs(time);
+    map.put(time, replica);
+  }
+
+  /**
+   * Purge a replica from the cache.
+   *
+   * This doesn't necessarily close the replica, since there may be
+   * outstanding references to it.  However, it does mean the cache won't
+   * hand it out to anyone after this.
+   *
+   * You must hold the cache lock while calling this function.
+   *
+   * @param replica   The replica being removed.
+   */
+  private void purge(ShortCircuitReplica replica) {
+    boolean removedFromInfoMap = false;
+    String evictionMapName = null;
+    Preconditions.checkArgument(!replica.purged);
+    replica.purged = true;
+    Waitable<ShortCircuitReplicaInfo> val = replicaInfoMap.get(replica.key);
+    if (val != null) {
+      ShortCircuitReplicaInfo info = val.getVal();
+      if ((info != null) && (info.getReplica() == replica)) {
+        replicaInfoMap.remove(replica.key);
+        removedFromInfoMap = true;
+      }
+    }
+    Long evictableTimeNs = replica.getEvictableTimeNs();
+    if (evictableTimeNs != null) {
+      evictionMapName = removeEvictable(replica);
+    }
+    if (LOG.isTraceEnabled()) {
+      StringBuilder builder = new StringBuilder();
+      builder.append(this).append(": ").append(": removed ").
+          append(replica).append(" from the cache.");
+      if (removedFromInfoMap) {
+        builder.append("  Removed from the replicaInfoMap.");
+      }
+      if (evictionMapName != null) {
+        builder.append("  Removed from ").append(evictionMapName);
+      }
+      LOG.trace(builder.toString());
+    }
+    unref(replica);
+  }
+
+  /**
+   * Fetch or create a replica.
+   *
+   * You must hold the cache lock while calling this function.
+   *
+   * @param key          Key to use for lookup.
+   * @param creator      Replica creator callback.  Will be called without
+   *                     the cache lock being held.
+   *
+   * @return             Null if no replica could be found or created.
+   *                     The replica, otherwise.
+   */
+  public ShortCircuitReplicaInfo fetchOrCreate(Key key,
+      ShortCircuitReplicaCreator creator) {
+    Waitable<ShortCircuitReplicaInfo> newWaitable = null;
+    lock.lock();
+    try {
+      ShortCircuitReplicaInfo info = null;
+      do {
+        if (closed) {
+          if (LOG.isTraceEnabled()) {
+            LOG.trace(this + ": can't fetchOrCreate " + key +
+                " because the cache is closed.");
+          }
+          return null;
+        }
+        Waitable<ShortCircuitReplicaInfo> waitable = replicaInfoMap.get(key);
+        if (waitable != null) {
+          try {
+            info = fetch(key, waitable);
+          } catch (RetriableException e) {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug(this + ": retrying " + e.getMessage());
+            }
+            continue;
+          }
+        }
+      } while (false);
+      if (info != null) return info;
+      // We need to load the replica ourselves.
+      newWaitable = new Waitable<ShortCircuitReplicaInfo>(lock.newCondition());
+      replicaInfoMap.put(key, newWaitable);
+    } finally {
+      lock.unlock();
+    }
+    return create(key, creator, newWaitable);
+  }
+
+  /**
+   * Fetch an existing ReplicaInfo object.
+   *
+   * @param key       The key that we're using.
+   * @param waitable  The waitable object to wait on.
+   * @return          The existing ReplicaInfo object, or null if there is
+   *                  none.
+   *
+   * @throws RetriableException   If the caller needs to retry.
+   */
+  private ShortCircuitReplicaInfo fetch(Key key,
+      Waitable<ShortCircuitReplicaInfo> waitable) throws RetriableException {
+    // Another thread is already in the process of loading this
+    // ShortCircuitReplica.  So we simply wait for it to complete.
+    ShortCircuitReplicaInfo info;
+    try {
+      if (LOG.isTraceEnabled()) {
+        LOG.trace(this + ": found waitable for " + key);
+      }
+      info = waitable.await();
+    } catch (InterruptedException e) {
+      LOG.info(this + ": interrupted while waiting for " + key);
+      Thread.currentThread().interrupt();
+      throw new RetriableException("interrupted");
+    }
+    if (info.getInvalidTokenException() != null) {
+      LOG.warn(this + ": could not get " + key + " due to InvalidToken " +
+            "exception.", info.getInvalidTokenException());
+      return info;
+    }
+    ShortCircuitReplica replica = info.getReplica();
+    if (replica == null) {
+      LOG.warn(this + ": failed to get " + key);
+      return info;
+    }
+    if (replica.purged) {
+      // Ignore replicas that have already been purged from the cache.
+      throw new RetriableException("Ignoring purged replica " +
+          replica + ".  Retrying.");
+    }
+    // Check if the replica is stale before using it.
+    // If it is, purge it and retry.
+    if (replica.isStale()) {
+      LOG.info(this + ": got stale replica " + replica + ".  Removing " +
+          "this replica from the replicaInfoMap and retrying.");
+      // Remove the cache's reference to the replica.  This may or may not
+      // trigger a close.
+      purge(replica);
+      throw new RetriableException("ignoring stale replica " + replica);
+    }
+    ref(replica);
+    return info;
+  }
+
+  private ShortCircuitReplicaInfo create(Key key,
+      ShortCircuitReplicaCreator creator,
+      Waitable<ShortCircuitReplicaInfo> newWaitable) {
+    // Handle loading a new replica.
+    ShortCircuitReplicaInfo info = null;
+    try {
+      if (LOG.isTraceEnabled()) {
+        LOG.trace(this + ": loading " + key);
+      }
+      info = creator.createShortCircuitReplicaInfo();
+    } catch (RuntimeException e) {
+      LOG.warn(this + ": failed to load " + key, e);
+    }
+    if (info == null) info = new ShortCircuitReplicaInfo();
+    lock.lock();
+    try {
+      if (info.getReplica() != null) {
+        // On success, make sure the cache cleaner thread is running.
+        if (LOG.isTraceEnabled()) {
+          LOG.trace(this + ": successfully loaded " + info.getReplica());
+        }
+        startCacheCleanerThreadIfNeeded();
+        // Note: new ShortCircuitReplicas start with a refCount of 2,
+        // indicating that both this cache and whoever requested the 
+        // creation of the replica hold a reference.  So we don't need
+        // to increment the reference count here.
+      } else {
+        // On failure, remove the waitable from the replicaInfoMap.
+        Waitable<ShortCircuitReplicaInfo> waitableInMap = replicaInfoMap.get(key);
+        if (waitableInMap == newWaitable) replicaInfoMap.remove(key);
+        if (info.getInvalidTokenException() != null) {
+          LOG.warn(this + ": could not load " + key + " due to InvalidToken " +
+              "exception.", info.getInvalidTokenException());
+        } else {
+          LOG.warn(this + ": failed to load " + key);
+        }
+      }
+      newWaitable.provide(info);
+    } finally {
+      lock.unlock();
+    }
+    return info;
+  }
+
+  private void startCacheCleanerThreadIfNeeded() {
+    if (cacheCleaner == null) {
+      cacheCleaner = new CacheCleaner();
+      long rateMs = cacheCleaner.getRateInMs();
+      ScheduledFuture<?> future =
+          executor.scheduleAtFixedRate(cacheCleaner, rateMs, rateMs,
+              TimeUnit.MILLISECONDS);
+      cacheCleaner.setFuture(future);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(this + ": starting cache cleaner thread which will run " +
+          "every " + rateMs + " ms");
+      }
+    }
+  }
+
+  ClientMmap getOrCreateClientMmap(ShortCircuitReplica replica) {
+    Condition newCond;
+    lock.lock();
+    try {
+      while (replica.mmapData != null) {
+        if (replica.mmapData instanceof ClientMmap) {
+          ref(replica);
+          ClientMmap clientMmap = (ClientMmap)replica.mmapData;
+          clientMmap.ref();
+          return clientMmap;
+        } else if (replica.mmapData instanceof Long) {
+          long lastAttemptTimeMs = (Long)replica.mmapData;
+          long delta = Time.monotonicNow() - lastAttemptTimeMs;
+          if (delta < staleThresholdMs) {
+            if (LOG.isTraceEnabled()) {
+              LOG.trace(this + ": can't create client mmap for " +
+                  replica + " because we failed to " +
+                  "create one just " + delta + "ms ago.");
+            }
+            return null;
+          }
+          if (LOG.isTraceEnabled()) {
+            LOG.trace(this + ": retrying client mmap for " + replica +
+                ", " + delta + " ms after the previous failure.");
+          }
+        } else if (replica.mmapData instanceof Condition) {
+          Condition cond = (Condition)replica.mmapData;
+          cond.awaitUninterruptibly();
+        } else {
+          Preconditions.checkState(false, "invalid mmapData type " +
+              replica.mmapData.getClass().getName());
+        }
+      }
+      newCond = lock.newCondition();
+      replica.mmapData = newCond;
+    } finally {
+      lock.unlock();
+    }
+    MappedByteBuffer map = replica.loadMmapInternal();
+    lock.lock();
+    try {
+      if (map == null) {
+        replica.mmapData = Long.valueOf(Time.monotonicNow());
+        newCond.signalAll();
+        return null;
+      } else {
+        ClientMmap clientMmap = new ClientMmap(replica, map);
+        outstandingMmapCount++;
+        replica.mmapData = clientMmap;
+        ref(replica);
+        newCond.signalAll();
+        return clientMmap;
+      }
+    } finally {
+      lock.unlock();
+    }
+  }
+
+  /**
+   * Close the cache and free all associated resources.
+   */
+  public void close() {
+    try {
+      lock.lock();
+      if (closed) return;
+      closed = true;
+      LOG.info(this + ": closing");
+      maxNonMmappedEvictableLifespanMs = 0;
+      maxEvictableMmapedSize = 0;
+      // Close and join cacheCleaner thread.
+      IOUtils.cleanup(LOG, cacheCleaner);
+      // Purge all replicas.
+      while (true) {
+        Entry<Long, ShortCircuitReplica> entry = evictable.firstEntry();
+        if (entry == null) break;
+        purge(entry.getValue());
+      }
+      while (true) {
+        Entry<Long, ShortCircuitReplica> entry = evictableMmapped.firstEntry();
+        if (entry == null) break;
+        purge(entry.getValue());
+      }
+    } finally {
+      lock.unlock();
+    }
+  }
+
+  @VisibleForTesting // ONLY for testing
+  public interface CacheVisitor {
+    void visit(int numOutstandingMmaps,
+        Map<Key, ShortCircuitReplica> replicas,
+        Map<Key, InvalidToken> failedLoads,
+        Map<Long, ShortCircuitReplica> evictable,
+        Map<Long, ShortCircuitReplica> evictableMmapped);
+  }
+
+  @VisibleForTesting // ONLY for testing
+  public void accept(CacheVisitor visitor) {
+    lock.lock();
+    try {
+      Map<Key, ShortCircuitReplica> replicas =
+          new HashMap<Key, ShortCircuitReplica>();
+      Map<Key, InvalidToken> failedLoads =
+          new HashMap<Key, InvalidToken>();
+      for (Entry<Key, Waitable<ShortCircuitReplicaInfo>> entry :
+            replicaInfoMap.entrySet()) {
+        Waitable<ShortCircuitReplicaInfo> waitable = entry.getValue();
+        if (waitable.hasVal()) {
+          if (waitable.getVal().getReplica() != null) {
+            replicas.put(entry.getKey(), waitable.getVal().getReplica());
+          } else {
+            // The exception may be null here, indicating a failed load that
+            // isn't the result of an invalid block token.
+            failedLoads.put(entry.getKey(),
+                waitable.getVal().getInvalidTokenException());
+          }
+        }
+      }
+      if (LOG.isDebugEnabled()) {
+        StringBuilder builder = new StringBuilder();
+        builder.append("visiting ").append(visitor.getClass().getName()).
+            append("with outstandingMmapCount=").append(outstandingMmapCount).
+            append(", replicas=");
+        String prefix = "";
+        for (Entry<Key, ShortCircuitReplica> entry : replicas.entrySet()) {
+          builder.append(prefix).append(entry.getValue());
+          prefix = ",";
+        }
+        prefix = "";
+        builder.append(", failedLoads=");
+        for (Entry<Key, InvalidToken> entry : failedLoads.entrySet()) {
+          builder.append(prefix).append(entry.getValue());
+          prefix = ",";
+        }
+        prefix = "";
+        builder.append(", evictable=");
+        for (Entry<Long, ShortCircuitReplica> entry : evictable.entrySet()) {
+          builder.append(prefix).append(entry.getKey()).
+              append(":").append(entry.getValue());
+          prefix = ",";
+        }
+        prefix = "";
+        builder.append(", evictableMmapped=");
+        for (Entry<Long, ShortCircuitReplica> entry : evictableMmapped.entrySet()) {
+          builder.append(prefix).append(entry.getKey()).
+              append(":").append(entry.getValue());
+          prefix = ",";
+        }
+        LOG.debug(builder.toString());
+      }
+      visitor.visit(outstandingMmapCount, replicas, failedLoads,
+            evictable, evictableMmapped);
+    } finally {
+      lock.unlock();
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "ShortCircuitCache(0x" +
+        Integer.toHexString(System.identityHashCode(this)) + ")";
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitReplica.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitReplica.java
new file mode 100644
index 00000000000..535c2df6a39
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitReplica.java
@@ -0,0 +1,324 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.client;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.FileChannel.MapMode;
+
+import org.apache.commons.lang.builder.EqualsBuilder;
+import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.nativeio.NativeIO;
+import org.apache.hadoop.util.Time;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+
+/**
+ * A ShortCircuitReplica object contains file descriptors for a block that
+ * we are reading via short-circuit local reads.
+ *
+ * The file descriptors can be shared between multiple threads because
+ * all the operations we perform are stateless-- i.e., we use pread
+ * instead of read, to avoid using the shared position state.
+ */
+@InterfaceAudience.Private
+public class ShortCircuitReplica {
+  public static final Log LOG = LogFactory.getLog(ShortCircuitCache.class);
+
+  /**
+   * Immutable class which identifies a ShortCircuitReplica object.
+   */
+  public static final class Key {
+    public Key(long blockId, String bpId) {
+      this.blockId = blockId;
+      this.bpId = bpId;
+    }
+
+    public long getBlockId() {
+      return this.blockId;
+    }
+
+    public String getBlockPoolId() {
+      return this.bpId;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if ((o == null) || (o.getClass() != this.getClass())) {
+        return false;
+      }
+      Key other = (Key)o;
+      return new EqualsBuilder().
+          append(blockId, other.blockId).
+          append(bpId, other.bpId).
+          isEquals();
+    }
+
+    @Override
+    public int hashCode() {
+      return new HashCodeBuilder().
+          append(this.blockId).
+          append(this.bpId).
+          toHashCode();
+    }
+
+    @Override
+    public String toString() {
+      return new StringBuilder().append(blockId).
+          append("_").append(bpId).toString();
+    }
+
+    /**
+     * The block ID for this BlockDescriptors object.
+     */
+    private final long blockId;
+
+    /**
+     * The block pool ID for this BlockDescriptors object.
+     */
+    private final String bpId;
+  }
+  
+
+  /**
+   * Identifies this ShortCircuitReplica object.
+   */
+  final Key key;
+
+  /**
+   * The block data input stream.
+   */
+  private final FileInputStream dataStream;
+
+  /**
+   * The block metadata input stream.
+   *
+   * TODO: make this nullable if the file has no checksums on disk.
+   */
+  private final FileInputStream metaStream;
+
+  /**
+   * Block metadata header.
+   */
+  private final BlockMetadataHeader metaHeader;
+
+  /**
+   * The cache we belong to.
+   */
+  private final ShortCircuitCache cache;
+
+  /**
+   * Monotonic time at which the replica was created.
+   */
+  private final long creationTimeMs;
+
+  /**
+   * Current mmap state.
+   *
+   * Protected by the cache lock.
+   */
+  Object mmapData;
+
+  /**
+   * True if this replica has been purged from the cache; false otherwise.
+   *
+   * Protected by the cache lock.
+   */
+  boolean purged = false;
+
+  /**
+   * Number of external references to this replica.  Replicas are referenced
+   * by the cache, BlockReaderLocal instances, and by ClientMmap instances.
+   * The number starts at 2 because when we create a replica, it is referenced
+   * by both the cache and the requester.
+   *
+   * Protected by the cache lock.
+   */
+  int refCount = 2;
+
+  /**
+   * The monotonic time in nanoseconds at which the replica became evictable, or
+   * null if it is not evictable.
+   *
+   * Protected by the cache lock.
+   */
+  private Long evictableTimeNs = null;
+
+  public ShortCircuitReplica(Key key,
+      FileInputStream dataStream, FileInputStream metaStream,
+      ShortCircuitCache cache, long creationTimeMs) throws IOException {
+    this.key = key;
+    this.dataStream = dataStream;
+    this.metaStream = metaStream;
+    this.metaHeader =
+          BlockMetadataHeader.preadHeader(metaStream.getChannel());
+    if (metaHeader.getVersion() != 1) {
+      throw new IOException("invalid metadata header version " +
+          metaHeader.getVersion() + ".  Can only handle version 1.");
+    }
+    this.cache = cache;
+    this.creationTimeMs = creationTimeMs;
+  }
+
+  /**
+   * Decrement the reference count.
+   */
+  public void unref() {
+    cache.unref(this);
+  }
+
+  /**
+   * Check if the replica is stale.
+   *
+   * Must be called with the cache lock held.
+   */
+  boolean isStale() {
+    long deltaMs = Time.monotonicNow() - creationTimeMs;
+    long staleThresholdMs = cache.getStaleThresholdMs();
+    if (deltaMs > staleThresholdMs) {
+      if (LOG.isTraceEnabled()) {
+        LOG.trace(this + " is stale because it's " + deltaMs +
+            " ms old, and staleThresholdMs = " + staleThresholdMs);
+      }
+      return true;
+    } else {
+      if (LOG.isTraceEnabled()) {
+        LOG.trace(this + " is not stale because it's only " + deltaMs +
+            " ms old, and staleThresholdMs = " + staleThresholdMs);
+      }
+      return false;
+    }
+  }
+
+  /**
+   * Check if the replica has an associated mmap that has been fully loaded.
+   *
+   * Must be called with the cache lock held.
+   */
+  @VisibleForTesting
+  public boolean hasMmap() {
+    return ((mmapData != null) && (mmapData instanceof ClientMmap));
+  }
+
+  /**
+   * Free the mmap associated with this replica.
+   *
+   * Must be called with the cache lock held.
+   */
+  void munmap() {
+    ClientMmap clientMmap = (ClientMmap)mmapData;
+    NativeIO.POSIX.munmap(clientMmap.getMappedByteBuffer());
+    mmapData = null;
+  }
+
+  /**
+   * Close the replica.
+   *
+   * Must be called after there are no more references to the replica in the
+   * cache or elsewhere.
+   */
+  void close() {
+    Preconditions.checkState(refCount == 0,
+        "tried to close replica with refCount " + refCount + ": " + this);
+    Preconditions.checkState(purged,
+        "tried to close unpurged replica " + this);
+    if (hasMmap()) munmap();
+    IOUtils.cleanup(LOG, dataStream, metaStream);
+  }
+
+  public FileInputStream getDataStream() {
+    return dataStream;
+  }
+
+  public FileInputStream getMetaStream() {
+    return metaStream;
+  }
+
+  public BlockMetadataHeader getMetaHeader() {
+    return metaHeader;
+  }
+
+  public Key getKey() {
+    return key;
+  }
+
+  public ClientMmap getOrCreateClientMmap() {
+    return cache.getOrCreateClientMmap(this);
+  }
+
+  MappedByteBuffer loadMmapInternal() {
+    try {
+      FileChannel channel = dataStream.getChannel();
+      return channel.map(MapMode.READ_ONLY, 0, channel.size());
+    } catch (IOException e) {
+      LOG.warn(this + ": mmap error", e);
+      return null;
+    } catch (RuntimeException e) {
+      LOG.warn(this + ": mmap error", e);
+      return null;
+    }
+  }
+
+  /**
+   * Get the evictable time in nanoseconds.
+   *
+   * Note: you must hold the cache lock to call this function.
+   *
+   * @return the evictable time in nanoseconds.
+   */
+  public Long getEvictableTimeNs() {
+    return evictableTimeNs;
+  }
+
+  /**
+   * Set the evictable time in nanoseconds.
+   *
+   * Note: you must hold the cache lock to call this function.
+   *
+   * @param evictableTimeNs   The evictable time in nanoseconds, or null
+   *                          to set no evictable time.
+   */
+  void setEvictableTimeNs(Long evictableTimeNs) {
+    this.evictableTimeNs = evictableTimeNs;
+  }
+
+  /**
+   * Convert the replica to a string for debugging purposes.
+   * Note that we can't take the lock here.
+   */
+  @Override
+  public String toString() {
+    return new StringBuilder().append("ShortCircuitReplica{").
+        append("key=").append(key).
+        append(", metaHeader.version=").append(metaHeader.getVersion()).
+        append(", metaHeader.checksum=").append(metaHeader.getChecksum()).
+        append(", ident=").append("0x").
+          append(Integer.toHexString(System.identityHashCode(this))).
+        append(", creationTimeMs=").append(creationTimeMs).
+        append("}").toString();
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitReplicaInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitReplicaInfo.java
new file mode 100644
index 00000000000..54caf828728
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitReplicaInfo.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.client;
+
+import org.apache.hadoop.security.token.SecretManager.InvalidToken;
+
+public final class ShortCircuitReplicaInfo {
+  private final ShortCircuitReplica replica;
+  private final InvalidToken exc; 
+
+  public ShortCircuitReplicaInfo() {
+    this.replica = null;
+    this.exc = null;
+  }
+
+  public ShortCircuitReplicaInfo(ShortCircuitReplica replica) {
+    this.replica = replica;
+    this.exc = null;
+  }
+
+  public ShortCircuitReplicaInfo(InvalidToken exc) {
+    this.replica = null;
+    this.exc = exc;
+  }
+
+  public ShortCircuitReplica getReplica() {
+    return replica;
+  }
+
+  public InvalidToken getInvalidTokenException() {
+    return exc; 
+  }
+  
+  public String toString() {
+    StringBuilder builder = new StringBuilder();
+    String prefix = "";
+    builder.append("ShortCircuitReplicaInfo{");
+    if (replica != null) {
+      builder.append(prefix).append(replica);
+      prefix = ", ";
+    }
+    if (exc != null) {
+      builder.append(prefix).append(exc);
+      prefix = ", ";
+    }
+    builder.append("}");
+    return builder.toString();
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
index a147c0fb189..86e65912503 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
@@ -27,8 +27,11 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.BlockReader;
 import org.apache.hadoop.hdfs.BlockReaderFactory;
+import org.apache.hadoop.hdfs.ClientContext;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.RemotePeerFactory;
+import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.net.TcpPeerServer;
 import org.apache.hadoop.hdfs.protocol.*;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
@@ -225,44 +228,67 @@ public class JspHelper {
   public static void streamBlockInAscii(InetSocketAddress addr, String poolId,
       long blockId, Token<BlockTokenIdentifier> blockToken, long genStamp,
       long blockSize, long offsetIntoBlock, long chunkSizeToView,
-      JspWriter out, Configuration conf, DFSClient.Conf dfsConf,
-      DataEncryptionKey encryptionKey)
+      JspWriter out, final Configuration conf, DFSClient.Conf dfsConf,
+      final DataEncryptionKey encryptionKey)
           throws IOException {
     if (chunkSizeToView == 0) return;
-    Socket s = NetUtils.getDefaultSocketFactory(conf).createSocket();
-    s.connect(addr, HdfsServerConstants.READ_TIMEOUT);
-    s.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
-      
     int amtToRead = (int)Math.min(chunkSizeToView, blockSize - offsetIntoBlock);
       
-      // Use the block name for file name. 
-    String file = BlockReaderFactory.getFileName(addr, poolId, blockId);
-    BlockReader blockReader = BlockReaderFactory.newBlockReader(dfsConf, file,
-        new ExtendedBlock(poolId, blockId, 0, genStamp), blockToken,
-        offsetIntoBlock, amtToRead,  true,
-        "JspHelper", TcpPeerServer.peerFromSocketAndKey(s, encryptionKey),
-        new DatanodeID(addr.getAddress().getHostAddress(),
-            addr.getHostName(), poolId, addr.getPort(), 0, 0, 0), null,
-            null, null, false, CachingStrategy.newDefaultStrategy());
-        
+    BlockReader blockReader = new BlockReaderFactory(dfsConf).
+      setInetSocketAddress(addr).
+      setBlock(new ExtendedBlock(poolId, blockId, 0, genStamp)).
+      setFileName(BlockReaderFactory.getFileName(addr, poolId, blockId)).
+      setBlockToken(blockToken).
+      setStartOffset(offsetIntoBlock).
+      setLength(amtToRead).
+      setVerifyChecksum(true).
+      setClientName("JspHelper").
+      setClientCacheContext(ClientContext.getFromConf(conf)).
+      setDatanodeInfo(new DatanodeInfo(
+          new DatanodeID(addr.getAddress().getHostAddress(),
+              addr.getHostName(), poolId, addr.getPort(), 0, 0, 0))).
+      setCachingStrategy(CachingStrategy.newDefaultStrategy()).
+      setConfiguration(conf).
+      setRemotePeerFactory(new RemotePeerFactory() {
+        @Override
+        public Peer newConnectedPeer(InetSocketAddress addr)
+            throws IOException {
+          Peer peer = null;
+          Socket sock = NetUtils.getDefaultSocketFactory(conf).createSocket();
+          try {
+            sock.connect(addr, HdfsServerConstants.READ_TIMEOUT);
+            sock.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
+            peer = TcpPeerServer.peerFromSocketAndKey(sock, encryptionKey);
+          } finally {
+            if (peer == null) {
+              IOUtils.closeSocket(sock);
+            }
+          }
+          return peer;
+        }
+      }).
+      build();
+
     final byte[] buf = new byte[amtToRead];
-    int readOffset = 0;
-    int retries = 2;
-    while ( amtToRead > 0 ) {
-      int numRead = amtToRead;
-      try {
-        blockReader.readFully(buf, readOffset, amtToRead);
+    try {
+      int readOffset = 0;
+      int retries = 2;
+      while (amtToRead > 0) {
+        int numRead = amtToRead;
+        try {
+          blockReader.readFully(buf, readOffset, amtToRead);
+        } catch (IOException e) {
+          retries--;
+          if (retries == 0)
+            throw new IOException("Could not read data from datanode");
+          continue;
+        }
+        amtToRead -= numRead;
+        readOffset += numRead;
       }
-      catch (IOException e) {
-        retries--;
-        if (retries == 0)
-          throw new IOException("Could not read data from datanode");
-        continue;
-      }
-      amtToRead -= numRead;
-      readOffset += numRead;
+    } finally {
+      blockReader.close();
     }
-    blockReader.close();
     out.print(HtmlQuoting.quoteHtmlChars(new String(buf, Charsets.UTF_8)));
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockMetadataHeader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockMetadataHeader.java
index 6bb9227883b..802942171d8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockMetadataHeader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockMetadataHeader.java
@@ -34,6 +34,8 @@ import org.apache.hadoop.util.DataChecksum;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 
+import com.google.common.annotations.VisibleForTesting;
+
 
 
 /**
@@ -55,7 +57,8 @@ public class BlockMetadataHeader {
   private short version;
   private DataChecksum checksum = null;
     
-  BlockMetadataHeader(short version, DataChecksum checksum) {
+  @VisibleForTesting
+  public BlockMetadataHeader(short version, DataChecksum checksum) {
     this.checksum = checksum;
     this.version = version;
   }
@@ -148,7 +151,8 @@ public class BlockMetadataHeader {
    * @return 
    * @throws IOException
    */
-  private static void writeHeader(DataOutputStream out, 
+  @VisibleForTesting
+  public static void writeHeader(DataOutputStream out, 
                                   BlockMetadataHeader header) 
                                   throws IOException {
     out.writeShort(header.getVersion());
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
index dfd01b5c39b..1f811bbeb0b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
@@ -32,6 +32,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.TreeSet;
 
+import org.apache.commons.io.IOUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -40,9 +41,12 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.hdfs.BlockReader;
 import org.apache.hadoop.hdfs.BlockReaderFactory;
+import org.apache.hadoop.hdfs.ClientContext;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.RemotePeerFactory;
+import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.net.TcpPeerServer;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
@@ -569,11 +573,10 @@ public class NamenodeFsck {
     int failures = 0;
     InetSocketAddress targetAddr = null;
     TreeSet<DatanodeInfo> deadNodes = new TreeSet<DatanodeInfo>();
-    Socket s = null;
     BlockReader blockReader = null; 
     ExtendedBlock block = lblock.getBlock(); 
 
-    while (s == null) {
+    while (blockReader == null) {
       DatanodeInfo chosenNode;
       
       try {
@@ -593,34 +596,47 @@ public class NamenodeFsck {
         continue;
       }
       try {
-        s = NetUtils.getDefaultSocketFactory(conf).createSocket();
-        s.connect(targetAddr, HdfsServerConstants.READ_TIMEOUT);
-        s.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
-        
-        String file = BlockReaderFactory.getFileName(targetAddr, block.getBlockPoolId(),
-            block.getBlockId());
-        blockReader = BlockReaderFactory.newBlockReader(dfs.getConf(),
-            file, block, lblock.getBlockToken(), 0, -1, true, "fsck",
-            TcpPeerServer.peerFromSocketAndKey(s, namenode.getRpcServer().
-                getDataEncryptionKey()), chosenNode, null, null, null, 
-                false, CachingStrategy.newDropBehind());
-        
+        String file = BlockReaderFactory.getFileName(targetAddr,
+            block.getBlockPoolId(), block.getBlockId());
+        blockReader = new BlockReaderFactory(dfs.getConf()).
+            setFileName(file).
+            setBlock(block).
+            setBlockToken(lblock.getBlockToken()).
+            setStartOffset(0).
+            setLength(-1).
+            setVerifyChecksum(true).
+            setClientName("fsck").
+            setDatanodeInfo(chosenNode).
+            setInetSocketAddress(targetAddr).
+            setCachingStrategy(CachingStrategy.newDropBehind()).
+            setClientCacheContext(dfs.getClientContext()).
+            setConfiguration(namenode.conf).
+            setRemotePeerFactory(new RemotePeerFactory() {
+              @Override
+              public Peer newConnectedPeer(InetSocketAddress addr)
+                  throws IOException {
+                Peer peer = null;
+                Socket s = NetUtils.getDefaultSocketFactory(conf).createSocket();
+                try {
+                  s.connect(addr, HdfsServerConstants.READ_TIMEOUT);
+                  s.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
+                  peer = TcpPeerServer.peerFromSocketAndKey(s, namenode.getRpcServer().
+                        getDataEncryptionKey());
+                } finally {
+                  if (peer == null) {
+                    IOUtils.closeQuietly(s);
+                  }
+                }
+                return peer;
+              }
+            }).
+            build();
       }  catch (IOException ex) {
         // Put chosen node into dead list, continue
         LOG.info("Failed to connect to " + targetAddr + ":" + ex);
         deadNodes.add(chosenNode);
-        if (s != null) {
-          try {
-            s.close();
-          } catch (IOException iex) {
-          }
-        }
-        s = null;
       }
     }
-    if (blockReader == null) {
-      throw new Exception("Could not open data stream for " + lblock.getBlock());
-    }
     byte[] buf = new byte[1024];
     int cnt = 0;
     boolean success = true;
@@ -638,10 +654,11 @@ public class NamenodeFsck {
       LOG.error("Error reading block", e);
       success = false;
     } finally {
-      try {s.close(); } catch (Exception e1) {}
+      blockReader.close();
     }
-    if (!success)
+    if (!success) {
       throw new Exception("Could not copy block data for " + lblock.getBlock());
+    }
   }
       
   /*
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index b0019ccb61d..45f2b3f288f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -161,6 +161,16 @@
   </description>
 </property>
 
+<property>
+  <name>dfs.client.cached.conn.retry</name>
+  <value>3</value>
+  <description>The number of times the HDFS client will pull a socket from the
+   cache.  Once this number is exceeded, the client will try to create a new
+   socket.
+  </description>
+</property>
+
+
 <property>
   <name>dfs.https.server.keystore.resource</name>
   <value>ssl-server.xml</value>
@@ -1489,6 +1499,26 @@
   </description>
 </property>
 
+<property>
+  <name>dfs.client.mmap.retry.timeout.ms</name>
+  <value>300000</value>
+  <description>
+    The minimum amount of time that we will wait before retrying a failed mmap
+    operation.
+  </description>
+</property>
+
+<property>
+  <name>dfs.client.short.circuit.replica.stale.threshold.ms</name>
+  <value>3000000</value>
+  <description>
+    The maximum amount of time that we will consider a short-circuit replica to
+    be valid, if there is no communication from the DataNode.  After this time
+    has elapsed, we will re-fetch the short-circuit replica even if it is in
+    the cache.
+  </description>
+</property>
+
 <property>
   <name>dfs.namenode.path.based.cache.block.map.allocation.percent</name>
   <value>0.25</value>
@@ -1618,4 +1648,15 @@
   </description>
 </property>
 
+<property>
+  <name>dfs.client.context</name>
+  <value>default</value>
+  <description>
+    The name of the DFSClient context that we should use.  Clients that share
+    a context share a socket cache and short-circuit cache, among other things.
+    You should only change this if you don't want to share with another set of
+    threads.
+  </description>
+</property>
+
 </configuration>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java
index c4045c35fb7..a95379a6a76 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java
@@ -28,32 +28,40 @@ import java.util.EnumSet;
 import java.util.Random;
 
 import org.apache.commons.lang.SystemUtils;
+import org.apache.commons.lang.mutable.MutableBoolean;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.BlockReaderTestUtil;
+import org.apache.hadoop.hdfs.ClientContext;
+import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hdfs.client.ClientMmap;
-import org.apache.hadoop.hdfs.client.ClientMmapManager;
 import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
+import org.apache.hadoop.hdfs.client.ShortCircuitCache;
+import org.apache.hadoop.hdfs.client.ShortCircuitCache.CacheVisitor;
+import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
+import org.apache.hadoop.hdfs.client.ShortCircuitReplica.Key;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.io.ByteBufferPool;
-import org.apache.hadoop.io.ElasticByteBufferPool;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.nativeio.NativeIO;
 import org.apache.hadoop.net.unix.DomainSocket;
 import org.apache.hadoop.net.unix.TemporarySocketDirectory;
+import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.Assert;
 import org.junit.Assume;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
+import java.util.Map;
+
 import com.google.common.base.Preconditions;
 import com.google.common.base.Supplier;
 
@@ -250,17 +258,39 @@ public class TestEnhancedByteBufferAccess {
     }
   }
 
-  private static class CountingVisitor
-      implements ClientMmapManager.ClientMmapVisitor {
-    int count = 0;
+  private static class CountingVisitor implements CacheVisitor {
+    private final int expectedNumOutstandingMmaps;
+    private final int expectedNumReplicas;
+    private final int expectedNumEvictable;
+    private final int expectedNumMmapedEvictable;
 
-    @Override
-    public void accept(ClientMmap mmap) {
-      count++;
+    CountingVisitor(int expectedNumOutstandingMmaps,
+        int expectedNumReplicas, int expectedNumEvictable,
+        int expectedNumMmapedEvictable) {
+      this.expectedNumOutstandingMmaps = expectedNumOutstandingMmaps;
+      this.expectedNumReplicas = expectedNumReplicas;
+      this.expectedNumEvictable = expectedNumEvictable;
+      this.expectedNumMmapedEvictable = expectedNumMmapedEvictable;
     }
 
-    public void reset() {
-      count = 0;
+    @Override
+    public void visit(int numOutstandingMmaps,
+        Map<Key, ShortCircuitReplica> replicas,
+        Map<Key, InvalidToken> failedLoads,
+        Map<Long, ShortCircuitReplica> evictable,
+        Map<Long, ShortCircuitReplica> evictableMmapped) {
+      if (expectedNumOutstandingMmaps >= 0) {
+        Assert.assertEquals(expectedNumOutstandingMmaps, numOutstandingMmaps);
+      }
+      if (expectedNumReplicas >= 0) {
+        Assert.assertEquals(expectedNumReplicas, replicas.size());
+      }
+      if (expectedNumEvictable >= 0) {
+        Assert.assertEquals(expectedNumEvictable, evictable.size());
+      }
+      if (expectedNumMmapedEvictable >= 0) {
+        Assert.assertEquals(expectedNumMmapedEvictable, evictableMmapped.size());
+      }
     }
   }
 
@@ -271,105 +301,98 @@ public class TestEnhancedByteBufferAccess {
     final Path TEST_PATH = new Path("/a");
     final int TEST_FILE_LENGTH = 16385;
     final int RANDOM_SEED = 23453;
+    final String CONTEXT = "testZeroCopyMmapCacheContext";
     FSDataInputStream fsIn = null;
-    ByteBuffer results[] = { null, null, null, null, null };
-    
+    ByteBuffer results[] = { null, null, null, null };
+
     DistributedFileSystem fs = null;
+    conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT, CONTEXT);
+    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+    cluster.waitActive();
+    fs = cluster.getFileSystem();
+    DFSTestUtil.createFile(fs, TEST_PATH,
+        TEST_FILE_LENGTH, (short)1, RANDOM_SEED);
     try {
-      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
-      cluster.waitActive();
-      fs = cluster.getFileSystem();
-      DFSTestUtil.createFile(fs, TEST_PATH,
-          TEST_FILE_LENGTH, (short)1, RANDOM_SEED);
-      try {
-        DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
-      } catch (InterruptedException e) {
-        Assert.fail("unexpected InterruptedException during " +
-            "waitReplication: " + e);
-      } catch (TimeoutException e) {
-        Assert.fail("unexpected TimeoutException during " +
-            "waitReplication: " + e);
-      }
-      fsIn = fs.open(TEST_PATH);
-      byte original[] = new byte[TEST_FILE_LENGTH];
-      IOUtils.readFully(fsIn, original, 0, TEST_FILE_LENGTH);
-      fsIn.close();
-      fsIn = fs.open(TEST_PATH);
-      final ClientMmapManager mmapManager = fs.getClient().getMmapManager();
-      final CountingVisitor countingVisitor = new CountingVisitor();
-      mmapManager.visitMmaps(countingVisitor);
-      Assert.assertEquals(0, countingVisitor.count);
-      mmapManager.visitEvictable(countingVisitor);
-      Assert.assertEquals(0, countingVisitor.count);
-      results[0] = fsIn.read(null, 4096,
-          EnumSet.of(ReadOption.SKIP_CHECKSUMS));
-      fsIn.seek(0);
-      results[1] = fsIn.read(null, 4096,
-          EnumSet.of(ReadOption.SKIP_CHECKSUMS));
-      mmapManager.visitMmaps(countingVisitor);
-      Assert.assertEquals(1, countingVisitor.count);
-      countingVisitor.reset();
-      mmapManager.visitEvictable(countingVisitor);
-      Assert.assertEquals(0, countingVisitor.count);
-      countingVisitor.reset();
-
-      // The mmaps should be of the first block of the file.
-      final ExtendedBlock firstBlock = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
-      mmapManager.visitMmaps(new ClientMmapManager.ClientMmapVisitor() {
-        @Override
-        public void accept(ClientMmap mmap) {
-          Assert.assertEquals(firstBlock, mmap.getBlock());
-        }
-      });
-
-      // Read more blocks.
-      results[2] = fsIn.read(null, 4096,
-          EnumSet.of(ReadOption.SKIP_CHECKSUMS));
-      results[3] = fsIn.read(null, 4096,
-          EnumSet.of(ReadOption.SKIP_CHECKSUMS));
-      try {
-        results[4] = fsIn.read(null, 4096,
-            EnumSet.of(ReadOption.SKIP_CHECKSUMS));
-        Assert.fail("expected UnsupportedOperationException");
-      } catch (UnsupportedOperationException e) {
-        // expected
-      }
-
-      // we should have 3 mmaps, 0 evictable
-      mmapManager.visitMmaps(countingVisitor);
-      Assert.assertEquals(3, countingVisitor.count);
-      countingVisitor.reset();
-      mmapManager.visitEvictable(countingVisitor);
-      Assert.assertEquals(0, countingVisitor.count);
-
-      // After we close the cursors, the mmaps should be evictable for 
-      // a brief period of time.  Then, they should be closed (we're 
-      // using a very quick timeout)
-      for (ByteBuffer buffer : results) {
-        if (buffer != null) {
-          fsIn.releaseBuffer(buffer);
-        }
-      }
-      GenericTestUtils.waitFor(new Supplier<Boolean>() {
-        public Boolean get() {
-          countingVisitor.reset();
-          try {
-            mmapManager.visitEvictable(countingVisitor);
-          } catch (InterruptedException e) {
-            e.printStackTrace();
-            return false;
-          }
-          return (0 == countingVisitor.count);
-        }
-      }, 10, 10000);
-      countingVisitor.reset();
-      mmapManager.visitMmaps(countingVisitor);
-      Assert.assertEquals(0, countingVisitor.count);
-    } finally {
-      if (fsIn != null) fsIn.close();
-      if (fs != null) fs.close();
-      if (cluster != null) cluster.shutdown();
+      DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
+    } catch (InterruptedException e) {
+      Assert.fail("unexpected InterruptedException during " +
+          "waitReplication: " + e);
+    } catch (TimeoutException e) {
+      Assert.fail("unexpected TimeoutException during " +
+          "waitReplication: " + e);
     }
+    fsIn = fs.open(TEST_PATH);
+    byte original[] = new byte[TEST_FILE_LENGTH];
+    IOUtils.readFully(fsIn, original, 0, TEST_FILE_LENGTH);
+    fsIn.close();
+    fsIn = fs.open(TEST_PATH);
+    final ShortCircuitCache cache = ClientContext.get(
+        CONTEXT, new DFSClient.Conf(conf)). getShortCircuitCache();
+    cache.accept(new CountingVisitor(0, 5, 5, 0));
+    results[0] = fsIn.read(null, 4096,
+        EnumSet.of(ReadOption.SKIP_CHECKSUMS));
+    fsIn.seek(0);
+    results[1] = fsIn.read(null, 4096,
+        EnumSet.of(ReadOption.SKIP_CHECKSUMS));
+
+    // The mmap should be of the first block of the file.
+    final ExtendedBlock firstBlock =
+        DFSTestUtil.getFirstBlock(fs, TEST_PATH);
+    cache.accept(new CacheVisitor() {
+      @Override
+      public void visit(int numOutstandingMmaps,
+          Map<Key, ShortCircuitReplica> replicas,
+          Map<Key, InvalidToken> failedLoads, 
+          Map<Long, ShortCircuitReplica> evictable,
+          Map<Long, ShortCircuitReplica> evictableMmapped) {
+        ShortCircuitReplica replica = replicas.get(
+            new Key(firstBlock.getBlockId(), firstBlock.getBlockPoolId()));
+        Assert.assertNotNull(replica);
+        Assert.assertTrue(replica.hasMmap());
+        // The replica should not yet be evictable, since we have it open.
+        Assert.assertNull(replica.getEvictableTimeNs());
+      }
+    });
+
+    // Read more blocks.
+    results[2] = fsIn.read(null, 4096,
+        EnumSet.of(ReadOption.SKIP_CHECKSUMS));
+    results[3] = fsIn.read(null, 4096,
+        EnumSet.of(ReadOption.SKIP_CHECKSUMS));
+
+    // we should have 3 mmaps, 1 evictable
+    cache.accept(new CountingVisitor(3, 5, 2, 0));
+
+    // After we close the cursors, the mmaps should be evictable for 
+    // a brief period of time.  Then, they should be closed (we're 
+    // using a very quick timeout)
+    for (ByteBuffer buffer : results) {
+      if (buffer != null) {
+        fsIn.releaseBuffer(buffer);
+      }
+    }
+    fsIn.close();
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      public Boolean get() {
+        final MutableBoolean finished = new MutableBoolean(false);
+        cache.accept(new CacheVisitor() {
+          @Override
+          public void visit(int numOutstandingMmaps,
+              Map<Key, ShortCircuitReplica> replicas,
+              Map<Key, InvalidToken> failedLoads,
+              Map<Long, ShortCircuitReplica> evictable,
+              Map<Long, ShortCircuitReplica> evictableMmapped) {
+            finished.setValue(evictableMmapped.isEmpty());
+          }
+        });
+        return finished.booleanValue();
+      }
+    }, 10, 60000);
+
+    cache.accept(new CountingVisitor(0, -1, -1, -1));
+    
+    fs.close();
+    cluster.shutdown();
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java
index ac17915df5f..0e511cd5f71 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java
@@ -28,8 +28,12 @@ import java.net.Socket;
 import java.util.List;
 import java.util.Random;
 
+import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.client.ShortCircuitCache;
+import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
+import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.net.TcpPeerServer;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@@ -38,6 +42,8 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
 import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.net.NetUtils;
+import org.apache.log4j.Level;
+import org.apache.log4j.LogManager;
 
 /**
  * A helper class to setup the cluster, and get to BlockReader and DataNode for a block.
@@ -141,22 +147,54 @@ public class BlockReaderTestUtil {
    */
   public BlockReader getBlockReader(LocatedBlock testBlock, int offset, int lenToRead)
       throws IOException {
+    return getBlockReader(cluster, testBlock, offset, lenToRead);
+  }
+
+  /**
+   * Get a BlockReader for the given block.
+   */
+  public static BlockReader getBlockReader(MiniDFSCluster cluster,
+      LocatedBlock testBlock, int offset, int lenToRead) throws IOException {
     InetSocketAddress targetAddr = null;
-    Socket sock = null;
     ExtendedBlock block = testBlock.getBlock();
     DatanodeInfo[] nodes = testBlock.getLocations();
     targetAddr = NetUtils.createSocketAddr(nodes[0].getXferAddr());
-    sock = NetUtils.getDefaultSocketFactory(conf).createSocket();
-    sock.connect(targetAddr, HdfsServerConstants.READ_TIMEOUT);
-    sock.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
 
-    return BlockReaderFactory.newBlockReader(
-      new DFSClient.Conf(conf),
-      targetAddr.toString()+ ":" + block.getBlockId(), block,
-      testBlock.getBlockToken(), 
-      offset, lenToRead,
-      true, "BlockReaderTestUtil", TcpPeerServer.peerFromSocket(sock),
-      nodes[0], null, null, null, false, CachingStrategy.newDefaultStrategy());
+    final DistributedFileSystem fs = cluster.getFileSystem();
+    return new BlockReaderFactory(fs.getClient().getConf()).
+      setInetSocketAddress(targetAddr).
+      setBlock(block).
+      setFileName(targetAddr.toString()+ ":" + block.getBlockId()).
+      setBlockToken(testBlock.getBlockToken()).
+      setStartOffset(offset).
+      setLength(lenToRead).
+      setVerifyChecksum(true).
+      setClientName("BlockReaderTestUtil").
+      setDatanodeInfo(nodes[0]).
+      setClientCacheContext(ClientContext.getFromConf(fs.getConf())).
+      setCachingStrategy(CachingStrategy.newDefaultStrategy()).
+      setConfiguration(fs.getConf()).
+      setAllowShortCircuitLocalReads(true).
+      setRemotePeerFactory(new RemotePeerFactory() {
+        @Override
+        public Peer newConnectedPeer(InetSocketAddress addr)
+            throws IOException {
+          Peer peer = null;
+          Socket sock = NetUtils.
+              getDefaultSocketFactory(fs.getConf()).createSocket();
+          try {
+            sock.connect(addr, HdfsServerConstants.READ_TIMEOUT);
+            sock.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
+            peer = TcpPeerServer.peerFromSocket(sock);
+          } finally {
+            if (peer == null) {
+              IOUtils.closeQuietly(sock);
+            }
+          }
+          return peer;
+        }
+      }).
+      build();
   }
 
   /**
@@ -167,4 +205,13 @@ public class BlockReaderTestUtil {
     int ipcport = nodes[0].getIpcPort();
     return cluster.getDataNode(ipcport);
   }
-}
+  
+  public static void enableBlockReaderFactoryTracing() {
+    LogManager.getLogger(BlockReaderFactory.class.getName()).setLevel(
+        Level.TRACE);
+    LogManager.getLogger(ShortCircuitCache.class.getName()).setLevel(
+        Level.TRACE);
+    LogManager.getLogger(ShortCircuitReplica.class.getName()).setLevel(
+        Level.TRACE);
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
index 919377a3c19..5bb70643a15 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
@@ -185,10 +185,26 @@ public class DFSTestUtil {
     }
   }
 
-  public static String readFile(FileSystem fs, Path fileName) throws IOException {
+  public static String readFile(FileSystem fs, Path fileName)
+      throws IOException {
+    byte buf[] = readFileBuffer(fs, fileName);
+	return new String(buf, 0, buf.length);
+  }
+
+  public static byte[] readFileBuffer(FileSystem fs, Path fileName) 
+      throws IOException {
     ByteArrayOutputStream os = new ByteArrayOutputStream();
-    IOUtils.copyBytes(fs.open(fileName), os, 1024, true);
-    return os.toString();
+    try {
+      FSDataInputStream in = fs.open(fileName);
+      try {
+        IOUtils.copyBytes(fs.open(fileName), os, 1024, true);
+        return os.toByteArray();
+      } finally {
+        in.close();
+      }
+    } finally {
+      os.close();
+    }
   }
   
   public static void createFile(FileSystem fs, Path fileName, long fileLen, 
@@ -230,6 +246,13 @@ public class DFSTestUtil {
     }
   }
   
+  public static byte[] calculateFileContentsFromSeed(long seed, int length) {
+    Random rb = new Random(seed);
+    byte val[] = new byte[length];
+    rb.nextBytes(val);
+    return val;
+  }
+  
   /** check if the files have been copied correctly. */
   public boolean checkFiles(FileSystem fs, String topdir) throws IOException {
     Path root = new Path(topdir);
@@ -549,8 +572,12 @@ public class DFSTestUtil {
   
   public static ExtendedBlock getFirstBlock(FileSystem fs, Path path) throws IOException {
     HdfsDataInputStream in = (HdfsDataInputStream) fs.open(path);
-    in.readByte();
-    return in.getCurrentBlock();
+    try {
+      in.readByte();
+      return in.getCurrentBlock();
+    } finally {
+      in.close();
+    }
   }  
 
   public static List<LocatedBlock> getAllBlocks(FSDataInputStream in)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderFactory.java
new file mode 100644
index 00000000000..6b496e21865
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderFactory.java
@@ -0,0 +1,285 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.client.ShortCircuitCache;
+import org.apache.hadoop.hdfs.client.ShortCircuitReplicaInfo;
+import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.net.unix.TemporarySocketDirectory;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.common.util.concurrent.Uninterruptibles;
+
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CONTEXT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC;
+
+public class TestBlockReaderFactory {
+  static final Log LOG = LogFactory.getLog(TestBlockReaderFactory.class);
+
+  @Before
+  public void init() {
+    DomainSocket.disableBindPathValidation();
+  }
+
+  @After
+  public void cleanup() {
+    DFSInputStream.tcpReadsDisabledForTesting = false;
+    BlockReaderFactory.createShortCircuitReplicaInfoCallback = null;
+  }
+
+  private static Configuration createShortCircuitConf(String testName,
+      TemporarySocketDirectory sockDir) {
+    Configuration conf = new Configuration();
+    conf.set(DFS_CLIENT_CONTEXT, testName);
+    conf.setLong(DFS_BLOCK_SIZE_KEY, 4096);
+    conf.set(DFS_DOMAIN_SOCKET_PATH_KEY, new File(sockDir.getDir(),
+        testName + "._PORT").getAbsolutePath());
+    conf.setBoolean(DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
+    conf.setBoolean(DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
+        false);
+    conf.setBoolean(DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC, false);
+    return conf;
+  }
+
+  /**
+   * If we have a UNIX domain socket configured,
+   * and we have dfs.client.domain.socket.data.traffic set to true,
+   * and short-circuit access fails, we should still be able to pass
+   * data traffic over the UNIX domain socket.  Test this.
+   */
+  @Test(timeout=60000)
+  public void testFallbackFromShortCircuitToUnixDomainTraffic()
+      throws Exception {
+    DFSInputStream.tcpReadsDisabledForTesting = true;
+    TemporarySocketDirectory sockDir = new TemporarySocketDirectory();
+
+    // The server is NOT configured with short-circuit local reads;
+    // the client is.  Both support UNIX domain reads.
+    Configuration clientConf = createShortCircuitConf(
+        "testFallbackFromShortCircuitToUnixDomainTraffic", sockDir);
+    clientConf.setBoolean(DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC, true);
+    Configuration serverConf = new Configuration(clientConf);
+    serverConf.setBoolean(DFS_CLIENT_READ_SHORTCIRCUIT_KEY, false);
+
+    MiniDFSCluster cluster =
+        new MiniDFSCluster.Builder(serverConf).numDataNodes(1).build();
+    cluster.waitActive();
+    FileSystem dfs = FileSystem.get(cluster.getURI(0), clientConf);
+    String TEST_FILE = "/test_file";
+    final int TEST_FILE_LEN = 8193;
+    final int SEED = 0xFADED;
+    DFSTestUtil.createFile(dfs, new Path(TEST_FILE), TEST_FILE_LEN,
+        (short)1, SEED);
+    byte contents[] = DFSTestUtil.readFileBuffer(dfs, new Path(TEST_FILE));
+    byte expected[] = DFSTestUtil.
+        calculateFileContentsFromSeed(SEED, TEST_FILE_LEN);
+    Assert.assertTrue(Arrays.equals(contents, expected));
+    cluster.shutdown();
+    sockDir.close();
+  }
+  
+  /**
+   * Test the case where we have multiple threads waiting on the
+   * ShortCircuitCache delivering a certain ShortCircuitReplica.
+   *
+   * In this case, there should only be one call to
+   * createShortCircuitReplicaInfo.  This one replica should be shared
+   * by all threads.
+   */
+  @Test(timeout=60000)
+  public void testMultipleWaitersOnShortCircuitCache()
+      throws Exception {
+    final CountDownLatch latch = new CountDownLatch(1);
+    final AtomicBoolean creationIsBlocked = new AtomicBoolean(true);
+    final AtomicBoolean testFailed = new AtomicBoolean(false);
+    DFSInputStream.tcpReadsDisabledForTesting = true;
+    BlockReaderFactory.createShortCircuitReplicaInfoCallback =
+      new ShortCircuitCache.ShortCircuitReplicaCreator() {
+        @Override
+        public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
+          Uninterruptibles.awaitUninterruptibly(latch);
+          if (!creationIsBlocked.compareAndSet(true, false)) {
+            Assert.fail("there were multiple calls to "
+                + "createShortCircuitReplicaInfo.  Only one was expected.");
+          }
+          return null;
+        }
+      };
+    TemporarySocketDirectory sockDir = new TemporarySocketDirectory();
+    Configuration conf = createShortCircuitConf(
+        "testMultipleWaitersOnShortCircuitCache", sockDir);
+    MiniDFSCluster cluster =
+        new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+    cluster.waitActive();
+    final DistributedFileSystem dfs = cluster.getFileSystem();
+    final String TEST_FILE = "/test_file";
+    final int TEST_FILE_LEN = 4000;
+    final int SEED = 0xFADED;
+    final int NUM_THREADS = 10;
+    DFSTestUtil.createFile(dfs, new Path(TEST_FILE), TEST_FILE_LEN,
+        (short)1, SEED);
+    Runnable readerRunnable = new Runnable() {
+      @Override
+      public void run() {
+        try {
+          byte contents[] = DFSTestUtil.readFileBuffer(dfs, new Path(TEST_FILE));
+          Assert.assertFalse(creationIsBlocked.get());
+          byte expected[] = DFSTestUtil.
+              calculateFileContentsFromSeed(SEED, TEST_FILE_LEN);
+          Assert.assertTrue(Arrays.equals(contents, expected));
+        } catch (Throwable e) {
+          LOG.error("readerRunnable error", e);
+          testFailed.set(true);
+        }
+      }
+    };
+    Thread threads[] = new Thread[NUM_THREADS];
+    for (int i = 0; i < NUM_THREADS; i++) {
+      threads[i] = new Thread(readerRunnable);
+      threads[i].start();
+    }
+    Thread.sleep(500);
+    latch.countDown();
+    for (int i = 0; i < NUM_THREADS; i++) {
+      Uninterruptibles.joinUninterruptibly(threads[i]);
+    }
+    cluster.shutdown();
+    sockDir.close();
+    Assert.assertFalse(testFailed.get());
+  }
+
+  /**
+   * Test the case where we have a failure to complete a short circuit read
+   * that occurs, and then later on, we have a success.
+   * Any thread waiting on a cache load should receive the failure (if it
+   * occurs);  however, the failure result should not be cached.  We want 
+   * to be able to retry later and succeed.
+   */
+  @Test(timeout=60000)
+  public void testShortCircuitCacheTemporaryFailure()
+      throws Exception {
+    BlockReaderTestUtil.enableBlockReaderFactoryTracing();
+    final AtomicBoolean replicaCreationShouldFail = new AtomicBoolean(true);
+    final AtomicBoolean testFailed = new AtomicBoolean(false);
+    DFSInputStream.tcpReadsDisabledForTesting = true;
+    BlockReaderFactory.createShortCircuitReplicaInfoCallback =
+      new ShortCircuitCache.ShortCircuitReplicaCreator() {
+        @Override
+        public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
+          if (replicaCreationShouldFail.get()) {
+            // Insert a short delay to increase the chance that one client
+            // thread waits for the other client thread's failure via
+            // a condition variable.
+            Uninterruptibles.sleepUninterruptibly(2, TimeUnit.SECONDS);
+            return new ShortCircuitReplicaInfo();
+          }
+          return null;
+        }
+      };
+    TemporarySocketDirectory sockDir = new TemporarySocketDirectory();
+    Configuration conf = createShortCircuitConf(
+        "testShortCircuitCacheTemporaryFailure", sockDir);
+    final MiniDFSCluster cluster =
+        new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+    cluster.waitActive();
+    final DistributedFileSystem dfs = cluster.getFileSystem();
+    final String TEST_FILE = "/test_file";
+    final int TEST_FILE_LEN = 4000;
+    final int NUM_THREADS = 2;
+    final int SEED = 0xFADED;
+    final CountDownLatch gotFailureLatch = new CountDownLatch(NUM_THREADS);
+    final CountDownLatch shouldRetryLatch = new CountDownLatch(1);
+    DFSTestUtil.createFile(dfs, new Path(TEST_FILE), TEST_FILE_LEN,
+        (short)1, SEED);
+    Runnable readerRunnable = new Runnable() {
+      @Override
+      public void run() {
+        try {
+          // First time should fail.
+          List<LocatedBlock> locatedBlocks = 
+              cluster.getNameNode().getRpcServer().getBlockLocations(
+              TEST_FILE, 0, TEST_FILE_LEN).getLocatedBlocks();
+          LocatedBlock lblock = locatedBlocks.get(0); // first block
+          BlockReader blockReader = null;
+          try {
+            blockReader = BlockReaderTestUtil.
+                getBlockReader(cluster, lblock, 0, TEST_FILE_LEN);
+            Assert.fail("expected getBlockReader to fail the first time.");
+          } catch (Throwable t) { 
+            Assert.assertTrue("expected to see 'TCP reads were disabled " +
+                "for testing' in exception " + t, t.getMessage().contains(
+                "TCP reads were disabled for testing"));
+          } finally {
+            if (blockReader != null) blockReader.close(); // keep findbugs happy
+          }
+          gotFailureLatch.countDown();
+          shouldRetryLatch.await();
+
+          // Second time should succeed.
+          try {
+            blockReader = BlockReaderTestUtil.
+                getBlockReader(cluster, lblock, 0, TEST_FILE_LEN);
+          } catch (Throwable t) { 
+            LOG.error("error trying to retrieve a block reader " +
+                "the second time.", t);
+            throw t;
+          } finally {
+            if (blockReader != null) blockReader.close();
+          }
+        } catch (Throwable t) {
+          LOG.error("getBlockReader failure", t);
+          testFailed.set(true);
+        }
+      }
+    };
+    Thread threads[] = new Thread[NUM_THREADS];
+    for (int i = 0; i < NUM_THREADS; i++) {
+      threads[i] = new Thread(readerRunnable);
+      threads[i].start();
+    }
+    gotFailureLatch.await();
+    replicaCreationShouldFail.set(false);
+    shouldRetryLatch.countDown();
+    for (int i = 0; i < NUM_THREADS; i++) {
+      Uninterruptibles.joinUninterruptibly(threads[i]);
+    }
+    cluster.shutdown();
+    sockDir.close();
+    Assert.assertFalse(testFailed.get());
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java
index bfca9d799b4..ffea447e5e1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java
@@ -30,13 +30,17 @@ import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
+import org.apache.hadoop.hdfs.client.ShortCircuitCache;
+import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
+import org.apache.hadoop.hdfs.client.ShortCircuitReplica.Key;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
 import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.net.unix.DomainSocket;
 import org.apache.hadoop.net.unix.TemporarySocketDirectory;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.Time;
 import org.junit.AfterClass;
 import org.junit.Assert;
 import org.junit.Assume;
@@ -155,6 +159,8 @@ public class TestBlockReaderLocal {
       File metaFile = MiniDFSCluster.getBlockMetadataFile(0, block);
 
       DatanodeID datanodeID = cluster.getDataNodes().get(0).getDatanodeId();
+      ShortCircuitCache shortCircuitCache =
+          ClientContext.getFromConf(conf).getShortCircuitCache();
       cluster.shutdown();
       cluster = null;
       test.setup(dataFile, checksum);
@@ -164,16 +170,17 @@ public class TestBlockReaderLocal {
       };
       dataIn = streams[0];
       metaIn = streams[1];
+      Key key = new Key(block.getBlockId(), block.getBlockPoolId());
+      ShortCircuitReplica replica = new ShortCircuitReplica(
+          key, dataIn, metaIn, shortCircuitCache, Time.now());
       blockReaderLocal = new BlockReaderLocal.Builder(
               new DFSClient.Conf(conf)).
           setFilename(TEST_PATH.getName()).
           setBlock(block).
-          setStreams(streams).
+          setShortCircuitReplica(replica).
           setDatanodeID(datanodeID).
           setCachingStrategy(new CachingStrategy(false, readahead)).
           setVerifyChecksum(checksum).
-          setBlockMetadataHeader(BlockMetadataHeader.preadHeader(
-              metaIn.getChannel())).
           build();
       dataIn = null;
       metaIn = null;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java
index cffd91dfa47..c518c8c5aec 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java
@@ -25,18 +25,8 @@ import java.net.InetSocketAddress;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
-import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
-import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
-import org.apache.hadoop.hdfs.net.Peer;
-import org.apache.hadoop.security.token.Token;
 import org.junit.Assert;
 import org.junit.Test;
-import org.mockito.Matchers;
-import org.mockito.Mockito;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
 
 /**
  * This class tests the client connection caching in a single node
@@ -48,30 +38,6 @@ public class TestConnCache {
   static final int BLOCK_SIZE = 4096;
   static final int FILE_SIZE = 3 * BLOCK_SIZE;
 
-  /**
-   * A mock Answer to remember the BlockReader used.
-   *
-   * It verifies that all invocation to DFSInputStream.getBlockReader()
-   * use the same peer.
-   */
-  private class MockGetBlockReader implements Answer<RemoteBlockReader2> {
-    public RemoteBlockReader2 reader = null;
-    private Peer peer = null;
-
-    @Override
-    public RemoteBlockReader2 answer(InvocationOnMock invocation) throws Throwable {
-      RemoteBlockReader2 prevReader = reader;
-      reader = (RemoteBlockReader2) invocation.callRealMethod();
-      if (peer == null) {
-        peer = reader.getPeer();
-      } else if (prevReader != null) {
-        Assert.assertSame("DFSInputStream should use the same peer",
-                   peer, reader.getPeer());
-      }
-      return reader;
-    }
-  }
-
   /**
    * (Optionally) seek to position, read and verify data.
    *
@@ -115,33 +81,29 @@ public class TestConnCache {
    * @throws Exception 
    */
   @Test
-  @SuppressWarnings("unchecked")
   public void testReadFromOneDN() throws Exception {
-    BlockReaderTestUtil util = new BlockReaderTestUtil(1,
-        new HdfsConfiguration());
+    HdfsConfiguration configuration = new HdfsConfiguration();
+    // One of the goals of this test is to verify that we don't open more
+    // than one socket.  So use a different client context, so that we
+    // get our own socket cache, rather than sharing with the other test 
+    // instances.  Also use a really long socket timeout so that nothing
+    // gets closed before we get around to checking the cache size at the end.
+    final String contextName = "testReadFromOneDNContext";
+    configuration.set(DFSConfigKeys.DFS_CLIENT_CONTEXT, contextName);
+    configuration.setLong(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY,
+        100000000L);
+    BlockReaderTestUtil util = new BlockReaderTestUtil(1, configuration);
     final Path testFile = new Path("/testConnCache.dat");
     byte authenticData[] = util.writeFile(testFile, FILE_SIZE / 1024);
     DFSClient client = new DFSClient(
         new InetSocketAddress("localhost",
             util.getCluster().getNameNodePort()), util.getConf());
-    DFSInputStream in = Mockito.spy(client.open(testFile.toString()));
+    ClientContext cacheContext =
+        ClientContext.get(contextName, client.getConf());
+    DFSInputStream in = client.open(testFile.toString());
     LOG.info("opened " + testFile.toString());
     byte[] dataBuf = new byte[BLOCK_SIZE];
 
-    MockGetBlockReader answer = new MockGetBlockReader();
-    Mockito.doAnswer(answer).when(in).getBlockReader(
-                           (InetSocketAddress) Matchers.anyObject(),
-                           (DatanodeInfo) Matchers.anyObject(),
-                           Matchers.anyString(),
-                           (ExtendedBlock) Matchers.anyObject(),
-                           (Token<BlockTokenIdentifier>) Matchers.anyObject(),
-                           Matchers.anyLong(),
-                           Matchers.anyLong(),
-                           Matchers.anyInt(),
-                           Matchers.anyBoolean(),
-                           Matchers.anyString(),
-                           (CachingStrategy)Matchers.anyObject());
-
     // Initial read
     pread(in, 0, dataBuf, 0, dataBuf.length, authenticData);
     // Read again and verify that the socket is the same
@@ -153,5 +115,8 @@ public class TestConnCache {
     pread(in, 64, dataBuf, 0, dataBuf.length / 2, authenticData);
 
     in.close();
+    client.close();
+    Assert.assertEquals(1,
+        ClientContext.getFromConf(configuration).getPeerCache().size());
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java
index bdfc62d5fde..dc01c56d255 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java
@@ -22,7 +22,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_REUSE_KEE
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_CONTEXT;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
@@ -86,21 +86,22 @@ public class TestDataTransferKeepalive {
     // the datanode-side expiration time.
     final long CLIENT_EXPIRY_MS = 60000L;
     clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS);
-    PeerCache.setInstance(DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT, CLIENT_EXPIRY_MS);
+    clientConf.set(DFS_CLIENT_CONTEXT, "testDatanodeRespectsKeepAliveTimeout");
     DistributedFileSystem fs =
         (DistributedFileSystem)FileSystem.get(cluster.getURI(),
             clientConf);
+    PeerCache peerCache = ClientContext.getFromConf(clientConf).getPeerCache();
 
     DFSTestUtil.createFile(fs, TEST_FILE, 1L, (short)1, 0L);
 
     // Clients that write aren't currently re-used.
-    assertEquals(0, fs.dfs.peerCache.size());
+    assertEquals(0, peerCache.size());
     assertXceiverCount(0);
 
     // Reads the file, so we should get a
     // cached socket, and should have an xceiver on the other side.
     DFSTestUtil.readFile(fs, TEST_FILE);
-    assertEquals(1, fs.dfs.peerCache.size());
+    assertEquals(1, peerCache.size());
     assertXceiverCount(1);
 
     // Sleep for a bit longer than the keepalive timeout
@@ -111,15 +112,13 @@ public class TestDataTransferKeepalive {
     // The socket is still in the cache, because we don't
     // notice that it's closed until we try to read
     // from it again.
-    assertEquals(1, fs.dfs.peerCache.size());
+    assertEquals(1, peerCache.size());
     
     // Take it out of the cache - reading should
     // give an EOF.
-    Peer peer = fs.dfs.peerCache.get(dn.getDatanodeId(), false);
+    Peer peer = peerCache.get(dn.getDatanodeId(), false);
     assertNotNull(peer);
     assertEquals(-1, peer.getInputStream().read());
-    PeerCache.setInstance(DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT,
-        DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_DEFAULT);
   }
 
   /**
@@ -132,34 +131,33 @@ public class TestDataTransferKeepalive {
     // the datanode-side expiration time.
     final long CLIENT_EXPIRY_MS = 10L;
     clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS);
-    PeerCache.setInstance(DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT, CLIENT_EXPIRY_MS);
+    clientConf.set(DFS_CLIENT_CONTEXT, "testClientResponsesKeepAliveTimeout");
     DistributedFileSystem fs =
         (DistributedFileSystem)FileSystem.get(cluster.getURI(),
             clientConf);
+    PeerCache peerCache = ClientContext.getFromConf(clientConf).getPeerCache();
 
     DFSTestUtil.createFile(fs, TEST_FILE, 1L, (short)1, 0L);
 
     // Clients that write aren't currently re-used.
-    assertEquals(0, fs.dfs.peerCache.size());
+    assertEquals(0, peerCache.size());
     assertXceiverCount(0);
 
     // Reads the file, so we should get a
     // cached socket, and should have an xceiver on the other side.
     DFSTestUtil.readFile(fs, TEST_FILE);
-    assertEquals(1, fs.dfs.peerCache.size());
+    assertEquals(1, peerCache.size());
     assertXceiverCount(1);
 
     // Sleep for a bit longer than the client keepalive timeout.
     Thread.sleep(CLIENT_EXPIRY_MS + 1);
     
     // Taking out a peer which is expired should give a null.
-    Peer peer = fs.dfs.peerCache.get(dn.getDatanodeId(), false);
+    Peer peer = peerCache.get(dn.getDatanodeId(), false);
     assertTrue(peer == null);
 
     // The socket cache is now empty.
-    assertEquals(0, fs.dfs.peerCache.size());
-    PeerCache.setInstance(DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT,
-        DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_DEFAULT);
+    assertEquals(0, peerCache.size());
   }
 
   /**
@@ -174,7 +172,7 @@ public class TestDataTransferKeepalive {
     final long CLIENT_EXPIRY_MS = 600000L;
     Configuration clientConf = new Configuration(conf);
     clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS);
-    PeerCache.setInstance(DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT, CLIENT_EXPIRY_MS);
+    clientConf.set(DFS_CLIENT_CONTEXT, "testSlowReader");
     DistributedFileSystem fs =
         (DistributedFileSystem)FileSystem.get(cluster.getURI(),
             clientConf);
@@ -209,7 +207,12 @@ public class TestDataTransferKeepalive {
   @Test(timeout=30000)
   public void testManyClosedSocketsInCache() throws Exception {
     // Make a small file
-    DistributedFileSystem fs = cluster.getFileSystem();
+    Configuration clientConf = new Configuration(conf);
+    clientConf.set(DFS_CLIENT_CONTEXT, "testManyClosedSocketsInCache");
+    DistributedFileSystem fs =
+        (DistributedFileSystem)FileSystem.get(cluster.getURI(),
+            clientConf);
+    PeerCache peerCache = ClientContext.getFromConf(clientConf).getPeerCache();
     DFSTestUtil.createFile(fs, TEST_FILE, 1L, (short)1, 0L);
 
     // Insert a bunch of dead sockets in the cache, by opening
@@ -227,15 +230,14 @@ public class TestDataTransferKeepalive {
       IOUtils.cleanup(null, stms);
     }
     
-    DFSClient client = ((DistributedFileSystem)fs).dfs;
-    assertEquals(5, client.peerCache.size());
+    assertEquals(5, peerCache.size());
     
     // Let all the xceivers timeout
     Thread.sleep(1500);
     assertXceiverCount(0);
 
     // Client side still has the sockets cached
-    assertEquals(5, client.peerCache.size());
+    assertEquals(5, peerCache.size());
 
     // Reading should not throw an exception.
     DFSTestUtil.readFile(fs, TEST_FILE);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDisableConnCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDisableConnCache.java
index f7fb128bb1b..44c3f144a11 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDisableConnCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDisableConnCache.java
@@ -53,7 +53,8 @@ public class TestDisableConnCache {
     FileSystem fsWithoutCache = FileSystem.newInstance(util.getConf());
     try {
       DFSTestUtil.readFile(fsWithoutCache, testFile);
-      assertEquals(0, ((DistributedFileSystem)fsWithoutCache).dfs.peerCache.size());
+      assertEquals(0, ((DistributedFileSystem)fsWithoutCache).
+          dfs.getClientContext().getPeerCache().size());
     } finally {
       fsWithoutCache.close();
       util.shutdown();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileInputStreamCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileInputStreamCache.java
deleted file mode 100644
index 7eef5383855..00000000000
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileInputStreamCache.java
+++ /dev/null
@@ -1,126 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hdfs;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hdfs.protocol.DatanodeID;
-import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.net.unix.TemporarySocketDirectory;
-import org.junit.Assert;
-import org.junit.Test;
-
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-
-public class TestFileInputStreamCache {
-  static final Log LOG = LogFactory.getLog(TestFileInputStreamCache.class);
-
-  @Test
-  public void testCreateAndDestroy() throws Exception {
-    FileInputStreamCache cache = new FileInputStreamCache(10, 1000);
-    cache.close();
-  }
-  
-  private static class TestFileDescriptorPair {
-    TemporarySocketDirectory dir = new TemporarySocketDirectory();
-    FileInputStream fis[];
-
-    public TestFileDescriptorPair() throws IOException {
-      fis = new FileInputStream[2];
-      for (int i = 0; i < 2; i++) {
-        String name = dir.getDir() + "/file" + i;
-        FileOutputStream fos = new FileOutputStream(name);
-        fos.write(1);
-        fos.close();
-        fis[i] = new FileInputStream(name);
-      }
-    }
-
-    public FileInputStream[] getFileInputStreams() {
-      return fis;
-    }
-
-    public void close() throws IOException {
-      IOUtils.cleanup(LOG, fis);
-      dir.close();
-    }
-
-    public boolean compareWith(FileInputStream other[]) {
-      if ((other == null) || (fis == null)) {
-        return other == fis;
-      }
-      if (fis.length != other.length) return false;
-      for (int i = 0; i < fis.length; i++) {
-        if (fis[i] != other[i]) return false;
-      }
-      return true;
-    }
-  }
-
-  @Test
-  public void testAddAndRetrieve() throws Exception {
-    FileInputStreamCache cache = new FileInputStreamCache(1, 1000000);
-    DatanodeID dnId = new DatanodeID("127.0.0.1", "localhost", 
-        "xyzzy", 8080, 9090, 7070, 6060);
-    ExtendedBlock block = new ExtendedBlock("poolid", 123);
-    TestFileDescriptorPair pair = new TestFileDescriptorPair();
-    cache.put(dnId, block, pair.getFileInputStreams());
-    FileInputStream fis[] = cache.get(dnId, block);
-    Assert.assertTrue(pair.compareWith(fis));
-    pair.close();
-    cache.close();
-  }
-
-  @Test
-  public void testExpiry() throws Exception {
-    FileInputStreamCache cache = new FileInputStreamCache(1, 10);
-    DatanodeID dnId = new DatanodeID("127.0.0.1", "localhost", 
-        "xyzzy", 8080, 9090, 7070, 6060);
-    ExtendedBlock block = new ExtendedBlock("poolid", 123);
-    TestFileDescriptorPair pair = new TestFileDescriptorPair();
-    cache.put(dnId, block, pair.getFileInputStreams());
-    Thread.sleep(cache.getExpiryTimeMs() * 100);
-    FileInputStream fis[] = cache.get(dnId, block);
-    Assert.assertNull(fis);
-    pair.close();
-    cache.close();
-  }
-
-  @Test
-  public void testEviction() throws Exception {
-    FileInputStreamCache cache = new FileInputStreamCache(1, 10000000);
-    DatanodeID dnId = new DatanodeID("127.0.0.1", "localhost", 
-        "xyzzy", 8080, 9090, 7070, 6060);
-    ExtendedBlock block = new ExtendedBlock("poolid", 123);
-    TestFileDescriptorPair pair = new TestFileDescriptorPair();
-    cache.put(dnId, block, pair.getFileInputStreams());
-    DatanodeID dnId2 = new DatanodeID("127.0.0.1", "localhost", 
-        "xyzzy", 8081, 9091, 7071, 6061);
-    TestFileDescriptorPair pair2 = new TestFileDescriptorPair();
-    cache.put(dnId2, block, pair2.getFileInputStreams());
-    FileInputStream fis[] = cache.get(dnId, block);
-    Assert.assertNull(fis);
-    FileInputStream fis2[] = cache.get(dnId2, block);
-    Assert.assertTrue(pair2.compareWith(fis2));
-    pair.close();
-    cache.close();
-  }
-}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitCache.java
new file mode 100644
index 00000000000..6e880420294
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitCache.java
@@ -0,0 +1,351 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import org.apache.commons.lang.mutable.MutableBoolean;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hdfs.client.ClientMmap;
+import org.apache.hadoop.hdfs.client.ShortCircuitCache;
+import org.apache.hadoop.hdfs.client.ShortCircuitCache.CacheVisitor;
+import org.apache.hadoop.hdfs.client.ShortCircuitCache.ShortCircuitReplicaCreator;
+import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
+import org.apache.hadoop.hdfs.client.ShortCircuitReplica.Key;
+import org.apache.hadoop.hdfs.client.ShortCircuitReplicaInfo;
+import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.net.unix.TemporarySocketDirectory;
+import org.apache.hadoop.security.token.SecretManager.InvalidToken;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.util.DataChecksum;
+import org.apache.hadoop.util.Time;
+import org.junit.Assert;
+import org.junit.Test;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Supplier;
+
+import java.io.DataOutputStream;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Map;
+
+public class TestShortCircuitCache {
+  static final Log LOG = LogFactory.getLog(TestShortCircuitCache.class);
+  
+  private static class TestFileDescriptorPair {
+    TemporarySocketDirectory dir = new TemporarySocketDirectory();
+    FileInputStream fis[];
+
+    public TestFileDescriptorPair() throws IOException {
+      fis = new FileInputStream[2];
+      for (int i = 0; i < 2; i++) {
+        String name = dir.getDir() + "/file" + i;
+        FileOutputStream fos = new FileOutputStream(name);
+        if (i == 0) {
+          // write 'data' file
+          fos.write(1);
+        } else {
+          // write 'metadata' file
+          BlockMetadataHeader header =
+              new BlockMetadataHeader((short)1,
+                  DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 4));
+          DataOutputStream dos = new DataOutputStream(fos);
+          BlockMetadataHeader.writeHeader(dos, header);
+          dos.close();
+        }
+        fos.close();
+        fis[i] = new FileInputStream(name);
+      }
+    }
+
+    public FileInputStream[] getFileInputStreams() {
+      return fis;
+    }
+
+    public void close() throws IOException {
+      IOUtils.cleanup(LOG, fis);
+      dir.close();
+    }
+
+    public boolean compareWith(FileInputStream data, FileInputStream meta) {
+      return ((data == fis[0]) && (meta == fis[1]));
+    }
+  }
+
+  private static class SimpleReplicaCreator
+      implements ShortCircuitReplicaCreator {
+    private final int blockId;
+    private final ShortCircuitCache cache;
+    private final TestFileDescriptorPair pair;
+
+    SimpleReplicaCreator(int blockId, ShortCircuitCache cache,
+        TestFileDescriptorPair pair) {
+      this.blockId = blockId;
+      this.cache = cache;
+      this.pair = pair;
+    }
+
+    @Override
+    public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
+      try {
+        Key key = new Key(blockId, "test_bp1");
+        return new ShortCircuitReplicaInfo(
+            new ShortCircuitReplica(key,
+                pair.getFileInputStreams()[0], pair.getFileInputStreams()[1],
+                cache, Time.monotonicNow()));
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+  }
+
+  @Test(timeout=60000)
+  public void testCreateAndDestroy() throws Exception {
+    ShortCircuitCache cache =
+        new ShortCircuitCache(10, 1, 10, 1, 1, 10000);
+    cache.close();
+  }
+  
+  @Test(timeout=60000)
+  public void testAddAndRetrieve() throws Exception {
+    final ShortCircuitCache cache =
+        new ShortCircuitCache(10, 10000000, 10, 10000000, 1, 10000);
+    final TestFileDescriptorPair pair = new TestFileDescriptorPair();
+    ShortCircuitReplicaInfo replicaInfo1 =
+      cache.fetchOrCreate(new Key(123, "test_bp1"),
+        new SimpleReplicaCreator(123, cache, pair));
+    Preconditions.checkNotNull(replicaInfo1.getReplica());
+    Preconditions.checkState(replicaInfo1.getInvalidTokenException() == null);
+    pair.compareWith(replicaInfo1.getReplica().getDataStream(),
+                     replicaInfo1.getReplica().getMetaStream());
+    ShortCircuitReplicaInfo replicaInfo2 =
+      cache.fetchOrCreate(new Key(123, "test_bp1"),
+          new ShortCircuitReplicaCreator() {
+        @Override
+        public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
+          Assert.fail("expected to use existing entry.");
+          return null;
+        }
+      });
+    Preconditions.checkNotNull(replicaInfo2.getReplica());
+    Preconditions.checkState(replicaInfo2.getInvalidTokenException() == null);
+    Preconditions.checkState(replicaInfo1 == replicaInfo2);
+    pair.compareWith(replicaInfo2.getReplica().getDataStream(),
+                     replicaInfo2.getReplica().getMetaStream());
+    replicaInfo1.getReplica().unref();
+    replicaInfo2.getReplica().unref();
+    
+    // Even after the reference count falls to 0, we still keep the replica
+    // around for a while (we have configured the expiry period to be really,
+    // really long here)
+    ShortCircuitReplicaInfo replicaInfo3 =
+      cache.fetchOrCreate(
+          new Key(123, "test_bp1"), new ShortCircuitReplicaCreator() {
+        @Override
+        public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
+          Assert.fail("expected to use existing entry.");
+          return null;
+        }
+      });
+    Preconditions.checkNotNull(replicaInfo3.getReplica());
+    Preconditions.checkState(replicaInfo3.getInvalidTokenException() == null);
+    replicaInfo3.getReplica().unref();
+    
+    pair.close();
+    cache.close();
+  }
+
+  @Test(timeout=60000)
+  public void testExpiry() throws Exception {
+    final ShortCircuitCache cache =
+        new ShortCircuitCache(2, 1, 1, 10000000, 1, 10000);
+    final TestFileDescriptorPair pair = new TestFileDescriptorPair();
+    ShortCircuitReplicaInfo replicaInfo1 =
+      cache.fetchOrCreate(
+        new Key(123, "test_bp1"), new SimpleReplicaCreator(123, cache, pair));
+    Preconditions.checkNotNull(replicaInfo1.getReplica());
+    Preconditions.checkState(replicaInfo1.getInvalidTokenException() == null);
+    pair.compareWith(replicaInfo1.getReplica().getDataStream(),
+                     replicaInfo1.getReplica().getMetaStream());
+    replicaInfo1.getReplica().unref();
+    final MutableBoolean triedToCreate = new MutableBoolean(false);
+    do {
+      Thread.sleep(10);
+      ShortCircuitReplicaInfo replicaInfo2 =
+        cache.fetchOrCreate(
+          new Key(123, "test_bp1"), new ShortCircuitReplicaCreator() {
+          @Override
+          public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
+            triedToCreate.setValue(true);
+            return null;
+          }
+        });
+      if ((replicaInfo2 != null) && (replicaInfo2.getReplica() != null)) {
+        replicaInfo2.getReplica().unref();
+      }
+    } while (triedToCreate.isFalse());
+    cache.close();
+  }
+  
+  
+  @Test(timeout=60000)
+  public void testEviction() throws Exception {
+    final ShortCircuitCache cache =
+        new ShortCircuitCache(2, 10000000, 1, 10000000, 1, 10000);
+    final TestFileDescriptorPair pairs[] = new TestFileDescriptorPair[] {
+      new TestFileDescriptorPair(),
+      new TestFileDescriptorPair(),
+      new TestFileDescriptorPair(),
+    };
+    ShortCircuitReplicaInfo replicaInfos[] = new ShortCircuitReplicaInfo[] {
+      null,
+      null,
+      null
+    };
+    for (int i = 0; i < pairs.length; i++) {
+      replicaInfos[i] = cache.fetchOrCreate(
+          new Key(i, "test_bp1"), 
+            new SimpleReplicaCreator(i, cache, pairs[i]));
+      Preconditions.checkNotNull(replicaInfos[i].getReplica());
+      Preconditions.checkState(replicaInfos[i].getInvalidTokenException() == null);
+      pairs[i].compareWith(replicaInfos[i].getReplica().getDataStream(),
+                           replicaInfos[i].getReplica().getMetaStream());
+    }
+    // At this point, we have 3 replicas in use.
+    // Let's close them all.
+    for (int i = 0; i < pairs.length; i++) {
+      replicaInfos[i].getReplica().unref();
+    }
+    // The last two replicas should still be cached.
+    for (int i = 1; i < pairs.length; i++) {
+      final Integer iVal = new Integer(i);
+      replicaInfos[i] = cache.fetchOrCreate(
+          new Key(i, "test_bp1"),
+            new ShortCircuitReplicaCreator() {
+        @Override
+        public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
+          Assert.fail("expected to use existing entry for " + iVal);
+          return null;
+        }
+      });
+      Preconditions.checkNotNull(replicaInfos[i].getReplica());
+      Preconditions.checkState(replicaInfos[i].getInvalidTokenException() == null);
+      pairs[i].compareWith(replicaInfos[i].getReplica().getDataStream(),
+                           replicaInfos[i].getReplica().getMetaStream());
+    }
+    // The first (oldest) replica should not be cached.
+    final MutableBoolean calledCreate = new MutableBoolean(false);
+    replicaInfos[0] = cache.fetchOrCreate(
+        new Key(0, "test_bp1"),
+          new ShortCircuitReplicaCreator() {
+        @Override
+        public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
+          calledCreate.setValue(true);
+          return null;
+        }
+      });
+    Preconditions.checkState(replicaInfos[0].getReplica() == null);
+    Assert.assertTrue(calledCreate.isTrue());
+    // Clean up
+    for (int i = 1; i < pairs.length; i++) {
+      replicaInfos[i].getReplica().unref();
+    }
+    for (int i = 0; i < pairs.length; i++) {
+      pairs[i].close();
+    }
+    cache.close();
+  }
+  
+  @Test(timeout=60000)
+  public void testStaleness() throws Exception {
+    // Set up the cache with a short staleness time.
+    final ShortCircuitCache cache =
+        new ShortCircuitCache(2, 10000000, 1, 10000000, 1, 10);
+    final TestFileDescriptorPair pairs[] = new TestFileDescriptorPair[] {
+      new TestFileDescriptorPair(),
+      new TestFileDescriptorPair(),
+    };
+    ShortCircuitReplicaInfo replicaInfos[] = new ShortCircuitReplicaInfo[] {
+      null,
+      null
+    };
+    final long HOUR_IN_MS = 60 * 60 * 1000;
+    for (int i = 0; i < pairs.length; i++) {
+      final Integer iVal = new Integer(i);
+      final Key key = new Key(i, "test_bp1");
+      replicaInfos[i] = cache.fetchOrCreate(key,
+          new ShortCircuitReplicaCreator() {
+        @Override
+        public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
+          try {
+            return new ShortCircuitReplicaInfo(
+                new ShortCircuitReplica(key,
+                    pairs[iVal].getFileInputStreams()[0],
+                    pairs[iVal].getFileInputStreams()[1],
+                    cache, Time.monotonicNow() + (iVal * HOUR_IN_MS)));
+          } catch (IOException e) {
+            throw new RuntimeException(e);
+          }
+        }
+      });
+      Preconditions.checkNotNull(replicaInfos[i].getReplica());
+      Preconditions.checkState(replicaInfos[i].getInvalidTokenException() == null);
+      pairs[i].compareWith(replicaInfos[i].getReplica().getDataStream(),
+                           replicaInfos[i].getReplica().getMetaStream());
+    }
+
+    // Keep trying to getOrCreate block 0 until it goes stale (and we must re-create.)
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        ShortCircuitReplicaInfo info = cache.fetchOrCreate(
+          new Key(0, "test_bp1"), new ShortCircuitReplicaCreator() {
+          @Override
+          public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
+            return null;
+          }
+        });
+        if (info.getReplica() != null) {
+          info.getReplica().unref();
+          return false;
+        }
+        return true;
+      }
+    }, 500, 60000);
+
+    // Make sure that second replica did not go stale.
+    ShortCircuitReplicaInfo info = cache.fetchOrCreate(
+        new Key(1, "test_bp1"), new ShortCircuitReplicaCreator() {
+      @Override
+      public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
+        Assert.fail("second replica went stale, despite 1 " +
+            "hour staleness time.");
+        return null;
+      }
+    });
+    info.getReplica().unref();
+
+    // Clean up
+    for (int i = 1; i < pairs.length; i++) {
+      replicaInfos[i].getReplica().unref();
+    }
+    cache.close();
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
index 57f1c117b46..4afdf62f995 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
@@ -27,6 +27,7 @@ import java.io.RandomAccessFile;
 import java.net.URI;
 import java.nio.ByteBuffer;
 import java.security.PrivilegedExceptionAction;
+import java.util.UUID;
 import java.util.concurrent.TimeoutException;
 
 import org.apache.hadoop.conf.Configuration;
@@ -35,8 +36,9 @@ import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
+import org.apache.hadoop.hdfs.client.ShortCircuitCache;
+import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
-import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
@@ -125,8 +127,9 @@ public class TestShortCircuitLocalRead {
       throws IOException, InterruptedException {
     // Ensure short circuit is enabled
     DistributedFileSystem fs = getFileSystem(readingUser, uri, conf);
+    ClientContext getClientContext = ClientContext.getFromConf(conf);
     if (legacyShortCircuitFails) {
-      assertTrue(fs.getClient().useLegacyBlockReaderLocal());
+      assertFalse(getClientContext.getDisableLegacyBlockReaderLocal());
     }
     
     FSDataInputStream stm = fs.open(name);
@@ -155,7 +158,7 @@ public class TestShortCircuitLocalRead {
     checkData(actual, readOffset, expected, "Read 3");
     
     if (legacyShortCircuitFails) {
-      assertFalse(fs.getClient().useLegacyBlockReaderLocal());
+      assertTrue(getClientContext.getDisableLegacyBlockReaderLocal());
     }
     stm.close();
   }
@@ -175,8 +178,9 @@ public class TestShortCircuitLocalRead {
       throws IOException, InterruptedException {
     // Ensure short circuit is enabled
     DistributedFileSystem fs = getFileSystem(readingUser, uri, conf);
+    ClientContext clientContext = ClientContext.getFromConf(conf);
     if (legacyShortCircuitFails) {
-      assertTrue(fs.getClient().useLegacyBlockReaderLocal());
+      assertTrue(clientContext.getDisableLegacyBlockReaderLocal());
     }
     
     HdfsDataInputStream stm = (HdfsDataInputStream)fs.open(name);
@@ -209,7 +213,7 @@ public class TestShortCircuitLocalRead {
     }
     checkData(arrayFromByteBuffer(actual), readOffset, expected, "Read 3");
     if (legacyShortCircuitFails) {
-      assertFalse(fs.getClient().useLegacyBlockReaderLocal());
+      assertTrue(clientContext.getDisableLegacyBlockReaderLocal());
     }
     stm.close();
   }
@@ -223,7 +227,6 @@ public class TestShortCircuitLocalRead {
 
   public void doTestShortCircuitRead(boolean ignoreChecksum, int size,
       int readOffset) throws IOException, InterruptedException {
-    String shortCircuitUser = getCurrentUser();
     doTestShortCircuitReadImpl(ignoreChecksum, size, readOffset,
         null, getCurrentUser(), false);
   }
@@ -239,6 +242,10 @@ public class TestShortCircuitLocalRead {
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
         ignoreChecksum);
+    // Set a random client context name so that we don't share a cache with
+    // other invocations of this function.
+    conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT,
+        UUID.randomUUID().toString());
     conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
         new File(sockDir.getDir(),
           "TestShortCircuitLocalRead._PORT.sock").getAbsolutePath());
@@ -322,18 +329,6 @@ public class TestShortCircuitLocalRead {
     doTestShortCircuitRead(true, 10*blockSize+100, 777);
   }
 
-  private ClientDatanodeProtocol getProxy(UserGroupInformation ugi,
-      final DatanodeID dnInfo, final Configuration conf) throws IOException,
-      InterruptedException {
-    return ugi.doAs(new PrivilegedExceptionAction<ClientDatanodeProtocol>() {
-      @Override
-      public ClientDatanodeProtocol run() throws Exception {
-        return DFSUtil.createClientDatanodeProtocolProxy(dnInfo, conf, 60000,
-            false);
-      }
-    });
-  }
-  
   private static DistributedFileSystem getFileSystem(String user, final URI uri,
       final Configuration conf) throws InterruptedException, IOException {
     UserGroupInformation ugi = UserGroupInformation.createRemoteUser(user);
@@ -555,8 +550,7 @@ public class TestShortCircuitLocalRead {
           for (int i = 0; i < iteration; i++) {
             try {
               String user = getCurrentUser();
-              checkFileContent(fs.getUri(), file1, dataToWrite, 0, user, conf,
-                  true);
+              checkFileContent(fs.getUri(), file1, dataToWrite, 0, user, conf, true);
             } catch (IOException e) {
               e.printStackTrace();
             } catch (InterruptedException e) {
@@ -608,7 +602,8 @@ public class TestShortCircuitLocalRead {
     stm.write(fileData);
     stm.close();
     try {
-      checkFileContent(uri, file1, fileData, readOffset, shortCircuitUser, conf, shortCircuitFails);
+      checkFileContent(uri, file1, fileData, readOffset, shortCircuitUser, 
+          conf, shortCircuitFails);
       //RemoteBlockReader have unsupported method read(ByteBuffer bf)
       assertTrue("RemoteBlockReader unsupported method read(ByteBuffer bf) error",
                     checkUnsupportedMethod(fs, file1, fileData, readOffset));
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java
index 53d3008dfa0..92b3d8c7934 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java
@@ -38,10 +38,16 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.BlockReader;
 import org.apache.hadoop.hdfs.BlockReaderFactory;
+import org.apache.hadoop.hdfs.ClientContext;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.DFSClient.Conf;
+import org.apache.hadoop.hdfs.RemotePeerFactory;
+import org.apache.hadoop.hdfs.client.ShortCircuitCache;
+import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
+import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.net.TcpPeerServer;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@@ -55,10 +61,13 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
 import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
+import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.log4j.Level;
+import org.junit.Assert;
 import org.junit.Test;
 
 public class TestBlockTokenWithDFS {
@@ -131,50 +140,70 @@ public class TestBlockTokenWithDFS {
   }
 
   // try reading a block using a BlockReader directly
-  private static void tryRead(Configuration conf, LocatedBlock lblock,
+  private static void tryRead(final Configuration conf, LocatedBlock lblock,
       boolean shouldSucceed) {
     InetSocketAddress targetAddr = null;
-    Socket s = null;
+    IOException ioe = null;
     BlockReader blockReader = null;
     ExtendedBlock block = lblock.getBlock();
     try {
       DatanodeInfo[] nodes = lblock.getLocations();
       targetAddr = NetUtils.createSocketAddr(nodes[0].getXferAddr());
-      s = NetUtils.getDefaultSocketFactory(conf).createSocket();
-      s.connect(targetAddr, HdfsServerConstants.READ_TIMEOUT);
-      s.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
-
-      String file = BlockReaderFactory.getFileName(targetAddr, 
-          "test-blockpoolid", block.getBlockId());
-      blockReader = BlockReaderFactory.newBlockReader(
-          new DFSClient.Conf(conf), file, block, lblock.getBlockToken(), 0, -1,
-          true, "TestBlockTokenWithDFS", TcpPeerServer.peerFromSocket(s),
-          nodes[0], null, null, null, false,
-          CachingStrategy.newDefaultStrategy());
 
+      blockReader = new BlockReaderFactory(new DFSClient.Conf(conf)).
+          setFileName(BlockReaderFactory.getFileName(targetAddr, 
+                        "test-blockpoolid", block.getBlockId())).
+          setBlock(block).
+          setBlockToken(lblock.getBlockToken()).
+          setInetSocketAddress(targetAddr).
+          setStartOffset(0).
+          setLength(-1).
+          setVerifyChecksum(true).
+          setClientName("TestBlockTokenWithDFS").
+          setDatanodeInfo(nodes[0]).
+          setCachingStrategy(CachingStrategy.newDefaultStrategy()).
+          setClientCacheContext(ClientContext.getFromConf(conf)).
+          setConfiguration(conf).
+          setRemotePeerFactory(new RemotePeerFactory() {
+            @Override
+            public Peer newConnectedPeer(InetSocketAddress addr)
+                throws IOException {
+              Peer peer = null;
+              Socket sock = NetUtils.getDefaultSocketFactory(conf).createSocket();
+              try {
+                sock.connect(addr, HdfsServerConstants.READ_TIMEOUT);
+                sock.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
+                peer = TcpPeerServer.peerFromSocket(sock);
+              } finally {
+                if (peer == null) {
+                  IOUtils.closeSocket(sock);
+                }
+              }
+              return peer;
+            }
+          }).
+          build();
     } catch (IOException ex) {
-      if (ex instanceof InvalidBlockTokenException) {
-        assertFalse("OP_READ_BLOCK: access token is invalid, "
-            + "when it is expected to be valid", shouldSucceed);
-        return;
-      }
-      fail("OP_READ_BLOCK failed due to reasons other than access token: "
-          + StringUtils.stringifyException(ex));
+      ioe = ex;
     } finally {
-      if (s != null) {
+      if (blockReader != null) {
         try {
-          s.close();
-        } catch (IOException iex) {
-        } finally {
-          s = null;
+          blockReader.close();
+        } catch (IOException e) {
+          throw new RuntimeException(e);
         }
       }
     }
-    if (blockReader == null) {
-      fail("OP_READ_BLOCK failed due to reasons other than access token");
+    if (shouldSucceed) {
+      Assert.assertNotNull("OP_READ_BLOCK: access token is invalid, "
+            + "when it is expected to be valid", blockReader);
+    } else {
+      Assert.assertNotNull("OP_READ_BLOCK: access token is valid, "
+          + "when it is expected to be invalid", ioe);
+      Assert.assertTrue(
+          "OP_READ_BLOCK failed due to reasons other than access token: ",
+          ioe instanceof InvalidBlockTokenException);
     }
-    assertTrue("OP_READ_BLOCK: access token is valid, "
-        + "when it is expected to be invalid", shouldSucceed);
   }
 
   // get a conf for testing
@@ -347,9 +376,13 @@ public class TestBlockTokenWithDFS {
       /*
        * testing READ interface on DN using a BlockReader
        */
-
-      new DFSClient(new InetSocketAddress("localhost",
+      DFSClient client = null;
+      try {
+        client = new DFSClient(new InetSocketAddress("localhost",
           cluster.getNameNodePort()), conf);
+      } finally {
+        if (client != null) client.close();
+      }
       List<LocatedBlock> locatedBlocks = nnProto.getBlockLocations(
           FILE_TO_READ, 0, FILE_SIZE).getLocatedBlocks();
       LocatedBlock lblock = locatedBlocks.get(0); // first block
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
index 646d33d13b8..f5b31847dc1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
@@ -35,11 +35,14 @@ import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.BlockReader;
 import org.apache.hadoop.hdfs.BlockReaderFactory;
+import org.apache.hadoop.hdfs.ClientContext;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.RemotePeerFactory;
+import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.net.TcpPeerServer;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
@@ -48,13 +51,14 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
-import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
+import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.UserGroupInformation;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -284,23 +288,43 @@ public class TestDataNodeVolumeFailure {
   private void accessBlock(DatanodeInfo datanode, LocatedBlock lblock)
     throws IOException {
     InetSocketAddress targetAddr = null;
-    Socket s = null;
     ExtendedBlock block = lblock.getBlock(); 
    
     targetAddr = NetUtils.createSocketAddr(datanode.getXferAddr());
-      
-    s = NetUtils.getDefaultSocketFactory(conf).createSocket();
-    s.connect(targetAddr, HdfsServerConstants.READ_TIMEOUT);
-    s.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
 
-    String file = BlockReaderFactory.getFileName(targetAddr, 
-        "test-blockpoolid",
-        block.getBlockId());
-    BlockReader blockReader =
-      BlockReaderFactory.newBlockReader(new DFSClient.Conf(conf), file, block,
-        lblock.getBlockToken(), 0, -1, true, "TestDataNodeVolumeFailure",
-        TcpPeerServer.peerFromSocket(s), datanode, null, null, null, false,
-        CachingStrategy.newDefaultStrategy());
+    BlockReader blockReader = new BlockReaderFactory(new DFSClient.Conf(conf)).
+      setInetSocketAddress(targetAddr).
+      setBlock(block).
+      setFileName(BlockReaderFactory.getFileName(targetAddr,
+                    "test-blockpoolid", block.getBlockId())).
+      setBlockToken(lblock.getBlockToken()).
+      setStartOffset(0).
+      setLength(-1).
+      setVerifyChecksum(true).
+      setClientName("TestDataNodeVolumeFailure").
+      setDatanodeInfo(datanode).
+      setCachingStrategy(CachingStrategy.newDefaultStrategy()).
+      setClientCacheContext(ClientContext.getFromConf(conf)).
+      setConfiguration(conf).
+      setRemotePeerFactory(new RemotePeerFactory() {
+        @Override
+        public Peer newConnectedPeer(InetSocketAddress addr)
+            throws IOException {
+          Peer peer = null;
+          Socket sock = NetUtils.getDefaultSocketFactory(conf).createSocket();
+          try {
+            sock.connect(addr, HdfsServerConstants.READ_TIMEOUT);
+            sock.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
+            peer = TcpPeerServer.peerFromSocket(sock);
+          } finally {
+            if (peer == null) {
+              IOUtils.closeSocket(sock);
+            }
+          }
+          return peer;
+        }
+      }).
+      build();
     blockReader.close();
   }
   

From b3af8edc26191ac708208bc9543847dc62eca763 Mon Sep 17 00:00:00 2001
From: Colin McCabe <cmccabe@apache.org>
Date: Wed, 12 Feb 2014 22:01:53 +0000
Subject: [PATCH 27/47] HDFS-5938. Make BlockReaderFactory#BlockReaderPeer a
 static class to avoid a findbugs warning (cmccabe)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567767 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt                    | 3 +++
 .../main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java   | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 726dbee00a0..2aaf479c60c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -422,6 +422,9 @@ Release 2.4.0 - UNRELEASED
     HDFS-5879. Some TestHftpFileSystem tests do not close streams.
     (Gera Shegalov via suresh)
 
+    HDFS-5938. Make BlockReaderFactory#BlockReaderPeer a static class to avoid
+    a findbugs warning. (cmccabe)
+
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
index c6af5e82c36..f7eb3c75051 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
@@ -663,7 +663,7 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator {
     return null;
   }
 
-  private class BlockReaderPeer {
+  private static class BlockReaderPeer {
     final Peer peer;
     final boolean fromCache;
     

From ad6bc868b6bcae197245fd9751fd5fa0c6fba31f Mon Sep 17 00:00:00 2001
From: Karthik Kambatla <kasha@apache.org>
Date: Wed, 12 Feb 2014 22:31:16 +0000
Subject: [PATCH 28/47] YARN-1531. True up yarn command documentation (Akira
 Ajisaka via kasha)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567775 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt               |  2 +
 .../src/site/apt/YarnCommands.apt.vm          | 66 +++++++++++++++----
 2 files changed, 54 insertions(+), 14 deletions(-)

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 8b95dcb667a..946fc408013 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -179,6 +179,8 @@ Release 2.4.0 - UNRELEASED
     YARN-1641. ZK store should attempt a write periodically to ensure it is 
     still Active. (kasha)
 
+    YARN-1531. True up yarn command documentation (Akira Ajisaka via kasha)
+
   OPTIMIZATIONS
 
   BUG FIXES
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/YarnCommands.apt.vm b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/YarnCommands.apt.vm
index 386be09d483..a0a9e7412af 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/YarnCommands.apt.vm
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/YarnCommands.apt.vm
@@ -35,7 +35,7 @@ Usage: yarn [--config confdir] COMMAND
 *---------------+--------------+
 || COMMAND_OPTIONS || Description                   |
 *---------------+--------------+
-| --config confdir | Overwrites the default Configuration directory. Default is ${HADOOP_PREFIX}/conf. | 
+| --config confdir | Overwrites the default Configuration directory. Default is $\{HADOOP_PREFIX\}/conf. |
 *---------------+--------------+
 | COMMAND COMMAND_OPTIONS | Various commands with their options are described in the following sections. The commands have been grouped into {{User Commands}} and {{Administration Commands}}. |
 *---------------+--------------+
@@ -63,11 +63,22 @@ Usage: yarn [--config confdir] COMMAND
 *---------------+--------------+
 || COMMAND_OPTIONS || Description                   |
 *---------------+--------------+
-| -status  ApplicationId | Specify an application id |
+| -list | Lists applications from the RM. Supports optional use of -appTypes
+|       | to filter applications based on application type, and -appStates to
+|       | filter applications based on application state.
 *---------------+--------------+
-| -list | Lists all the Applications from RM |
+| -appStates States | Works with -list to filter applications based on input
+|                   | comma-separated list of application states. The valid
+|                   | application state can be one of the following: \
+|                   | ALL, NEW, NEW_SAVING, SUBMITTED, ACCEPTED, RUNNING,
+|                   | FINISHED, FAILED, KILLED
 *---------------+--------------+
-| -kill ApplicationId | Specify an application id |
+| -appTypes Types | Works with -list to filter applications based on input
+|                 | comma-separated list of application types.
+*---------------+--------------+
+| -status  ApplicationId | Prints the status of the application.
+*---------------+--------------+
+| -kill ApplicationId | Kills the application.
 *---------------+--------------+
 
 ** node
@@ -81,9 +92,15 @@ Usage: yarn [--config confdir] COMMAND
 *---------------+--------------+
 || COMMAND_OPTIONS || Description                   |
 *---------------+--------------+
-| -status NodeId | Specify a node id |
+| -list | Lists all running nodes. Supports optional use of -states to filter
+|       | nodes based on node state, and -all to list all nodes.
 *---------------+--------------+
-| -list | Lists all the Nodes |
+| -states States | Works with -list to filter nodes based on input
+|                | comma-separated list of node states.
+*---------------+--------------+
+| -all | Works with -list to list all nodes.
+*---------------+--------------+
+| -status NodeId | Prints the status report of the node.
 *---------------+--------------+
 
 ** logs
@@ -91,19 +108,22 @@ Usage: yarn [--config confdir] COMMAND
   Dump the container logs
 
 -------
-  Usage: yarn logs <options>
+  Usage: yarn logs -applicationId <application ID> <options>
 -------
 
 *---------------+--------------+
 || COMMAND_OPTIONS || Description                   |
 *---------------+--------------+
-| -applicationId ApplicationId | Specify an application id |
+| -applicationId \<application ID\> | Specifies an application id |
 *---------------+--------------+
-| -appOwner AppOwner | Specify an application owner |
+| -appOwner AppOwner | AppOwner (assumed to be current user if not
+|                    | specified)
 *---------------+--------------+
-| -containerId ContainerId | Specify a container id |
+| -containerId ContainerId | ContainerId (must be specified if node address is
+|                          | specified)
 *---------------+--------------+
-| -nodeAddress NodeAddress | Specify a node address |
+| -nodeAddress NodeAddress | NodeAddress in the format nodename:port (must be
+|                          | specified if container id is specified)
 *---------------+--------------+
 
 ** classpath
@@ -158,7 +178,11 @@ Usage: yarn [--config confdir] COMMAND
 -------
   Usage: yarn rmadmin [-refreshQueues] [-refreshNodes] [-refreshUserToGroupsMapping] 
                       [-refreshSuperUserGroupsConfiguration] [-refreshAdminAcls] 
-                      [-refreshServiceAcl] [-help [cmd]]
+                      [-refreshServiceAcl] [-getGroups [username]] [-help [cmd]]
+                      [-transitionToActive <serviceId>]
+                      [-transitionToStandby <serviceId>]
+                      [-getServiceState <serviceId>]
+                      [-checkHealth <serviceId>]
 -------
 
 *---------------+--------------+
@@ -176,8 +200,22 @@ Usage: yarn [--config confdir] COMMAND
 *---------------+--------------+
 | -refreshServiceAcl | Reload the service-level authorization policy file ResoureceManager will reload the authorization policy file. |
 *---------------+--------------+
+| -getGroups [username] | Get groups the specified user belongs to.
+*---------------+--------------+
 | -help [cmd] | Displays help for the given command or all commands if none is specified. |
 *---------------+--------------+
+| -transitionToActive \<serviceId\> | Transitions the service into Active
+|                                   | state.
+*---------------+--------------+
+| -transitionToStandby \<serviceId\> | Transitions the service into Standby
+|                                    | state.
+*---------------+--------------+
+| -getServiceState \<serviceId\> | Returns the state of the service.
+*---------------+--------------+
+| -checkHealth \<serviceId\> | Requests that the service perform a health
+|                            | check. The RMAdmin tool will exit with a
+|                            | non-zero exit code if the check fails.
+*---------------+--------------+
 
 ** daemonlog
 
@@ -191,9 +229,9 @@ Usage: yarn [--config confdir] COMMAND
 *---------------+--------------+
 || COMMAND_OPTIONS || Description                   |
 *---------------+--------------+
-| -getlevel <host:port> <name> | Prints the log level of the daemon running at <host:port>. This command internally connects to http://<host:port>/logLevel?log=<name> |
+| -getlevel \<host:port\> \<name\> | Prints the log level of the daemon running at \<host:port\>. This command internally connects to http://\<host:port\>/logLevel?log=\<name\>
 *---------------+--------------+
-| -setlevel <host:port> <name> <level>  | Sets the log level of the daemon running at <host:port>. This command internally connects to http://<host:port>/logLevel?log=<name> |
+| -setlevel \<host:port\> \<name\> \<level\> | Sets the log level of the daemon running at \<host:port\>. This command internally connects to http://\<host:port\>/logLevel?log=\<name\>
 *---------------+--------------+
 
 

From bc939f7ae607d32d52c4e9ceacdfb25c1850e04c Mon Sep 17 00:00:00 2001
From: Sanford Ryza <sandy@apache.org>
Date: Wed, 12 Feb 2014 23:04:42 +0000
Subject: [PATCH 29/47] YARN-1692. ConcurrentModificationException in fair
 scheduler AppSchedulable (Sangjin Lee via Sandy Ryza)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567788 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt                        |  5 ++++-
 .../scheduler/SchedulerApplicationAttempt.java         |  4 ++--
 .../scheduler/common/fica/FiCaSchedulerApp.java        |  2 +-
 .../resourcemanager/scheduler/fair/AppSchedulable.java | 10 ++++++----
 4 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 946fc408013..34bdc0fe5b3 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -266,7 +266,10 @@ Release 2.3.1 - UNRELEASED
 
   OPTIMIZATIONS
 
-  BUG FIXES 
+  BUG FIXES
+
+    YARN-1692. ConcurrentModificationException in fair scheduler AppSchedulable
+    (Sangjin Lee via Sandy Ryza)
 
 Release 2.3.0 - 2014-02-18
 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
index 7785e56bdb3..553338d7ab0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
@@ -150,7 +150,7 @@ public class SchedulerApplicationAttempt {
     return appSchedulingInfo.getPriorities();
   }
   
-  public ResourceRequest getResourceRequest(Priority priority, String resourceName) {
+  public synchronized ResourceRequest getResourceRequest(Priority priority, String resourceName) {
     return this.appSchedulingInfo.getResourceRequest(priority, resourceName);
   }
 
@@ -158,7 +158,7 @@ public class SchedulerApplicationAttempt {
     return getResourceRequest(priority, ResourceRequest.ANY).getNumContainers();
   }
 
-  public Resource getResource(Priority priority) {
+  public synchronized Resource getResource(Priority priority) {
     return appSchedulingInfo.getResource(priority);
   }
 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java
index 9c34f2f5995..4be6b941d12 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java
@@ -192,7 +192,7 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt {
     return Math.min(((float)requiredResources / clusterNodes), 1.0f);
   }
 
-  public Resource getTotalPendingRequests() {
+  public synchronized Resource getTotalPendingRequests() {
     Resource ret = Resource.newInstance(0, 0);
     for (ResourceRequest rr : appSchedulingInfo.getAllResourceRequests()) {
       // to avoid double counting we count only "ANY" resource requests
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java
index 275061a5fa1..b488e780fa2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java
@@ -82,10 +82,12 @@ public class AppSchedulable extends Schedulable {
     Resources.addTo(demand, app.getCurrentConsumption());
 
     // Add up outstanding resource requests
-    for (Priority p : app.getPriorities()) {
-      for (ResourceRequest r : app.getResourceRequests(p).values()) {
-        Resource total = Resources.multiply(r.getCapability(), r.getNumContainers());
-        Resources.addTo(demand, total);
+    synchronized (app) {
+      for (Priority p : app.getPriorities()) {
+        for (ResourceRequest r : app.getResourceRequests(p).values()) {
+          Resource total = Resources.multiply(r.getCapability(), r.getNumContainers());
+          Resources.addTo(demand, total);
+        }
       }
     }
   }

From c6e2f4f07f5eae52535f158fd3fa618e6f3cc4d1 Mon Sep 17 00:00:00 2001
From: Sanford Ryza <sandy@apache.org>
Date: Wed, 12 Feb 2014 23:13:52 +0000
Subject: [PATCH 30/47] Move YARN-1692 in CHANGES.txt

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567793 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 34bdc0fe5b3..4185b7be47f 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -256,6 +256,9 @@ Release 2.4.0 - UNRELEASED
     YARN-1719. Fixed the root path related Jersey warnings produced in
     ATSWebServices. (Billie Rinaldi via zjshen)
 
+    YARN-1692. ConcurrentModificationException in fair scheduler AppSchedulable
+    (Sangjin Lee via Sandy Ryza)
+
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -268,9 +271,6 @@ Release 2.3.1 - UNRELEASED
 
   BUG FIXES
 
-    YARN-1692. ConcurrentModificationException in fair scheduler AppSchedulable
-    (Sangjin Lee via Sandy Ryza)
-
 Release 2.3.0 - 2014-02-18
 
   INCOMPATIBLE CHANGES

From 256adb2106cb838f3aff21f5a77f2973807d0df3 Mon Sep 17 00:00:00 2001
From: Brandon Li <brandonli@apache.org>
Date: Wed, 12 Feb 2014 23:57:35 +0000
Subject: [PATCH 31/47] HDFS-5891. webhdfs should not try connecting the DN
 during redirection. Contributed by Haohui Mai

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567810 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |  3 +
 .../hadoop/hdfs/server/common/JspHelper.java  | 99 ++++++++-----------
 .../web/resources/NamenodeWebHdfsMethods.java | 24 ++++-
 .../resources/TestWebHdfsDataLocality.java    |  8 +-
 .../hadoop/hdfs/web/TestHttpsFileSystem.java  |  4 +
 5 files changed, 70 insertions(+), 68 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 2aaf479c60c..847c98d8625 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -425,6 +425,9 @@ Release 2.4.0 - UNRELEASED
     HDFS-5938. Make BlockReaderFactory#BlockReaderPeer a static class to avoid
     a findbugs warning. (cmccabe)
 
+    HDFS-5891. webhdfs should not try connecting the DN during redirection
+    (Haohui Mai via brandonli)
+
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
index 86e65912503..0946d041817 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
@@ -18,7 +18,26 @@
 
 package org.apache.hadoop.hdfs.server.common;
 
-import com.google.common.base.Charsets;
+import static org.apache.hadoop.fs.CommonConfigurationKeys.DEFAULT_HADOOP_HTTP_STATIC_USER;
+import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_HTTP_STATIC_USER;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.InetSocketAddress;
+import java.net.Socket;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+
+import javax.servlet.ServletContext;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.jsp.JspWriter;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -33,7 +52,11 @@ import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.RemotePeerFactory;
 import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.net.TcpPeerServer;
-import org.apache.hadoop.hdfs.protocol.*;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
@@ -56,22 +79,7 @@ import org.apache.hadoop.security.authorize.ProxyUsers;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.VersionInfo;
 
-import javax.servlet.ServletContext;
-import javax.servlet.http.HttpServletRequest;
-import javax.servlet.jsp.JspWriter;
-
-import java.io.ByteArrayInputStream;
-import java.io.DataInputStream;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.InetSocketAddress;
-import java.net.Socket;
-import java.net.URL;
-import java.net.URLEncoder;
-import java.util.*;
-
-import static org.apache.hadoop.fs.CommonConfigurationKeys.DEFAULT_HADOOP_HTTP_STATIC_USER;
-import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_HTTP_STATIC_USER;
+import com.google.common.base.Charsets;
 
 @InterfaceAudience.Private
 public class JspHelper {
@@ -171,58 +179,31 @@ public class JspHelper {
     }
     NodeRecord[] nodes = map.values().toArray(new NodeRecord[map.size()]);
     Arrays.sort(nodes, new NodeRecordComparator());
-    return bestNode(nodes, false, conf);
+    return bestNode(nodes, false);
   }
 
   public static DatanodeInfo bestNode(LocatedBlock blk, Configuration conf)
       throws IOException {
     DatanodeInfo[] nodes = blk.getLocations();
-    return bestNode(nodes, true, conf);
+    return bestNode(nodes, true);
   }
 
-  public static DatanodeInfo bestNode(DatanodeInfo[] nodes, boolean doRandom,
-      Configuration conf) throws IOException {
-    TreeSet<DatanodeInfo> deadNodes = new TreeSet<DatanodeInfo>();
-    DatanodeInfo chosenNode = null;
-    int failures = 0;
-    Socket s = null;
-    int index = -1;
+  private static DatanodeInfo bestNode(DatanodeInfo[] nodes, boolean doRandom)
+      throws IOException {
     if (nodes == null || nodes.length == 0) {
       throw new IOException("No nodes contain this block");
     }
-    while (s == null) {
-      if (chosenNode == null) {
-        do {
-          if (doRandom) {
-            index = DFSUtil.getRandom().nextInt(nodes.length);
-          } else {
-            index++;
-          }
-          chosenNode = nodes[index];
-        } while (deadNodes.contains(chosenNode));
-      }
-      chosenNode = nodes[index];
-
-      //just ping to check whether the node is alive
-      InetSocketAddress targetAddr = NetUtils.createSocketAddr(
-          chosenNode.getInfoAddr());
-        
-      try {
-        s = NetUtils.getDefaultSocketFactory(conf).createSocket();
-        s.connect(targetAddr, HdfsServerConstants.READ_TIMEOUT);
-        s.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
-      } catch (IOException e) {
-        deadNodes.add(chosenNode);
-        IOUtils.closeSocket(s);
-        s = null;
-        failures++;
-      }
-      if (failures == nodes.length)
-        throw new IOException("Could not reach the block containing the data. Please try again");
-        
+    int l = 0;
+    while (l < nodes.length && !nodes[l].isDecommissioned()) {
+      ++l;
     }
-    s.close();
-    return chosenNode;
+
+    if (l == 0) {
+      throw new IOException("No active nodes contain this block");
+    }
+
+    int index = doRandom ? DFSUtil.getRandom().nextInt(l) : 0;
+    return nodes[index];
   }
 
   public static void streamBlockInAscii(InetSocketAddress addr, String poolId,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
index d8b5eaeb76c..b4e9d4388a3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
@@ -107,6 +107,7 @@ import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Charsets;
 import com.sun.jersey.spi.container.ResourceFilters;
 
@@ -160,9 +161,10 @@ public class NamenodeWebHdfsMethods {
     response.setContentType(null);
   }
 
+  @VisibleForTesting
   static DatanodeInfo chooseDatanode(final NameNode namenode,
       final String path, final HttpOpParam.Op op, final long openOffset,
-      final long blocksize, final Configuration conf) throws IOException {
+      final long blocksize) throws IOException {
     final BlockManager bm = namenode.getNamesystem().getBlockManager();
 
     if (op == PutOpParam.Op.CREATE) {
@@ -201,7 +203,7 @@ public class NamenodeWebHdfsMethods {
         final LocatedBlocks locations = np.getBlockLocations(path, offset, 1);
         final int count = locations.locatedBlockCount();
         if (count > 0) {
-          return JspHelper.bestNode(locations.get(0).getLocations(), false, conf);
+          return bestNode(locations.get(0).getLocations());
         }
       }
     } 
@@ -210,13 +212,26 @@ public class NamenodeWebHdfsMethods {
         ).chooseRandom(NodeBase.ROOT);
   }
 
+  /**
+   * Choose the datanode to redirect the request. Note that the nodes have been
+   * sorted based on availability and network distances, thus it is sufficient
+   * to return the first element of the node here.
+   */
+  private static DatanodeInfo bestNode(DatanodeInfo[] nodes) throws IOException {
+    if (nodes.length == 0 || nodes[0].isDecommissioned()) {
+      throw new IOException("No active nodes contain this block");
+    }
+    return nodes[0];
+  }
+
   private Token<? extends TokenIdentifier> generateDelegationToken(
       final NameNode namenode, final UserGroupInformation ugi,
       final String renewer) throws IOException {
     final Credentials c = DelegationTokenSecretManager.createCredentials(
         namenode, ugi, renewer != null? renewer: ugi.getShortUserName());
     final Token<? extends TokenIdentifier> t = c.getAllTokens().iterator().next();
-    Text kind = request.getScheme().equals("http") ? WebHdfsFileSystem.TOKEN_KIND : SWebHdfsFileSystem.TOKEN_KIND;
+    Text kind = request.getScheme().equals("http") ? WebHdfsFileSystem.TOKEN_KIND
+        : SWebHdfsFileSystem.TOKEN_KIND;
     t.setKind(kind);
     return t;
   }
@@ -227,9 +242,8 @@ public class NamenodeWebHdfsMethods {
       final String path, final HttpOpParam.Op op, final long openOffset,
       final long blocksize,
       final Param<?, ?>... parameters) throws URISyntaxException, IOException {
-    final Configuration conf = (Configuration)context.getAttribute(JspHelper.CURRENT_CONF);
     final DatanodeInfo dn = chooseDatanode(namenode, path, op, openOffset,
-        blocksize, conf);
+        blocksize);
 
     final String delegationQuery;
     if (!UserGroupInformation.isSecurityEnabled()) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/web/resources/TestWebHdfsDataLocality.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/web/resources/TestWebHdfsDataLocality.java
index fb8d5292844..9fe3deab1bd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/web/resources/TestWebHdfsDataLocality.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/web/resources/TestWebHdfsDataLocality.java
@@ -92,7 +92,7 @@ public class TestWebHdfsDataLocality {
 
           //The chosen datanode must be the same as the client address
           final DatanodeInfo chosen = NamenodeWebHdfsMethods.chooseDatanode(
-              namenode, f, PutOpParam.Op.CREATE, -1L, blocksize, conf);
+              namenode, f, PutOpParam.Op.CREATE, -1L, blocksize);
           Assert.assertEquals(ipAddr, chosen.getIpAddr());
         }
       }
@@ -117,19 +117,19 @@ public class TestWebHdfsDataLocality {
 
       { //test GETFILECHECKSUM
         final DatanodeInfo chosen = NamenodeWebHdfsMethods.chooseDatanode(
-            namenode, f, GetOpParam.Op.GETFILECHECKSUM, -1L, blocksize, conf);
+            namenode, f, GetOpParam.Op.GETFILECHECKSUM, -1L, blocksize);
         Assert.assertEquals(expected, chosen);
       }
   
       { //test OPEN
         final DatanodeInfo chosen = NamenodeWebHdfsMethods.chooseDatanode(
-            namenode, f, GetOpParam.Op.OPEN, 0, blocksize, conf);
+            namenode, f, GetOpParam.Op.OPEN, 0, blocksize);
         Assert.assertEquals(expected, chosen);
       }
 
       { //test APPEND
         final DatanodeInfo chosen = NamenodeWebHdfsMethods.chooseDatanode(
-            namenode, f, PostOpParam.Op.APPEND, -1L, blocksize, conf);
+            namenode, f, PostOpParam.Op.APPEND, -1L, blocksize);
         Assert.assertEquals(expected, chosen);
       }
     } finally {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestHttpsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestHttpsFileSystem.java
index c4f30b3ebf2..0942ef26726 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestHttpsFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestHttpsFileSystem.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.web;
 
 import java.io.File;
+import java.io.InputStream;
 import java.net.InetSocketAddress;
 import java.net.URI;
 
@@ -92,6 +93,9 @@ public class TestHttpsFileSystem {
     os.write(23);
     os.close();
     Assert.assertTrue(fs.exists(f));
+    InputStream is = fs.open(f);
+    Assert.assertEquals(23, is.read());
+    is.close();
     fs.close();
   }
 }

From 2624b20291629b4565ea45590b66f2c38f96df67 Mon Sep 17 00:00:00 2001
From: Jing Zhao <jing9@apache.org>
Date: Thu, 13 Feb 2014 00:00:42 +0000
Subject: [PATCH 32/47] HDFS-5847. Consolidate INodeReference into a separate
 section. Contributed by Jing Zhao.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567812 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |   2 +
 .../server/namenode/FSImageFormatPBINode.java |  59 ++------
 .../namenode/FSImageFormatProtobuf.java       |  20 ++-
 .../snapshot/FSImageFormatPBSnapshot.java     | 135 +++++++++++++-----
 .../tools/offlineImageViewer/LsrPBImage.java  |  27 +++-
 .../offlineImageViewer/PBImageXmlWriter.java  |  38 +++--
 .../hadoop-hdfs/src/main/proto/fsimage.proto  |  36 ++---
 7 files changed, 198 insertions(+), 119 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 847c98d8625..e9f915e04c2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -338,6 +338,8 @@ Trunk (Unreleased)
     HDFS-5915. Refactor FSImageFormatProtobuf to simplify cross section reads.
     (Haohui Mai via cnauroth)
 
+    HDFS-5847. Consolidate INodeReference into a separate section. (jing9)
+
 Release 2.4.0 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java
index 43bbfdbc7ff..bbb73c5c37e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java
@@ -43,9 +43,6 @@ import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection.FileUnderConstructionEntry;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
-import org.apache.hadoop.hdfs.server.namenode.INodeReference.DstReference;
-import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithCount;
-import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithName;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
 import org.apache.hadoop.hdfs.util.ReadOnlyList;
 
@@ -69,25 +66,6 @@ public final class FSImageFormatPBINode {
           new FsPermission(perm));
     }
 
-    public static INodeReference loadINodeReference(
-        INodeSection.INodeReference r, FSDirectory dir) throws IOException {
-      long referredId = r.getReferredId();
-      INode referred = dir.getInode(referredId);
-      WithCount withCount = (WithCount) referred.getParentReference();
-      if (withCount == null) {
-        withCount = new INodeReference.WithCount(null, referred);
-      }
-      final INodeReference ref;
-      if (r.hasDstSnapshotId()) { // DstReference
-        ref = new INodeReference.DstReference(null, withCount,
-            r.getDstSnapshotId());
-      } else {
-        ref = new INodeReference.WithName(null, withCount, r.getName()
-            .toByteArray(), r.getLastSnapshotId());
-      }
-      return ref;
-    }
-
     public static INodeDirectory loadINodeDirectory(INodeSection.INode n,
         final String[] stringTable) {
       assert n.getType() == INodeSection.INode.Type.DIRECTORY;
@@ -126,6 +104,8 @@ public final class FSImageFormatPBINode {
     }
 
     void loadINodeDirectorySection(InputStream in) throws IOException {
+      final List<INodeReference> refList = parent.getLoaderContext()
+          .getRefList();
       while (true) {
         INodeDirectorySection.DirEntry e = INodeDirectorySection.DirEntry
             .parseDelimitedFrom(in);
@@ -138,20 +118,13 @@ public final class FSImageFormatPBINode {
           INode child = dir.getInode(id);
           addToParent(p, child);
         }
-        for (int i = 0; i < e.getNumOfRef(); i++) {
-          INodeReference ref = loadINodeReference(in);
+        for (int refId : e.getRefChildrenList()) {
+          INodeReference ref = refList.get(refId);
           addToParent(p, ref);
         }
       }
     }
 
-    private INodeReference loadINodeReference(InputStream in)
-        throws IOException {
-      INodeSection.INodeReference ref = INodeSection.INodeReference
-          .parseDelimitedFrom(in);
-      return loadINodeReference(ref, dir);
-    }
-
     void loadINodeSection(InputStream in) throws IOException {
       INodeSection s = INodeSection.parseDelimitedFrom(in);
       fsn.resetLastInodeId(s.getLastInodeId());
@@ -306,19 +279,6 @@ public final class FSImageFormatPBINode {
       return b;
     }
 
-    public static INodeSection.INodeReference.Builder buildINodeReference(
-        INodeReference ref) throws IOException {
-      INodeSection.INodeReference.Builder rb = INodeSection.INodeReference
-          .newBuilder().setReferredId(ref.getId());
-      if (ref instanceof WithName) {
-        rb.setLastSnapshotId(((WithName) ref).getLastSnapshotId()).setName(
-            ByteString.copyFrom(ref.getLocalNameBytes()));
-      } else if (ref instanceof DstReference) {
-        rb.setDstSnapshotId(((DstReference) ref).getDstSnapshotId());
-      }
-      return rb;
-    }
-
     private final FSNamesystem fsn;
     private final FileSummary.Builder summary;
     private final SaveNamespaceContext context;
@@ -334,6 +294,8 @@ public final class FSImageFormatPBINode {
     void serializeINodeDirectorySection(OutputStream out) throws IOException {
       Iterator<INodeWithAdditionalFields> iter = fsn.getFSDirectory()
           .getINodeMap().getMapIterator();
+      final ArrayList<INodeReference> refList = parent.getSaverContext()
+          .getRefList();
       int i = 0;
       while (iter.hasNext()) {
         INodeWithAdditionalFields n = iter.next();
@@ -346,21 +308,16 @@ public final class FSImageFormatPBINode {
         if (children.size() > 0) {
           INodeDirectorySection.DirEntry.Builder b = INodeDirectorySection.
               DirEntry.newBuilder().setParent(n.getId());
-          List<INodeReference> refs = new ArrayList<INodeReference>();
           for (INode inode : children) {
             if (!inode.isReference()) {
               b.addChildren(inode.getId());
             } else {
-              refs.add(inode.asReference());
+              refList.add(inode.asReference());
+              b.addRefChildren(refList.size() - 1);
             }
           }
-          b.setNumOfRef(refs.size());
           INodeDirectorySection.DirEntry e = b.build();
           e.writeDelimitedTo(out);
-          for (INodeReference ref : refs) {
-            INodeSection.INodeReference.Builder rb = buildINodeReference(ref);
-            rb.build().writeDelimitedTo(out);
-          }
         }
 
         ++i;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java
index c03ba606410..92245434ccc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java
@@ -75,10 +75,15 @@ public final class FSImageFormatProtobuf {
 
   public static final class LoaderContext {
     private String[] stringTable;
+    private final ArrayList<INodeReference> refList = Lists.newArrayList();
 
     public String[] getStringTable() {
       return stringTable;
     }
+
+    public ArrayList<INodeReference> getRefList() {
+      return refList;
+    }
   }
 
   public static final class SaverContext {
@@ -112,10 +117,15 @@ public final class FSImageFormatProtobuf {
       }
     }
     private final DeduplicationMap<String> stringMap = DeduplicationMap.newMap();
+    private final ArrayList<INodeReference> refList = Lists.newArrayList();
 
     public DeduplicationMap<String> getStringMap() {
       return stringMap;
     }
+
+    public ArrayList<INodeReference> getRefList() {
+      return refList;
+    }
   }
 
   public static final class Loader implements FSImageFormat.AbstractLoader {
@@ -123,7 +133,6 @@ public final class FSImageFormatProtobuf {
     private final Configuration conf;
     private final FSNamesystem fsn;
     private final LoaderContext ctx;
-
     /** The MD5 sum of the loaded file */
     private MD5Hash imgDigest;
     /** The transaction ID of the last edit represented by the loaded file */
@@ -226,6 +235,9 @@ public final class FSImageFormatProtobuf {
           inodeLoader.loadINodeSection(in);
         }
           break;
+        case INODE_REFRENCE:
+          snapshotLoader.loadINodeReferenceSection(in);
+          break;
         case INODE_DIR:
           inodeLoader.loadINodeDirectorySection(in);
           break;
@@ -313,9 +325,10 @@ public final class FSImageFormatProtobuf {
   }
 
   public static final class Saver {
+    public static final int CHECK_CANCEL_INTERVAL = 4096;
+
     private final SaveNamespaceContext context;
     private final SaverContext saverContext;
-
     private long currentOffset = FSImageUtil.MAGIC_HEADER.length;
     private MD5Hash savedDigest;
 
@@ -324,7 +337,6 @@ public final class FSImageFormatProtobuf {
     private OutputStream sectionOutputStream;
     private CompressionCodec codec;
     private OutputStream underlyingOutputStream;
-    public static final int CHECK_CANCEL_INTERVAL = 4096;
 
     Saver(SaveNamespaceContext context) {
       this.context = context;
@@ -400,6 +412,7 @@ public final class FSImageFormatProtobuf {
 
       snapshotSaver.serializeSnapshotSection(sectionOutputStream);
       snapshotSaver.serializeSnapshotDiffSection(sectionOutputStream);
+      snapshotSaver.serializeINodeReferenceSection(sectionOutputStream);
     }
 
     private void saveInternal(FileOutputStream fout,
@@ -535,6 +548,7 @@ public final class FSImageFormatProtobuf {
     NS_INFO("NS_INFO"),
     STRING_TABLE("STRING_TABLE"),
     INODE("INODE"),
+    INODE_REFRENCE("INODE_REFRENCE"),
     SNAPSHOT("SNAPSHOT"),
     INODE_DIR("INODE_DIR"),
     FILES_UNDERCONSTRUCTION("FILES_UNDERCONSTRUCTION"),
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java
index b64a3db9325..660b0dc274b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java
@@ -18,12 +18,10 @@
 package org.apache.hadoop.hdfs.server.namenode.snapshot;
 
 import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Loader.loadINodeDirectory;
-import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Loader.loadINodeReference;
 import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Loader.loadPermission;
 import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Loader.updateBlocksMap;
 import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Saver.buildINodeDirectory;
 import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Saver.buildINodeFile;
-import static org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode.Saver.buildINodeReference;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -40,8 +38,10 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.fs.permission.PermissionStatus;
 import org.apache.hadoop.hdfs.server.namenode.FSDirectory;
 import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeReferenceSection;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection.CreatedListEntry;
@@ -54,6 +54,9 @@ import org.apache.hadoop.hdfs.server.namenode.INodeFile;
 import org.apache.hadoop.hdfs.server.namenode.INodeFileAttributes;
 import org.apache.hadoop.hdfs.server.namenode.INodeMap;
 import org.apache.hadoop.hdfs.server.namenode.INodeReference;
+import org.apache.hadoop.hdfs.server.namenode.INodeReference.DstReference;
+import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithCount;
+import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithName;
 import org.apache.hadoop.hdfs.server.namenode.INodeWithAdditionalFields;
 import org.apache.hadoop.hdfs.server.namenode.SaveNamespaceContext;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiff;
@@ -75,7 +78,6 @@ public class FSImageFormatPBSnapshot {
     private final FSImageFormatProtobuf.Loader parent;
     private final Map<Integer, Snapshot> snapshotMap;
 
-
     public Loader(FSNamesystem fsn, FSImageFormatProtobuf.Loader parent) {
       this.fsn = fsn;
       this.fsDir = fsn.getFSDirectory();
@@ -83,6 +85,43 @@ public class FSImageFormatPBSnapshot {
       this.parent = parent;
     }
 
+    /**
+     * The sequence of the ref node in refList must be strictly the same with
+     * the sequence in fsimage
+     */
+    public void loadINodeReferenceSection(InputStream in) throws IOException {
+      final List<INodeReference> refList = parent.getLoaderContext()
+          .getRefList();
+      while (true) {
+        INodeReferenceSection.INodeReference e = INodeReferenceSection
+            .INodeReference.parseDelimitedFrom(in);
+        if (e == null) {
+          break;
+        }
+        INodeReference ref = loadINodeReference(e);
+        refList.add(ref);
+      }
+    }
+
+    private INodeReference loadINodeReference(
+        INodeReferenceSection.INodeReference r) throws IOException {
+      long referredId = r.getReferredId();
+      INode referred = fsDir.getInode(referredId);
+      WithCount withCount = (WithCount) referred.getParentReference();
+      if (withCount == null) {
+        withCount = new INodeReference.WithCount(null, referred);
+      }
+      final INodeReference ref;
+      if (r.hasDstSnapshotId()) { // DstReference
+        ref = new INodeReference.DstReference(null, withCount,
+            r.getDstSnapshotId());
+      } else {
+        ref = new INodeReference.WithName(null, withCount, r.getName()
+            .toByteArray(), r.getLastSnapshotId());
+      }
+      return ref;
+    }
+
     /**
      * Load the snapshots section from fsimage. Also convert snapshottable
      * directories into {@link INodeDirectorySnapshottable}.
@@ -131,6 +170,8 @@ public class FSImageFormatPBSnapshot {
      * Load the snapshot diff section from fsimage.
      */
     public void loadSnapshotDiffSection(InputStream in) throws IOException {
+      final List<INodeReference> refList = parent.getLoaderContext()
+          .getRefList();
       while (true) {
         SnapshotDiffSection.DiffEntry entry = SnapshotDiffSection.DiffEntry
             .parseDelimitedFrom(in);
@@ -145,7 +186,8 @@ public class FSImageFormatPBSnapshot {
           loadFileDiffList(in, inode.asFile(), entry.getNumOfDiff());
           break;
         case DIRECTORYDIFF:
-          loadDirectoryDiffList(in, inode.asDirectory(), entry.getNumOfDiff());
+          loadDirectoryDiffList(in, inode.asDirectory(), entry.getNumOfDiff(),
+              refList);
           break;
         }
       }
@@ -199,13 +241,13 @@ public class FSImageFormatPBSnapshot {
 
     /**
      * Load the deleted list in a DirectoryDiff
-     * @param totalSize the total size of the deleted list
-     * @param deletedNodes non-reference inodes in the deleted list. These
-     *        inodes' ids are directly recorded in protobuf
      */
-    private List<INode> loadDeletedList(InputStream in, INodeDirectory dir,
-        int refNum, List<Long> deletedNodes) throws IOException {
-      List<INode> dlist = new ArrayList<INode>(refNum + deletedNodes.size());
+    private List<INode> loadDeletedList(final List<INodeReference> refList,
+        InputStream in, INodeDirectory dir, List<Long> deletedNodes,
+        List<Integer> deletedRefNodes)
+        throws IOException {
+      List<INode> dlist = new ArrayList<INode>(deletedRefNodes.size()
+          + deletedNodes.size());
       // load non-reference inodes
       for (long deletedId : deletedNodes) {
         INode deleted = fsDir.getInode(deletedId);
@@ -213,13 +255,12 @@ public class FSImageFormatPBSnapshot {
         addToDeletedList(deleted, dir);
       }
       // load reference nodes in the deleted list
-      for (int r = 0; r < refNum; r++) {
-        INodeSection.INodeReference ref = INodeSection.INodeReference
-            .parseDelimitedFrom(in);
-        INodeReference refNode = loadINodeReference(ref, fsDir);
-        dlist.add(refNode);
-        addToDeletedList(refNode, dir);
+      for (int refId : deletedRefNodes) {
+        INodeReference deletedRef = refList.get(refId);
+        dlist.add(deletedRef);
+        addToDeletedList(deletedRef, dir);
       }
+
       Collections.sort(dlist, new Comparator<INode>() {
         @Override
         public int compare(INode n1, INode n2) {
@@ -231,7 +272,7 @@ public class FSImageFormatPBSnapshot {
 
     /** Load DirectoryDiff list for a directory with snapshot feature */
     private void loadDirectoryDiffList(InputStream in, INodeDirectory dir,
-        int size) throws IOException {
+        int size, final List<INodeReference> refList) throws IOException {
       if (!dir.isWithSnapshot()) {
         dir.addSnapshotFeature(null);
       }
@@ -247,7 +288,7 @@ public class FSImageFormatPBSnapshot {
         INodeDirectoryAttributes copy = null;
         if (useRoot) {
           copy = snapshot.getRoot();
-        }else if (diffInPb.hasSnapshotCopy()) {
+        } else if (diffInPb.hasSnapshotCopy()) {
           INodeSection.INodeDirectory dirCopyInPb = diffInPb.getSnapshotCopy();
           final byte[] name = diffInPb.getName().toByteArray();
           PermissionStatus permission = loadPermission(
@@ -265,8 +306,8 @@ public class FSImageFormatPBSnapshot {
         List<INode> clist = loadCreatedList(in, dir,
             diffInPb.getCreatedListSize());
         // load deleted list
-        List<INode> dlist = loadDeletedList(in, dir,
-            diffInPb.getNumOfDeletedRef(), diffInPb.getDeletedINodeList());
+        List<INode> dlist = loadDeletedList(refList, in, dir,
+            diffInPb.getDeletedINodeList(), diffInPb.getDeletedINodeRefList());
         // create the directory diff
         DirectoryDiff diff = new DirectoryDiff(snapshotId, copy, null,
             childrenSize, clist, dlist, useRoot);
@@ -285,7 +326,8 @@ public class FSImageFormatPBSnapshot {
     private final SaveNamespaceContext context;
 
     public Saver(FSImageFormatProtobuf.Saver parent,
-        FileSummary.Builder headers, SaveNamespaceContext context, FSNamesystem fsn) {
+        FileSummary.Builder headers, SaveNamespaceContext context,
+        FSNamesystem fsn) {
       this.parent = parent;
       this.headers = headers;
       this.context = context;
@@ -330,12 +372,42 @@ public class FSImageFormatPBSnapshot {
       parent.commitSection(headers, FSImageFormatProtobuf.SectionName.SNAPSHOT);
     }
 
+    /**
+     * This can only be called after serializing both INode_Dir and SnapshotDiff
+     */
+    public void serializeINodeReferenceSection(OutputStream out)
+        throws IOException {
+      final List<INodeReference> refList = parent.getSaverContext()
+          .getRefList();
+      for (INodeReference ref : refList) {
+        INodeReferenceSection.INodeReference.Builder rb = buildINodeReference(ref);
+        rb.build().writeDelimitedTo(out);
+      }
+      parent.commitSection(headers, SectionName.INODE_REFRENCE);
+    }
+
+    private INodeReferenceSection.INodeReference.Builder buildINodeReference(
+        INodeReference ref) throws IOException {
+      INodeReferenceSection.INodeReference.Builder rb =
+          INodeReferenceSection.INodeReference.newBuilder().
+            setReferredId(ref.getId());
+      if (ref instanceof WithName) {
+        rb.setLastSnapshotId(((WithName) ref).getLastSnapshotId()).setName(
+            ByteString.copyFrom(ref.getLocalNameBytes()));
+      } else if (ref instanceof DstReference) {
+        rb.setDstSnapshotId(((DstReference) ref).getDstSnapshotId());
+      }
+      return rb;
+    }
+
     /**
      * save all the snapshot diff to fsimage
      */
     public void serializeSnapshotDiffSection(OutputStream out)
         throws IOException {
       INodeMap inodesMap = fsn.getFSDirectory().getINodeMap();
+      final List<INodeReference> refList = parent.getSaverContext()
+          .getRefList();
       int i = 0;
       Iterator<INodeWithAdditionalFields> iter = inodesMap.getMapIterator();
       while (iter.hasNext()) {
@@ -343,7 +415,7 @@ public class FSImageFormatPBSnapshot {
         if (inode.isFile()) {
           serializeFileDiffList(inode.asFile(), out);
         } else if (inode.isDirectory()) {
-          serializeDirDiffList(inode.asDirectory(), out);
+          serializeDirDiffList(inode.asDirectory(), refList, out);
         }
         ++i;
         if (i % FSImageFormatProtobuf.Saver.CHECK_CANCEL_INTERVAL == 0) {
@@ -378,22 +450,18 @@ public class FSImageFormatPBSnapshot {
       }
     }
 
-    private void saveCreatedDeletedList(List<INode> created,
-        List<INodeReference> deletedRefs, OutputStream out) throws IOException {
+    private void saveCreatedList(List<INode> created, OutputStream out)
+        throws IOException {
       // local names of the created list member
       for (INode c : created) {
         SnapshotDiffSection.CreatedListEntry.newBuilder()
             .setName(ByteString.copyFrom(c.getLocalNameBytes())).build()
             .writeDelimitedTo(out);
       }
-      // reference nodes in deleted list
-      for (INodeReference ref : deletedRefs) {
-        INodeSection.INodeReference.Builder rb = buildINodeReference(ref);
-        rb.build().writeDelimitedTo(out);
-      }
     }
 
-    private void serializeDirDiffList(INodeDirectory dir, OutputStream out)
+    private void serializeDirDiffList(INodeDirectory dir,
+        final List<INodeReference> refList, OutputStream out)
         throws IOException {
       DirectoryWithSnapshotFeature sf = dir.getDirectoryWithSnapshotFeature();
       if (sf != null) {
@@ -419,17 +487,16 @@ public class FSImageFormatPBSnapshot {
               .getList(ListType.CREATED);
           db.setCreatedListSize(created.size());
           List<INode> deleted = diff.getChildrenDiff().getList(ListType.DELETED);
-          List<INodeReference> refs = new ArrayList<INodeReference>();
           for (INode d : deleted) {
             if (d.isReference()) {
-              refs.add(d.asReference());
+              refList.add(d.asReference());
+              db.addDeletedINodeRef(refList.size() - 1);
             } else {
               db.addDeletedINode(d.getId());
             }
           }
-          db.setNumOfDeletedRef(refs.size());
           db.build().writeDelimitedTo(out);
-          saveCreatedDeletedList(created, refs, out);
+          saveCreatedList(created, out);
         }
       }
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsrPBImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsrPBImage.java
index e467725646e..61c4d5e22ca 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsrPBImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/LsrPBImage.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
 import org.apache.hadoop.hdfs.server.namenode.FSImageUtil;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeReferenceSection;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeDirectory;
@@ -79,6 +80,7 @@ final class LsrPBImage {
   private String[] stringTable;
   private HashMap<Long, INodeSection.INode> inodes = Maps.newHashMap();
   private HashMap<Long, long[]> dirmap = Maps.newHashMap();
+  private ArrayList<INodeReferenceSection.INodeReference> refList = Lists.newArrayList();
 
   public LsrPBImage(Configuration conf, PrintWriter out) {
     this.conf = conf;
@@ -125,6 +127,9 @@ final class LsrPBImage {
         case INODE:
           loadINodeSection(is);
           break;
+        case INODE_REFRENCE:
+          loadINodeReferenceSection(is);
+          break;
         case INODE_DIR:
           loadINodeDirectorySection(is);
           break;
@@ -202,14 +207,26 @@ final class LsrPBImage {
       if (e == null) {
         break;
       }
-      long[] l = new long[e.getChildrenCount()];
-      for (int i = 0; i < l.length; ++i) {
+      long[] l = new long[e.getChildrenCount() + e.getRefChildrenCount()];
+      for (int i = 0; i < e.getChildrenCount(); ++i) {
         l[i] = e.getChildren(i);
       }
-      dirmap.put(e.getParent(), l);
-      for (int i = 0; i < e.getNumOfRef(); i++) {
-        INodeSection.INodeReference.parseDelimitedFrom(in);
+      for (int i = e.getChildrenCount(); i < l.length; i++) {
+        int refId = e.getRefChildren(i - e.getChildrenCount());
+        l[i] = refList.get(refId).getReferredId();
       }
+      dirmap.put(e.getParent(), l);
+    }
+  }
+
+  private void loadINodeReferenceSection(InputStream in) throws IOException {
+    while (true) {
+      INodeReferenceSection.INodeReference e = INodeReferenceSection
+          .INodeReference.parseDelimitedFrom(in);
+      if (e == null) {
+        break;
+      }
+      refList.add(e);
     }
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java
index 7ebf1196c4b..d70f63710b1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java
@@ -43,6 +43,7 @@ import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeDirectory;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeSymlink;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeReferenceSection;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
 import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SnapshotDiffSection;
@@ -132,6 +133,9 @@ public final class PBImageXmlWriter {
         case INODE:
           dumpINodeSection(is);
           break;
+        case INODE_REFRENCE:
+          dumpINodeReferenceSection(is);
+          break;
         case INODE_DIR:
           dumpINodeDirectorySection(is);
           break;
@@ -227,18 +231,27 @@ public final class PBImageXmlWriter {
       for (long id : e.getChildrenList()) {
         o("inode", id);
       }
-      for (int i = 0; i < e.getNumOfRef(); i++) {
-        INodeSection.INodeReference r = INodeSection.INodeReference
-            .parseDelimitedFrom(in);
-        dumpINodeReference(r);
-
+      for (int refId : e.getRefChildrenList()) {
+        o("inodereference-index", refId);
       }
       out.print("</directory>\n");
     }
     out.print("</INodeDirectorySection>\n");
   }
 
-  private void dumpINodeReference(INodeSection.INodeReference r) {
+  private void dumpINodeReferenceSection(InputStream in) throws IOException {
+    out.print("<INodeReferenceSection>");
+    while (true) {
+      INodeReferenceSection.INodeReference e = INodeReferenceSection
+          .INodeReference.parseDelimitedFrom(in);
+      if (e == null) {
+        break;
+      }
+      dumpINodeReference(e);
+    }
+  }
+
+  private void dumpINodeReference(INodeReferenceSection.INodeReference r) {
     out.print("<ref>");
     o("referredId", r.getReferredId()).o("name", r.getName().toStringUtf8())
         .o("dstSnapshotId", r.getDstSnapshotId())
@@ -362,10 +375,15 @@ public final class PBImageXmlWriter {
             o("name", ce.getName().toStringUtf8());
             out.print("</created>\n");
           }
-          for (int j = 0; j < d.getNumOfDeletedRef(); ++j) {
-            INodeSection.INodeReference r = INodeSection.INodeReference
-                .parseDelimitedFrom(in);
-            dumpINodeReference(r);
+          for (long did : d.getDeletedINodeList()) {
+            out.print("<deleted>");
+            o("inode", did);
+            out.print("</deleted>\n");
+          }
+          for (int dRefid : d.getDeletedINodeRefList()) {
+            out.print("<deleted>");
+            o("inodereference-index", dRefid);
+            out.print("</deleted>\n");
           }
           out.print("</dirdiff>\n");
         }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto
index af7ba874d29..79d6fd0dbd4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/fsimage.proto
@@ -112,17 +112,6 @@ message INodeSection {
     optional bytes target = 2;
   }
 
-  message INodeReference {
-    // id of the referred inode
-    optional uint64 referredId = 1;
-    // local name recorded in WithName
-    optional bytes name = 2;
-    // recorded in DstReference
-    optional uint32 dstSnapshotId = 3;
-    // recorded in WithName
-    optional uint32 lastSnapshotId = 4;
-  }
-
   message INode {
     enum Type {
       FILE = 1;
@@ -163,13 +152,28 @@ message FilesUnderConstructionSection {
 message INodeDirectorySection {
   message DirEntry {
     optional uint64 parent = 1;
+    // children that are not reference nodes
     repeated uint64 children = 2 [packed = true];
-    optional uint64 numOfRef = 3;
-    // repeated INodeReference...
+    // children that are reference nodes, each element is a reference node id
+    repeated uint32 refChildren = 3 [packed = true];
   }
   // repeated DirEntry, ended at the boundary of the section.
 }
 
+message INodeReferenceSection {
+  message INodeReference {
+    // id of the referred inode
+    optional uint64 referredId = 1;
+    // local name recorded in WithName
+    optional bytes name = 2;
+    // recorded in DstReference
+    optional uint32 dstSnapshotId = 3;
+    // recorded in WithName
+    optional uint32 lastSnapshotId = 4;
+  }
+  // repeated INodeReference...
+}
+
 /**
  * This section records the information about snapshot
  * NAME: SNAPSHOT
@@ -204,10 +208,10 @@ message SnapshotDiffSection {
     optional bytes name = 4;
     optional INodeSection.INodeDirectory snapshotCopy = 5;
     optional uint32 createdListSize = 6;
-    optional uint32 numOfDeletedRef = 7; // number of reference nodes in deleted list
-    repeated uint64 deletedINode = 8 [packed = true]; // id of deleted inode
+    repeated uint64 deletedINode = 7 [packed = true]; // id of deleted inodes
+    // id of reference nodes in the deleted list
+    repeated uint32 deletedINodeRef = 8 [packed = true];
     // repeated CreatedListEntry (size is specified by createdListSize)
-    // repeated INodeReference (reference inodes in deleted list)
   }
 
   message FileDiff {

From 0185a5784712d9b6e23d9d8c7624cd4e4886cab8 Mon Sep 17 00:00:00 2001
From: Zhijie Shen <zjshen@apache.org>
Date: Thu, 13 Feb 2014 00:08:47 +0000
Subject: [PATCH 33/47] YARN-1578. Fixed reading incomplete application attempt
 and container data in FileSystemApplicationHistoryStore. Contributed by
 Shinichi Yamashita.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567816 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt               |   3 +
 .../FileSystemApplicationHistoryStore.java    | 132 +++++-------------
 ...TestFileSystemApplicationHistoryStore.java |  51 ++++++-
 3 files changed, 85 insertions(+), 101 deletions(-)

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 4185b7be47f..d653144868f 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -259,6 +259,9 @@ Release 2.4.0 - UNRELEASED
     YARN-1692. ConcurrentModificationException in fair scheduler AppSchedulable
     (Sangjin Lee via Sandy Ryza)
 
+    YARN-1578. Fixed reading incomplete application attempt and container data
+    in FileSystemApplicationHistoryStore. (Shinichi Yamashita via zjshen)
+
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/FileSystemApplicationHistoryStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/FileSystemApplicationHistoryStore.java
index 9109dfccb15..a321976c5b1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/FileSystemApplicationHistoryStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/FileSystemApplicationHistoryStore.java
@@ -215,17 +215,30 @@ public class FileSystemApplicationHistoryStore extends AbstractService
       getApplicationAttempts(ApplicationId appId) throws IOException {
     Map<ApplicationAttemptId, ApplicationAttemptHistoryData> historyDataMap =
         new HashMap<ApplicationAttemptId, ApplicationAttemptHistoryData>();
-    Map<ApplicationAttemptId, StartFinishDataPair<ApplicationAttemptStartData, ApplicationAttemptFinishData>> startFinshDataMap =
-        new HashMap<ApplicationAttemptId, StartFinishDataPair<ApplicationAttemptStartData, ApplicationAttemptFinishData>>();
     HistoryFileReader hfReader = getHistoryFileReader(appId);
     try {
       while (hfReader.hasNext()) {
         HistoryFileReader.Entry entry = hfReader.next();
-        if (entry.key.id.startsWith(ConverterUtils.APPLICATION_ATTEMPT_PREFIX)) {
-          if (entry.key.suffix.equals(START_DATA_SUFFIX)) {
-            retrieveStartFinishData(appId, entry, startFinshDataMap, true);
-          } else if (entry.key.suffix.equals(FINISH_DATA_SUFFIX)) {
-            retrieveStartFinishData(appId, entry, startFinshDataMap, false);
+        if (entry.key.id.startsWith(
+            ConverterUtils.APPLICATION_ATTEMPT_PREFIX)) {
+          ApplicationAttemptId appAttemptId = 
+              ConverterUtils.toApplicationAttemptId(entry.key.id);
+          if (appAttemptId.getApplicationId().equals(appId)) {
+            ApplicationAttemptHistoryData historyData = 
+                historyDataMap.get(appAttemptId);
+            if (historyData == null) {
+              historyData = ApplicationAttemptHistoryData.newInstance(
+                  appAttemptId, null, -1, null, null, null,
+                  FinalApplicationStatus.UNDEFINED, null);
+              historyDataMap.put(appAttemptId, historyData);
+            }
+            if (entry.key.suffix.equals(START_DATA_SUFFIX)) {
+              mergeApplicationAttemptHistoryData(historyData,
+                  parseApplicationAttemptStartData(entry.value));
+            } else if (entry.key.suffix.equals(FINISH_DATA_SUFFIX)) {
+              mergeApplicationAttemptHistoryData(historyData,
+                  parseApplicationAttemptFinishData(entry.value));
+            }
           }
         }
       }
@@ -237,45 +250,9 @@ public class FileSystemApplicationHistoryStore extends AbstractService
     } finally {
       hfReader.close();
     }
-    for (Map.Entry<ApplicationAttemptId, StartFinishDataPair<ApplicationAttemptStartData, ApplicationAttemptFinishData>> entry : startFinshDataMap
-      .entrySet()) {
-      ApplicationAttemptHistoryData historyData =
-          ApplicationAttemptHistoryData.newInstance(entry.getKey(), null, -1,
-            null, null, null, FinalApplicationStatus.UNDEFINED, null);
-      mergeApplicationAttemptHistoryData(historyData,
-        entry.getValue().startData);
-      mergeApplicationAttemptHistoryData(historyData,
-        entry.getValue().finishData);
-      historyDataMap.put(entry.getKey(), historyData);
-    }
     return historyDataMap;
   }
 
-  private
-      void
-      retrieveStartFinishData(
-          ApplicationId appId,
-          HistoryFileReader.Entry entry,
-          Map<ApplicationAttemptId, StartFinishDataPair<ApplicationAttemptStartData, ApplicationAttemptFinishData>> startFinshDataMap,
-          boolean start) throws IOException {
-    ApplicationAttemptId appAttemptId =
-        ConverterUtils.toApplicationAttemptId(entry.key.id);
-    if (appAttemptId.getApplicationId().equals(appId)) {
-      StartFinishDataPair<ApplicationAttemptStartData, ApplicationAttemptFinishData> pair =
-          startFinshDataMap.get(appAttemptId);
-      if (pair == null) {
-        pair =
-            new StartFinishDataPair<ApplicationAttemptStartData, ApplicationAttemptFinishData>();
-        startFinshDataMap.put(appAttemptId, pair);
-      }
-      if (start) {
-        pair.startData = parseApplicationAttemptStartData(entry.value);
-      } else {
-        pair.finishData = parseApplicationAttemptFinishData(entry.value);
-      }
-    }
-  }
-
   @Override
   public ApplicationAttemptHistoryData getApplicationAttempt(
       ApplicationAttemptId appAttemptId) throws IOException {
@@ -391,20 +368,30 @@ public class FileSystemApplicationHistoryStore extends AbstractService
       ApplicationAttemptId appAttemptId) throws IOException {
     Map<ContainerId, ContainerHistoryData> historyDataMap =
         new HashMap<ContainerId, ContainerHistoryData>();
-    Map<ContainerId, StartFinishDataPair<ContainerStartData, ContainerFinishData>> startFinshDataMap =
-        new HashMap<ContainerId, StartFinishDataPair<ContainerStartData, ContainerFinishData>>();
     HistoryFileReader hfReader =
         getHistoryFileReader(appAttemptId.getApplicationId());
     try {
       while (hfReader.hasNext()) {
         HistoryFileReader.Entry entry = hfReader.next();
         if (entry.key.id.startsWith(ConverterUtils.CONTAINER_PREFIX)) {
-          if (entry.key.suffix.equals(START_DATA_SUFFIX)) {
-            retrieveStartFinishData(appAttemptId, entry, startFinshDataMap,
-              true);
-          } else if (entry.key.suffix.equals(FINISH_DATA_SUFFIX)) {
-            retrieveStartFinishData(appAttemptId, entry, startFinshDataMap,
-              false);
+          ContainerId containerId =
+              ConverterUtils.toContainerId(entry.key.id);
+          if (containerId.getApplicationAttemptId().equals(appAttemptId)) {
+            ContainerHistoryData historyData =
+                historyDataMap.get(containerId);
+            if (historyData == null) {
+              historyData = ContainerHistoryData.newInstance(
+                  containerId, null, null, null, Long.MIN_VALUE,
+                  Long.MAX_VALUE, null, null, Integer.MAX_VALUE, null);
+              historyDataMap.put(containerId, historyData);
+            }
+            if (entry.key.suffix.equals(START_DATA_SUFFIX)) {
+              mergeContainerHistoryData(historyData,
+                  parseContainerStartData(entry.value));
+            } else if (entry.key.suffix.equals(FINISH_DATA_SUFFIX)) {
+              mergeContainerHistoryData(historyData,
+                  parseContainerFinishData(entry.value));
+            }
           }
         }
       }
@@ -416,43 +403,9 @@ public class FileSystemApplicationHistoryStore extends AbstractService
     } finally {
       hfReader.close();
     }
-    for (Map.Entry<ContainerId, StartFinishDataPair<ContainerStartData, ContainerFinishData>> entry : startFinshDataMap
-      .entrySet()) {
-      ContainerHistoryData historyData =
-          ContainerHistoryData
-            .newInstance(entry.getKey(), null, null, null, Long.MIN_VALUE,
-              Long.MAX_VALUE, null, null, Integer.MAX_VALUE, null);
-      mergeContainerHistoryData(historyData, entry.getValue().startData);
-      mergeContainerHistoryData(historyData, entry.getValue().finishData);
-      historyDataMap.put(entry.getKey(), historyData);
-    }
     return historyDataMap;
   }
 
-  private
-      void
-      retrieveStartFinishData(
-          ApplicationAttemptId appAttemptId,
-          HistoryFileReader.Entry entry,
-          Map<ContainerId, StartFinishDataPair<ContainerStartData, ContainerFinishData>> startFinshDataMap,
-          boolean start) throws IOException {
-    ContainerId containerId = ConverterUtils.toContainerId(entry.key.id);
-    if (containerId.getApplicationAttemptId().equals(appAttemptId)) {
-      StartFinishDataPair<ContainerStartData, ContainerFinishData> pair =
-          startFinshDataMap.get(containerId);
-      if (pair == null) {
-        pair =
-            new StartFinishDataPair<ContainerStartData, ContainerFinishData>();
-        startFinshDataMap.put(containerId, pair);
-      }
-      if (start) {
-        pair.startData = parseContainerStartData(entry.value);
-      } else {
-        pair.finishData = parseContainerFinishData(entry.value);
-      }
-    }
-  }
-
   @Override
   public void applicationStarted(ApplicationStartData appStart)
       throws IOException {
@@ -828,14 +781,5 @@ public class FileSystemApplicationHistoryStore extends AbstractService
       id = in.readUTF();
       suffix = in.readUTF();
     }
-
   }
-
-  private static class StartFinishDataPair<S, F> {
-
-    private S startData;
-    private F finishData;
-
-  }
-
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestFileSystemApplicationHistoryStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestFileSystemApplicationHistoryStore.java
index c31efab1bb6..960b68f3d43 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestFileSystemApplicationHistoryStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestFileSystemApplicationHistoryStore.java
@@ -72,6 +72,12 @@ public class TestFileSystemApplicationHistoryStore extends
   }
 
   private void testWriteHistoryData(int num) throws IOException {
+    testWriteHistoryData(num, false, false);
+  }
+  
+  private void testWriteHistoryData(
+      int num, boolean missingContainer, boolean missingApplicationAttempt)
+          throws IOException {
     // write application history data
     for (int i = 1; i <= num; ++i) {
       ApplicationId appId = ApplicationId.newInstance(0, i);
@@ -83,21 +89,31 @@ public class TestFileSystemApplicationHistoryStore extends
             ApplicationAttemptId.newInstance(appId, j);
         writeApplicationAttemptStartData(appAttemptId);
 
+        if (missingApplicationAttempt && j == num) {
+          continue;
+        }
         // write container history data
         for (int k = 1; k <= num; ++k) {
           ContainerId containerId = ContainerId.newInstance(appAttemptId, k);
           writeContainerStartData(containerId);
+          if (missingContainer && k == num) {
+            continue;
+          }
           writeContainerFinishData(containerId);
-
-          writeApplicationAttemptFinishData(appAttemptId);
         }
+        writeApplicationAttemptFinishData(appAttemptId);
       }
-
       writeApplicationFinishData(appId);
     }
   }
 
   private void testReadHistoryData(int num) throws IOException {
+    testReadHistoryData(num, false, false);
+  }
+  
+  private void testReadHistoryData(
+      int num, boolean missingContainer, boolean missingApplicationAttempt)
+          throws IOException {
     // read application history data
     Assert.assertEquals(num, store.getAllApplications().size());
     for (int i = 1; i <= num; ++i) {
@@ -116,8 +132,14 @@ public class TestFileSystemApplicationHistoryStore extends
             store.getApplicationAttempt(appAttemptId);
         Assert.assertNotNull(attemptData);
         Assert.assertEquals(appAttemptId.toString(), attemptData.getHost());
-        Assert.assertEquals(appAttemptId.toString(),
-          attemptData.getDiagnosticsInfo());
+        
+        if (missingApplicationAttempt && j == num) {
+          Assert.assertNull(attemptData.getDiagnosticsInfo());
+          continue;
+        } else {
+          Assert.assertEquals(appAttemptId.toString(),
+              attemptData.getDiagnosticsInfo());
+        }
 
         // read container history data
         Assert.assertEquals(num, store.getContainers(appAttemptId).size());
@@ -127,8 +149,12 @@ public class TestFileSystemApplicationHistoryStore extends
           Assert.assertNotNull(containerData);
           Assert.assertEquals(Priority.newInstance(containerId.getId()),
             containerData.getPriority());
-          Assert.assertEquals(containerId.toString(),
-            containerData.getDiagnosticsInfo());
+          if (missingContainer && k == num) {
+            Assert.assertNull(containerData.getDiagnosticsInfo());
+          } else {
+            Assert.assertEquals(containerId.toString(),
+                containerData.getDiagnosticsInfo());
+          }
         }
         ContainerHistoryData masterContainer =
             store.getAMContainer(appAttemptId);
@@ -193,4 +219,15 @@ public class TestFileSystemApplicationHistoryStore extends
     Assert.assertTrue((usedDiskAfter - usedDiskBefore) < 20);
   }
 
+  @Test
+  public void testMissingContainerHistoryData() throws IOException {
+    testWriteHistoryData(3, true, false);
+    testReadHistoryData(3, true, false);
+  }
+  
+  @Test
+  public void testMissingApplicationAttemptHistoryData() throws IOException {
+    testWriteHistoryData(3, false, true);
+    testReadHistoryData(3, false, true);
+  }
 }

From dab635980d089a0836c08a58d915dfa32ae3c7c0 Mon Sep 17 00:00:00 2001
From: Jian He <jianhe@apache.org>
Date: Thu, 13 Feb 2014 00:59:19 +0000
Subject: [PATCH 34/47] YARN-1345. Remove FINAL_SAVING state from
 YarnApplicationAttemptState. Contributed by Zhijie Shen

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567820 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt               |  3 ++
 .../records/YarnApplicationAttemptState.java  |  3 --
 .../src/main/proto/yarn_protos.proto          |  7 ++--
 .../server/resourcemanager/RMServerUtils.java |  2 -
 .../rmapp/attempt/RMAppAttemptImpl.java       |  6 +++
 .../attempt/TestRMAppAttemptTransitions.java  | 37 ++++++++++++++++++-
 6 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index d653144868f..a94e43c1e54 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -181,6 +181,9 @@ Release 2.4.0 - UNRELEASED
 
     YARN-1531. True up yarn command documentation (Akira Ajisaka via kasha)
 
+    YARN-1345. Remove FINAL_SAVING state from YarnApplicationAttemptState
+    (Zhijie Shen via jianhe)
+
   OPTIMIZATIONS
 
   BUG FIXES
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/YarnApplicationAttemptState.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/YarnApplicationAttemptState.java
index 8b180a1f8ea..1ae9f9e2726 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/YarnApplicationAttemptState.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/YarnApplicationAttemptState.java
@@ -51,9 +51,6 @@ public enum YarnApplicationAttemptState {
   /** AppAttempt is currently running. */
   RUNNING,
 
-  /** AppAttempt is waiting for state bing saved */
-  FINAL_SAVING,
-
   /** AppAttempt is finishing. */
   FINISHING,
 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
index 8f6cf4c783b..48aac9d08e5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
@@ -120,10 +120,9 @@ enum YarnApplicationAttemptStateProto {
   APP_ATTEMPT_LAUNCHED = 6;
   APP_ATTEMPT_FAILED = 7;
   APP_ATTEMPT_RUNNING = 8;
-  APP_ATTEMPT_FINAL_SAVING = 9;
-  APP_ATTEMPT_FINISHING = 10;
-  APP_ATTEMPT_FINISHED = 11;
-  APP_ATTEMPT_KILLED = 12;
+  APP_ATTEMPT_FINISHING = 9;
+  APP_ATTEMPT_FINISHED = 10;
+  APP_ATTEMPT_KILLED = 11;
 }
 
 enum FinalApplicationStatusProto {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java
index 5556255352c..e884d29e303 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java
@@ -214,8 +214,6 @@ public class RMServerUtils {
         return YarnApplicationAttemptState.RUNNING;
       case FINISHING:
         return YarnApplicationAttemptState.FINISHING;
-      case FINAL_SAVING:
-        return YarnApplicationAttemptState.FINAL_SAVING;
       case FINISHED:
         return YarnApplicationAttemptState.FINISHED;
       case KILLED:
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
index ce246db7ba3..4ca8c28243a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
@@ -157,6 +157,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
   private RMAppAttemptEvent eventCausingFinalSaving;
   private RMAppAttemptState targetedFinalState;
   private RMAppAttemptState recoveredFinalState;
+  private RMAppAttemptState stateBeforeFinalSaving;
   private Object transitionTodo;
 
   private static final StateMachineFactory<RMAppAttemptImpl,
@@ -885,6 +886,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
       RMAppAttemptState stateToBeStored) {
 
     rememberTargetTransitions(event, transitionToDo, targetFinalState);
+    stateBeforeFinalSaving = getState();
 
     // As of today, finalState, diagnostics, final-tracking-url and
     // finalAppStatus are the only things that we store into the StateStore
@@ -1536,6 +1538,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
   @Override
   public YarnApplicationAttemptState createApplicationAttemptState() {
     RMAppAttemptState state = getState();
+    // If AppAttempt is in FINAL_SAVING state, return its previous state.
+    if (state.equals(RMAppAttemptState.FINAL_SAVING)) {
+      state = stateBeforeFinalSaving;
+    }
     return RMServerUtils.createApplicationAttemptState(state);
   }
 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
index 954a4845c32..f9550f049ea 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
@@ -57,6 +57,7 @@ import org.apache.hadoop.yarn.api.records.ContainerState;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.YarnApplicationAttemptState;
 import org.apache.hadoop.yarn.event.AsyncDispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.event.InlineDispatcher;
@@ -695,6 +696,8 @@ public class TestRMAppAttemptTransitions {
     // launch AM and verify attempt failed
     applicationAttempt.handle(new RMAppAttemptRegistrationEvent(
         applicationAttempt.getAppAttemptId(), "host", 8042, "oldtrackingurl"));
+    assertEquals(YarnApplicationAttemptState.SUBMITTED,
+        applicationAttempt.createApplicationAttemptState());
     testAppAttemptSubmittedToFailedState(
         "Unmanaged AM must register after AM attempt reaches LAUNCHED state.");
   }
@@ -710,6 +713,8 @@ public class TestRMAppAttemptTransitions {
     // launch AM and verify attempt failed
     applicationAttempt.handle(new RMAppAttemptRegistrationEvent(
       applicationAttempt.getAppAttemptId(), "host", 8042, "oldtrackingurl"));
+    assertEquals(YarnApplicationAttemptState.SUBMITTED,
+        applicationAttempt.createApplicationAttemptState());
     sendAttemptUpdateSavedEvent(applicationAttempt);
     assertFalse(transferStateFromPreviousAttempt);
   }
@@ -720,6 +725,8 @@ public class TestRMAppAttemptTransitions {
         new RMAppAttemptEvent(
             applicationAttempt.getAppAttemptId(), 
             RMAppAttemptEventType.KILL));
+    assertEquals(YarnApplicationAttemptState.NEW,
+        applicationAttempt.createApplicationAttemptState());
     testAppAttemptKilledState(null, EMPTY_DIAGNOSTICS);
     verifyTokenCount(applicationAttempt.getAppAttemptId(), 1);
   } 
@@ -740,6 +747,8 @@ public class TestRMAppAttemptTransitions {
         new RMAppAttemptEvent(
             applicationAttempt.getAppAttemptId(), 
             RMAppAttemptEventType.KILL));
+    assertEquals(YarnApplicationAttemptState.SUBMITTED,
+        applicationAttempt.createApplicationAttemptState());
     testAppAttemptKilledState(null, EMPTY_DIAGNOSTICS);
   }
 
@@ -750,6 +759,8 @@ public class TestRMAppAttemptTransitions {
         new RMAppAttemptEvent(
             applicationAttempt.getAppAttemptId(), 
             RMAppAttemptEventType.KILL));
+    assertEquals(YarnApplicationAttemptState.SCHEDULED,
+        applicationAttempt.createApplicationAttemptState());
     testAppAttemptKilledState(null, EMPTY_DIAGNOSTICS);
   }
 
@@ -760,6 +771,8 @@ public class TestRMAppAttemptTransitions {
         new RMAppAttemptEvent(
             applicationAttempt.getAppAttemptId(), 
             RMAppAttemptEventType.KILL));
+    assertEquals(YarnApplicationAttemptState.ALLOCATED,
+        applicationAttempt.createApplicationAttemptState());
     testAppAttemptKilledState(amContainer, EMPTY_DIAGNOSTICS);
   }
 
@@ -771,6 +784,8 @@ public class TestRMAppAttemptTransitions {
         new RMAppAttemptLaunchFailedEvent(
             applicationAttempt.getAppAttemptId(), 
             diagnostics));
+    assertEquals(YarnApplicationAttemptState.ALLOCATED,
+        applicationAttempt.createApplicationAttemptState());
     testAppAttemptFailedState(amContainer, diagnostics);
   }
   
@@ -784,6 +799,8 @@ public class TestRMAppAttemptTransitions {
           ContainerState.COMPLETE, containerDiagMsg, exitCode);
     applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
       applicationAttempt.getAppAttemptId(), cs));
+    assertEquals(YarnApplicationAttemptState.ALLOCATED,
+        applicationAttempt.createApplicationAttemptState());
     sendAttemptUpdateSavedEvent(applicationAttempt);
     assertEquals(RMAppAttemptState.FAILED,
       applicationAttempt.getAppAttemptState());
@@ -815,7 +832,8 @@ public class TestRMAppAttemptTransitions {
       applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE));
     assertEquals(RMAppAttemptState.FINAL_SAVING,
       applicationAttempt.getAppAttemptState()); 
-
+    assertEquals(YarnApplicationAttemptState.RUNNING,
+        applicationAttempt.createApplicationAttemptState());
     sendAttemptUpdateSavedEvent(applicationAttempt);
     assertEquals(RMAppAttemptState.FAILED,
         applicationAttempt.getAppAttemptState());
@@ -850,7 +868,8 @@ public class TestRMAppAttemptTransitions {
       applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE));
     assertEquals(RMAppAttemptState.FINAL_SAVING,
       applicationAttempt.getAppAttemptState()); 
-
+    assertEquals(YarnApplicationAttemptState.RUNNING,
+        applicationAttempt.createApplicationAttemptState());
     sendAttemptUpdateSavedEvent(applicationAttempt);
     assertEquals(RMAppAttemptState.KILLED,
         applicationAttempt.getAppAttemptState());
@@ -871,6 +890,8 @@ public class TestRMAppAttemptTransitions {
     launchApplicationAttempt(amContainer);
     applicationAttempt.handle(new RMAppAttemptEvent(
         applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE));
+    assertEquals(YarnApplicationAttemptState.LAUNCHED,
+        applicationAttempt.createApplicationAttemptState());
     sendAttemptUpdateSavedEvent(applicationAttempt);
     assertEquals(RMAppAttemptState.FAILED,
         applicationAttempt.getAppAttemptState());
@@ -890,6 +911,8 @@ public class TestRMAppAttemptTransitions {
     runApplicationAttempt(amContainer, "host", 8042, "oldtrackingurl", false);
     applicationAttempt.handle(new RMAppAttemptEvent(
         applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE));
+    assertEquals(YarnApplicationAttemptState.RUNNING,
+        applicationAttempt.createApplicationAttemptState());
     sendAttemptUpdateSavedEvent(applicationAttempt);
     assertEquals(RMAppAttemptState.FAILED,
         applicationAttempt.getAppAttemptState());
@@ -1055,6 +1078,8 @@ public class TestRMAppAttemptTransitions {
       diagnostics));
     assertEquals(RMAppAttemptState.FINAL_SAVING,
       applicationAttempt.getAppAttemptState());
+    assertEquals(YarnApplicationAttemptState.RUNNING,
+        applicationAttempt.createApplicationAttemptState());
     // Container_finished event comes before Attempt_Saved event.
     applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
       applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus(
@@ -1083,6 +1108,8 @@ public class TestRMAppAttemptTransitions {
       diagnostics));
     assertEquals(RMAppAttemptState.FINAL_SAVING,
       applicationAttempt.getAppAttemptState());
+    assertEquals(YarnApplicationAttemptState.RUNNING,
+        applicationAttempt.createApplicationAttemptState());
     // Expire event comes before Attempt_saved event.
     applicationAttempt.handle(new RMAppAttemptEvent(applicationAttempt
       .getAppAttemptId(), RMAppAttemptEventType.EXPIRE));
@@ -1118,6 +1145,8 @@ public class TestRMAppAttemptTransitions {
 
     applicationAttempt.handle(new RMAppAttemptEvent(applicationAttempt
       .getAppAttemptId(), RMAppAttemptEventType.KILL));
+    assertEquals(YarnApplicationAttemptState.LAUNCHED,
+        applicationAttempt.createApplicationAttemptState());
     sendAttemptUpdateSavedEvent(applicationAttempt);
     // after attempt is killed, can not get Client Token
     token = applicationAttempt.createClientToken(null);
@@ -1140,6 +1169,8 @@ public class TestRMAppAttemptTransitions {
     ApplicationAttemptId appAttemptId = applicationAttempt.getAppAttemptId();
     applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
       appAttemptId, cs1));
+    assertEquals(YarnApplicationAttemptState.RUNNING,
+        applicationAttempt.createApplicationAttemptState());
     sendAttemptUpdateSavedEvent(applicationAttempt);
     assertEquals(RMAppAttemptState.FAILED,
       applicationAttempt.getAppAttemptState());
@@ -1178,6 +1209,8 @@ public class TestRMAppAttemptTransitions {
     ApplicationAttemptId appAttemptId = applicationAttempt.getAppAttemptId();
     applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
       appAttemptId, cs1));
+    assertEquals(YarnApplicationAttemptState.RUNNING,
+        applicationAttempt.createApplicationAttemptState());
     sendAttemptUpdateSavedEvent(applicationAttempt);
     assertEquals(RMAppAttemptState.FAILED,
       applicationAttempt.getAppAttemptState());

From f0d64a078da7e932b9509734f75170e3e525e68c Mon Sep 17 00:00:00 2001
From: Colin McCabe <cmccabe@apache.org>
Date: Thu, 13 Feb 2014 03:10:48 +0000
Subject: [PATCH 35/47] HDFS-5940.  Minor cleanups to ShortCircuitReplica,
 FsDatasetCache, and DomainSocketWatcher (cmccabe)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567835 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop/net/unix/DomainSocketWatcher.java  |  11 +-
 .../net/unix/TestDomainSocketWatcher.java     |   7 +-
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |   3 +
 .../hadoop/hdfs/BlockReaderFactory.java       |   5 +-
 .../apache/hadoop/hdfs/ExtendedBlockId.java   |  75 +++++++++++++
 .../hadoop/hdfs/client/ShortCircuitCache.java |  31 +++---
 .../hdfs/client/ShortCircuitReplica.java      |  64 +----------
 .../fsdataset/impl/FsDatasetCache.java        | 104 ++++++------------
 .../fs/TestEnhancedByteBufferAccess.java      |  17 ++-
 .../hadoop/hdfs/TestBlockReaderLocal.java     |   4 +-
 .../hadoop/hdfs/TestShortCircuitCache.java    |  29 ++---
 11 files changed, 164 insertions(+), 186 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/ExtendedBlockId.java

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocketWatcher.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocketWatcher.java
index 673129dd988..797f7f23467 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocketWatcher.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocketWatcher.java
@@ -37,6 +37,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.util.NativeCodeLoader;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.util.concurrent.Uninterruptibles;
 
@@ -48,7 +49,7 @@ import com.google.common.util.concurrent.Uninterruptibles;
  * See {@link DomainSocket} for more information about UNIX domain sockets.
  */
 @InterfaceAudience.LimitedPrivate("HDFS")
-public final class DomainSocketWatcher extends Thread implements Closeable {
+public final class DomainSocketWatcher implements Closeable {
   static {
     if (SystemUtils.IS_OS_WINDOWS) {
       loadingFailureReason = "UNIX Domain sockets are not available on Windows.";
@@ -281,7 +282,7 @@ public final class DomainSocketWatcher extends Thread implements Closeable {
         try {
           processedCond.await();
         } catch (InterruptedException e) {
-          this.interrupt();
+          Thread.currentThread().interrupt();
         }
         if (!toAdd.contains(entry)) {
           break;
@@ -308,7 +309,7 @@ public final class DomainSocketWatcher extends Thread implements Closeable {
         try {
           processedCond.await();
         } catch (InterruptedException e) {
-          this.interrupt();
+          Thread.currentThread().interrupt();
         }
         if (!toRemove.containsKey(sock.fd)) {
           break;
@@ -381,7 +382,8 @@ public final class DomainSocketWatcher extends Thread implements Closeable {
     }
   }
 
-  private final Thread watcherThread = new Thread(new Runnable() {
+  @VisibleForTesting
+  final Thread watcherThread = new Thread(new Runnable() {
     @Override
     public void run() {
       LOG.info(this + ": starting with interruptCheckPeriodMs = " +
@@ -443,6 +445,7 @@ public final class DomainSocketWatcher extends Thread implements Closeable {
       } catch (IOException e) {
         LOG.error(toString() + " terminating on IOException", e);
       } finally {
+        kick(); // allow the handler for notificationSockets[0] to read a byte
         for (Entry entry : entries.values()) {
           sendCallback("close", entries, fdSet, entry.getDomainSocket().fd);
         }
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocketWatcher.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocketWatcher.java
index b9d76cb603d..7c5b42de46a 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocketWatcher.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocketWatcher.java
@@ -73,9 +73,10 @@ public class TestDomainSocketWatcher {
    */
   @Test(timeout=60000)
   public void testInterruption() throws Exception {
-    DomainSocketWatcher watcher = new DomainSocketWatcher(10);
-    watcher.interrupt();
-    Uninterruptibles.joinUninterruptibly(watcher);
+    final DomainSocketWatcher watcher = new DomainSocketWatcher(10);
+    watcher.watcherThread.interrupt();
+    Uninterruptibles.joinUninterruptibly(watcher.watcherThread);
+    watcher.close();
   }
   
   @Test(timeout=300000)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index e9f915e04c2..15026dd4ad9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -373,6 +373,9 @@ Release 2.4.0 - UNRELEASED
     HDFS-5810. Unify mmap cache and short-circuit file descriptor cache
     (cmccabe)
 
+    HDFS-5940. Minor cleanups to ShortCircuitReplica, FsDatasetCache, and
+    DomainSocketWatcher (cmccabe)
+
   OPTIMIZATIONS
 
     HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
index f7eb3c75051..09462ef145d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
@@ -31,7 +31,6 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.client.ShortCircuitCache;
 import org.apache.hadoop.hdfs.client.ShortCircuitCache.ShortCircuitReplicaCreator;
 import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
-import org.apache.hadoop.hdfs.client.ShortCircuitReplica.Key;
 import org.apache.hadoop.hdfs.client.ShortCircuitReplicaInfo;
 import org.apache.hadoop.hdfs.net.DomainPeer;
 import org.apache.hadoop.hdfs.net.Peer;
@@ -389,7 +388,7 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator {
       return null;
     }
     ShortCircuitCache cache = clientContext.getShortCircuitCache();
-    Key key = new Key(block.getBlockId(), block.getBlockPoolId());
+    ExtendedBlockId key = new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId());
     ShortCircuitReplicaInfo info = cache.fetchOrCreate(key, this);
     InvalidToken exc = info.getInvalidTokenException();
     if (exc != null) {
@@ -492,7 +491,7 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator {
       sock.recvFileInputStreams(fis, buf, 0, buf.length);
       ShortCircuitReplica replica = null;
       try {
-        Key key = new Key(block.getBlockId(), block.getBlockPoolId());
+        ExtendedBlockId key = new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId());
         replica = new ShortCircuitReplica(key, fis[0], fis[1],
             clientContext.getShortCircuitCache(), Time.monotonicNow());
       } catch (IOException e) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/ExtendedBlockId.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/ExtendedBlockId.java
new file mode 100644
index 00000000000..716ee2c6141
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/ExtendedBlockId.java
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import org.apache.commons.lang.builder.EqualsBuilder;
+import org.apache.commons.lang.builder.HashCodeBuilder;
+
+/**
+ * An immutable key which identifies a block.
+ */
+final public class ExtendedBlockId {
+  /**
+   * The block ID for this block.
+   */
+  private final long blockId;
+
+  /**
+   * The block pool ID for this block.
+   */
+  private final String bpId;
+
+  public ExtendedBlockId(long blockId, String bpId) {
+    this.blockId = blockId;
+    this.bpId = bpId;
+  }
+
+  public long getBlockId() {
+    return this.blockId;
+  }
+
+  public String getBlockPoolId() {
+    return this.bpId;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if ((o == null) || (o.getClass() != this.getClass())) {
+      return false;
+    }
+    ExtendedBlockId other = (ExtendedBlockId)o;
+    return new EqualsBuilder().
+        append(blockId, other.blockId).
+        append(bpId, other.bpId).
+        isEquals();
+  }
+
+  @Override
+  public int hashCode() {
+    return new HashCodeBuilder().
+        append(this.blockId).
+        append(this.bpId).
+        toHashCode();
+  }
+
+  @Override
+  public String toString() {
+    return new StringBuilder().append(blockId).
+        append("_").append(bpId).toString();
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitCache.java
index 29bff7d7979..3aef525b2de 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitCache.java
@@ -36,9 +36,9 @@ import java.util.concurrent.locks.ReentrantLock;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.ExtendedBlockId;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
-import org.apache.hadoop.hdfs.client.ShortCircuitReplica.Key;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.ipc.RetriableException;
 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
@@ -183,8 +183,9 @@ public class ShortCircuitCache implements Closeable {
    * ShortCircuitReplicaInfo objects may contain a replica, or an InvalidToken
    * exception.
    */
-  private final HashMap<Key, Waitable<ShortCircuitReplicaInfo>>
-      replicaInfoMap = new HashMap<Key, Waitable<ShortCircuitReplicaInfo>>();
+  private final HashMap<ExtendedBlockId, Waitable<ShortCircuitReplicaInfo>> 
+      replicaInfoMap = new HashMap<ExtendedBlockId,
+          Waitable<ShortCircuitReplicaInfo>>();
 
   /**
    * The CacheCleaner.  We don't create this and schedule it until it becomes
@@ -566,7 +567,7 @@ public class ShortCircuitCache implements Closeable {
    * @return             Null if no replica could be found or created.
    *                     The replica, otherwise.
    */
-  public ShortCircuitReplicaInfo fetchOrCreate(Key key,
+  public ShortCircuitReplicaInfo fetchOrCreate(ExtendedBlockId key,
       ShortCircuitReplicaCreator creator) {
     Waitable<ShortCircuitReplicaInfo> newWaitable = null;
     lock.lock();
@@ -612,7 +613,7 @@ public class ShortCircuitCache implements Closeable {
    *
    * @throws RetriableException   If the caller needs to retry.
    */
-  private ShortCircuitReplicaInfo fetch(Key key,
+  private ShortCircuitReplicaInfo fetch(ExtendedBlockId key,
       Waitable<ShortCircuitReplicaInfo> waitable) throws RetriableException {
     // Another thread is already in the process of loading this
     // ShortCircuitReplica.  So we simply wait for it to complete.
@@ -656,7 +657,7 @@ public class ShortCircuitCache implements Closeable {
     return info;
   }
 
-  private ShortCircuitReplicaInfo create(Key key,
+  private ShortCircuitReplicaInfo create(ExtendedBlockId key,
       ShortCircuitReplicaCreator creator,
       Waitable<ShortCircuitReplicaInfo> newWaitable) {
     // Handle loading a new replica.
@@ -805,8 +806,8 @@ public class ShortCircuitCache implements Closeable {
   @VisibleForTesting // ONLY for testing
   public interface CacheVisitor {
     void visit(int numOutstandingMmaps,
-        Map<Key, ShortCircuitReplica> replicas,
-        Map<Key, InvalidToken> failedLoads,
+        Map<ExtendedBlockId, ShortCircuitReplica> replicas,
+        Map<ExtendedBlockId, InvalidToken> failedLoads,
         Map<Long, ShortCircuitReplica> evictable,
         Map<Long, ShortCircuitReplica> evictableMmapped);
   }
@@ -815,11 +816,11 @@ public class ShortCircuitCache implements Closeable {
   public void accept(CacheVisitor visitor) {
     lock.lock();
     try {
-      Map<Key, ShortCircuitReplica> replicas =
-          new HashMap<Key, ShortCircuitReplica>();
-      Map<Key, InvalidToken> failedLoads =
-          new HashMap<Key, InvalidToken>();
-      for (Entry<Key, Waitable<ShortCircuitReplicaInfo>> entry :
+      Map<ExtendedBlockId, ShortCircuitReplica> replicas =
+          new HashMap<ExtendedBlockId, ShortCircuitReplica>();
+      Map<ExtendedBlockId, InvalidToken> failedLoads =
+          new HashMap<ExtendedBlockId, InvalidToken>();
+      for (Entry<ExtendedBlockId, Waitable<ShortCircuitReplicaInfo>> entry :
             replicaInfoMap.entrySet()) {
         Waitable<ShortCircuitReplicaInfo> waitable = entry.getValue();
         if (waitable.hasVal()) {
@@ -839,13 +840,13 @@ public class ShortCircuitCache implements Closeable {
             append("with outstandingMmapCount=").append(outstandingMmapCount).
             append(", replicas=");
         String prefix = "";
-        for (Entry<Key, ShortCircuitReplica> entry : replicas.entrySet()) {
+        for (Entry<ExtendedBlockId, ShortCircuitReplica> entry : replicas.entrySet()) {
           builder.append(prefix).append(entry.getValue());
           prefix = ",";
         }
         prefix = "";
         builder.append(", failedLoads=");
-        for (Entry<Key, InvalidToken> entry : failedLoads.entrySet()) {
+        for (Entry<ExtendedBlockId, InvalidToken> entry : failedLoads.entrySet()) {
           builder.append(prefix).append(entry.getValue());
           prefix = ",";
         }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitReplica.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitReplica.java
index 535c2df6a39..e6137c7b9ca 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitReplica.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ShortCircuitReplica.java
@@ -25,10 +25,9 @@ import java.nio.MappedByteBuffer;
 import java.nio.channels.FileChannel;
 import java.nio.channels.FileChannel.MapMode;
 
-import org.apache.commons.lang.builder.EqualsBuilder;
-import org.apache.commons.lang.builder.HashCodeBuilder;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hdfs.ExtendedBlockId;
 import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.nativeio.NativeIO;
@@ -49,65 +48,10 @@ import com.google.common.base.Preconditions;
 public class ShortCircuitReplica {
   public static final Log LOG = LogFactory.getLog(ShortCircuitCache.class);
 
-  /**
-   * Immutable class which identifies a ShortCircuitReplica object.
-   */
-  public static final class Key {
-    public Key(long blockId, String bpId) {
-      this.blockId = blockId;
-      this.bpId = bpId;
-    }
-
-    public long getBlockId() {
-      return this.blockId;
-    }
-
-    public String getBlockPoolId() {
-      return this.bpId;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if ((o == null) || (o.getClass() != this.getClass())) {
-        return false;
-      }
-      Key other = (Key)o;
-      return new EqualsBuilder().
-          append(blockId, other.blockId).
-          append(bpId, other.bpId).
-          isEquals();
-    }
-
-    @Override
-    public int hashCode() {
-      return new HashCodeBuilder().
-          append(this.blockId).
-          append(this.bpId).
-          toHashCode();
-    }
-
-    @Override
-    public String toString() {
-      return new StringBuilder().append(blockId).
-          append("_").append(bpId).toString();
-    }
-
-    /**
-     * The block ID for this BlockDescriptors object.
-     */
-    private final long blockId;
-
-    /**
-     * The block pool ID for this BlockDescriptors object.
-     */
-    private final String bpId;
-  }
-  
-
   /**
    * Identifies this ShortCircuitReplica object.
    */
-  final Key key;
+  final ExtendedBlockId key;
 
   /**
    * The block data input stream.
@@ -168,7 +112,7 @@ public class ShortCircuitReplica {
    */
   private Long evictableTimeNs = null;
 
-  public ShortCircuitReplica(Key key,
+  public ShortCircuitReplica(ExtendedBlockId key,
       FileInputStream dataStream, FileInputStream metaStream,
       ShortCircuitCache cache, long creationTimeMs) throws IOException {
     this.key = key;
@@ -262,7 +206,7 @@ public class ShortCircuitReplica {
     return metaHeader;
   }
 
-  public Key getKey() {
+  public ExtendedBlockId getKey() {
     return key;
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetCache.java
index fc77b0570fb..7384b1523c1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetCache.java
@@ -37,12 +37,12 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang.builder.HashCodeBuilder;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.ChecksumException;
+import org.apache.hadoop.hdfs.ExtendedBlockId;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@@ -56,43 +56,6 @@ import org.apache.hadoop.io.nativeio.NativeIO;
 @InterfaceAudience.Private
 @InterfaceStability.Unstable
 public class FsDatasetCache {
-  /**
-   * Keys which identify MappableBlocks.
-   */
-  private static final class Key {
-    /**
-     * Block id.
-     */
-    final long id;
-
-    /**
-     * Block pool id.
-     */
-    final String bpid;
-
-    Key(long id, String bpid) {
-      this.id = id;
-      this.bpid = bpid;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (o == null) {
-        return false;
-      }
-      if (!(o.getClass() == getClass())) {
-        return false;
-      }
-      Key other = (Key)o;
-      return ((other.id == this.id) && (other.bpid.equals(this.bpid)));
-    }
-
-    @Override
-    public int hashCode() {
-      return new HashCodeBuilder().append(id).append(bpid).hashCode();
-    }
-  };
-
   /**
    * MappableBlocks that we know about.
    */
@@ -143,7 +106,8 @@ public class FsDatasetCache {
   /**
    * Stores MappableBlock objects and the states they're in.
    */
-  private final HashMap<Key, Value> mappableBlockMap = new HashMap<Key, Value>();
+  private final HashMap<ExtendedBlockId, Value> mappableBlockMap =
+      new HashMap<ExtendedBlockId, Value>();
 
   private final AtomicLong numBlocksCached = new AtomicLong(0);
 
@@ -260,12 +224,12 @@ public class FsDatasetCache {
    */
   synchronized List<Long> getCachedBlocks(String bpid) {
     List<Long> blocks = new ArrayList<Long>();
-    for (Iterator<Entry<Key, Value>> iter =
+    for (Iterator<Entry<ExtendedBlockId, Value>> iter =
         mappableBlockMap.entrySet().iterator(); iter.hasNext(); ) {
-      Entry<Key, Value> entry = iter.next();
-      if (entry.getKey().bpid.equals(bpid)) {
+      Entry<ExtendedBlockId, Value> entry = iter.next();
+      if (entry.getKey().getBlockPoolId().equals(bpid)) {
         if (entry.getValue().state.shouldAdvertise()) {
-          blocks.add(entry.getKey().id);
+          blocks.add(entry.getKey().getBlockId());
         }
       }
     }
@@ -278,7 +242,7 @@ public class FsDatasetCache {
   synchronized void cacheBlock(long blockId, String bpid,
       String blockFileName, long length, long genstamp,
       Executor volumeExecutor) {
-    Key key = new Key(blockId, bpid);
+    ExtendedBlockId key = new ExtendedBlockId(blockId, bpid);
     Value prevValue = mappableBlockMap.get(key);
     if (prevValue != null) {
       if (LOG.isDebugEnabled()) {
@@ -299,7 +263,7 @@ public class FsDatasetCache {
   }
 
   synchronized void uncacheBlock(String bpid, long blockId) {
-    Key key = new Key(blockId, bpid);
+    ExtendedBlockId key = new ExtendedBlockId(blockId, bpid);
     Value prevValue = mappableBlockMap.get(key);
 
     if (prevValue == null) {
@@ -344,12 +308,12 @@ public class FsDatasetCache {
    * Background worker that mmaps, mlocks, and checksums a block
    */
   private class CachingTask implements Runnable {
-    private final Key key; 
+    private final ExtendedBlockId key; 
     private final String blockFileName;
     private final long length;
     private final long genstamp;
 
-    CachingTask(Key key, String blockFileName, long length, long genstamp) {
+    CachingTask(ExtendedBlockId key, String blockFileName, long length, long genstamp) {
       this.key = key;
       this.blockFileName = blockFileName;
       this.length = length;
@@ -361,13 +325,13 @@ public class FsDatasetCache {
       boolean success = false;
       FileInputStream blockIn = null, metaIn = null;
       MappableBlock mappableBlock = null;
-      ExtendedBlock extBlk =
-          new ExtendedBlock(key.bpid, key.id, length, genstamp);
+      ExtendedBlock extBlk = new ExtendedBlock(key.getBlockPoolId(),
+          key.getBlockId(), length, genstamp);
       long newUsedBytes = usedBytesCount.reserve(length);
       if (newUsedBytes < 0) {
-        LOG.warn("Failed to cache block id " + key.id + ", pool " + key.bpid +
-            ": could not reserve " + length + " more bytes in the " +
-            "cache: " + DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY +
+        LOG.warn("Failed to cache " + key + ": could not reserve " + length +
+            " more bytes in the cache: " +
+            DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY +
             " of " + maxBytes + " exceeded.");
         numBlocksFailedToCache.incrementAndGet();
         return;
@@ -378,16 +342,15 @@ public class FsDatasetCache {
           metaIn = (FileInputStream)dataset.getMetaDataInputStream(extBlk)
               .getWrappedStream();
         } catch (ClassCastException e) {
-          LOG.warn("Failed to cache block with id " + key.id + ", pool " +
-                key.bpid + ": Underlying blocks are not backed by files.", e);
+          LOG.warn("Failed to cache " + key +
+              ": Underlying blocks are not backed by files.", e);
           return;
         } catch (FileNotFoundException e) {
-          LOG.info("Failed to cache block with id " + key.id + ", pool " +
-                key.bpid + ": failed to find backing files.");
+          LOG.info("Failed to cache " + key + ": failed to find backing " +
+              "files.");
           return;
         } catch (IOException e) {
-          LOG.warn("Failed to cache block with id " + key.id + ", pool " +
-                key.bpid + ": failed to open file", e);
+          LOG.warn("Failed to cache " + key + ": failed to open file", e);
           return;
         }
         try {
@@ -395,11 +358,10 @@ public class FsDatasetCache {
               load(length, blockIn, metaIn, blockFileName);
         } catch (ChecksumException e) {
           // Exception message is bogus since this wasn't caused by a file read
-          LOG.warn("Failed to cache block " + key.id + " in " + key.bpid + ": " +
-                   "checksum verification failed.");
+          LOG.warn("Failed to cache " + key + ": checksum verification failed.");
           return;
         } catch (IOException e) {
-          LOG.warn("Failed to cache block " + key.id + " in " + key.bpid, e);
+          LOG.warn("Failed to cache " + key, e);
           return;
         }
         synchronized (FsDatasetCache.this) {
@@ -409,15 +371,14 @@ public class FsDatasetCache {
                                    value.state == State.CACHING_CANCELLED);
           if (value.state == State.CACHING_CANCELLED) {
             mappableBlockMap.remove(key);
-            LOG.warn("Caching of block " + key.id + " in " + key.bpid +
-                " was cancelled.");
+            LOG.warn("Caching of " + key + " was cancelled.");
             return;
           }
           mappableBlockMap.put(key, new Value(mappableBlock, State.CACHED));
         }
         if (LOG.isDebugEnabled()) {
-          LOG.debug("Successfully cached block " + key.id + " in " + key.bpid +
-              ".  We are now caching " + newUsedBytes + " bytes in total.");
+          LOG.debug("Successfully cached " + key + ".  We are now caching " +
+              newUsedBytes + " bytes in total.");
         }
         numBlocksCached.addAndGet(1);
         success = true;
@@ -425,9 +386,8 @@ public class FsDatasetCache {
         if (!success) {
           newUsedBytes = usedBytesCount.release(length);
           if (LOG.isDebugEnabled()) {
-            LOG.debug("Caching of block " + key.id + " in " +
-              key.bpid + " was aborted.  We are now caching only " +
-              newUsedBytes + " + bytes in total.");
+            LOG.debug("Caching of " + key + " was aborted.  We are now " +
+                "caching only " + newUsedBytes + " + bytes in total.");
           }
           IOUtils.closeQuietly(blockIn);
           IOUtils.closeQuietly(metaIn);
@@ -445,9 +405,9 @@ public class FsDatasetCache {
   }
 
   private class UncachingTask implements Runnable {
-    private final Key key; 
+    private final ExtendedBlockId key; 
 
-    UncachingTask(Key key) {
+    UncachingTask(ExtendedBlockId key) {
       this.key = key;
     }
 
@@ -470,8 +430,8 @@ public class FsDatasetCache {
           usedBytesCount.release(value.mappableBlock.getLength());
       numBlocksCached.addAndGet(-1);
       if (LOG.isDebugEnabled()) {
-        LOG.debug("Uncaching of block " + key.id + " in " + key.bpid +
-            " completed.  usedBytes = " + newUsedBytes);
+        LOG.debug("Uncaching of " + key + " completed.  " +
+            "usedBytes = " + newUsedBytes);
       }
     }
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java
index a95379a6a76..6f0fafa2628 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java
@@ -34,7 +34,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.BlockReaderTestUtil;
+import org.apache.hadoop.hdfs.ExtendedBlockId;
 import org.apache.hadoop.hdfs.ClientContext;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
@@ -46,7 +46,6 @@ import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
 import org.apache.hadoop.hdfs.client.ShortCircuitCache;
 import org.apache.hadoop.hdfs.client.ShortCircuitCache.CacheVisitor;
 import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
-import org.apache.hadoop.hdfs.client.ShortCircuitReplica.Key;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.io.ByteBufferPool;
 import org.apache.hadoop.io.IOUtils;
@@ -275,8 +274,8 @@ public class TestEnhancedByteBufferAccess {
 
     @Override
     public void visit(int numOutstandingMmaps,
-        Map<Key, ShortCircuitReplica> replicas,
-        Map<Key, InvalidToken> failedLoads,
+        Map<ExtendedBlockId, ShortCircuitReplica> replicas,
+        Map<ExtendedBlockId, InvalidToken> failedLoads,
         Map<Long, ShortCircuitReplica> evictable,
         Map<Long, ShortCircuitReplica> evictableMmapped) {
       if (expectedNumOutstandingMmaps >= 0) {
@@ -341,12 +340,12 @@ public class TestEnhancedByteBufferAccess {
     cache.accept(new CacheVisitor() {
       @Override
       public void visit(int numOutstandingMmaps,
-          Map<Key, ShortCircuitReplica> replicas,
-          Map<Key, InvalidToken> failedLoads, 
+          Map<ExtendedBlockId, ShortCircuitReplica> replicas,
+          Map<ExtendedBlockId, InvalidToken> failedLoads, 
           Map<Long, ShortCircuitReplica> evictable,
           Map<Long, ShortCircuitReplica> evictableMmapped) {
         ShortCircuitReplica replica = replicas.get(
-            new Key(firstBlock.getBlockId(), firstBlock.getBlockPoolId()));
+            new ExtendedBlockId(firstBlock.getBlockId(), firstBlock.getBlockPoolId()));
         Assert.assertNotNull(replica);
         Assert.assertTrue(replica.hasMmap());
         // The replica should not yet be evictable, since we have it open.
@@ -378,8 +377,8 @@ public class TestEnhancedByteBufferAccess {
         cache.accept(new CacheVisitor() {
           @Override
           public void visit(int numOutstandingMmaps,
-              Map<Key, ShortCircuitReplica> replicas,
-              Map<Key, InvalidToken> failedLoads,
+              Map<ExtendedBlockId, ShortCircuitReplica> replicas,
+              Map<ExtendedBlockId, InvalidToken> failedLoads,
               Map<Long, ShortCircuitReplica> evictable,
               Map<Long, ShortCircuitReplica> evictableMmapped) {
             finished.setValue(evictableMmapped.isEmpty());
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java
index ffea447e5e1..03dced7bee0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java
@@ -32,14 +32,12 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
 import org.apache.hadoop.hdfs.client.ShortCircuitCache;
 import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
-import org.apache.hadoop.hdfs.client.ShortCircuitReplica.Key;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.net.unix.DomainSocket;
 import org.apache.hadoop.net.unix.TemporarySocketDirectory;
-import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.Time;
 import org.junit.AfterClass;
 import org.junit.Assert;
@@ -170,7 +168,7 @@ public class TestBlockReaderLocal {
       };
       dataIn = streams[0];
       metaIn = streams[1];
-      Key key = new Key(block.getBlockId(), block.getBlockPoolId());
+      ExtendedBlockId key = new ExtendedBlockId(block.getBlockId(), block.getBlockPoolId());
       ShortCircuitReplica replica = new ShortCircuitReplica(
           key, dataIn, metaIn, shortCircuitCache, Time.now());
       blockReaderLocal = new BlockReaderLocal.Builder(
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitCache.java
index 6e880420294..ce1c2275a7d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitCache.java
@@ -20,17 +20,13 @@ package org.apache.hadoop.hdfs;
 import org.apache.commons.lang.mutable.MutableBoolean;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hdfs.client.ClientMmap;
 import org.apache.hadoop.hdfs.client.ShortCircuitCache;
-import org.apache.hadoop.hdfs.client.ShortCircuitCache.CacheVisitor;
 import org.apache.hadoop.hdfs.client.ShortCircuitCache.ShortCircuitReplicaCreator;
 import org.apache.hadoop.hdfs.client.ShortCircuitReplica;
-import org.apache.hadoop.hdfs.client.ShortCircuitReplica.Key;
 import org.apache.hadoop.hdfs.client.ShortCircuitReplicaInfo;
 import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.net.unix.TemporarySocketDirectory;
-import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.util.DataChecksum;
 import org.apache.hadoop.util.Time;
@@ -44,7 +40,6 @@ import java.io.DataOutputStream;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
-import java.util.Map;
 
 public class TestShortCircuitCache {
   static final Log LOG = LogFactory.getLog(TestShortCircuitCache.class);
@@ -105,7 +100,7 @@ public class TestShortCircuitCache {
     @Override
     public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
       try {
-        Key key = new Key(blockId, "test_bp1");
+        ExtendedBlockId key = new ExtendedBlockId(blockId, "test_bp1");
         return new ShortCircuitReplicaInfo(
             new ShortCircuitReplica(key,
                 pair.getFileInputStreams()[0], pair.getFileInputStreams()[1],
@@ -129,14 +124,14 @@ public class TestShortCircuitCache {
         new ShortCircuitCache(10, 10000000, 10, 10000000, 1, 10000);
     final TestFileDescriptorPair pair = new TestFileDescriptorPair();
     ShortCircuitReplicaInfo replicaInfo1 =
-      cache.fetchOrCreate(new Key(123, "test_bp1"),
+      cache.fetchOrCreate(new ExtendedBlockId(123, "test_bp1"),
         new SimpleReplicaCreator(123, cache, pair));
     Preconditions.checkNotNull(replicaInfo1.getReplica());
     Preconditions.checkState(replicaInfo1.getInvalidTokenException() == null);
     pair.compareWith(replicaInfo1.getReplica().getDataStream(),
                      replicaInfo1.getReplica().getMetaStream());
     ShortCircuitReplicaInfo replicaInfo2 =
-      cache.fetchOrCreate(new Key(123, "test_bp1"),
+      cache.fetchOrCreate(new ExtendedBlockId(123, "test_bp1"),
           new ShortCircuitReplicaCreator() {
         @Override
         public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
@@ -157,7 +152,7 @@ public class TestShortCircuitCache {
     // really long here)
     ShortCircuitReplicaInfo replicaInfo3 =
       cache.fetchOrCreate(
-          new Key(123, "test_bp1"), new ShortCircuitReplicaCreator() {
+          new ExtendedBlockId(123, "test_bp1"), new ShortCircuitReplicaCreator() {
         @Override
         public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
           Assert.fail("expected to use existing entry.");
@@ -179,7 +174,7 @@ public class TestShortCircuitCache {
     final TestFileDescriptorPair pair = new TestFileDescriptorPair();
     ShortCircuitReplicaInfo replicaInfo1 =
       cache.fetchOrCreate(
-        new Key(123, "test_bp1"), new SimpleReplicaCreator(123, cache, pair));
+        new ExtendedBlockId(123, "test_bp1"), new SimpleReplicaCreator(123, cache, pair));
     Preconditions.checkNotNull(replicaInfo1.getReplica());
     Preconditions.checkState(replicaInfo1.getInvalidTokenException() == null);
     pair.compareWith(replicaInfo1.getReplica().getDataStream(),
@@ -190,7 +185,7 @@ public class TestShortCircuitCache {
       Thread.sleep(10);
       ShortCircuitReplicaInfo replicaInfo2 =
         cache.fetchOrCreate(
-          new Key(123, "test_bp1"), new ShortCircuitReplicaCreator() {
+          new ExtendedBlockId(123, "test_bp1"), new ShortCircuitReplicaCreator() {
           @Override
           public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
             triedToCreate.setValue(true);
@@ -221,7 +216,7 @@ public class TestShortCircuitCache {
     };
     for (int i = 0; i < pairs.length; i++) {
       replicaInfos[i] = cache.fetchOrCreate(
-          new Key(i, "test_bp1"), 
+          new ExtendedBlockId(i, "test_bp1"), 
             new SimpleReplicaCreator(i, cache, pairs[i]));
       Preconditions.checkNotNull(replicaInfos[i].getReplica());
       Preconditions.checkState(replicaInfos[i].getInvalidTokenException() == null);
@@ -237,7 +232,7 @@ public class TestShortCircuitCache {
     for (int i = 1; i < pairs.length; i++) {
       final Integer iVal = new Integer(i);
       replicaInfos[i] = cache.fetchOrCreate(
-          new Key(i, "test_bp1"),
+          new ExtendedBlockId(i, "test_bp1"),
             new ShortCircuitReplicaCreator() {
         @Override
         public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
@@ -253,7 +248,7 @@ public class TestShortCircuitCache {
     // The first (oldest) replica should not be cached.
     final MutableBoolean calledCreate = new MutableBoolean(false);
     replicaInfos[0] = cache.fetchOrCreate(
-        new Key(0, "test_bp1"),
+        new ExtendedBlockId(0, "test_bp1"),
           new ShortCircuitReplicaCreator() {
         @Override
         public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
@@ -289,7 +284,7 @@ public class TestShortCircuitCache {
     final long HOUR_IN_MS = 60 * 60 * 1000;
     for (int i = 0; i < pairs.length; i++) {
       final Integer iVal = new Integer(i);
-      final Key key = new Key(i, "test_bp1");
+      final ExtendedBlockId key = new ExtendedBlockId(i, "test_bp1");
       replicaInfos[i] = cache.fetchOrCreate(key,
           new ShortCircuitReplicaCreator() {
         @Override
@@ -316,7 +311,7 @@ public class TestShortCircuitCache {
       @Override
       public Boolean get() {
         ShortCircuitReplicaInfo info = cache.fetchOrCreate(
-          new Key(0, "test_bp1"), new ShortCircuitReplicaCreator() {
+          new ExtendedBlockId(0, "test_bp1"), new ShortCircuitReplicaCreator() {
           @Override
           public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
             return null;
@@ -332,7 +327,7 @@ public class TestShortCircuitCache {
 
     // Make sure that second replica did not go stale.
     ShortCircuitReplicaInfo info = cache.fetchOrCreate(
-        new Key(1, "test_bp1"), new ShortCircuitReplicaCreator() {
+        new ExtendedBlockId(1, "test_bp1"), new ShortCircuitReplicaCreator() {
       @Override
       public ShortCircuitReplicaInfo createShortCircuitReplicaInfo() {
         Assert.fail("second replica went stale, despite 1 " +

From b0456745f9c0d446387b68be3c38cdfbd58dbf89 Mon Sep 17 00:00:00 2001
From: Kihwal Lee <kihwal@apache.org>
Date: Thu, 13 Feb 2014 15:06:17 +0000
Subject: [PATCH 36/47] HDFS-5904. TestFileStatus fails intermittently.
 Contributed by Mit Desai.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567946 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt                     | 2 ++
 .../src/test/java/org/apache/hadoop/hdfs/TestFileStatus.java    | 1 +
 2 files changed, 3 insertions(+)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 15026dd4ad9..1147ef0f215 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -433,6 +433,8 @@ Release 2.4.0 - UNRELEASED
     HDFS-5891. webhdfs should not try connecting the DN during redirection
     (Haohui Mai via brandonli)
 
+    HDFS-5904. TestFileStatus fails intermittently. (Mit Desai via kihwal)
+ 
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileStatus.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileStatus.java
index 9c44029bfef..2644420ee1a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileStatus.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileStatus.java
@@ -303,5 +303,6 @@ public class TestFileStatus {
         FileSystem.LOG.info("GOOD: getting an exception", ioe);
       }
     }
+    fs.delete(dir, true);
   }
 }

From 8ffd3a91a55c95fa2b6adbaab06816ccb4641cc4 Mon Sep 17 00:00:00 2001
From: Chris Nauroth <cnauroth@apache.org>
Date: Thu, 13 Feb 2014 19:53:20 +0000
Subject: [PATCH 37/47] HDFS-5941. add dfs.namenode.secondary.https-address and
 dfs.namenode.secondary.https-address in hdfs-default.xml. Contributed by
 Haohui Mai.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1568019 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |  4 ++++
 .../src/main/resources/hdfs-default.xml       | 19 ++++++++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 1147ef0f215..51dd15a6abf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -434,6 +434,10 @@ Release 2.4.0 - UNRELEASED
     (Haohui Mai via brandonli)
 
     HDFS-5904. TestFileStatus fails intermittently. (Mit Desai via kihwal)
+
+    HDFS-5941. add dfs.namenode.secondary.https-address and
+    dfs.namenode.secondary.https-address in hdfs-default.xml.
+    (Haohui Mai via cnauroth)
  
 Release 2.3.1 - UNRELEASED
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 45f2b3f288f..9c3b94fcf80 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -96,6 +96,14 @@
   </description>
 </property>
 
+<property>
+  <name>dfs.namenode.secondary.https-address</name>
+  <value>0.0.0.0:50091</value>
+  <description>
+    The secondary namenode HTTPS server address and port.
+  </description>
+</property>
+
 <property>
   <name>dfs.datanode.address</name>
   <value>0.0.0.0:50010</value>
@@ -1310,7 +1318,16 @@
   <name>dfs.journalnode.http-address</name>
   <value>0.0.0.0:8480</value>
   <description>
-    The address and port the JournalNode web UI listens on.
+    The address and port the JournalNode HTTP server listens on.
+    If the port is 0 then the server will start on a free port.
+  </description>
+</property>
+
+<property>
+  <name>dfs.journalnode.https-address</name>
+  <value>0.0.0.0:8481</value>
+  <description>
+    The address and port the JournalNode HTTPS server listens on.
     If the port is 0 then the server will start on a free port.
   </description>
 </property>

From 2b916db60b94ceffbb17dc2778287c88b966bc04 Mon Sep 17 00:00:00 2001
From: Vinod Kumar Vavilapalli <vinodkv@apache.org>
Date: Thu, 13 Feb 2014 21:23:33 +0000
Subject: [PATCH 38/47] YARN-1676. Modified RM HA handling of user-to-group
 mappings to be available across RM failover by making using of a remote
 configuration-provider. Contributed by Xuan Gong.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1568041 13f79535-47bb-0310-9956-ffa450edef68
---
 .../org/apache/hadoop/security/Groups.java    |  15 +++
 hadoop-yarn-project/CHANGES.txt               |   4 +
 .../server/resourcemanager/AdminService.java  |  17 +--
 .../resourcemanager/ResourceManager.java      |   4 +
 .../resourcemanager/TestRMAdminService.java   | 120 ++++++++++++++++++
 5 files changed, 152 insertions(+), 8 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java
index 097bc30dfe3..ea18b94dea7 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java
@@ -27,6 +27,7 @@ import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
@@ -240,4 +241,18 @@ public class Groups {
     }
     return GROUPS;
   }
+
+  /**
+   * Create new groups used to map user-to-groups with loaded configuration.
+   * @param conf
+   * @return the groups being used to map user-to-groups.
+   */
+  @Private
+  public static synchronized Groups
+      getUserToGroupsMappingServiceWithLoadedConfiguration(
+          Configuration conf) {
+
+    GROUPS = new Groups(conf);
+    return GROUPS;
+  }
 }
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index a94e43c1e54..0a2a922c252 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -184,6 +184,10 @@ Release 2.4.0 - UNRELEASED
     YARN-1345. Remove FINAL_SAVING state from YarnApplicationAttemptState
     (Zhijie Shen via jianhe)
 
+    YARN-1676. Modified RM HA handling of user-to-group mappings to
+    be available across RM failover by making using of a remote
+    configuration-provider. (Xuan Gong via vinodkv)
+
   OPTIMIZATIONS
 
   BUG FIXES
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
index da479b47ee8..6ebf90a6b5f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
@@ -381,21 +381,22 @@ public class AdminService extends CompositeService implements
   @Override
   public RefreshUserToGroupsMappingsResponse refreshUserToGroupsMappings(
       RefreshUserToGroupsMappingsRequest request)
-      throws YarnException, StandbyException {
-    UserGroupInformation user = checkAcls("refreshUserToGroupsMappings");
+      throws YarnException, IOException {
+    String argName = "refreshUserToGroupsMappings";
+    UserGroupInformation user = checkAcls(argName);
 
-    // TODO (YARN-1459): Revisit handling user-groups on Standby RM
     if (!isRMActive()) {
-      RMAuditLogger.logFailure(user.getShortUserName(),
-          "refreshUserToGroupsMapping",
+      RMAuditLogger.logFailure(user.getShortUserName(), argName,
           adminAcl.toString(), "AdminService",
           "ResourceManager is not active. Can not refresh user-groups.");
       throwStandbyException();
     }
 
-    Groups.getUserToGroupsMappingService().refresh();
-    RMAuditLogger.logSuccess(user.getShortUserName(), 
-        "refreshUserToGroupsMappings", "AdminService");
+    Groups.getUserToGroupsMappingService(
+        getConfiguration(getConfig(),
+            YarnConfiguration.CORE_SITE_CONFIGURATION_FILE)).refresh();
+
+    RMAuditLogger.logSuccess(user.getShortUserName(), argName, "AdminService");
 
     return recordFactory.newRecordInstance(
         RefreshUserToGroupsMappingsResponse.class);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
index 1040cc5c526..5ef58a74d8e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.http.HttpConfig;
 import org.apache.hadoop.http.HttpConfig.Policy;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.metrics2.source.JvmMetrics;
+import org.apache.hadoop.security.Groups;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.service.AbstractService;
@@ -198,6 +199,9 @@ public class ResourceManager extends CompositeService implements Recoverable {
       this.conf =
           this.configurationProvider.getConfiguration(this.conf,
               YarnConfiguration.CORE_SITE_CONFIGURATION_FILE);
+      // Do refreshUserToGroupsMappings with loaded core-site.xml
+      Groups.getUserToGroupsMappingServiceWithLoadedConfiguration(this.conf)
+          .refresh();
     }
 
     // register the handlers for all AlwaysOn services using setupDispatcher().
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java
index ee008e93b43..9746664dba8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java
@@ -24,10 +24,17 @@ import java.io.DataOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.security.GroupMappingServiceProvider;
+import org.apache.hadoop.security.Groups;
+import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.authorize.AccessControlList;
 import org.apache.hadoop.security.authorize.ProxyUsers;
 import org.apache.hadoop.security.authorize.ServiceAuthorizationManager;
@@ -37,6 +44,7 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsRequest
 import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshQueuesRequest;
 import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshServiceAclsRequest;
 import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshSuperUserGroupsConfigurationRequest;
+import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshUserToGroupsMappingsRequest;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
 import org.junit.After;
@@ -366,6 +374,84 @@ public class TestRMAdminService {
         .get("hadoop.proxyuser.test.hosts").contains("test_hosts"));
   }
 
+  @Test
+  public void testRefreshUserToGroupsMappingsWithLocalConfigurationProvider() {
+    rm = new MockRM(configuration);
+    rm.init(configuration);
+    rm.start();
+    try {
+      rm.adminService
+          .refreshUserToGroupsMappings(RefreshUserToGroupsMappingsRequest
+              .newInstance());
+    } catch (Exception ex) {
+      fail("Using localConfigurationProvider. Should not get any exception.");
+    }
+  }
+
+  @Test
+  public void
+      testRefreshUserToGroupsMappingsWithFileSystemBasedConfigurationProvider()
+          throws IOException, YarnException {
+    configuration.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS,
+        "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider");
+    try {
+      rm = new MockRM(configuration);
+      rm.init(configuration);
+      rm.start();
+      fail("Should throw an exception");
+    } catch (Exception ex) {
+      // Expect exception here
+    }
+
+    String user = UserGroupInformation.getCurrentUser().getUserName();
+    List<String> groupWithInit =
+        new ArrayList<String>(Groups.getUserToGroupsMappingService(
+            configuration).getGroups(user));
+
+    // upload default configurations
+    uploadDefaultConfiguration();
+    Configuration conf = new Configuration();
+    conf.setClass(CommonConfigurationKeys.HADOOP_SECURITY_GROUP_MAPPING,
+        MockUnixGroupsMapping.class,
+        GroupMappingServiceProvider.class);
+    uploadConfiguration(conf, "core-site.xml");
+
+    try {
+      rm = new MockRM(configuration);
+      rm.init(configuration);
+      rm.start();
+    } catch (Exception ex) {
+      fail("Should not get any exceptions");
+    }
+
+    // Make sure RM will use the updated GroupMappingServiceProvider
+    List<String> groupBefore =
+        new ArrayList<String>(Groups.getUserToGroupsMappingService(
+            configuration).getGroups(user));
+    Assert.assertTrue(groupBefore.contains("test_group_A")
+        && groupBefore.contains("test_group_B")
+        && groupBefore.contains("test_group_C") && groupBefore.size() == 3);
+    Assert.assertTrue(groupWithInit.size() != groupBefore.size());
+    Assert.assertFalse(groupWithInit.contains("test_group_A")
+        || groupWithInit.contains("test_group_B")
+        || groupWithInit.contains("test_group_C"));
+
+    // update the groups
+    MockUnixGroupsMapping.updateGroups();
+
+    rm.adminService
+        .refreshUserToGroupsMappings(RefreshUserToGroupsMappingsRequest
+            .newInstance());
+    List<String> groupAfter =
+        Groups.getUserToGroupsMappingService(configuration).getGroups(user);
+
+    // should get the updated groups
+    Assert.assertTrue(groupAfter.contains("test_group_D")
+        && groupAfter.contains("test_group_E")
+        && groupAfter.contains("test_group_F") && groupAfter.size() == 3);
+
+  }
+
   private String writeConfigurationXML(Configuration conf, String confXMLName)
       throws IOException {
     DataOutputStream output = null;
@@ -418,4 +504,38 @@ public class TestRMAdminService {
         .addResource(YarnConfiguration.HADOOP_POLICY_CONFIGURATION_FILE);
     uploadConfiguration(hadoopPolicyConf, "hadoop-policy.xml");
   }
+
+  private static class MockUnixGroupsMapping implements
+      GroupMappingServiceProvider {
+
+    @SuppressWarnings("serial")
+    private static List<String> group = new ArrayList<String>() {{
+      add("test_group_A");
+      add("test_group_B");
+      add("test_group_C");
+    }};
+
+    @Override
+    public List<String> getGroups(String user) throws IOException {
+      return group;
+    }
+
+    @Override
+    public void cacheGroupsRefresh() throws IOException {
+      // Do nothing
+    }
+
+    @Override
+    public void cacheGroupsAdd(List<String> groups) throws IOException {
+      // Do nothing
+    }
+
+    public static void updateGroups() {
+      group.clear();
+      group.add("test_group_D");
+      group.add("test_group_E");
+      group.add("test_group_F");
+    }
+  }
+
 }

From 2417ca71d5115f16bd13a737087dab5edd04fb99 Mon Sep 17 00:00:00 2001
From: Arpit Gupta <arpit@apache.org>
Date: Thu, 13 Feb 2014 21:52:56 +0000
Subject: [PATCH 39/47] HADOOP-10343. Change info to debug log in
 LossyRetryInvocationHandler. Contributed by Arpit Gupta

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1568054 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-common-project/hadoop-common/CHANGES.txt           | 2 ++
 .../hadoop/io/retry/LossyRetryInvocationHandler.java      | 8 ++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 7bfad349293..f14c33a1da6 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -315,6 +315,8 @@ Release 2.4.0 - UNRELEASED
     HADOOP-10333. Fix grammatical error in overview.html document.
     (René Nyffenegger via suresh)
 
+    HADOOP-10343. Change info to debug log in LossyRetryInvocationHandler. (arpit)
+
   OPTIMIZATIONS
 
   BUG FIXES
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/LossyRetryInvocationHandler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/LossyRetryInvocationHandler.java
index bdb6a614ef3..7a557c477ca 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/LossyRetryInvocationHandler.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/LossyRetryInvocationHandler.java
@@ -51,11 +51,15 @@ public class LossyRetryInvocationHandler<T> extends RetryInvocationHandler<T> {
     int retryCount = RetryCount.get();
     if (retryCount < this.numToDrop) {
       RetryCount.set(++retryCount);
-      LOG.info("Drop the response. Current retryCount == " + retryCount);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Drop the response. Current retryCount == " + retryCount);
+      }
       throw new RetriableException("Fake Exception");
     } else {
-      LOG.info("retryCount == " + retryCount
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("retryCount == " + retryCount
           + ". It's time to normally process the response");
+      }
       return result;
     }
   }

From d0a5e43de73119e57d12f2ec89a9d1a192cde204 Mon Sep 17 00:00:00 2001
From: Vinod Kumar Vavilapalli <vinodkv@apache.org>
Date: Thu, 13 Feb 2014 22:02:11 +0000
Subject: [PATCH 40/47] YARN-1417. Modified RM to generate container-tokens not
 at creation time, but at allocation time so as to prevent RM from shelling
 out containers with expired tokens. Contributed by Omkar Vinit Joshi and Jian
 He.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1568060 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt               |  4 ++
 .../SchedulerApplicationAttempt.java          |  5 +++
 .../scheduler/capacity/LeafQueue.java         | 18 --------
 .../scheduler/fair/AppSchedulable.java        |  8 +---
 .../scheduler/fifo/FifoScheduler.java         | 11 +----
 .../yarn/server/resourcemanager/MockRM.java   |  9 +++-
 .../capacity/TestContainerAllocation.java     | 43 +++++++++++++++++++
 7 files changed, 62 insertions(+), 36 deletions(-)

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 0a2a922c252..8b880d33f91 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -269,6 +269,10 @@ Release 2.4.0 - UNRELEASED
     YARN-1578. Fixed reading incomplete application attempt and container data
     in FileSystemApplicationHistoryStore. (Shinichi Yamashita via zjshen)
 
+    YARN-1417. Modified RM to generate container-tokens not at creation time, but
+    at allocation time so as to prevent RM from shelling out containers with
+    expired tokens. (Omkar Vinit Joshi and Jian He via vinodkv)
+
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
index 553338d7ab0..b1801dc10d1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java
@@ -345,6 +345,11 @@ public class SchedulerApplicationAttempt {
     for (RMContainer rmContainer : newlyAllocatedContainers) {
       rmContainer.handle(new RMContainerEvent(rmContainer.getContainerId(),
           RMContainerEventType.ACQUIRED));
+      Container container = rmContainer.getContainer();
+      rmContainer.getContainer().setContainerToken(
+        rmContext.getContainerTokenSecretManager().createContainerToken(
+          rmContainer.getContainerId(), container.getNodeId(), getUser(),
+          container.getResource()));
       returnContainerList.add(rmContainer.getContainer());
     }
     newlyAllocatedContainers.clear();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
index 9bc80bc2dab..968d373e257 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
@@ -1292,16 +1292,6 @@ public class LeafQueue implements CSQueue {
     return container;
   }
 
-  /**
-   * Create <code>ContainerToken</code>, only in secure-mode
-   */
-  Token createContainerToken(
-      FiCaSchedulerApp application, Container container) {
-    return containerTokenSecretManager.createContainerToken(
-        container.getId(), container.getNodeId(),
-        application.getUser(), container.getResource());
-  }
-
   private Resource assignContainer(Resource clusterResource, FiCaSchedulerNode node, 
       FiCaSchedulerApp application, Priority priority, 
       ResourceRequest request, NodeType type, RMContainer rmContainer) {
@@ -1345,14 +1335,6 @@ public class LeafQueue implements CSQueue {
         unreserve(application, priority, node, rmContainer);
       }
 
-      Token containerToken =
-          createContainerToken(application, container);
-      if (containerToken == null) {
-        // Something went wrong...
-        return Resources.none();
-      }
-      container.setContainerToken(containerToken);
-      
       // Inform the application
       RMContainer allocatedContainer = 
           application.allocate(type, node, priority, request, container);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java
index b488e780fa2..146994b4f7e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/AppSchedulable.java
@@ -151,17 +151,11 @@ public class AppSchedulable extends Schedulable {
     NodeId nodeId = node.getRMNode().getNodeID();
     ContainerId containerId = BuilderUtils.newContainerId(application
         .getApplicationAttemptId(), application.getNewContainerId());
-    org.apache.hadoop.yarn.api.records.Token containerToken =
-        containerTokenSecretManager.createContainerToken(containerId, nodeId,
-          application.getUser(), capability);
-    if (containerToken == null) {
-      return null; // Try again later.
-    }
 
     // Create the container
     Container container =
         BuilderUtils.newContainer(containerId, nodeId, node.getRMNode()
-          .getHttpAddress(), capability, priority, containerToken);
+          .getHttpAddress(), capability, priority, null);
 
     return container;
   }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
index e33348a10d4..a2e01345abe 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
@@ -654,20 +654,11 @@ public class FifoScheduler extends AbstractYarnScheduler implements
         NodeId nodeId = node.getRMNode().getNodeID();
         ContainerId containerId = BuilderUtils.newContainerId(application
             .getApplicationAttemptId(), application.getNewContainerId());
-        Token containerToken = null;
-
-        containerToken =
-            this.rmContext.getContainerTokenSecretManager()
-              .createContainerToken(containerId, nodeId, application.getUser(),
-                capability);
-        if (containerToken == null) {
-          return i; // Try again later.
-        }
 
         // Create the container
         Container container =
             BuilderUtils.newContainer(containerId, nodeId, node.getRMNode()
-              .getHttpAddress(), capability, priority, containerToken);
+              .getHttpAddress(), capability, priority, null);
         
         // Allocate!
         
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
index 31035b420a5..63efe8fe454 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
@@ -142,8 +142,15 @@ public class MockRM extends ResourceManager {
   public void waitForState(MockNM nm, ContainerId containerId,
       RMContainerState containerState) throws Exception {
     RMContainer container = getResourceScheduler().getRMContainer(containerId);
-    Assert.assertNotNull("Container shouldn't be null", container);
     int timeoutSecs = 0;
+    while(container == null && timeoutSecs++ < 20) {
+      nm.nodeHeartbeat(true);
+      container = getResourceScheduler().getRMContainer(containerId);
+      System.out.println("Waiting for container " + containerId + " to be allocated.");
+      Thread.sleep(100);
+    }
+    Assert.assertNotNull("Container shouldn't be null", container);
+    timeoutSecs = 0;
     while (!containerState.equals(container.getState()) && timeoutSecs++ < 40) {
       System.out.println("Container : " + containerId + " State is : "
           + container.getState() + " Waiting for state : " + containerState);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java
index b877fbbf98f..0e3bdeb2d4a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java
@@ -18,11 +18,17 @@
 
 package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
 
+import java.util.ArrayList;
+import java.util.List;
+
 import junit.framework.Assert;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.server.resourcemanager.MockAM;
 import org.apache.hadoop.yarn.server.resourcemanager.MockNM;
@@ -30,6 +36,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
 import org.apache.hadoop.yarn.server.resourcemanager.TestFifoScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
+import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
+import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
 import org.junit.Test;
 
 
@@ -106,4 +115,38 @@ public class TestContainerAllocation {
 
     rm.stop();
   }
+
+  // This is to test container tokens are generated when the containers are
+  // acquired by the AM, not when the containers are allocated
+  @Test
+  public void testContainerTokenGeneratedOnPullRequest() throws Exception {
+    YarnConfiguration conf = new YarnConfiguration();
+    conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class,
+      ResourceScheduler.class);
+    MockRM rm1 = new MockRM(conf);
+    rm1.start();
+    MockNM nm1 = rm1.registerNode("127.0.0.1:1234", 8000);
+    RMApp app1 = rm1.submitApp(200);
+    MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1);
+    // request a container.
+    am1.allocate("127.0.0.1", 1024, 1, new ArrayList<ContainerId>());
+    ContainerId containerId2 =
+        ContainerId.newInstance(am1.getApplicationAttemptId(), 2);
+    rm1.waitForState(nm1, containerId2, RMContainerState.ALLOCATED);
+
+    RMContainer container =
+        rm1.getResourceScheduler().getRMContainer(containerId2);
+    // no container token is generated.
+    Assert.assertEquals(containerId2, container.getContainerId());
+    Assert.assertNull(container.getContainer().getContainerToken());
+
+    // acquire the container.
+    List<Container> containers =
+        am1.allocate(new ArrayList<ResourceRequest>(),
+          new ArrayList<ContainerId>()).getAllocatedContainers();
+    Assert.assertEquals(containerId2, containers.get(0).getId());
+    // container token is generated.
+    Assert.assertNotNull(containers.get(0).getContainerToken());
+    rm1.stop();
+  }
 }
\ No newline at end of file

From 983f11ba2d5dafdf265fd4afa9539a3b3762b4f3 Mon Sep 17 00:00:00 2001
From: Brandon Li <brandonli@apache.org>
Date: Thu, 13 Feb 2014 22:41:29 +0000
Subject: [PATCH 41/47] HDFS-5913. Nfs3Utils#getWccAttr() should check attr
 parameter against null. Contributed by Brandon Li

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1568076 13f79535-47bb-0310-9956-ffa450edef68
---
 .../java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java   | 3 ++-
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt                    | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java
index 533fa220774..488cd0df2da 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java
@@ -1829,7 +1829,8 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface {
       } catch (IOException e1) {
         LOG.info("Can't get postOpAttr for fileId: " + handle.getFileId());
       }
-      WccData fileWcc = new WccData(Nfs3Utils.getWccAttr(preOpAttr), postOpAttr);
+      WccData fileWcc = new WccData(preOpAttr == null ? null
+          : Nfs3Utils.getWccAttr(preOpAttr), postOpAttr);
       return new COMMIT3Response(Nfs3Status.NFS3ERR_IO, fileWcc,
           Nfs3Constant.WRITE_COMMIT_VERF);
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 51dd15a6abf..a1ad7458747 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -438,6 +438,9 @@ Release 2.4.0 - UNRELEASED
     HDFS-5941. add dfs.namenode.secondary.https-address and
     dfs.namenode.secondary.https-address in hdfs-default.xml.
     (Haohui Mai via cnauroth)
+
+    HDFS-5913. Nfs3Utils#getWccAttr() should check attr parameter against null
+    (brandonli)
  
 Release 2.3.1 - UNRELEASED
 

From c122ef900042ba55b069c19e7d9454412e59359b Mon Sep 17 00:00:00 2001
From: Jason Darrell Lowe <jlowe@apache.org>
Date: Thu, 13 Feb 2014 23:16:00 +0000
Subject: [PATCH 42/47] MAPREDUCE-5670. CombineFileRecordReader should report
 progress when moving to the next file. Contributed by Chen He

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1568118 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt          |  3 +
 .../mapred/lib/CombineFileRecordReader.java   |  2 +
 .../lib/input/CombineFileRecordReader.java    |  4 +-
 .../lib/TestCombineFileRecordReader.java      | 88 +++++++++++++++++
 .../input/TestCombineFileRecordReader.java    | 96 +++++++++++++++++++
 5 files changed, 192 insertions(+), 1 deletion(-)
 create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/lib/TestCombineFileRecordReader.java
 create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileRecordReader.java

diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index fe8ee947b09..02099b7ce7a 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -162,6 +162,9 @@ Release 2.4.0 - UNRELEASED
     MAPREDUCE-5746. Job diagnostics can implicate wrong task for a failed job.
     (Jason Lowe via kasha)
 
+    MAPREDUCE-5670. CombineFileRecordReader should report progress when moving
+    to the next file (Chen He via jlowe)
+
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/CombineFileRecordReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/CombineFileRecordReader.java
index 1abaef260c5..f54f1760d7d 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/CombineFileRecordReader.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/CombineFileRecordReader.java
@@ -140,6 +140,8 @@ public class CombineFileRecordReader<K, V> implements RecordReader<K, V> {
       return false;
     }
 
+    reporter.progress();
+
     // get a record reader for the idx-th chunk
     try {
       curReader =  rrConstructor.newInstance(new Object [] 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileRecordReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileRecordReader.java
index fb86cbafc12..767f79a1c02 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileRecordReader.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileRecordReader.java
@@ -54,7 +54,7 @@ public class CombineFileRecordReader<K, V> extends RecordReader<K, V> {
   protected int idx;
   protected long progress;
   protected RecordReader<K, V> curReader;
-  
+
   public void initialize(InputSplit split,
       TaskAttemptContext context) throws IOException, InterruptedException {
     this.split = (CombineFileSplit)split;
@@ -144,6 +144,8 @@ public class CombineFileRecordReader<K, V> extends RecordReader<K, V> {
       return false;
     }
 
+    context.progress();
+
     // get a record reader for the idx-th chunk
     try {
       Configuration conf = context.getConfiguration();
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/lib/TestCombineFileRecordReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/lib/TestCombineFileRecordReader.java
new file mode 100644
index 00000000000..296aa232389
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/lib/TestCombineFileRecordReader.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred.lib;
+
+import java.io.File;
+import java.io.FileWriter;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.fs.FileUtil;
+
+import org.junit.Test;
+import org.mockito.Mockito;
+import org.junit.Assert;
+
+import java.io.IOException;
+
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+
+public class TestCombineFileRecordReader {
+
+  private static Path outDir = new Path(System.getProperty("test.build.data",
+            "/tmp"), TestCombineFileRecordReader.class.getName());
+
+  private static class TextRecordReaderWrapper
+    extends org.apache.hadoop.mapred.lib.CombineFileRecordReaderWrapper<LongWritable,Text> {
+    // this constructor signature is required by CombineFileRecordReader
+    public TextRecordReaderWrapper(CombineFileSplit split, Configuration conf,
+      Reporter reporter, Integer idx) throws IOException {
+      super(new TextInputFormat(), split, conf, reporter, idx);
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testInitNextRecordReader() throws IOException{
+    JobConf conf = new JobConf();
+    Path[] paths = new Path[3];
+    long[] fileLength = new long[3];
+    File[] files = new File[3];
+    LongWritable key = new LongWritable(1);
+    Text value = new Text();
+    try {
+      for(int i=0;i<3;i++){
+        fileLength[i] = i;
+        File dir = new File(outDir.toString());
+        dir.mkdir();
+        files[i] = new File(dir,"testfile"+i);
+        FileWriter fileWriter = new FileWriter(files[i]);
+        fileWriter.close();
+        paths[i] = new Path(outDir+"/testfile"+i);
+      }
+      CombineFileSplit combineFileSplit = new CombineFileSplit(conf, paths, fileLength);
+      Reporter reporter = Mockito.mock(Reporter.class);
+      CombineFileRecordReader cfrr = new CombineFileRecordReader(conf, combineFileSplit,
+        reporter,  TextRecordReaderWrapper.class);
+      verify(reporter).progress();
+      Assert.assertFalse(cfrr.next(key,value));
+      verify(reporter, times(3)).progress();
+    } finally {
+      FileUtil.fullyDelete(new File(outDir.toString()));
+    }
+
+  }
+}
+
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileRecordReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileRecordReader.java
new file mode 100644
index 00000000000..052195efcdc
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileRecordReader.java
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.lib.input;
+
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.File;
+
+import junit.framework.Assert;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapred.Task.TaskReporter;
+
+import org.mockito.Mockito;
+
+import org.junit.Test;
+
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+
+public class TestCombineFileRecordReader {
+
+  private static Path outDir = new Path(System.getProperty("test.build.data",
+            "/tmp"), TestCombineFileRecordReader.class.getName());
+  private static class TextRecordReaderWrapper
+    extends CombineFileRecordReaderWrapper<LongWritable,Text> {
+    // this constructor signature is required by CombineFileRecordReader
+    public TextRecordReaderWrapper(org.apache.hadoop.mapreduce.lib.input.CombineFileSplit split,
+      TaskAttemptContext context, Integer idx)
+        throws IOException, InterruptedException {
+      super(new TextInputFormat(), split, context, idx);
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testProgressIsReportedIfInputASeriesOfEmptyFiles() throws IOException, InterruptedException {
+    JobConf conf = new JobConf();
+    Path[] paths = new Path[3];
+    File[] files = new File[3];
+    long[] fileLength = new long[3];
+
+    try {
+      for(int i=0;i<3;i++){
+        File dir = new File(outDir.toString());
+        dir.mkdir();
+        files[i] = new File(dir,"testfile"+i);
+        FileWriter fileWriter = new FileWriter(files[i]);
+        fileWriter.flush();
+        fileWriter.close();
+        fileLength[i] = i;
+        paths[i] = new Path(outDir+"/testfile"+i);
+      }
+
+      CombineFileSplit combineFileSplit = new CombineFileSplit(paths, fileLength);
+      TaskAttemptID taskAttemptID = Mockito.mock(TaskAttemptID.class);
+      TaskReporter reporter = Mockito.mock(TaskReporter.class);
+      TaskAttemptContextImpl taskAttemptContext =
+        new TaskAttemptContextImpl(conf, taskAttemptID,reporter);
+
+      CombineFileRecordReader cfrr = new CombineFileRecordReader(combineFileSplit,
+        taskAttemptContext, TextRecordReaderWrapper.class);
+
+      cfrr.initialize(combineFileSplit,taskAttemptContext);
+
+      verify(reporter).progress();
+      Assert.assertFalse(cfrr.nextKeyValue());
+      verify(reporter, times(3)).progress();
+    } finally {
+      FileUtil.fullyDelete(new File(outDir.toString()));
+    }
+  }
+}
+

From 9d89fb8ca1d83cf90b5f92b6aab4115b68967ae1 Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Thu, 13 Feb 2014 23:46:16 +0000
Subject: [PATCH 43/47] HADOOP-10249. LdapGroupsMapping should trim ldap
 password read from file. Contributed by Dilli Armugam.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1568164 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-common-project/hadoop-common/CHANGES.txt                | 3 +++
 .../java/org/apache/hadoop/security/LdapGroupsMapping.java     | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index f14c33a1da6..4e6dc469ebe 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -339,6 +339,9 @@ Release 2.4.0 - UNRELEASED
     HADOOP-10338. Cannot get the FileStatus of the root inode from the new
     Globber (cmccabe)
 
+    HADOOP-10249. LdapGroupsMapping should trim ldap password read from file.
+    (Dilli Armugam via suresh)
+
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/LdapGroupsMapping.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/LdapGroupsMapping.java
index eb62e8b2b9e..c542b2aea06 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/LdapGroupsMapping.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/LdapGroupsMapping.java
@@ -356,7 +356,7 @@ public class LdapGroupsMapping
         c = reader.read();
       }
       reader.close();
-      return password.toString();
+      return password.toString().trim();
     } catch (IOException ioe) {
       throw new RuntimeException("Could not read password file: " + pwFile, ioe);
     }

From 2ccc1daacff14aeb2466514c0243f9791105ae07 Mon Sep 17 00:00:00 2001
From: Brandon Li <brandonli@apache.org>
Date: Thu, 13 Feb 2014 23:47:11 +0000
Subject: [PATCH 44/47] HDFS-5934. New Namenode UI back button doesn't work as
 expected. Contributed by Travis Thompson

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1568165 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |  5 ++-
 .../src/main/webapps/hdfs/dfshealth.js        | 33 ++++++++++++++-----
 .../src/main/webapps/hdfs/explorer.js         | 20 ++++++++++-
 3 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index a1ad7458747..74fdcfdd12b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -441,7 +441,10 @@ Release 2.4.0 - UNRELEASED
 
     HDFS-5913. Nfs3Utils#getWccAttr() should check attr parameter against null
     (brandonli)
- 
+
+    HDFS-5934. New Namenode UI back button doesn't work as expected
+    (Travis Thompson via brandonli)
+
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js
index 946d21815da..3ab21548f58 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js
@@ -166,14 +166,29 @@
 
   $('#ui-tabs a[href="#tab-snapshot"]').click(load_snapshot_info);
 
-  var hash = window.location.hash;
-  if (hash === "#tab-datanode") {
-    load_datanode_info();
-  } else if (hash === "#tab-snapshot") {
-    load_snapshot_info();
-  } else if (hash === "#tab-startup-progress") {
-    load_startup_progress();
-  } else {
-    load_overview();
+  function load_page() {
+    var hash = window.location.hash;
+    switch(hash) {
+      case "#tab-datanode":
+        load_datanode_info();
+        break;
+      case "#tab-snapshot":
+        load_snapshot_info();
+        break;
+      case "#tab-startup-progress":
+        load_startup_progress();
+        break;
+      case "#tab-overview":
+        load_overview();
+        break;
+      default:
+        window.location.hash = "tab-overview";
+        break;
+    }
   }
+  load_page();
+
+  $(window).bind('hashchange', function () {
+    load_page();
+  });
 })();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js
index 1aa0c39079b..9d1ca663527 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.js
@@ -52,6 +52,18 @@
     $('#alert-panel').show();
   }
 
+  $(window).bind('hashchange', function () {
+    $('#alert-panel').hide();
+
+    var dir = window.location.hash.slice(1);
+    if(dir == "") {
+      dir = "/";
+    }
+    if(current_directory != dir) {
+      browse_directory(dir);
+    }
+  });
+
   function network_error_handler(url) {
     return function (jqxhr, text, err) {
       var msg = '<p>Failed to retreive data from ' + url + ', cause: ' + err + '</p>';
@@ -145,6 +157,7 @@
 
       current_directory = dir;
       $('#directory').val(dir);
+      window.location.hash = dir;
       dust.render('explorer', base.push(d), function(err, out) {
         $('#panel').html(out);
 
@@ -169,7 +182,12 @@
 
     var b = function() { browse_directory($('#directory').val()); };
     $('#btn-nav-directory').click(b);
-    browse_directory('/');
+    var dir = window.location.hash.slice(1);
+    if(dir == "") {
+      window.location.hash = "/";
+    } else {
+      browse_directory(dir);
+    }
   }
 
   init();

From f4c339a1c528b2f3dec5e620eff3237c9b62ed88 Mon Sep 17 00:00:00 2001
From: Brandon Li <brandonli@apache.org>
Date: Thu, 13 Feb 2014 23:53:14 +0000
Subject: [PATCH 45/47]     HDFS-5901

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1568168 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt                   | 3 +++
 .../hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html          | 4 ++--
 .../hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js            | 2 +-
 .../hadoop-hdfs/src/main/webapps/hdfs/explorer.html           | 4 ++--
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 74fdcfdd12b..31451c8c3de 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -445,6 +445,9 @@ Release 2.4.0 - UNRELEASED
     HDFS-5934. New Namenode UI back button doesn't work as expected
     (Travis Thompson via brandonli)
 
+    HDFS-5901. NameNode new UI doesn't support IE8 and IE9 on windows 7
+    (Vinayakumar B via brandonli)
+
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html
index 461031a7937..90667716424 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.html
@@ -1,3 +1,5 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <!--
    Licensed to the Apache Software Foundation (ASF) under one or more
    contributor license agreements.  See the NOTICE file distributed with
@@ -14,8 +16,6 @@
    See the License for the specific language governing permissions and
    limitations under the License.
 -->
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
-    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml">
 <head>
 <link rel="stylesheet" type="text/css" href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js
index 3ab21548f58..39450043cda 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.js
@@ -28,7 +28,7 @@
       {"name": "nn",      "url": "/jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo"},
       {"name": "nnstat",  "url": "/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus"},
       {"name": "fs",      "url": "/jmx?qry=Hadoop:service=NameNode,name=FSNamesystemState"},
-      {"name": "mem",     "url": "/jmx?qry=java.lang:type=Memory"},
+      {"name": "mem",     "url": "/jmx?qry=java.lang:type=Memory"}
     ];
 
     var HELPERS = {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.html
index 50c7dfe46b4..f9c339748f9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.html
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/explorer.html
@@ -1,3 +1,5 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <!--
     Licensed to the Apache Software Foundation (ASF) under one or more
     contributor license agreements.  See the NOTICE file distributed with
@@ -14,8 +16,6 @@
     See the License for the specific language governing permissions and
     limitations under the License.
   -->
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
-	  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 <html xmlns="http://www.w3.org/1999/xhtml">
   <head>
     <link rel="stylesheet" type="text/css" href="/static/bootstrap-3.0.2/css/bootstrap.min.css" />

From 84ae8a3ca8d31b537ff13e5ad79272eaf6531401 Mon Sep 17 00:00:00 2001
From: Jing Zhao <jing9@apache.org>
Date: Fri, 14 Feb 2014 08:32:55 +0000
Subject: [PATCH 46/47] Move Flatten INode hierarchy jiras (HDFS-5531,
 HDFS-5285, HDFS-5286, HDFS-5537, HDFS-5554, HDFS-5647, HDFS-5632, HDFS-5715,
 HDFS-5726) to 2.4.0 section in CHANGES.txt

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1568218 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 54 ++++++++++-----------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 31451c8c3de..3f9b95f4e49 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -120,31 +120,6 @@ Trunk (Unreleased)
     HDFS-5041. Add the time of last heartbeat to dead server Web UI (Shinichi
     Yamashita via brandonli)
 
-    HDFS-5531. Combine the getNsQuota() and getDsQuota() methods in INode.
-    (szetszwo)
-
-    HDFS-5285. Flatten INodeFile hierarchy: Replace INodeFileUnderConstruction
-    and INodeFileUnderConstructionWithSnapshot with FileUnderContructionFeature.
-    (jing9 via szetszwo)
-
-    HDFS-5286. Flatten INodeDirectory hierarchy: Replace INodeDirectoryWithQuota
-    with DirectoryWithQuotaFeature.  (szetszwo)
-
-    HDFS-5537. Remove FileWithSnapshot interface.  (jing9 via szetszwo)
-
-    HDFS-5554. Flatten INodeFile hierarchy: Replace INodeFileWithSnapshot with
-    FileWithSnapshotFeature.  (jing9 via szetszwo)
-
-    HDFS-5647. Merge INodeDirectory.Feature and INodeFile.Feature. (Haohui Mai
-    via jing9)
-
-    HDFS-5632. Flatten INodeDirectory hierarchy: Replace
-    INodeDirectoryWithSnapshot with DirectoryWithSnapshotFeature.
-    (jing9 via szetszwo)
-
-    HDFS-5715. Use Snapshot ID to indicate the corresponding Snapshot for a
-    FileDiff/DirectoryDiff. (jing9)
-
     HDFS-5721. sharedEditsImage in Namenode#initializeSharedEdits() should be 
     closed before method returns. (Ted Yu via junping_du)
 
@@ -275,8 +250,6 @@ Trunk (Unreleased)
     HDFS-5719. FSImage#doRollback() should close prevState before return
     (Ted Yu via brandonli)
 
-    HDFS-5726. Fix compilation error in AbstractINodeDiff for JDK7. (jing9)
-
     HDFS-5768. Consolidate the serialization code in DelegationTokenSecretManager 
     (Haohui Mai via brandonli)
 
@@ -376,6 +349,33 @@ Release 2.4.0 - UNRELEASED
     HDFS-5940. Minor cleanups to ShortCircuitReplica, FsDatasetCache, and
     DomainSocketWatcher (cmccabe)
 
+    HDFS-5531. Combine the getNsQuota() and getDsQuota() methods in INode.
+    (szetszwo)
+
+    HDFS-5285. Flatten INodeFile hierarchy: Replace INodeFileUnderConstruction
+    and INodeFileUnderConstructionWithSnapshot with FileUnderContructionFeature.
+    (jing9 via szetszwo)
+
+    HDFS-5286. Flatten INodeDirectory hierarchy: Replace INodeDirectoryWithQuota
+    with DirectoryWithQuotaFeature.  (szetszwo)
+
+    HDFS-5537. Remove FileWithSnapshot interface.  (jing9 via szetszwo)
+
+    HDFS-5554. Flatten INodeFile hierarchy: Replace INodeFileWithSnapshot with
+    FileWithSnapshotFeature.  (jing9 via szetszwo)
+
+    HDFS-5647. Merge INodeDirectory.Feature and INodeFile.Feature. (Haohui Mai
+    via jing9)
+
+    HDFS-5632. Flatten INodeDirectory hierarchy: Replace
+    INodeDirectoryWithSnapshot with DirectoryWithSnapshotFeature.
+    (jing9 via szetszwo)
+
+    HDFS-5715. Use Snapshot ID to indicate the corresponding Snapshot for a
+    FileDiff/DirectoryDiff. (jing9)
+
+    HDFS-5726. Fix compilation error in AbstractINodeDiff for JDK7. (jing9)
+
   OPTIMIZATIONS
 
     HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery

From 72f63c8957880ed5e432fef0a1612b8cb44bd1e4 Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Fri, 14 Feb 2014 18:07:04 +0000
Subject: [PATCH 47/47] HDFS-5943. 'dfs.namenode.https-address' property is not
 loaded from configuration in federation setup. Contributed by Suresh
 Srinivas.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1568412 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |  3 +
 .../hadoop/hdfs/server/namenode/NameNode.java | 65 ++++++++-----------
 2 files changed, 30 insertions(+), 38 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 3f9b95f4e49..8cc4ddd4a50 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -448,6 +448,9 @@ Release 2.4.0 - UNRELEASED
     HDFS-5901. NameNode new UI doesn't support IE8 and IE9 on windows 7
     (Vinayakumar B via brandonli)
 
+    HDFS-5943. 'dfs.namenode.https-address' property is not loaded from
+    configuration in federation setup. (suresh)
+
 Release 2.3.1 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 726ddd211c4..6df82cbd3f1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -17,39 +17,22 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
-import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
-import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
-import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
-
-import java.io.IOException;
-import java.io.PrintStream;
-import java.net.InetSocketAddress;
-import java.net.URI;
-import java.security.PrivilegedExceptionAction;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-
-import javax.management.ObjectName;
-
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Trash;
 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
 import org.apache.hadoop.ha.HAServiceStatus;
 import org.apache.hadoop.ha.HealthCheckFailedException;
 import org.apache.hadoop.ha.ServiceFailedException;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Trash;
-
-import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
-import static org.apache.hadoop.util.ExitUtil.terminate;
-import static org.apache.hadoop.util.ToolRunner.confirmPrompt;
-
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HAUtil;
@@ -58,20 +41,11 @@ import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
-import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState;
-import org.apache.hadoop.hdfs.server.namenode.ha.BootstrapStandby;
-import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
-import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
-import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState;
+import org.apache.hadoop.hdfs.server.namenode.ha.*;
 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress;
 import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgressMetrics;
-import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
-import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
-import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
-import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
-import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
-import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
+import org.apache.hadoop.hdfs.server.protocol.*;
 import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.ipc.StandbyException;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
@@ -89,10 +63,23 @@ import org.apache.hadoop.util.JvmPauseMonitor;
 import org.apache.hadoop.util.ServicePlugin;
 import org.apache.hadoop.util.StringUtils;
 
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Joiner;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
+import javax.management.ObjectName;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.net.InetSocketAddress;
+import java.net.URI;
+import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
+import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT;
+import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
+import static org.apache.hadoop.util.ExitUtil.terminate;
+import static org.apache.hadoop.util.ToolRunner.confirmPrompt;
 
 /**********************************************************
  * NameNode serves as both directory namespace manager and
@@ -183,8 +170,10 @@ public class NameNode implements NameNodeStatusMXBean {
     DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
     DFS_NAMENODE_SERVICE_RPC_BIND_HOST_KEY,
     DFS_NAMENODE_HTTP_ADDRESS_KEY,
+    DFS_NAMENODE_HTTPS_ADDRESS_KEY,
     DFS_NAMENODE_KEYTAB_FILE_KEY,
     DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY,
+    DFS_NAMENODE_SECONDARY_HTTPS_ADDRESS_KEY,
     DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY,
     DFS_NAMENODE_BACKUP_ADDRESS_KEY,
     DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY,