From f6c28639005f46bc171a9a990e2ad4d7afb4ce73 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 9 Jan 2013 18:12:27 +0000
Subject: [PATCH 01/52] Branch for HDFS-347

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1430995 13f79535-47bb-0310-9956-ffa450edef68

From c9db06f2e4d1c1f71f021d5070323f9fc194cdd7 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 9 Jan 2013 21:34:13 +0000
Subject: [PATCH 02/52] HDFS-4353. Encapsulate connections to peers in Peer and
 PeerServer classes. Contributed by Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1431097 13f79535-47bb-0310-9956-ffa450edef68
---
 .../apache/hadoop/net/SocketInputStream.java  |   4 +-
 .../apache/hadoop/net/SocketOutputStream.java |   4 +
 .../hadoop-hdfs/CHANGES.HDFS-347.txt          |   6 +
 .../org/apache/hadoop/hdfs/BlockReader.java   |  31 +--
 .../hadoop/hdfs/BlockReaderFactory.java       |  69 ++----
 .../apache/hadoop/hdfs/BlockReaderLocal.java  |  17 +-
 .../org/apache/hadoop/hdfs/DFSClient.java     |   4 +-
 .../apache/hadoop/hdfs/DFSInputStream.java    |  93 ++++----
 .../hdfs/{SocketCache.java => PeerCache.java} | 129 +++++------
 .../apache/hadoop/hdfs/RemoteBlockReader.java |  80 +++----
 .../hadoop/hdfs/RemoteBlockReader2.java       | 108 ++++-----
 .../apache/hadoop/hdfs/net/BasicInetPeer.java | 121 ++++++++++
 .../apache/hadoop/hdfs/net/EncryptedPeer.java | 136 +++++++++++
 .../apache/hadoop/hdfs/net/NioInetPeer.java   | 125 ++++++++++
 .../java/org/apache/hadoop/hdfs/net/Peer.java | 108 +++++++++
 .../apache/hadoop/hdfs/net/PeerServer.java    |  60 +++++
 .../apache/hadoop/hdfs/net/TcpPeerServer.java | 156 +++++++++++++
 .../hadoop/hdfs/server/common/JspHelper.java  |   9 +-
 .../hadoop/hdfs/server/datanode/DataNode.java |  22 +-
 .../hdfs/server/datanode/DataXceiver.java     |  92 ++++----
 .../server/datanode/DataXceiverServer.java    |  60 +++--
 .../hdfs/server/namenode/NamenodeFsck.java    |   8 +-
 .../hadoop/hdfs/BlockReaderTestUtil.java      |   9 +-
 .../hdfs/TestClientBlockVerification.java     |   8 +-
 .../org/apache/hadoop/hdfs/TestConnCache.java | 189 +++------------
 .../hdfs/TestDataTransferKeepalive.java       |  17 +-
 .../hadoop/hdfs/TestDisableConnCache.java     |  62 +++++
 .../org/apache/hadoop/hdfs/TestPeerCache.java | 218 ++++++++++++++++++
 .../apache/hadoop/hdfs/TestSocketCache.java   | 171 --------------
 .../TestBlockTokenWithDFS.java                |   6 +-
 .../datanode/TestDataNodeVolumeFailure.java   |   6 +-
 31 files changed, 1348 insertions(+), 780 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
 rename hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/{SocketCache.java => PeerCache.java} (61%)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/BasicInetPeer.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/EncryptedPeer.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/NioInetPeer.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/Peer.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/PeerServer.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/TcpPeerServer.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDisableConnCache.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java
 delete mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSocketCache.java

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketInputStream.java
index 46039a5506e..cfa7b01e813 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketInputStream.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketInputStream.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.net;
 
 import java.io.IOException;
+import org.apache.hadoop.classification.InterfaceAudience;
 import java.io.InputStream;
 import java.net.Socket;
 import java.net.SocketTimeoutException;
@@ -37,7 +38,8 @@ import java.nio.channels.SelectionKey;
  * IllegalBlockingModeException. 
  * Please use {@link SocketOutputStream} for writing.
  */
-class SocketInputStream extends InputStream
+@InterfaceAudience.LimitedPrivate("HDFS")
+public class SocketInputStream extends InputStream
                                implements ReadableByteChannel {
 
   private Reader reader;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketOutputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketOutputStream.java
index 091c684059b..ead1d7b2b05 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketOutputStream.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/SocketOutputStream.java
@@ -260,4 +260,8 @@ public class SocketOutputStream extends OutputStream
       throws IOException {
     transferToFully(fileCh, position, count, null, null);
   }
+
+  public void setTimeout(int timeoutMs) {
+    writer.setTimeout(timeoutMs);
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
new file mode 100644
index 00000000000..7c509ee7e80
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -0,0 +1,6 @@
+CHANGES for HDFS-347 branch.
+These will be integrated to trunk CHANGES.txt after merge
+
+
+HDFS-4353. Encapsulate connections to peers in Peer and PeerServer classes.
+(Colin Patrick McCabe via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java
index cd6dc2d25ed..2bbae525898 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java
@@ -18,10 +18,8 @@
 package org.apache.hadoop.hdfs;
 
 import java.io.IOException;
-import java.net.Socket;
 
 import org.apache.hadoop.fs.ByteBufferReadable;
-import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
 
 /**
  * A BlockReader is responsible for reading a single block
@@ -43,7 +41,18 @@ public interface BlockReader extends ByteBufferReadable {
    */
   long skip(long n) throws IOException;
 
-  void close() throws IOException;
+  /**
+   * Close the block reader.
+   *
+   * @param peerCache      The PeerCache to put the Peer we're using back
+   *                       into, or null if we should simply close the Peer
+   *                       we're using (along with its Socket).
+   *                       Some block readers, like BlockReaderLocal, may
+   *                       not make use of this parameter.
+   *
+   * @throws IOException
+   */
+  void close(PeerCache peerCache) throws IOException;
 
   /**
    * Read exactly the given amount of data, throwing an exception
@@ -60,20 +69,4 @@ public interface BlockReader extends ByteBufferReadable {
    * filled or the next call will return EOF.
    */
   int readAll(byte[] buf, int offset, int len) throws IOException;
-
-  /**
-   * Take the socket used to talk to the DN.
-   */
-  Socket takeSocket();
-
-  /**
-   * Whether the BlockReader has reached the end of its input stream
-   * and successfully sent a status code back to the datanode.
-   */
-  boolean hasSentStatusCode();
-
-  /**
-   * @return a reference to the streams this block reader is using.
-   */
-  IOStreamPair getStreams();
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
index c71f1ced6a6..d6d39304966 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
@@ -24,6 +24,8 @@ import java.net.Socket;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSClient.Conf;
+import org.apache.hadoop.hdfs.net.Peer;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferEncryptor;
 import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
@@ -39,72 +41,51 @@ import org.apache.hadoop.security.token.Token;
  */
 @InterfaceAudience.Private
 public class BlockReaderFactory {
-  /**
-   * @see #newBlockReader(Conf, Socket, String, ExtendedBlock, Token, long, long, int, boolean, String)
-   */
-  public static BlockReader newBlockReader(
-      Configuration conf,
-      Socket sock, String file,
-      ExtendedBlock block, Token<BlockTokenIdentifier> blockToken, 
-      long startOffset, long len, DataEncryptionKey encryptionKey)
-          throws IOException {
-    int bufferSize = conf.getInt(DFSConfigKeys.IO_FILE_BUFFER_SIZE_KEY,
-        DFSConfigKeys.IO_FILE_BUFFER_SIZE_DEFAULT);
-    return newBlockReader(new Conf(conf),
-        sock, file, block, blockToken, startOffset,
-        len, bufferSize, true, "", encryptionKey, null);
-  }
-
   /**
    * Create a new BlockReader specifically to satisfy a read.
    * This method also sends the OP_READ_BLOCK request.
    * 
    * @param conf the DFSClient configuration
-   * @param sock  An established Socket to the DN. The BlockReader will not close it normally
    * @param file  File location
    * @param block  The block object
    * @param blockToken  The block token for security
    * @param startOffset  The read offset, relative to block head
-   * @param len  The number of bytes to read
+   * @param len  The number of bytes to read, or -1 to read as many as
+   *             possible.
    * @param bufferSize  The IO buffer size (not the client buffer size)
+   *                    Ignored except on the legacy BlockReader.
    * @param verifyChecksum  Whether to verify checksum
-   * @param clientName  Client name
+   * @param clientName  Client name.  Used for log messages.
+   * @param peer  The peer
+   * @param datanodeID  The datanode that the Peer is connected to
    * @return New BlockReader instance, or null on error.
    */
   @SuppressWarnings("deprecation")
   public static BlockReader newBlockReader(
-                                     Conf conf,
-                                     Socket sock, String file,
+                                     Configuration conf,
+                                     String file,
                                      ExtendedBlock block, 
                                      Token<BlockTokenIdentifier> blockToken,
                                      long startOffset, long len,
-                                     int bufferSize, boolean verifyChecksum,
+                                     boolean verifyChecksum,
                                      String clientName,
-                                     DataEncryptionKey encryptionKey,
-                                     IOStreamPair ioStreams)
+                                     Peer peer,
+                                     DatanodeID datanodeID)
                                      throws IOException {
-    
-    if (conf.useLegacyBlockReader) {
-      if (encryptionKey != null) {
-        throw new RuntimeException("Encryption is not supported with the legacy block reader.");
-      }
-      return RemoteBlockReader.newBlockReader(
-          sock, file, block, blockToken, startOffset, len, bufferSize, verifyChecksum, clientName);
+    peer.setReadTimeout(conf.getInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY,
+        HdfsServerConstants.READ_TIMEOUT));
+    peer.setWriteTimeout(HdfsServerConstants.WRITE_TIMEOUT);
+    if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER,
+        DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER_DEFAULT)) {
+      return RemoteBlockReader.newBlockReader(file,
+          block, blockToken, startOffset, len,
+          conf.getInt(DFSConfigKeys.IO_FILE_BUFFER_SIZE_KEY,
+              DFSConfigKeys.IO_FILE_BUFFER_SIZE_DEFAULT),
+          verifyChecksum, clientName, peer, datanodeID);
     } else {
-      if (ioStreams == null) {
-        ioStreams = new IOStreamPair(NetUtils.getInputStream(sock),
-            NetUtils.getOutputStream(sock, HdfsServerConstants.WRITE_TIMEOUT));
-        if (encryptionKey != null) {
-          IOStreamPair encryptedStreams =
-              DataTransferEncryptor.getEncryptedStreams(
-                  ioStreams.out, ioStreams.in, encryptionKey);
-          ioStreams = encryptedStreams;
-        }
-      }
-      
       return RemoteBlockReader2.newBlockReader(
-          sock, file, block, blockToken, startOffset, len, bufferSize,
-          verifyChecksum, clientName, encryptionKey, ioStreams);
+          file, block, blockToken, startOffset, len,
+          verifyChecksum, clientName, peer, datanodeID);
     }
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
index 8b1f0bdc0a6..eacd902aa2b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
@@ -649,7 +649,7 @@ class BlockReaderLocal implements BlockReader {
   }
 
   @Override
-  public synchronized void close() throws IOException {
+  public synchronized void close(PeerCache peerCache) throws IOException {
     dataIn.close();
     if (checksumIn != null) {
       checksumIn.close();
@@ -675,19 +675,4 @@ class BlockReaderLocal implements BlockReader {
   public void readFully(byte[] buf, int off, int len) throws IOException {
     BlockReaderUtil.readFully(this, buf, off, len);
   }
-
-  @Override
-  public Socket takeSocket() {
-    return null;
-  }
-
-  @Override
-  public boolean hasSentStatusCode() {
-    return false;
-  }
-
-  @Override
-  public IOStreamPair getStreams() {
-    return null;
-  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index 14bb1d22b64..8230b992cff 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -191,7 +191,7 @@ public class DFSClient implements java.io.Closeable {
   final FileSystem.Statistics stats;
   final int hdfsTimeout;    // timeout value for a DFS operation.
   private final String authority;
-  final SocketCache socketCache;
+  final PeerCache peerCache;
   final Conf dfsClientConf;
   private Random r = new Random();
   private SocketAddress[] localInterfaceAddrs;
@@ -432,7 +432,7 @@ public class DFSClient implements java.io.Closeable {
       Joiner.on(',').join(localInterfaceAddrs) + "]");
     }
     
-    this.socketCache = SocketCache.getInstance(dfsClientConf.socketCacheCapacity, dfsClientConf.socketCacheExpiry);
+    this.peerCache = PeerCache.getInstance(dfsClientConf.socketCacheCapacity, dfsClientConf.socketCacheExpiry);
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
index 1e986cd1350..aec1726a3ab 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
@@ -32,12 +32,15 @@ import java.util.Map.Entry;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
 
+import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.fs.ChecksumException;
 import org.apache.hadoop.fs.ByteBufferReadable;
 import org.apache.hadoop.fs.FSInputStream;
 import org.apache.hadoop.fs.UnresolvedLinkException;
-import org.apache.hadoop.hdfs.SocketCache.SocketAndStreams;
+import org.apache.hadoop.hdfs.net.EncryptedPeer;
+import org.apache.hadoop.hdfs.net.Peer;
+import org.apache.hadoop.hdfs.net.TcpPeerServer;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@@ -46,6 +49,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
 import org.apache.hadoop.hdfs.protocol.datatransfer.InvalidEncryptionKeyException;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
+import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
 import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
 import org.apache.hadoop.hdfs.server.datanode.ReplicaNotFoundException;
 import org.apache.hadoop.ipc.RPC;
@@ -60,7 +64,7 @@ import org.apache.hadoop.security.token.Token;
  ****************************************************************/
 @InterfaceAudience.Private
 public class DFSInputStream extends FSInputStream implements ByteBufferReadable {
-  private final SocketCache socketCache;
+  private final PeerCache peerCache;
 
   private final DFSClient dfsClient;
   private boolean closed = false;
@@ -110,7 +114,7 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
     this.verifyChecksum = verifyChecksum;
     this.buffersize = buffersize;
     this.src = src;
-    this.socketCache = dfsClient.socketCache;
+    this.peerCache = dfsClient.peerCache;
     prefetchSize = dfsClient.getConf().prefetchSize;
     timeWindow = dfsClient.getConf().timeWindow;
     nCachedConnRetry = dfsClient.getConf().nCachedConnRetry;
@@ -424,7 +428,7 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
 
     // Will be getting a new BlockReader.
     if (blockReader != null) {
-      closeBlockReader(blockReader);
+      blockReader.close(peerCache);
       blockReader = null;
     }
 
@@ -506,7 +510,7 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
     dfsClient.checkOpen();
 
     if (blockReader != null) {
-      closeBlockReader(blockReader);
+      blockReader.close(peerCache);
       blockReader = null;
     }
     super.close();
@@ -833,7 +837,7 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
         }
       } finally {
         if (reader != null) {
-          closeBlockReader(reader);
+          reader.close(peerCache);
         }
       }
       // Put chosen node into dead list, continue
@@ -841,16 +845,25 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
     }
   }
 
-  /**
-   * Close the given BlockReader and cache its socket.
-   */
-  private void closeBlockReader(BlockReader reader) throws IOException {
-    if (reader.hasSentStatusCode()) {
-      IOStreamPair ioStreams = reader.getStreams();
-      Socket oldSock = reader.takeSocket();
-      socketCache.put(oldSock, ioStreams);
+  private Peer newPeer(InetSocketAddress addr) throws IOException {
+    Peer peer = null;
+    boolean success = false;
+    Socket sock = null;
+    try {
+      sock = dfsClient.socketFactory.createSocket();
+      NetUtils.connect(sock, addr,
+        dfsClient.getRandomLocalInterfaceAddr(),
+        dfsClient.getConf().socketTimeout);
+      peer = TcpPeerServer.peerFromSocketAndKey(sock, 
+          dfsClient.getDataEncryptionKey());
+      success = true;
+      return peer;
+    } finally {
+      if (!success) {
+        IOUtils.closeQuietly(peer);
+        IOUtils.closeQuietly(sock);
+      }
     }
-    reader.close();
   }
 
   /**
@@ -896,62 +909,34 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
     // Allow retry since there is no way of knowing whether the cached socket
     // is good until we actually use it.
     for (int retries = 0; retries <= nCachedConnRetry && fromCache; ++retries) {
-      SocketAndStreams sockAndStreams = null;
+      Peer peer = null;
       // Don't use the cache on the last attempt - it's possible that there
       // are arbitrarily many unusable sockets in the cache, but we don't
       // want to fail the read.
       if (retries < nCachedConnRetry) {
-        sockAndStreams = socketCache.get(dnAddr);
+        peer = peerCache.get(chosenNode);
       }
-      Socket sock;
-      if (sockAndStreams == null) {
+      if (peer == null) {
+        peer = newPeer(dnAddr);
         fromCache = false;
-
-        sock = dfsClient.socketFactory.createSocket();
-        
-        // TCP_NODELAY is crucial here because of bad interactions between
-        // Nagle's Algorithm and Delayed ACKs. With connection keepalive
-        // between the client and DN, the conversation looks like:
-        //   1. Client -> DN: Read block X
-        //   2. DN -> Client: data for block X
-        //   3. Client -> DN: Status OK (successful read)
-        //   4. Client -> DN: Read block Y
-        // The fact that step #3 and #4 are both in the client->DN direction
-        // triggers Nagling. If the DN is using delayed ACKs, this results
-        // in a delay of 40ms or more.
-        //
-        // TCP_NODELAY disables nagling and thus avoids this performance
-        // disaster.
-        sock.setTcpNoDelay(true);
-
-        NetUtils.connect(sock, dnAddr,
-            dfsClient.getRandomLocalInterfaceAddr(),
-            dfsClient.getConf().socketTimeout);
-        sock.setSoTimeout(dfsClient.getConf().socketTimeout);
-      } else {
-        sock = sockAndStreams.sock;
       }
 
       try {
         // The OP_READ_BLOCK request is sent as we make the BlockReader
         BlockReader reader =
-            BlockReaderFactory.newBlockReader(dfsClient.getConf(),
-                                       sock, file, block,
+            BlockReaderFactory.newBlockReader(dfsClient.conf,
+                                       file, block,
                                        blockToken,
                                        startOffset, len,
-                                       bufferSize, verifyChecksum,
+                                       verifyChecksum,
                                        clientName,
-                                       dfsClient.getDataEncryptionKey(),
-                                       sockAndStreams == null ? null : sockAndStreams.ioStreams);
+                                       peer,
+                                       chosenNode);
         return reader;
       } catch (IOException ex) {
         // Our socket is no good.
-        DFSClient.LOG.debug("Error making BlockReader. Closing stale " + sock, ex);
-        if (sockAndStreams != null) {
-          sockAndStreams.close();
-        } else {
-          sock.close();
-        }
+        DFSClient.LOG.debug("Error making BlockReader. Closing stale " + peer, ex);
+        IOUtils.closeQuietly(peer);
         err = ex;
       }
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/SocketCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java
similarity index 61%
rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/SocketCache.java
rename to hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java
index 596b0176c40..09b2ef70b14 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/SocketCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java
@@ -18,69 +18,55 @@
 
 package org.apache.hadoop.hdfs;
 
-import java.io.Closeable;
-import java.net.Socket;
-import java.net.SocketAddress;
-
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map.Entry;
 
-import java.io.IOException;
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.LinkedListMultimap;
 import org.apache.commons.logging.Log;
-import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.util.Daemon;
-import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Time;
 
 /**
  * A cache of input stream sockets to Data Node.
  */
-class SocketCache {
-  private static final Log LOG = LogFactory.getLog(SocketCache.class);
+class PeerCache {
+  private static final Log LOG = LogFactory.getLog(PeerCache.class);
+  
+  private static class Value {
+    private final Peer peer;
+    private final long time;
 
-  @InterfaceAudience.Private
-  static class SocketAndStreams implements Closeable {
-    public final Socket sock;
-    public final IOStreamPair ioStreams;
-    long createTime;
-    
-    public SocketAndStreams(Socket s, IOStreamPair ioStreams) {
-      this.sock = s;
-      this.ioStreams = ioStreams;
-      this.createTime = Time.monotonicNow();
-    }
-    
-    @Override
-    public void close() {
-      if (ioStreams != null) { 
-        IOUtils.closeStream(ioStreams.in);
-        IOUtils.closeStream(ioStreams.out);
-      }
-      IOUtils.closeSocket(sock);
+    Value(Peer peer, long time) {
+      this.peer = peer;
+      this.time = time;
     }
 
-    public long getCreateTime() {
-      return this.createTime;
+    Peer getPeer() {
+      return peer;
+    }
+
+    long getTime() {
+      return time;
     }
   }
 
   private Daemon daemon;
   /** A map for per user per datanode. */
-  private static LinkedListMultimap<SocketAddress, SocketAndStreams> multimap =
+  private static LinkedListMultimap<DatanodeID, Value> multimap =
     LinkedListMultimap.create();
   private static int capacity;
   private static long expiryPeriod;
-  private static SocketCache scInstance = new SocketCache();
+  private static PeerCache instance = new PeerCache();
   private static boolean isInitedOnce = false;
  
-  public static synchronized SocketCache getInstance(int c, long e) {
+  public static synchronized PeerCache getInstance(int c, long e) {
     // capacity is only initialized once
     if (isInitedOnce == false) {
       capacity = c;
@@ -102,7 +88,7 @@ class SocketCache {
       }
     }
 
-    return scInstance;
+    return instance;
   }
 
   private boolean isDaemonStarted() {
@@ -119,44 +105,45 @@ class SocketCache {
       @Override
       public void run() {
         try {
-          SocketCache.this.run();
+          PeerCache.this.run();
         } catch(InterruptedException e) {
           //noop
         } finally {
-          SocketCache.this.clear();
+          PeerCache.this.clear();
         }
       }
 
       @Override
       public String toString() {
-        return String.valueOf(SocketCache.this);
+        return String.valueOf(PeerCache.this);
       }
     });
     daemon.start();
   }
 
   /**
-   * Get a cached socket to the given address.
-   * @param remote  Remote address the socket is connected to.
-   * @return  A socket with unknown state, possibly closed underneath. Or null.
+   * Get a cached peer connected to the given DataNode.
+   * @param dnId         The DataNode to get a Peer for.
+   * @return             An open Peer connected to the DN, or null if none
+   *                     was found. 
    */
-  public synchronized SocketAndStreams get(SocketAddress remote) {
+  public synchronized Peer get(DatanodeID dnId) {
 
     if (capacity <= 0) { // disabled
       return null;
     }
 
-    List<SocketAndStreams> sockStreamList = multimap.get(remote);
+    List<Value> sockStreamList = multimap.get(dnId);
     if (sockStreamList == null) {
       return null;
     }
 
-    Iterator<SocketAndStreams> iter = sockStreamList.iterator();
+    Iterator<Value> iter = sockStreamList.iterator();
     while (iter.hasNext()) {
-      SocketAndStreams candidate = iter.next();
+      Value candidate = iter.next();
       iter.remove();
-      if (!candidate.sock.isClosed()) {
-        return candidate;
+      if (!candidate.getPeer().isClosed()) {
+        return candidate.getPeer();
       }
     }
     return null;
@@ -166,30 +153,22 @@ class SocketCache {
    * Give an unused socket to the cache.
    * @param sock socket not used by anyone.
    */
-  public synchronized void put(Socket sock, IOStreamPair ioStreams) {
-
-    Preconditions.checkNotNull(sock);
-    SocketAndStreams s = new SocketAndStreams(sock, ioStreams);
+  public synchronized void put(DatanodeID dnId, Peer peer) {
+    Preconditions.checkNotNull(dnId);
+    Preconditions.checkNotNull(peer);
+    if (peer.isClosed()) return;
     if (capacity <= 0) {
       // Cache disabled.
-      s.close();
+      IOUtils.cleanup(LOG, peer);
       return;
     }
  
     startExpiryDaemon();
 
-    SocketAddress remoteAddr = sock.getRemoteSocketAddress();
-    if (remoteAddr == null) {
-      LOG.warn("Cannot cache (unconnected) socket with no remote address: " +
-               sock);
-      IOUtils.closeSocket(sock);
-      return;
-    }
-
     if (capacity == multimap.size()) {
       evictOldest();
     }
-    multimap.put(remoteAddr, s);
+    multimap.put(dnId, new Value(peer, Time.monotonicNow()));
   }
 
   public synchronized int size() {
@@ -201,18 +180,17 @@ class SocketCache {
    */
   private synchronized void evictExpired(long expiryPeriod) {
     while (multimap.size() != 0) {
-      Iterator<Entry<SocketAddress, SocketAndStreams>> iter =
+      Iterator<Entry<DatanodeID, Value>> iter =
         multimap.entries().iterator();
-      Entry<SocketAddress, SocketAndStreams> entry = iter.next();
+      Entry<DatanodeID, Value> entry = iter.next();
       // if oldest socket expired, remove it
       if (entry == null || 
-        Time.monotonicNow() - entry.getValue().getCreateTime() < 
+        Time.monotonicNow() - entry.getValue().getTime() <
         expiryPeriod) {
         break;
       }
+      IOUtils.cleanup(LOG, entry.getValue().getPeer());
       iter.remove();
-      SocketAndStreams s = entry.getValue();
-      s.close();
     }
   }
 
@@ -220,16 +198,18 @@ class SocketCache {
    * Evict the oldest entry in the cache.
    */
   private synchronized void evictOldest() {
-    Iterator<Entry<SocketAddress, SocketAndStreams>> iter =
+    // We can get the oldest element immediately, because of an interesting
+    // property of LinkedListMultimap: its iterator traverses entries in the
+    // order that they were added.
+    Iterator<Entry<DatanodeID, Value>> iter =
       multimap.entries().iterator();
     if (!iter.hasNext()) {
       throw new IllegalStateException("Cannot evict from empty cache! " +
         "capacity: " + capacity);
     }
-    Entry<SocketAddress, SocketAndStreams> entry = iter.next();
+    Entry<DatanodeID, Value> entry = iter.next();
+    IOUtils.cleanup(LOG, entry.getValue().getPeer());
     iter.remove();
-    SocketAndStreams s = entry.getValue();
-    s.close();
   }
 
   /**
@@ -253,9 +233,10 @@ class SocketCache {
   /**
    * Empty the cache, and close all sockets.
    */
-  private synchronized void clear() {
-    for (SocketAndStreams sockAndStream : multimap.values()) {
-      sockAndStream.close();
+  @VisibleForTesting
+  synchronized void clear() {
+    for (Value value : multimap.values()) {
+      IOUtils.cleanup(LOG, value.getPeer());
     }
     multimap.clear();
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java
index 2bcd96e7644..2a9523351c1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java
@@ -31,6 +31,8 @@ import java.nio.ByteBuffer;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.fs.FSInputChecker;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.net.Peer;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil;
 import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
@@ -55,8 +57,8 @@ import org.apache.hadoop.util.DataChecksum;
 @InterfaceAudience.Private
 @Deprecated
 public class RemoteBlockReader extends FSInputChecker implements BlockReader {
-
-  Socket dnSock; //for now just sending the status code (e.g. checksumOk) after the read.
+  private final Peer peer;
+  private final DatanodeID datanodeID;
   private final DataInputStream in;
   private DataChecksum checksum;
 
@@ -126,9 +128,9 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
     // if eos was set in the previous read, send a status code to the DN
     if (eos && !eosBefore && nRead >= 0) {
       if (needChecksum()) {
-        sendReadResult(dnSock, Status.CHECKSUM_OK);
+        sendReadResult(peer, Status.CHECKSUM_OK);
       } else {
-        sendReadResult(dnSock, Status.SUCCESS);
+        sendReadResult(peer, Status.SUCCESS);
       }
     }
     return nRead;
@@ -322,7 +324,8 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
   
   private RemoteBlockReader(String file, String bpid, long blockId,
       DataInputStream in, DataChecksum checksum, boolean verifyChecksum,
-      long startOffset, long firstChunkOffset, long bytesToRead, Socket dnSock) {
+      long startOffset, long firstChunkOffset, long bytesToRead, Peer peer,
+      DatanodeID datanodeID) {
     // Path is used only for printing block and file information in debug
     super(new Path("/blk_" + blockId + ":" + bpid + ":of:"+ file)/*too non path-like?*/,
           1, verifyChecksum,
@@ -330,7 +333,8 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
           checksum.getBytesPerChecksum(),
           checksum.getChecksumSize());
     
-    this.dnSock = dnSock;
+    this.peer = peer;
+    this.datanodeID = datanodeID;
     this.in = in;
     this.checksum = checksum;
     this.startOffset = Math.max( startOffset, 0 );
@@ -349,13 +353,6 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
     checksumSize = this.checksum.getChecksumSize();
   }
 
-  public static RemoteBlockReader newBlockReader(Socket sock, String file,
-      ExtendedBlock block, Token<BlockTokenIdentifier> blockToken, 
-      long startOffset, long len, int bufferSize) throws IOException {
-    return newBlockReader(sock, file, block, blockToken, startOffset,
-        len, bufferSize, true, "");
-  }
-
   /**
    * Create a new BlockReader specifically to satisfy a read.
    * This method also sends the OP_READ_BLOCK request.
@@ -371,16 +368,17 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
    * @param clientName  Client name
    * @return New BlockReader instance, or null on error.
    */
-  public static RemoteBlockReader newBlockReader( Socket sock, String file,
+  public static RemoteBlockReader newBlockReader(String file,
                                      ExtendedBlock block, 
                                      Token<BlockTokenIdentifier> blockToken,
                                      long startOffset, long len,
                                      int bufferSize, boolean verifyChecksum,
-                                     String clientName)
+                                     String clientName, Peer peer,
+                                     DatanodeID datanodeID)
                                      throws IOException {
     // in and out will be closed when sock is closed (by the caller)
-    final DataOutputStream out = new DataOutputStream(new BufferedOutputStream(
-          NetUtils.getOutputStream(sock, HdfsServerConstants.WRITE_TIMEOUT)));
+    final DataOutputStream out =
+        new DataOutputStream(new BufferedOutputStream(peer.getOutputStream()));
     new Sender(out).readBlock(block, blockToken, clientName, startOffset, len);
     
     //
@@ -388,12 +386,11 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
     //
 
     DataInputStream in = new DataInputStream(
-        new BufferedInputStream(NetUtils.getInputStream(sock), 
-                                bufferSize));
+        new BufferedInputStream(peer.getInputStream(), bufferSize));
     
     BlockOpResponseProto status = BlockOpResponseProto.parseFrom(
         vintPrefixed(in));
-    RemoteBlockReader2.checkSuccess(status, sock, block, file);
+    RemoteBlockReader2.checkSuccess(status, peer, block, file);
     ReadOpChecksumInfoProto checksumInfo =
       status.getReadOpChecksumInfo();
     DataChecksum checksum = DataTransferProtoUtil.fromProto(
@@ -411,15 +408,18 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
     }
 
     return new RemoteBlockReader(file, block.getBlockPoolId(), block.getBlockId(),
-        in, checksum, verifyChecksum, startOffset, firstChunkOffset, len, sock);
+        in, checksum, verifyChecksum, startOffset, firstChunkOffset, len,
+        peer, datanodeID);
   }
 
   @Override
-  public synchronized void close() throws IOException {
+  public synchronized void close(PeerCache peerCache) throws IOException {
     startOffset = -1;
     checksum = null;
-    if (dnSock != null) {
-      dnSock.close();
+    if (peerCache != null & sentStatusCode) {
+      peerCache.put(datanodeID, peer);
+    } else {
+      peer.close();
     }
 
     // in will be closed when its Socket is closed.
@@ -436,37 +436,21 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
     return readFully(this, buf, offset, len);
   }
 
-  @Override
-  public Socket takeSocket() {
-    assert hasSentStatusCode() :
-      "BlockReader shouldn't give back sockets mid-read";
-    Socket res = dnSock;
-    dnSock = null;
-    return res;
-  }
-
-  @Override
-  public boolean hasSentStatusCode() {
-    return sentStatusCode;
-  }
-
   /**
    * When the reader reaches end of the read, it sends a status response
    * (e.g. CHECKSUM_OK) to the DN. Failure to do so could lead to the DN
    * closing our connection (which we will re-open), but won't affect
    * data correctness.
    */
-  void sendReadResult(Socket sock, Status statusCode) {
-    assert !sentStatusCode : "already sent status code to " + sock;
+  void sendReadResult(Peer peer, Status statusCode) {
+    assert !sentStatusCode : "already sent status code to " + peer;
     try {
-      RemoteBlockReader2.writeReadResult(
-          NetUtils.getOutputStream(sock, HdfsServerConstants.WRITE_TIMEOUT),
-          statusCode);
+      RemoteBlockReader2.writeReadResult(peer.getOutputStream(), statusCode);
       sentStatusCode = true;
     } catch (IOException e) {
       // It's ok not to be able to send this. But something is probably wrong.
       LOG.info("Could not send read status (" + statusCode + ") to datanode " +
-               sock.getInetAddress() + ": " + e.getMessage());
+               peer.getRemoteAddressString() + ": " + e.getMessage());
     }
   }
   
@@ -486,12 +470,4 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
   public int read(ByteBuffer buf) throws IOException {
     throw new UnsupportedOperationException("readDirect unsupported in RemoteBlockReader");
   }
-
-  @Override
-  public IOStreamPair getStreams() {
-    // This class doesn't support encryption, which is the only thing this
-    // method is used for. See HDFS-3637.
-    return null;
-  }
-
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java
index b9a5c76ec31..007249b7922 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java
@@ -25,16 +25,16 @@ import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.net.InetSocketAddress;
-import java.net.Socket;
 import java.nio.ByteBuffer;
 import java.nio.channels.ReadableByteChannel;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.net.Peer;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil;
-import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
 import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader;
 import org.apache.hadoop.hdfs.protocol.datatransfer.PacketReceiver;
 import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
@@ -45,10 +45,11 @@ import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status;
 import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
-import org.apache.hadoop.net.SocketInputWrapper;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.DataChecksum;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * This is a wrapper around connection to datanode
  * and understands checksum, offset etc.
@@ -80,9 +81,8 @@ public class RemoteBlockReader2  implements BlockReader {
 
   static final Log LOG = LogFactory.getLog(RemoteBlockReader2.class);
   
-  Socket dnSock;
-  // for now just sending the status code (e.g. checksumOk) after the read.
-  private IOStreamPair ioStreams;
+  final private Peer peer;
+  final private DatanodeID datanodeID;
   private final ReadableByteChannel in;
   private DataChecksum checksum;
   
@@ -115,6 +115,11 @@ public class RemoteBlockReader2  implements BlockReader {
   /** Amount of unread data in the current received packet */
   int dataLeft = 0;
   
+  @VisibleForTesting
+  public Peer getPeer() {
+    return peer;
+  }
+  
   @Override
   public synchronized int read(byte[] buf, int off, int len) 
                                throws IOException {
@@ -247,13 +252,13 @@ public class RemoteBlockReader2  implements BlockReader {
   }
 
   protected RemoteBlockReader2(String file, String bpid, long blockId,
-      ReadableByteChannel in, DataChecksum checksum, boolean verifyChecksum,
-      long startOffset, long firstChunkOffset, long bytesToRead, Socket dnSock,
-      IOStreamPair ioStreams) {
+      DataChecksum checksum, boolean verifyChecksum,
+      long startOffset, long firstChunkOffset, long bytesToRead, Peer peer,
+      DatanodeID datanodeID) {
     // Path is used only for printing block and file information in debug
-    this.dnSock = dnSock;
-    this.ioStreams = ioStreams;
-    this.in = in;
+    this.peer = peer;
+    this.datanodeID = datanodeID;
+    this.in = peer.getInputStreamChannel();
     this.checksum = checksum;
     this.verifyChecksum = verifyChecksum;
     this.startOffset = Math.max( startOffset, 0 );
@@ -270,39 +275,19 @@ public class RemoteBlockReader2  implements BlockReader {
 
 
   @Override
-  public synchronized void close() throws IOException {
+  public synchronized void close(PeerCache peerCache) throws IOException {
     packetReceiver.close();
-    
     startOffset = -1;
     checksum = null;
-    if (dnSock != null) {
-      dnSock.close();
+    if (peerCache != null && sentStatusCode) {
+      peerCache.put(datanodeID, peer);
+    } else {
+      peer.close();
     }
 
     // in will be closed when its Socket is closed.
   }
   
-  /**
-   * Take the socket used to talk to the DN.
-   */
-  @Override
-  public Socket takeSocket() {
-    assert hasSentStatusCode() :
-      "BlockReader shouldn't give back sockets mid-read";
-    Socket res = dnSock;
-    dnSock = null;
-    return res;
-  }
-
-  /**
-   * Whether the BlockReader has reached the end of its input stream
-   * and successfully sent a status code back to the datanode.
-   */
-  @Override
-  public boolean hasSentStatusCode() {
-    return sentStatusCode;
-  }
-
   /**
    * When the reader reaches end of the read, it sends a status response
    * (e.g. CHECKSUM_OK) to the DN. Failure to do so could lead to the DN
@@ -310,14 +295,14 @@ public class RemoteBlockReader2  implements BlockReader {
    * data correctness.
    */
   void sendReadResult(Status statusCode) {
-    assert !sentStatusCode : "already sent status code to " + dnSock;
+    assert !sentStatusCode : "already sent status code to " + peer;
     try {
-      writeReadResult(ioStreams.out, statusCode);
+      writeReadResult(peer.getOutputStream(), statusCode);
       sentStatusCode = true;
     } catch (IOException e) {
       // It's ok not to be able to send this. But something is probably wrong.
       LOG.info("Could not send read status (" + statusCode + ") to datanode " +
-               dnSock.getInetAddress() + ": " + e.getMessage());
+               peer.getRemoteAddressString() + ": " + e.getMessage());
     }
   }
 
@@ -368,41 +353,33 @@ public class RemoteBlockReader2  implements BlockReader {
    * @param blockToken  The block token for security
    * @param startOffset  The read offset, relative to block head
    * @param len  The number of bytes to read
-   * @param bufferSize  The IO buffer size (not the client buffer size)
    * @param verifyChecksum  Whether to verify checksum
    * @param clientName  Client name
+   * @param peer  The Peer to use
+   * @param datanodeID  The DatanodeID this peer is connected to
    * @return New BlockReader instance, or null on error.
    */
-  public static BlockReader newBlockReader(Socket sock, String file,
+  public static BlockReader newBlockReader(String file,
                                      ExtendedBlock block,
                                      Token<BlockTokenIdentifier> blockToken,
                                      long startOffset, long len,
-                                     int bufferSize, boolean verifyChecksum,
+                                     boolean verifyChecksum,
                                      String clientName,
-                                     DataEncryptionKey encryptionKey,
-                                     IOStreamPair ioStreams)
+                                     Peer peer, DatanodeID datanodeID)
                                      throws IOException {
-    
-    ReadableByteChannel ch;
-    if (ioStreams.in instanceof SocketInputWrapper) {
-      ch = ((SocketInputWrapper)ioStreams.in).getReadableByteChannel();
-    } else {
-      ch = (ReadableByteChannel) ioStreams.in;
-    }
-    
     // in and out will be closed when sock is closed (by the caller)
     final DataOutputStream out = new DataOutputStream(new BufferedOutputStream(
-          ioStreams.out));
+          peer.getOutputStream()));
     new Sender(out).readBlock(block, blockToken, clientName, startOffset, len);
 
     //
     // Get bytes in block
     //
-    DataInputStream in = new DataInputStream(ioStreams.in);
+    DataInputStream in = new DataInputStream(peer.getInputStream());
 
     BlockOpResponseProto status = BlockOpResponseProto.parseFrom(
         vintPrefixed(in));
-    checkSuccess(status, sock, block, file);
+    checkSuccess(status, peer, block, file);
     ReadOpChecksumInfoProto checksumInfo =
       status.getReadOpChecksumInfo();
     DataChecksum checksum = DataTransferProtoUtil.fromProto(
@@ -420,34 +397,29 @@ public class RemoteBlockReader2  implements BlockReader {
     }
 
     return new RemoteBlockReader2(file, block.getBlockPoolId(), block.getBlockId(),
-        ch, checksum, verifyChecksum, startOffset, firstChunkOffset, len, sock,
-        ioStreams);
+        checksum, verifyChecksum, startOffset, firstChunkOffset, len, peer,
+        datanodeID);
   }
 
   static void checkSuccess(
-      BlockOpResponseProto status, Socket sock,
+      BlockOpResponseProto status, Peer peer,
       ExtendedBlock block, String file)
       throws IOException {
     if (status.getStatus() != Status.SUCCESS) {
       if (status.getStatus() == Status.ERROR_ACCESS_TOKEN) {
         throw new InvalidBlockTokenException(
             "Got access token error for OP_READ_BLOCK, self="
-                + sock.getLocalSocketAddress() + ", remote="
-                + sock.getRemoteSocketAddress() + ", for file " + file
+                + peer.getLocalAddressString() + ", remote="
+                + peer.getRemoteAddressString() + ", for file " + file
                 + ", for pool " + block.getBlockPoolId() + " block " 
                 + block.getBlockId() + "_" + block.getGenerationStamp());
       } else {
         throw new IOException("Got error for OP_READ_BLOCK, self="
-            + sock.getLocalSocketAddress() + ", remote="
-            + sock.getRemoteSocketAddress() + ", for file " + file
+            + peer.getLocalAddressString() + ", remote="
+            + peer.getRemoteAddressString() + ", for file " + file
             + ", for pool " + block.getBlockPoolId() + " block " 
             + block.getBlockId() + "_" + block.getGenerationStamp());
       }
     }
   }
-
-  @Override
-  public IOStreamPair getStreams() {
-    return ioStreams;
-  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/BasicInetPeer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/BasicInetPeer.java
new file mode 100644
index 00000000000..eb2d0c92d9e
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/BasicInetPeer.java
@@ -0,0 +1,121 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.net;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.Socket;
+import java.nio.channels.ReadableByteChannel;
+
+/**
+ * Represents a peer that we communicate with by using a basic Socket
+ * that has no associated Channel.
+ *
+ */
+class BasicInetPeer implements Peer {
+  private final Socket socket;
+  private final OutputStream out;
+  private final InputStream in;
+  private final boolean isLocal;
+
+  public BasicInetPeer(Socket socket) throws IOException {
+    this.socket = socket;
+    this.out = socket.getOutputStream();
+    this.in = socket.getInputStream();
+    this.isLocal = socket.getInetAddress().equals(socket.getLocalAddress());
+  }
+
+  @Override
+  public ReadableByteChannel getInputStreamChannel() {
+    /*
+     * This Socket has no channel, so there's nothing to return here.
+     */
+    return null;
+  }
+
+  @Override
+  public void setReadTimeout(int timeoutMs) throws IOException {
+    socket.setSoTimeout(timeoutMs);
+  }
+
+  @Override
+  public int getReceiveBufferSize() throws IOException {
+    return socket.getReceiveBufferSize();
+  }
+
+  @Override
+  public boolean getTcpNoDelay() throws IOException {
+    return socket.getTcpNoDelay();
+  }
+
+  @Override
+  public void setWriteTimeout(int timeoutMs) {
+   /* 
+    * We can't implement write timeouts. :(
+    * 
+    * Java provides no facility to set a blocking write timeout on a Socket.
+    * You can simulate a blocking write with a timeout by using
+    * non-blocking I/O.  However, we can't use nio here, because this Socket
+    * doesn't have an associated Channel.
+    * 
+    * See http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4031100 for
+    * more details.
+    */
+  }
+
+  @Override
+  public boolean isClosed() {
+    return socket.isClosed();
+  }
+
+  @Override
+  public void close() throws IOException {
+    socket.close();
+  }
+
+  @Override
+  public String getRemoteAddressString() {
+    return socket.getRemoteSocketAddress().toString();
+  }
+
+  @Override
+  public String getLocalAddressString() {
+    return socket.getLocalSocketAddress().toString();
+  }
+  
+  @Override
+  public InputStream getInputStream() throws IOException {
+    return in;
+  }
+
+  @Override
+  public OutputStream getOutputStream() throws IOException {
+    return out;
+  }
+
+  @Override
+  public boolean isLocal() {
+    return isLocal;
+  }
+
+  @Override
+  public String toString() {
+    return "BasicInetPeer(" + socket.toString() + ")";
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/EncryptedPeer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/EncryptedPeer.java
new file mode 100644
index 00000000000..295632ca63d
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/EncryptedPeer.java
@@ -0,0 +1,136 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.net;
+
+import java.io.IOException;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferEncryptor;
+import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
+import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
+
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.channels.ReadableByteChannel;
+
+/**
+ * Represents a peer that we communicate with by using an encrypted
+ * communications medium.
+ */
+@InterfaceAudience.Private
+public class EncryptedPeer implements Peer {
+  private final Peer enclosedPeer;
+
+  /**
+   * An encrypted InputStream.
+   */
+  private final InputStream in;
+  
+  /**
+   * An encrypted OutputStream.
+   */
+  private final OutputStream out;
+  
+  /**
+   * An encrypted ReadableByteChannel.
+   */
+  private final ReadableByteChannel channel;
+
+  public EncryptedPeer(Peer enclosedPeer, DataEncryptionKey key)
+      throws IOException {
+    this.enclosedPeer = enclosedPeer;
+    IOStreamPair ios = DataTransferEncryptor.getEncryptedStreams(
+        enclosedPeer.getOutputStream(), enclosedPeer.getInputStream(), key);
+    this.in = ios.in;
+    this.out = ios.out;
+    this.channel = ios.in instanceof ReadableByteChannel ? 
+        (ReadableByteChannel)ios.in : null;
+  }
+
+  @Override
+  public ReadableByteChannel getInputStreamChannel() {
+    return channel;
+  }
+
+  @Override
+  public void setReadTimeout(int timeoutMs) throws IOException {
+    enclosedPeer.setReadTimeout(timeoutMs);
+  }
+
+  @Override
+  public int getReceiveBufferSize() throws IOException {
+    return enclosedPeer.getReceiveBufferSize();
+  }
+
+  @Override
+  public boolean getTcpNoDelay() throws IOException {
+    return enclosedPeer.getTcpNoDelay();
+  }
+
+  @Override
+  public void setWriteTimeout(int timeoutMs) throws IOException {
+    enclosedPeer.setWriteTimeout(timeoutMs);
+  }
+
+  @Override
+  public boolean isClosed() {
+    return enclosedPeer.isClosed();
+  }
+
+  @Override
+  public void close() throws IOException {
+    try {
+      in.close();
+    } finally {
+      try {
+        out.close();
+      } finally {
+        enclosedPeer.close();
+      }
+    }
+  }
+
+  @Override
+  public String getRemoteAddressString() {
+    return enclosedPeer.getRemoteAddressString();
+  }
+
+  @Override
+  public String getLocalAddressString() {
+    return enclosedPeer.getLocalAddressString();
+  }
+
+  @Override
+  public InputStream getInputStream() throws IOException {
+    return in;
+  }
+
+  @Override
+  public OutputStream getOutputStream() throws IOException {
+    return out;
+  }
+
+  @Override
+  public boolean isLocal() {
+    return enclosedPeer.isLocal();
+  }
+
+  @Override
+  public String toString() {
+    return "EncryptedPeer(" + enclosedPeer + ")";
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/NioInetPeer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/NioInetPeer.java
new file mode 100644
index 00000000000..1186490512b
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/NioInetPeer.java
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.net;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.Socket;
+import java.nio.channels.ReadableByteChannel;
+
+import org.apache.hadoop.net.SocketInputStream;
+import org.apache.hadoop.net.SocketOutputStream;
+
+/**
+ * Represents a peer that we communicate with by using non-blocking I/O 
+ * on a Socket.
+ */
+class NioInetPeer implements Peer {
+  private final Socket socket;
+
+  /**
+   * An InputStream which simulates blocking I/O with timeouts using NIO.
+   */
+  private final SocketInputStream in;
+  
+  /**
+   * An OutputStream which simulates blocking I/O with timeouts using NIO.
+   */
+  private final SocketOutputStream out;
+
+  private final boolean isLocal;
+
+  NioInetPeer(Socket socket) throws IOException {
+    this.socket = socket;
+    this.in = new SocketInputStream(socket.getChannel(), 0);
+    this.out = new SocketOutputStream(socket.getChannel(), 0);
+    this.isLocal = socket.getInetAddress().equals(socket.getLocalAddress());
+  }
+
+  @Override
+  public ReadableByteChannel getInputStreamChannel() {
+    return in;
+  }
+
+  @Override
+  public void setReadTimeout(int timeoutMs) throws IOException {
+    in.setTimeout(timeoutMs);
+  }
+
+  @Override
+  public int getReceiveBufferSize() throws IOException {
+    return socket.getReceiveBufferSize();
+  }
+
+  @Override
+  public boolean getTcpNoDelay() throws IOException {
+    return socket.getTcpNoDelay();
+  }
+
+  @Override
+  public void setWriteTimeout(int timeoutMs) throws IOException {
+    out.setTimeout(timeoutMs);
+  }
+
+  @Override
+  public boolean isClosed() {
+    return socket.isClosed();
+  }
+
+  @Override
+  public void close() throws IOException {
+    // We always close the outermost streams-- in this case, 'in' and 'out'
+    // Closing either one of these will also close the Socket.
+    try {
+      in.close();
+    } finally {
+      out.close();
+    }
+  }
+
+  @Override
+  public String getRemoteAddressString() {
+    return socket.getRemoteSocketAddress().toString();
+  }
+
+  @Override
+  public String getLocalAddressString() {
+    return socket.getLocalSocketAddress().toString();
+  }
+
+  @Override
+  public InputStream getInputStream() throws IOException {
+    return in;
+  }
+
+  @Override
+  public OutputStream getOutputStream() throws IOException {
+    return out;
+  }
+
+  @Override
+  public boolean isLocal() {
+    return isLocal;
+  }
+
+  @Override
+  public String toString() {
+    return "NioInetPeer(" + socket.toString() + ")";
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/Peer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/Peer.java
new file mode 100644
index 00000000000..129ada76116
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/Peer.java
@@ -0,0 +1,108 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.net;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.channels.ReadableByteChannel;
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * Represents a connection to a peer.
+ */
+@InterfaceAudience.Private
+public interface Peer extends Closeable {
+  /**
+   * @return                The input stream channel associated with this
+   *                        peer, or null if it has none.
+   */
+  public ReadableByteChannel getInputStreamChannel();
+
+  /**
+   * Set the read timeout on this peer.
+   *
+   * @param timeoutMs       The timeout in milliseconds.
+   */
+  public void setReadTimeout(int timeoutMs) throws IOException;
+
+  /**
+   * @return                The receive buffer size.
+   */
+  public int getReceiveBufferSize() throws IOException;
+
+  /**
+   * @return                True if TCP_NODELAY is turned on.
+   */
+  public boolean getTcpNoDelay() throws IOException;
+
+  /**
+   * Set the write timeout on this peer.
+   *
+   * Note: this is not honored for BasicInetPeer.
+   * See {@link BasicSocketPeer#setWriteTimeout} for details.
+   * 
+   * @param timeoutMs       The timeout in milliseconds.
+   */
+  public void setWriteTimeout(int timeoutMs) throws IOException;
+
+  /**
+   * @return                true only if the peer is closed.
+   */
+  public boolean isClosed();
+  
+  /**
+   * Close the peer.
+   *
+   * It's safe to re-close a Peer that is already closed.
+   */
+  public void close() throws IOException;
+
+  /**
+   * @return               A string representing the remote end of our 
+   *                       connection to the peer.
+   */
+  public String getRemoteAddressString();
+
+  /**
+   * @return               A string representing the local end of our 
+   *                       connection to the peer.
+   */
+  public String getLocalAddressString();
+  
+  /**
+   * @return               An InputStream associated with the Peer.
+   *                       This InputStream will be valid until you close
+   *                       this peer with Peer#close.
+   */
+  public InputStream getInputStream() throws IOException;
+  
+  /**
+   * @return               An OutputStream associated with the Peer.
+   *                       This OutputStream will be valid until you close
+   *                       this peer with Peer#close.
+   */
+  public OutputStream getOutputStream() throws IOException;
+
+  /**
+   * @return               True if the peer resides on the same
+   *                       computer as we.
+   */
+  public boolean isLocal();
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/PeerServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/PeerServer.java
new file mode 100644
index 00000000000..c7b6b14df49
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/PeerServer.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.net;
+
+import java.io.Closeable;
+import org.apache.hadoop.classification.InterfaceAudience;
+import java.io.IOException;
+import java.net.SocketTimeoutException;
+
+@InterfaceAudience.Private
+public interface PeerServer extends Closeable {
+  /**
+   * Set the receive buffer size of the PeerServer.
+   * 
+   * @param size     The receive buffer size.
+   */
+  public void setReceiveBufferSize(int size) throws IOException;
+
+  /**
+   * Listens for a connection to be made to this server and accepts 
+   * it. The method blocks until a connection is made.
+   *
+   * @exception IOException  if an I/O error occurs when waiting for a
+   *               connection.
+   * @exception SecurityException  if a security manager exists and its  
+   *             <code>checkAccept</code> method doesn't allow the operation.
+   * @exception SocketTimeoutException if a timeout was previously set and
+   *             the timeout has been reached.
+   */
+  public Peer accept() throws IOException, SocketTimeoutException;
+
+  /**
+   * @return                 A string representation of the address we're
+   *                         listening on.
+   */
+  public String getListeningString();
+
+  /**
+   * Free the resources associated with this peer server.
+   * This normally includes sockets, etc.
+   *
+   * @throws IOException     If there is an error closing the PeerServer
+   */
+  public void close() throws IOException;
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/TcpPeerServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/TcpPeerServer.java
new file mode 100644
index 00000000000..29d86634f29
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/TcpPeerServer.java
@@ -0,0 +1,156 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.net;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.ServerSocket;
+import java.net.Socket;
+import java.net.SocketTimeoutException;
+import java.nio.channels.ServerSocketChannel;
+import java.nio.channels.SocketChannel;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
+import org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter.SecureResources;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.ipc.Server;
+
+@InterfaceAudience.Private
+public class TcpPeerServer implements PeerServer {
+  static Log LOG = LogFactory.getLog(TcpPeerServer.class);
+
+  private final ServerSocket serverSocket;
+
+  public static Peer peerFromSocket(Socket socket)
+      throws IOException {
+    Peer peer = null;
+    boolean success = false;
+    try {
+      // TCP_NODELAY is crucial here because of bad interactions between
+      // Nagle's Algorithm and Delayed ACKs. With connection keepalive
+      // between the client and DN, the conversation looks like:
+      //   1. Client -> DN: Read block X
+      //   2. DN -> Client: data for block X
+      //   3. Client -> DN: Status OK (successful read)
+      //   4. Client -> DN: Read block Y
+      // The fact that step #3 and #4 are both in the client->DN direction
+      // triggers Nagling. If the DN is using delayed ACKs, this results
+      // in a delay of 40ms or more.
+      //
+      // TCP_NODELAY disables nagling and thus avoids this performance
+      // disaster.
+      socket.setTcpNoDelay(true);
+      SocketChannel channel = socket.getChannel();
+      if (channel == null) {
+        peer = new BasicInetPeer(socket);
+      } else {
+        peer = new NioInetPeer(socket);
+      }
+      success = true;
+      return peer;
+    } finally {
+      if (!success) {
+        if (peer != null) peer.close();
+        socket.close();
+      }
+    }
+  }
+
+  public static Peer peerFromSocketAndKey(Socket s,
+        DataEncryptionKey key) throws IOException {
+    Peer peer = null;
+    boolean success = false;
+    try {
+      peer = peerFromSocket(s); 
+      if (key != null) {
+        peer = new EncryptedPeer(peer, key);
+      }
+      success = true;
+      return peer;
+    } finally {
+      if (!success) {
+        IOUtils.cleanup(null, peer);
+      }
+    }
+  }
+
+  /**
+   * Create a non-secure TcpPeerServer.
+   *
+   * @param socketWriteTimeout    The Socket write timeout in ms.
+   * @param bindAddr              The address to bind to.
+   * @throws IOException
+   */
+  public TcpPeerServer(int socketWriteTimeout,
+        InetSocketAddress bindAddr) throws IOException {
+    this.serverSocket = (socketWriteTimeout > 0) ?
+          ServerSocketChannel.open().socket() : new ServerSocket();
+    Server.bind(serverSocket, bindAddr, 0);
+  }
+
+  /**
+   * Create a secure TcpPeerServer.
+   *
+   * @param secureResources   Security resources.
+   */
+  public TcpPeerServer(SecureResources secureResources) {
+    this.serverSocket = secureResources.getStreamingSocket();
+  }
+  
+  /**
+   * @return     the IP address which this TcpPeerServer is listening on.
+   */
+  public InetSocketAddress getStreamingAddr() {
+    return new InetSocketAddress(
+        serverSocket.getInetAddress().getHostAddress(),
+        serverSocket.getLocalPort());
+  }
+
+  @Override
+  public void setReceiveBufferSize(int size) throws IOException {
+    this.serverSocket.setReceiveBufferSize(size);
+  }
+
+  @Override
+  public Peer accept() throws IOException, SocketTimeoutException {
+    Peer peer = peerFromSocket(serverSocket.accept());
+    return peer;
+  }
+
+  @Override
+  public String getListeningString() {
+    return serverSocket.getLocalSocketAddress().toString();
+  }
+  
+  @Override
+  public void close() throws IOException {
+    try {
+      serverSocket.close();
+    } catch(IOException e) {
+      LOG.error("error closing TcpPeerServer: ", e);
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "TcpPeerServer(" + getListeningString() + ")";
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
index 1d9a8f0a1c1..fe4eac1a6e5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
@@ -45,6 +45,8 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.BlockReader;
 import org.apache.hadoop.hdfs.BlockReaderFactory;
 import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.net.TcpPeerServer;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
@@ -206,9 +208,12 @@ public class JspHelper {
       // Use the block name for file name. 
     String file = BlockReaderFactory.getFileName(addr, poolId, blockId);
     BlockReader blockReader = BlockReaderFactory.newBlockReader(
-        conf, s, file,
+        conf, file,
         new ExtendedBlock(poolId, blockId, 0, genStamp), blockToken,
-        offsetIntoBlock, amtToRead, encryptionKey);
+        offsetIntoBlock, amtToRead,  true,
+        "JspHelper", TcpPeerServer.peerFromSocketAndKey(s, encryptionKey),
+        new DatanodeID(addr.getAddress().toString(),              
+            addr.getHostName(), poolId, addr.getPort(), 0, 0));
         
     byte[] buf = new byte[(int)amtToRead];
     int readOffset = 0;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index 467d44e80ef..8045481538e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -90,6 +90,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HDFSPolicyProvider;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.net.TcpPeerServer;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
@@ -522,24 +523,19 @@ public class DataNode extends Configured
   
   private void initDataXceiver(Configuration conf) throws IOException {
     // find free port or use privileged port provided
-    ServerSocket ss;
-    if (secureResources == null) {
-      InetSocketAddress addr = DataNode.getStreamingAddr(conf);
-      ss = (dnConf.socketWriteTimeout > 0) ? 
-          ServerSocketChannel.open().socket() : new ServerSocket();
-          Server.bind(ss, addr, 0);
+    TcpPeerServer tcpPeerServer;
+    if (secureResources != null) {
+      tcpPeerServer = new TcpPeerServer(secureResources);
     } else {
-      ss = secureResources.getStreamingSocket();
+      tcpPeerServer = new TcpPeerServer(dnConf.socketWriteTimeout,
+          DataNode.getStreamingAddr(conf));
     }
-    ss.setReceiveBufferSize(HdfsConstants.DEFAULT_DATA_SOCKET_SIZE); 
-
-    streamingAddr = new InetSocketAddress(ss.getInetAddress().getHostAddress(),
-                                     ss.getLocalPort());
-
+    tcpPeerServer.setReceiveBufferSize(HdfsConstants.DEFAULT_DATA_SOCKET_SIZE);
+    streamingAddr = tcpPeerServer.getStreamingAddr();
     LOG.info("Opened streaming server at " + streamingAddr);
     this.threadGroup = new ThreadGroup("dataXceiverServer");
     this.dataXceiverServer = new Daemon(threadGroup, 
-        new DataXceiverServer(ss, conf, this));
+        new DataXceiverServer(tcpPeerServer, conf, this));
     this.threadGroup.setDaemon(true); // auto destroy when empty
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
index 31b896caf93..d618c787d13 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
@@ -39,6 +39,7 @@ import java.nio.channels.ClosedChannelException;
 import java.util.Arrays;
 
 import org.apache.commons.logging.Log;
+import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
@@ -64,7 +65,6 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.net.NetUtils;
-import org.apache.hadoop.net.SocketInputWrapper;
 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.DataChecksum;
@@ -79,8 +79,7 @@ class DataXceiver extends Receiver implements Runnable {
   public static final Log LOG = DataNode.LOG;
   static final Log ClientTraceLog = DataNode.ClientTraceLog;
   
-  private final Socket s;
-  private final boolean isLocal; //is a local connection?
+  private final Peer peer;
   private final String remoteAddress; // address of remote side
   private final String localAddress;  // local address of this daemon
   private final DataNode datanode;
@@ -88,7 +87,7 @@ class DataXceiver extends Receiver implements Runnable {
   private final DataXceiverServer dataXceiverServer;
   private final boolean connectToDnViaHostname;
   private long opStartTime; //the start time of receiving an Op
-  private final SocketInputWrapper socketIn;
+  private final InputStream socketIn;
   private OutputStream socketOut;
 
   /**
@@ -97,25 +96,23 @@ class DataXceiver extends Receiver implements Runnable {
    */
   private String previousOpClientName;
   
-  public static DataXceiver create(Socket s, DataNode dn,
+  public static DataXceiver create(Peer peer, DataNode dn,
       DataXceiverServer dataXceiverServer) throws IOException {
-    return new DataXceiver(s, dn, dataXceiverServer);
+    return new DataXceiver(peer, dn, dataXceiverServer);
   }
   
-  private DataXceiver(Socket s, 
-      DataNode datanode, 
+  private DataXceiver(Peer peer, DataNode datanode,
       DataXceiverServer dataXceiverServer) throws IOException {
 
-    this.s = s;
+    this.peer = peer;
     this.dnConf = datanode.getDnConf();
-    this.socketIn = NetUtils.getInputStream(s);
-    this.socketOut = NetUtils.getOutputStream(s, dnConf.socketWriteTimeout);
-    this.isLocal = s.getInetAddress().equals(s.getLocalAddress());
+    this.socketIn = peer.getInputStream();
+    this.socketOut = peer.getOutputStream();
     this.datanode = datanode;
     this.dataXceiverServer = dataXceiverServer;
     this.connectToDnViaHostname = datanode.getDnConf().connectToDnViaHostname;
-    remoteAddress = s.getRemoteSocketAddress().toString();
-    localAddress = s.getLocalSocketAddress().toString();
+    remoteAddress = peer.getRemoteAddressString();
+    localAddress = peer.getLocalAddressString();
 
     if (LOG.isDebugEnabled()) {
       LOG.debug("Number of active connections is: "
@@ -155,11 +152,10 @@ class DataXceiver extends Receiver implements Runnable {
   public void run() {
     int opsProcessed = 0;
     Op op = null;
-    
-    dataXceiverServer.childSockets.add(s);
-    
+
+    dataXceiverServer.addPeer(peer);
     try {
-      
+      peer.setWriteTimeout(datanode.getDnConf().socketWriteTimeout);
       InputStream input = socketIn;
       if (dnConf.encryptDataTransfer) {
         IOStreamPair encryptedStreams = null;
@@ -169,8 +165,9 @@ class DataXceiver extends Receiver implements Runnable {
               dnConf.encryptionAlgorithm);
         } catch (InvalidMagicNumberException imne) {
           LOG.info("Failed to read expected encryption handshake from client " +
-              "at " + s.getInetAddress() + ". Perhaps the client is running an " +
-              "older version of Hadoop which does not support encryption");
+              "at " + peer.getRemoteAddressString() + ". Perhaps the client " +
+              "is running an older version of Hadoop which does not support " +
+              "encryption");
           return;
         }
         input = encryptedStreams.in;
@@ -189,9 +186,9 @@ class DataXceiver extends Receiver implements Runnable {
         try {
           if (opsProcessed != 0) {
             assert dnConf.socketKeepaliveTimeout > 0;
-            socketIn.setTimeout(dnConf.socketKeepaliveTimeout);
+            peer.setReadTimeout(dnConf.socketKeepaliveTimeout);
           } else {
-            socketIn.setTimeout(dnConf.socketTimeout);
+            peer.setReadTimeout(dnConf.socketTimeout);
           }
           op = readOp();
         } catch (InterruptedIOException ignored) {
@@ -202,7 +199,7 @@ class DataXceiver extends Receiver implements Runnable {
           if (opsProcessed > 0 &&
               (err instanceof EOFException || err instanceof ClosedChannelException)) {
             if (LOG.isDebugEnabled()) {
-              LOG.debug("Cached " + s.toString() + " closing after " + opsProcessed + " ops");
+              LOG.debug("Cached " + peer + " closing after " + opsProcessed + " ops");
             }
           } else {
             throw err;
@@ -212,13 +209,13 @@ class DataXceiver extends Receiver implements Runnable {
 
         // restore normal timeout
         if (opsProcessed != 0) {
-          s.setSoTimeout(dnConf.socketTimeout);
+          peer.setReadTimeout(dnConf.socketTimeout);
         }
 
         opStartTime = now();
         processOp(op);
         ++opsProcessed;
-      } while (!s.isClosed() && dnConf.socketKeepaliveTimeout > 0);
+      } while (!peer.isClosed() && dnConf.socketKeepaliveTimeout > 0);
     } catch (Throwable t) {
       LOG.error(datanode.getDisplayName() + ":DataXceiver error processing " +
                 ((op == null) ? "unknown" : op.name()) + " operation " +
@@ -230,9 +227,8 @@ class DataXceiver extends Receiver implements Runnable {
             + datanode.getXceiverCount());
       }
       updateCurrentThreadName("Cleaning up");
+      dataXceiverServer.closePeer(peer);
       IOUtils.closeStream(in);
-      IOUtils.closeSocket(s);
-      dataXceiverServer.childSockets.remove(s);
     }
   }
 
@@ -286,8 +282,9 @@ class DataXceiver extends Receiver implements Runnable {
           ClientReadStatusProto stat = ClientReadStatusProto.parseFrom(
               HdfsProtoUtil.vintPrefixed(in));
           if (!stat.hasStatus()) {
-            LOG.warn("Client " + s.getInetAddress() + " did not send a valid status " +
-                     "code after reading. Will close connection.");
+            LOG.warn("Client " + peer.getRemoteAddressString() +
+                " did not send a valid status code after reading. " +
+                "Will close connection.");
             IOUtils.closeStream(out);
           }
         } catch (IOException ioe) {
@@ -320,7 +317,7 @@ class DataXceiver extends Receiver implements Runnable {
 
     //update metrics
     datanode.metrics.addReadBlockOp(elapsed());
-    datanode.metrics.incrReadsFromClient(isLocal);
+    datanode.metrics.incrReadsFromClient(peer.isLocal());
   }
 
   @Override
@@ -358,8 +355,8 @@ class DataXceiver extends Receiver implements Runnable {
       LOG.debug("isDatanode=" + isDatanode
           + ", isClient=" + isClient
           + ", isTransfer=" + isTransfer);
-      LOG.debug("writeBlock receive buf size " + s.getReceiveBufferSize() +
-                " tcp no delay " + s.getTcpNoDelay());
+      LOG.debug("writeBlock receive buf size " + peer.getReceiveBufferSize() +
+                " tcp no delay " + peer.getTcpNoDelay());
     }
 
     // We later mutate block's generation stamp and length, but we need to
@@ -390,8 +387,8 @@ class DataXceiver extends Receiver implements Runnable {
           stage != BlockConstructionStage.PIPELINE_CLOSE_RECOVERY) {
         // open a block receiver
         blockReceiver = new BlockReceiver(block, in, 
-            s.getRemoteSocketAddress().toString(),
-            s.getLocalSocketAddress().toString(),
+            peer.getRemoteAddressString(),
+            peer.getLocalAddressString(),
             stage, latestGenerationStamp, minBytesRcvd, maxBytesRcvd,
             clientname, srcDataNode, datanode, requestedChecksum);
       } else {
@@ -546,7 +543,7 @@ class DataXceiver extends Receiver implements Runnable {
 
     //update metrics
     datanode.metrics.addWriteBlockOp(elapsed());
-    datanode.metrics.incrWritesFromClient(isLocal);
+    datanode.metrics.incrWritesFromClient(peer.isLocal());
   }
 
   @Override
@@ -554,7 +551,7 @@ class DataXceiver extends Receiver implements Runnable {
       final Token<BlockTokenIdentifier> blockToken,
       final String clientName,
       final DatanodeInfo[] targets) throws IOException {
-    checkAccess(null, true, blk, blockToken,
+    checkAccess(socketOut, true, blk, blockToken,
         Op.TRANSFER_BLOCK, BlockTokenSecretManager.AccessMode.COPY);
     previousOpClientName = clientName;
     updateCurrentThreadName(Op.TRANSFER_BLOCK + " " + blk);
@@ -641,8 +638,9 @@ class DataXceiver extends Receiver implements Runnable {
     }
 
     if (!dataXceiverServer.balanceThrottler.acquire()) { // not able to start
-      String msg = "Not able to copy block " + block.getBlockId() + " to " 
-      + s.getRemoteSocketAddress() + " because threads quota is exceeded."; 
+      String msg = "Not able to copy block " + block.getBlockId() + " " +
+          "to " + peer.getRemoteAddressString() + " because threads " +
+          "quota is exceeded.";
       LOG.info(msg);
       sendResponse(ERROR, msg);
       return;
@@ -671,7 +669,7 @@ class DataXceiver extends Receiver implements Runnable {
       datanode.metrics.incrBytesRead((int) read);
       datanode.metrics.incrBlocksRead();
       
-      LOG.info("Copied " + block + " to " + s.getRemoteSocketAddress());
+      LOG.info("Copied " + block + " to " + peer.getRemoteAddressString());
     } catch (IOException ioe) {
       isOpSuccess = false;
       LOG.info("opCopyBlock " + block + " received exception " + ioe);
@@ -716,8 +714,9 @@ class DataXceiver extends Receiver implements Runnable {
     }
 
     if (!dataXceiverServer.balanceThrottler.acquire()) { // not able to start
-      String msg = "Not able to receive block " + block.getBlockId() + " from " 
-          + s.getRemoteSocketAddress() + " because threads quota is exceeded."; 
+      String msg = "Not able to receive block " + block.getBlockId() +
+          " from " + peer.getRemoteAddressString() + " because threads " +
+          "quota is exceeded.";
       LOG.warn(msg);
       sendResponse(ERROR, msg);
       return;
@@ -794,7 +793,7 @@ class DataXceiver extends Receiver implements Runnable {
       // notify name node
       datanode.notifyNamenodeReceivedBlock(block, delHint);
 
-      LOG.info("Moved " + block + " from " + s.getRemoteSocketAddress());
+      LOG.info("Moved " + block + " from " + peer.getRemoteAddressString());
       
     } catch (IOException ioe) {
       opStatus = ERROR;
@@ -817,7 +816,7 @@ class DataXceiver extends Receiver implements Runnable {
       try {
         sendResponse(opStatus, errMsg);
       } catch (IOException ioe) {
-        LOG.warn("Error writing reply back to " + s.getRemoteSocketAddress());
+        LOG.warn("Error writing reply back to " + peer.getRemoteAddressString());
       }
       IOUtils.closeStream(proxyOut);
       IOUtils.closeStream(blockReceiver);
@@ -871,7 +870,7 @@ class DataXceiver extends Receiver implements Runnable {
   }
   
 
-  private void checkAccess(DataOutputStream out, final boolean reply, 
+  private void checkAccess(OutputStream out, final boolean reply, 
       final ExtendedBlock blk,
       final Token<BlockTokenIdentifier> t,
       final Op op,
@@ -886,11 +885,6 @@ class DataXceiver extends Receiver implements Runnable {
       } catch(InvalidToken e) {
         try {
           if (reply) {
-            if (out == null) {
-              out = new DataOutputStream(
-                  NetUtils.getOutputStream(s, dnConf.socketWriteTimeout));
-            }
-            
             BlockOpResponseProto.Builder resp = BlockOpResponseProto.newBuilder()
               .setStatus(ERROR_ACCESS_TOKEN);
             if (mode == BlockTokenSecretManager.AccessMode.WRITE) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java
index bb0f7fd81b4..2755eb415f8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java
@@ -18,18 +18,16 @@
 package org.apache.hadoop.hdfs.server.datanode;
 
 import java.io.IOException;
-import java.net.ServerSocket;
-import java.net.Socket;
 import java.net.SocketTimeoutException;
 import java.nio.channels.AsynchronousCloseException;
-import java.util.Collections;
 import java.util.HashSet;
-import java.util.Iterator;
 import java.util.Set;
 
 import org.apache.commons.logging.Log;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.net.Peer;
+import org.apache.hadoop.hdfs.net.PeerServer;
 import org.apache.hadoop.hdfs.server.balancer.Balancer;
 import org.apache.hadoop.hdfs.util.DataTransferThrottler;
 import org.apache.hadoop.io.IOUtils;
@@ -45,11 +43,9 @@ import org.apache.hadoop.util.Daemon;
 class DataXceiverServer implements Runnable {
   public static final Log LOG = DataNode.LOG;
   
-  ServerSocket ss;
-  DataNode datanode;
-  // Record all sockets opened for data transfer
-  Set<Socket> childSockets = Collections.synchronizedSet(
-                                       new HashSet<Socket>());
+  private final PeerServer peerServer;
+  private final DataNode datanode;
+  private final Set<Peer> peers = new HashSet<Peer>();
   
   /**
    * Maximal number of concurrent xceivers per node.
@@ -109,10 +105,10 @@ class DataXceiverServer implements Runnable {
   long estimateBlockSize;
   
   
-  DataXceiverServer(ServerSocket ss, Configuration conf, 
+  DataXceiverServer(PeerServer peerServer, Configuration conf,
       DataNode datanode) {
     
-    this.ss = ss;
+    this.peerServer = peerServer;
     this.datanode = datanode;
     
     this.maxXceiverCount = 
@@ -130,12 +126,10 @@ class DataXceiverServer implements Runnable {
 
   @Override
   public void run() {
+    Peer peer = null;
     while (datanode.shouldRun) {
-      Socket s = null;
       try {
-        s = ss.accept();
-        s.setTcpNoDelay(true);
-        // Timeouts are set within DataXceiver.run()
+        peer = peerServer.accept();
 
         // Make sure the xceiver count is not exceeded
         int curXceiverCount = datanode.getXceiverCount();
@@ -146,7 +140,7 @@ class DataXceiverServer implements Runnable {
         }
 
         new Daemon(datanode.threadGroup,
-            DataXceiver.create(s, datanode, this))
+            DataXceiver.create(peer, datanode, this))
             .start();
       } catch (SocketTimeoutException ignored) {
         // wake up to see if should continue to run
@@ -157,10 +151,10 @@ class DataXceiverServer implements Runnable {
           LOG.warn(datanode.getDisplayName() + ":DataXceiverServer: ", ace);
         }
       } catch (IOException ie) {
-        IOUtils.closeSocket(s);
+        IOUtils.cleanup(null, peer);
         LOG.warn(datanode.getDisplayName() + ":DataXceiverServer: ", ie);
       } catch (OutOfMemoryError ie) {
-        IOUtils.closeSocket(s);
+        IOUtils.cleanup(null, peer);
         // DataNode can run out of memory if there is too many transfers.
         // Log the event, Sleep for 30 seconds, other transfers may complete by
         // then.
@@ -176,33 +170,35 @@ class DataXceiverServer implements Runnable {
         datanode.shouldRun = false;
       }
     }
+    synchronized (this) {
+      for (Peer p : peers) {
+        IOUtils.cleanup(LOG, p);
+      }
+    }
     try {
-      ss.close();
+      peerServer.close();
     } catch (IOException ie) {
       LOG.warn(datanode.getDisplayName()
           + " :DataXceiverServer: close exception", ie);
     }
   }
-  
+
   void kill() {
     assert datanode.shouldRun == false :
       "shoudRun should be set to false before killing";
     try {
-      this.ss.close();
+      this.peerServer.close();
     } catch (IOException ie) {
       LOG.warn(datanode.getDisplayName() + ":DataXceiverServer.kill(): ", ie);
     }
+  }
+  
+  synchronized void addPeer(Peer peer) {
+    peers.add(peer);
+  }
 
-    // close all the sockets that were accepted earlier
-    synchronized (childSockets) {
-      for (Iterator<Socket> it = childSockets.iterator();
-           it.hasNext();) {
-        Socket thissock = it.next();
-        try {
-          thissock.close();
-        } catch (IOException e) {
-        }
-      }
-    }
+  synchronized void closePeer(Peer peer) {
+    peers.remove(peer);
+    IOUtils.cleanup(null, peer);
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
index 90542124076..4dd1e808c54 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
@@ -42,6 +42,7 @@ import org.apache.hadoop.hdfs.BlockReaderFactory;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.net.TcpPeerServer;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.DirectoryListing;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@@ -559,9 +560,10 @@ public class NamenodeFsck {
         String file = BlockReaderFactory.getFileName(targetAddr, block.getBlockPoolId(),
             block.getBlockId());
         blockReader = BlockReaderFactory.newBlockReader(
-            conf, s, file, block, lblock
-            .getBlockToken(), 0, -1,
-            namenode.getRpcServer().getDataEncryptionKey());
+            conf, file, block, lblock.getBlockToken(), 0, -1, true, "fsck",
+            TcpPeerServer.peerFromSocketAndKey(s, namenode.getRpcServer().
+                getDataEncryptionKey()),
+            chosenNode);
         
       }  catch (IOException ex) {
         // Put chosen node into dead list, continue
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java
index 29d8063426e..1731b2b5f5d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java
@@ -31,6 +31,7 @@ import java.util.Random;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.net.TcpPeerServer;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
@@ -150,12 +151,12 @@ public class BlockReaderTestUtil {
     sock.setSoTimeout(HdfsServerConstants.READ_TIMEOUT);
 
     return BlockReaderFactory.newBlockReader(
-      new DFSClient.Conf(conf),
-      sock, targetAddr.toString()+ ":" + block.getBlockId(), block,
+      conf,
+      targetAddr.toString()+ ":" + block.getBlockId(), block,
       testBlock.getBlockToken(), 
       offset, lenToRead,
-      conf.getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096),
-      true, "", null, null);
+      true, "BlockReaderTestUtil", TcpPeerServer.peerFromSocket(sock),
+      nodes[0]);
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientBlockVerification.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientBlockVerification.java
index 8dd3d6fd38a..2a0e0a85565 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientBlockVerification.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientBlockVerification.java
@@ -61,7 +61,7 @@ public class TestClientBlockVerification {
         util.getBlockReader(testBlock, 0, FILE_SIZE_K * 1024));
     util.readAndCheckEOS(reader, FILE_SIZE_K * 1024, true);
     verify(reader).sendReadResult(Status.CHECKSUM_OK);
-    reader.close();
+    reader.close(null);
   }
 
   /**
@@ -76,7 +76,7 @@ public class TestClientBlockVerification {
     // We asked the blockreader for the whole file, and only read
     // half of it, so no CHECKSUM_OK
     verify(reader, never()).sendReadResult(Status.CHECKSUM_OK);
-    reader.close();
+    reader.close(null);
   }
 
   /**
@@ -92,7 +92,7 @@ public class TestClientBlockVerification {
     // And read half the file
     util.readAndCheckEOS(reader, FILE_SIZE_K * 1024 / 2, true);
     verify(reader).sendReadResult(Status.CHECKSUM_OK);
-    reader.close();
+    reader.close(null);
   }
 
   /**
@@ -111,7 +111,7 @@ public class TestClientBlockVerification {
             util.getBlockReader(testBlock, startOffset, length));
         util.readAndCheckEOS(reader, length, true);
         verify(reader).sendReadResult(Status.CHECKSUM_OK);
-        reader.close();
+        reader.close(null);
       }
     }
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java
index d9020e0bd0a..b140bae1109 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java
@@ -18,28 +18,20 @@
 package org.apache.hadoop.hdfs;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertSame;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.spy;
 
 import java.io.IOException;
 import java.net.InetSocketAddress;
-import java.net.Socket;
-import java.security.PrivilegedExceptionAction;
+
+import junit.framework.Assert;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
-import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.security.token.Token;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
 import org.junit.Test;
 import org.mockito.Matchers;
 import org.mockito.Mockito;
@@ -55,59 +47,31 @@ public class TestConnCache {
 
   static final int BLOCK_SIZE = 4096;
   static final int FILE_SIZE = 3 * BLOCK_SIZE;
-  final static int CACHE_SIZE = 4;
-  final static long CACHE_EXPIRY_MS = 200;
-  static Configuration conf = null;
-  static MiniDFSCluster cluster = null;
-  static FileSystem fs = null;
-  static SocketCache cache;
-
-  static final Path testFile = new Path("/testConnCache.dat");
-  static byte authenticData[] = null;
-
-  static BlockReaderTestUtil util = null;
-
 
   /**
    * A mock Answer to remember the BlockReader used.
    *
    * It verifies that all invocation to DFSInputStream.getBlockReader()
-   * use the same socket.
+   * use the same peer.
    */
   private class MockGetBlockReader implements Answer<RemoteBlockReader2> {
     public RemoteBlockReader2 reader = null;
-    private Socket sock = null;
+    private Peer peer = null;
 
     @Override
     public RemoteBlockReader2 answer(InvocationOnMock invocation) throws Throwable {
       RemoteBlockReader2 prevReader = reader;
       reader = (RemoteBlockReader2) invocation.callRealMethod();
-      if (sock == null) {
-        sock = reader.dnSock;
+      if (peer == null) {
+        peer = reader.getPeer();
       } else if (prevReader != null) {
-        assertSame("DFSInputStream should use the same socket",
-                   sock, reader.dnSock);
+        Assert.assertSame("DFSInputStream should use the same peer",
+                   peer, reader.getPeer());
       }
       return reader;
     }
   }
 
-  @BeforeClass
-  public static void setupCluster() throws Exception {
-    final int REPLICATION_FACTOR = 1;
-
-    /* create a socket cache. There is only one socket cache per jvm */
-    cache = SocketCache.getInstance(CACHE_SIZE, CACHE_EXPIRY_MS);
-
-    util = new BlockReaderTestUtil(REPLICATION_FACTOR);
-    cluster = util.getCluster();
-    conf = util.getConf();
-    fs = cluster.getFileSystem();
-
-    authenticData = util.writeFile(testFile, FILE_SIZE / 1024);
-  }
-
-
   /**
    * (Optionally) seek to position, read and verify data.
    *
@@ -117,9 +81,10 @@ public class TestConnCache {
                      long pos,
                      byte[] buffer,
                      int offset,
-                     int length)
+                     int length,
+                     byte[] authenticData)
       throws IOException {
-    assertTrue("Test buffer too small", buffer.length >= offset + length);
+    Assert.assertTrue("Test buffer too small", buffer.length >= offset + length);
 
     if (pos >= 0)
       in.seek(pos);
@@ -129,7 +94,7 @@ public class TestConnCache {
 
     while (length > 0) {
       int cnt = in.read(buffer, offset, length);
-      assertTrue("Error in read", cnt > 0);
+      Assert.assertTrue("Error in read", cnt > 0);
       offset += cnt;
       length -= cnt;
     }
@@ -144,116 +109,23 @@ public class TestConnCache {
     }
   }
 
-  /**
-   * Test the SocketCache itself.
-   */
-  @Test
-  public void testSocketCache() throws Exception {
-    // Make a client
-    InetSocketAddress nnAddr =
-        new InetSocketAddress("localhost", cluster.getNameNodePort());
-    DFSClient client = new DFSClient(nnAddr, conf);
-
-    // Find out the DN addr
-    LocatedBlock block =
-        client.getNamenode().getBlockLocations(
-            testFile.toString(), 0, FILE_SIZE)
-        .getLocatedBlocks().get(0);
-    DataNode dn = util.getDataNode(block);
-    InetSocketAddress dnAddr = dn.getXferAddress();
-
-
-    // Make some sockets to the DN
-    Socket[] dnSockets = new Socket[CACHE_SIZE];
-    for (int i = 0; i < dnSockets.length; ++i) {
-      dnSockets[i] = client.socketFactory.createSocket(
-          dnAddr.getAddress(), dnAddr.getPort());
-    }
-
-
-    // Insert a socket to the NN
-    Socket nnSock = new Socket(nnAddr.getAddress(), nnAddr.getPort());
-    cache.put(nnSock, null);
-    assertSame("Read the write", nnSock, cache.get(nnAddr).sock);
-    cache.put(nnSock, null);
-
-    // Insert DN socks
-    for (Socket dnSock : dnSockets) {
-      cache.put(dnSock, null);
-    }
-
-    assertEquals("NN socket evicted", null, cache.get(nnAddr));
-    assertTrue("Evicted socket closed", nnSock.isClosed());
- 
-    // Lookup the DN socks
-    for (Socket dnSock : dnSockets) {
-      assertEquals("Retrieve cached sockets", dnSock, cache.get(dnAddr).sock);
-      dnSock.close();
-    }
-
-    assertEquals("Cache is empty", 0, cache.size());
-  }
-
-
-  /**
-   * Test the SocketCache expiry.
-   * Verify that socket cache entries expire after the set
-   * expiry time.
-   */
-  @Test
-  public void testSocketCacheExpiry() throws Exception {
-    // Make a client
-    InetSocketAddress nnAddr =
-        new InetSocketAddress("localhost", cluster.getNameNodePort());
-    DFSClient client = new DFSClient(nnAddr, conf);
-
-    // Find out the DN addr
-    LocatedBlock block =
-        client.getNamenode().getBlockLocations(
-            testFile.toString(), 0, FILE_SIZE)
-        .getLocatedBlocks().get(0);
-    DataNode dn = util.getDataNode(block);
-    InetSocketAddress dnAddr = dn.getXferAddress();
-
-
-    // Make some sockets to the DN and put in cache
-    Socket[] dnSockets = new Socket[CACHE_SIZE];
-    for (int i = 0; i < dnSockets.length; ++i) {
-      dnSockets[i] = client.socketFactory.createSocket(
-          dnAddr.getAddress(), dnAddr.getPort());
-      cache.put(dnSockets[i], null);
-    }
-
-    // Client side still has the sockets cached
-    assertEquals(CACHE_SIZE, client.socketCache.size());
-
-    //sleep for a second and see if it expired
-    Thread.sleep(CACHE_EXPIRY_MS + 1000);
-    
-    // Client side has no sockets cached
-    assertEquals(0, client.socketCache.size());
-
-    //sleep for another second and see if 
-    //the daemon thread runs fine on empty cache
-    Thread.sleep(CACHE_EXPIRY_MS + 1000);
-  }
-
-
   /**
    * Read a file served entirely from one DN. Seek around and read from
    * different offsets. And verify that they all use the same socket.
-   *
-   * @throws java.io.IOException
+   * @throws Exception 
    */
   @Test
   @SuppressWarnings("unchecked")
-  public void testReadFromOneDN() throws IOException {
-    LOG.info("Starting testReadFromOneDN()");
+  public void testReadFromOneDN() throws Exception {
+    BlockReaderTestUtil util = new BlockReaderTestUtil(1,
+        new HdfsConfiguration());
+    final Path testFile = new Path("/testConnCache.dat");
+    byte authenticData[] = util.writeFile(testFile, FILE_SIZE / 1024);
     DFSClient client = new DFSClient(
-        new InetSocketAddress("localhost", cluster.getNameNodePort()), conf);
-    DFSInputStream in = spy(client.open(testFile.toString()));
+        new InetSocketAddress("localhost",
+            util.getCluster().getNameNodePort()), util.getConf());
+    DFSInputStream in = Mockito.spy(client.open(testFile.toString()));
     LOG.info("opened " + testFile.toString());
-
     byte[] dataBuf = new byte[BLOCK_SIZE];
 
     MockGetBlockReader answer = new MockGetBlockReader();
@@ -270,18 +142,15 @@ public class TestConnCache {
                            Matchers.anyString());
 
     // Initial read
-    pread(in, 0, dataBuf, 0, dataBuf.length);
+    pread(in, 0, dataBuf, 0, dataBuf.length, authenticData);
     // Read again and verify that the socket is the same
-    pread(in, FILE_SIZE - dataBuf.length, dataBuf, 0, dataBuf.length);
-    pread(in, 1024, dataBuf, 0, dataBuf.length);
-    pread(in, -1, dataBuf, 0, dataBuf.length);            // No seek; just read
-    pread(in, 64, dataBuf, 0, dataBuf.length / 2);
+    pread(in, FILE_SIZE - dataBuf.length, dataBuf, 0, dataBuf.length,
+        authenticData);
+    pread(in, 1024, dataBuf, 0, dataBuf.length, authenticData);
+    // No seek; just read
+    pread(in, -1, dataBuf, 0, dataBuf.length, authenticData);
+    pread(in, 64, dataBuf, 0, dataBuf.length / 2, authenticData);
 
     in.close();
   }
-
-  @AfterClass
-  public static void teardownCluster() throws Exception {
-    util.shutdown();
-  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java
index c8ab6e002fa..9ef0f093ffa 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
+import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
@@ -92,13 +93,13 @@ public class TestDataTransferKeepalive {
     DFSTestUtil.createFile(fs, TEST_FILE, 1L, (short)1, 0L);
 
     // Clients that write aren't currently re-used.
-    assertEquals(0, dfsClient.socketCache.size());
+    assertEquals(0, dfsClient.peerCache.size());
     assertXceiverCount(0);
 
     // Reads the file, so we should get a
     // cached socket, and should have an xceiver on the other side.
     DFSTestUtil.readFile(fs, TEST_FILE);
-    assertEquals(1, dfsClient.socketCache.size());
+    assertEquals(1, dfsClient.peerCache.size());
     assertXceiverCount(1);
 
     // Sleep for a bit longer than the keepalive timeout
@@ -109,13 +110,13 @@ public class TestDataTransferKeepalive {
     // The socket is still in the cache, because we don't
     // notice that it's closed until we try to read
     // from it again.
-    assertEquals(1, dfsClient.socketCache.size());
+    assertEquals(1, dfsClient.peerCache.size());
     
     // Take it out of the cache - reading should
     // give an EOF.
-    Socket s = dfsClient.socketCache.get(dnAddr).sock;
-    assertNotNull(s);
-    assertEquals(-1, NetUtils.getInputStream(s).read());
+    Peer peer = dfsClient.peerCache.get(dn.getDatanodeId());
+    assertNotNull(peer);
+    assertEquals(-1, peer.getInputStream().read());
   }
 
   /**
@@ -174,14 +175,14 @@ public class TestDataTransferKeepalive {
     }
     
     DFSClient client = ((DistributedFileSystem)fs).dfs;
-    assertEquals(5, client.socketCache.size());
+    assertEquals(5, client.peerCache.size());
     
     // Let all the xceivers timeout
     Thread.sleep(1500);
     assertXceiverCount(0);
 
     // Client side still has the sockets cached
-    assertEquals(5, client.socketCache.size());
+    assertEquals(5, client.peerCache.size());
 
     // Reading should not throw an exception.
     DFSTestUtil.readFile(fs, TEST_FILE);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDisableConnCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDisableConnCache.java
new file mode 100644
index 00000000000..f7fb128bb1b
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDisableConnCache.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.junit.Test;
+
+/**
+ * This class tests disabling client connection caching in a single node
+ * mini-cluster.
+ */
+public class TestDisableConnCache {
+  static final Log LOG = LogFactory.getLog(TestDisableConnCache.class);
+
+  static final int BLOCK_SIZE = 4096;
+  static final int FILE_SIZE = 3 * BLOCK_SIZE;
+  
+  /**
+   * Test that the socket cache can be disabled by setting the capacity to
+   * 0. Regression test for HDFS-3365.
+   * @throws Exception 
+   */
+  @Test
+  public void testDisableCache() throws Exception {
+    HdfsConfiguration confWithoutCache = new HdfsConfiguration();
+    // Configure a new instance with no peer caching, ensure that it doesn't
+    // cache anything
+    confWithoutCache.setInt(
+        DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY, 0);
+    BlockReaderTestUtil util = new BlockReaderTestUtil(1, confWithoutCache);
+    final Path testFile = new Path("/testConnCache.dat");
+    util.writeFile(testFile, FILE_SIZE / 1024);
+    FileSystem fsWithoutCache = FileSystem.newInstance(util.getConf());
+    try {
+      DFSTestUtil.readFile(fsWithoutCache, testFile);
+      assertEquals(0, ((DistributedFileSystem)fsWithoutCache).dfs.peerCache.size());
+    } finally {
+      fsWithoutCache.close();
+      util.shutdown();
+    }
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java
new file mode 100644
index 00000000000..0953c410b03
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java
@@ -0,0 +1,218 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertSame;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.channels.ReadableByteChannel;
+import java.util.HashSet;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.net.Peer;
+import org.junit.Test;
+
+public class TestPeerCache {
+  static final Log LOG = LogFactory.getLog(TestPeerCache.class);
+
+  private static final int CAPACITY = 3;
+  private static final int EXPIRY_PERIOD = 20;
+  private static PeerCache cache =
+      PeerCache.getInstance(CAPACITY, EXPIRY_PERIOD);
+
+  private static class FakePeer implements Peer {
+    private boolean closed = false;
+
+    private DatanodeID dnId;
+
+    public FakePeer(DatanodeID dnId) {
+      this.dnId = dnId;
+    }
+
+    @Override
+    public ReadableByteChannel getInputStreamChannel() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void setReadTimeout(int timeoutMs) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public int getReceiveBufferSize() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean getTcpNoDelay() throws IOException {
+      return false;
+    }
+
+    @Override
+    public void setWriteTimeout(int timeoutMs) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean isClosed() {
+      return closed;
+    }
+  
+    @Override
+    public void close() throws IOException {
+      closed = true;
+    }
+
+    @Override
+    public String getRemoteAddressString() {
+      return dnId.getInfoAddr();
+    }
+
+    @Override
+    public String getLocalAddressString() {
+      return "127.0.0.1:123";
+    }
+
+    @Override
+    public InputStream getInputStream() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+  
+    @Override
+    public OutputStream getOutputStream() throws IOException {
+      throw new UnsupportedOperationException();
+    }
+  
+    @Override
+    public boolean isLocal() {
+      return true;
+    }
+  
+    @Override
+    public String toString() {
+      return "FakePeer(dnId=" + dnId + ")";
+    }
+  }
+
+  @Test
+  public void testAddAndRetrieve() throws Exception {
+    DatanodeID dnId = new DatanodeID("192.168.0.1",
+          "fakehostname", "fake_storage_id",
+          100, 101, 102);
+    FakePeer peer = new FakePeer(dnId);
+    cache.put(dnId, peer);
+    assertTrue(!peer.isClosed());
+    assertEquals(1, cache.size());
+    assertEquals(peer, cache.get(dnId));
+    assertEquals(0, cache.size());
+    cache.clear();
+  }
+
+  @Test
+  public void testExpiry() throws Exception {
+    DatanodeID dnIds[] = new DatanodeID[CAPACITY];
+    FakePeer peers[] = new FakePeer[CAPACITY];
+    for (int i = 0; i < CAPACITY; ++i) {
+      dnIds[i] = new DatanodeID("192.168.0.1",
+          "fakehostname_" + i, "fake_storage_id",
+          100, 101, 102);
+      peers[i] = new FakePeer(dnIds[i]);
+    }
+    for (int i = 0; i < CAPACITY; ++i) {
+      cache.put(dnIds[i], peers[i]);
+    }
+    // Check that the peers are cached
+    assertEquals(CAPACITY, cache.size());
+
+    // Wait for the peers to expire
+    Thread.sleep(EXPIRY_PERIOD * 50);
+    assertEquals(0, cache.size());
+
+    // make sure that the peers were closed when they were expired
+    for (int i = 0; i < CAPACITY; ++i) {
+      assertTrue(peers[i].isClosed());
+    }
+
+    // sleep for another second and see if 
+    // the daemon thread runs fine on empty cache
+    Thread.sleep(EXPIRY_PERIOD * 50);
+    cache.clear();
+  }
+
+  @Test
+  public void testEviction() throws Exception {
+    DatanodeID dnIds[] = new DatanodeID[CAPACITY + 1];
+    FakePeer peers[] = new FakePeer[CAPACITY + 1];
+    for (int i = 0; i < dnIds.length; ++i) {
+      dnIds[i] = new DatanodeID("192.168.0.1",
+          "fakehostname_" + i, "fake_storage_id_" + i,
+          100, 101, 102);
+      peers[i] = new FakePeer(dnIds[i]);
+    }
+    for (int i = 0; i < CAPACITY; ++i) {
+      cache.put(dnIds[i], peers[i]);
+    }
+    // Check that the peers are cached
+    assertEquals(CAPACITY, cache.size());
+
+    // Add another entry and check that the first entry was evicted
+    cache.put(dnIds[CAPACITY], peers[CAPACITY]);
+    assertEquals(CAPACITY, cache.size());
+    assertSame(null, cache.get(dnIds[0]));
+
+    // Make sure that the other entries are still there
+    for (int i = 1; i < CAPACITY; ++i) {
+      Peer peer = cache.get(dnIds[i]);
+      assertSame(peers[i], peer);
+      assertTrue(!peer.isClosed());
+      peer.close();
+    }
+    assertEquals(1, cache.size());
+    cache.clear();
+  }
+
+  @Test
+  public void testMultiplePeersWithSameDnId() throws Exception {
+    DatanodeID dnId = new DatanodeID("192.168.0.1",
+          "fakehostname", "fake_storage_id",
+          100, 101, 102);
+    HashSet<FakePeer> peers = new HashSet<FakePeer>(CAPACITY);
+    for (int i = 0; i < CAPACITY; ++i) {
+      FakePeer peer = new FakePeer(dnId);
+      peers.add(peer);
+      cache.put(dnId, peer);
+    }
+    // Check that all of the peers ended up in the cache
+    assertEquals(CAPACITY, cache.size());
+    while (!peers.isEmpty()) {
+      Peer peer = cache.get(dnId);
+      assertTrue(peer != null);
+      assertTrue(!peer.isClosed());
+      peers.remove(peer);
+    }
+    assertEquals(0, cache.size());
+    cache.clear();
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSocketCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSocketCache.java
deleted file mode 100644
index 255d408f824..00000000000
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSocketCache.java
+++ /dev/null
@@ -1,171 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hdfs;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertSame;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.spy;
-
-import java.io.IOException;
-import java.net.InetSocketAddress;
-import java.net.Socket;
-import java.security.PrivilegedExceptionAction;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
-import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.hdfs.protocol.LocatedBlock;
-import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
-import org.apache.hadoop.hdfs.server.datanode.DataNode;
-import org.apache.hadoop.security.token.Token;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.mockito.Matchers;
-import org.mockito.Mockito;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
-
-/**
- * This class tests the client connection caching in a single node
- * mini-cluster.
- */
-public class TestSocketCache {
-  static final Log LOG = LogFactory.getLog(TestSocketCache.class);
-
-  static final int BLOCK_SIZE = 4096;
-  static final int FILE_SIZE = 3 * BLOCK_SIZE;
-  final static int CACHE_SIZE = 4;
-  final static long CACHE_EXPIRY_MS = 200;
-  static Configuration conf = null;
-  static MiniDFSCluster cluster = null;
-  static FileSystem fs = null;
-  static SocketCache cache;
-
-  static final Path testFile = new Path("/testConnCache.dat");
-  static byte authenticData[] = null;
-
-  static BlockReaderTestUtil util = null;
-
-
-  /**
-   * A mock Answer to remember the BlockReader used.
-   *
-   * It verifies that all invocation to DFSInputStream.getBlockReader()
-   * use the same socket.
-   */
-  private class MockGetBlockReader implements Answer<RemoteBlockReader2> {
-    public RemoteBlockReader2 reader = null;
-    private Socket sock = null;
-
-    @Override
-    public RemoteBlockReader2 answer(InvocationOnMock invocation) throws Throwable {
-      RemoteBlockReader2 prevReader = reader;
-      reader = (RemoteBlockReader2) invocation.callRealMethod();
-      if (sock == null) {
-        sock = reader.dnSock;
-      } else if (prevReader != null) {
-        assertSame("DFSInputStream should use the same socket",
-                   sock, reader.dnSock);
-      }
-      return reader;
-    }
-  }
-
-  @BeforeClass
-  public static void setupCluster() throws Exception {
-    final int REPLICATION_FACTOR = 1;
-
-    HdfsConfiguration confWithoutCache = new HdfsConfiguration();
-    confWithoutCache.setInt(
-        DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY, 0);
-    util = new BlockReaderTestUtil(REPLICATION_FACTOR, confWithoutCache);
-    cluster = util.getCluster();
-    conf = util.getConf();
-
-    authenticData = util.writeFile(testFile, FILE_SIZE / 1024);
-  }
-
-
-  /**
-   * (Optionally) seek to position, read and verify data.
-   *
-   * Seek to specified position if pos is non-negative.
-   */
-  private void pread(DFSInputStream in,
-                     long pos,
-                     byte[] buffer,
-                     int offset,
-                     int length)
-      throws IOException {
-    assertTrue("Test buffer too small", buffer.length >= offset + length);
-
-    if (pos >= 0)
-      in.seek(pos);
-
-    LOG.info("Reading from file of size " + in.getFileLength() +
-             " at offset " + in.getPos());
-
-    while (length > 0) {
-      int cnt = in.read(buffer, offset, length);
-      assertTrue("Error in read", cnt > 0);
-      offset += cnt;
-      length -= cnt;
-    }
-
-    // Verify
-    for (int i = 0; i < length; ++i) {
-      byte actual = buffer[i];
-      byte expect = authenticData[(int)pos + i];
-      assertEquals("Read data mismatch at file offset " + (pos + i) +
-                   ". Expects " + expect + "; got " + actual,
-                   actual, expect);
-    }
-  }
-
-  
-  /**
-   * Test that the socket cache can be disabled by setting the capacity to
-   * 0. Regression test for HDFS-3365.
-   */
-  @Test
-  public void testDisableCache() throws IOException {
-    LOG.info("Starting testDisableCache()");
-
-    // Configure a new instance with no caching, ensure that it doesn't
-    // cache anything
-
-    FileSystem fsWithoutCache = FileSystem.newInstance(conf);
-    try {
-      DFSTestUtil.readFile(fsWithoutCache, testFile);
-      assertEquals(0, ((DistributedFileSystem)fsWithoutCache).dfs.socketCache.size());
-    } finally {
-      fsWithoutCache.close();
-    }
-  }
-
-  @AfterClass
-  public static void teardownCluster() throws Exception {
-    util.shutdown();
-  }
-}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java
index c7dbf200b13..fc2c6c67912 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java
@@ -42,6 +42,7 @@ import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.net.TcpPeerServer;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
@@ -145,8 +146,9 @@ public class TestBlockTokenWithDFS {
       String file = BlockReaderFactory.getFileName(targetAddr, 
           "test-blockpoolid", block.getBlockId());
       blockReader = BlockReaderFactory.newBlockReader(
-          conf, s, file, block, 
-          lblock.getBlockToken(), 0, -1, null);
+          conf, file, block, lblock.getBlockToken(), 0, -1,
+          true, "TestBlockTokenWithDFS", TcpPeerServer.peerFromSocket(s),
+          nodes[0]);
 
     } catch (IOException ex) {
       if (ex instanceof InvalidBlockTokenException) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
index 5a80098329b..efe0a4cc962 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.net.TcpPeerServer;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@@ -280,8 +281,9 @@ public class TestDataNodeVolumeFailure {
     String file = BlockReaderFactory.getFileName(targetAddr, 
         "test-blockpoolid",
         block.getBlockId());
-    BlockReaderFactory.newBlockReader(conf, s, file, block, lblock
-        .getBlockToken(), 0, -1, null);
+    BlockReaderFactory.newBlockReader(conf, file, block,
+        lblock.getBlockToken(), 0, -1, true, "TestDataNodeVolumeFailure",
+        TcpPeerServer.peerFromSocket(s), datanode);
 
     // nothing - if it fails - it will throw and exception
   }

From d94621a0cd33c01e1c2765d49d20b4438022e20c Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 9 Jan 2013 21:37:34 +0000
Subject: [PATCH 03/52] HDFS-4354. Create DomainSocket and DomainPeer and
 associated unit tests. Contributed by Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1431102 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-common-project/hadoop-common/pom.xml   |   1 +
 .../hadoop-common/src/CMakeLists.txt          |   3 +-
 .../apache/hadoop/net/unix/DomainSocket.java  | 619 ++++++++++++
 .../src/main/native/src/exception.c           | 109 +++
 .../src/main/native/src/exception.h           |  82 ++
 .../org/apache/hadoop/net/unix/DomainSocket.c | 924 ++++++++++++++++++
 .../src/main/native/src/org_apache_hadoop.h   |   4 +
 .../net/unix/TemporarySocketDirectory.java    |  58 ++
 .../hadoop/net/unix/TestDomainSocket.java     | 576 +++++++++++
 .../hadoop-hdfs/CHANGES.HDFS-347.txt          |   3 +
 .../apache/hadoop/hdfs/net/BasicInetPeer.java |   7 +
 .../apache/hadoop/hdfs/net/DomainPeer.java    | 117 +++
 .../hadoop/hdfs/net/DomainPeerServer.java     |  88 ++
 .../apache/hadoop/hdfs/net/EncryptedPeer.java |   6 +
 .../apache/hadoop/hdfs/net/NioInetPeer.java   |   6 +
 .../java/org/apache/hadoop/hdfs/net/Peer.java |   7 +
 .../org/apache/hadoop/hdfs/TestPeerCache.java |   6 +
 17 files changed, 2615 insertions(+), 1 deletion(-)
 create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
 create mode 100644 hadoop-common-project/hadoop-common/src/main/native/src/exception.c
 create mode 100644 hadoop-common-project/hadoop-common/src/main/native/src/exception.h
 create mode 100644 hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c
 create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TemporarySocketDirectory.java
 create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeer.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java

diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml
index a4f7ceabc00..3414d6d2d31 100644
--- a/hadoop-common-project/hadoop-common/pom.xml
+++ b/hadoop-common-project/hadoop-common/pom.xml
@@ -491,6 +491,7 @@
                     <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Compressor</javahClassName>
                     <javahClassName>org.apache.hadoop.io.compress.lz4.Lz4Decompressor</javahClassName>
                     <javahClassName>org.apache.hadoop.util.NativeCrc32</javahClassName>
+                    <javahClassName>org.apache.hadoop.net.unix.DomainSocket</javahClassName>
                   </javahClassNames>
                   <javahOutputDirectory>${project.build.directory}/native/javah</javahOutputDirectory>
                 </configuration>
diff --git a/hadoop-common-project/hadoop-common/src/CMakeLists.txt b/hadoop-common-project/hadoop-common/src/CMakeLists.txt
index c7f05e5c3bc..7c47ca46a9a 100644
--- a/hadoop-common-project/hadoop-common/src/CMakeLists.txt
+++ b/hadoop-common-project/hadoop-common/src/CMakeLists.txt
@@ -144,10 +144,10 @@ add_executable(test_bulk_crc32
     ${D}/util/bulk_crc32.c
     ${T}/util/test_bulk_crc32.c
 )
-set_property(SOURCE main.cpp PROPERTY INCLUDE_DIRECTORIES "\"-Werror\" \"-Wall\"")
 
 SET(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
 add_dual_library(hadoop
+    main/native/src/exception.c
     ${D}/io/compress/lz4/Lz4Compressor.c
     ${D}/io/compress/lz4/Lz4Decompressor.c
     ${D}/io/compress/lz4/lz4.c
@@ -157,6 +157,7 @@ add_dual_library(hadoop
     ${D}/io/nativeio/NativeIO.c
     ${D}/io/nativeio/errno_enum.c
     ${D}/io/nativeio/file_descriptor.c
+    ${D}/net/unix/DomainSocket.c
     ${D}/security/JniBasedUnixGroupsMapping.c
     ${D}/security/JniBasedUnixGroupsNetgroupMapping.c
     ${D}/security/getGroup.c
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
new file mode 100644
index 00000000000..9f93924b5be
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
@@ -0,0 +1,619 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.net.unix;
+
+import java.io.Closeable;
+import org.apache.hadoop.classification.InterfaceAudience;
+import java.io.FileDescriptor;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.SocketException;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.ByteBuffer;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.commons.lang.SystemUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.util.NativeCodeLoader;
+
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * The implementation of UNIX domain sockets in Java.
+ * 
+ * See {@link DomainSocket} for more information about UNIX domain sockets.
+ */
+@InterfaceAudience.LimitedPrivate("HDFS")
+public class DomainSocket implements Closeable {
+  static {
+    if (SystemUtils.IS_OS_WINDOWS) {
+      loadingFailureReason = "UNIX Domain sockets are not available on Windows.";
+    } else if (!NativeCodeLoader.isNativeCodeLoaded()) {
+      loadingFailureReason = "libhadoop cannot be loaded.";
+    } else {
+      String problem = "DomainSocket#anchorNative got error: unknown";
+      try {
+        anchorNative();
+        problem = null;
+      } catch (Throwable t) {
+        problem = "DomainSocket#anchorNative got error: " + t.getMessage();
+      }
+      loadingFailureReason = problem;
+    }
+  }
+
+  static Log LOG = LogFactory.getLog(DomainSocket.class);
+
+  /**
+   * True only if we should validate the paths used in {@link DomainSocket#bind()}
+   */
+  private static boolean validateBindPaths = true;
+
+  /**
+   * The reason why DomainSocket is not available, or null if it is available.
+   */
+  private final static String loadingFailureReason;
+
+  /**
+   * Initialize the native library code.
+   */
+  private static native void anchorNative();
+
+  /**
+   * This function is designed to validate that the path chosen for a UNIX
+   * domain socket is secure.  A socket path is secure if it doesn't allow
+   * unprivileged users to perform a man-in-the-middle attack against it.
+   * For example, one way to perform a man-in-the-middle attack would be for
+   * a malicious user to move the server socket out of the way and create his
+   * own socket in the same place.  Not good.
+   * 
+   * Note that we only check the path once.  It's possible that the
+   * permissions on the path could change, perhaps to something more relaxed,
+   * immediately after the path passes our validation test-- hence creating a
+   * security hole.  However, the purpose of this check is to spot common
+   * misconfigurations.  System administrators do not commonly change
+   * permissions on these paths while the server is running.
+   *
+   * @param path             the path to validate
+   * @param skipComponents   the number of starting path components to skip 
+   *                         validation for (used only for testing)
+   */
+  @VisibleForTesting
+  native static void validateSocketPathSecurity0(String path,
+      int skipComponents) throws IOException;
+
+  /**
+   * Return true only if UNIX domain sockets are available.
+   */
+  public static String getLoadingFailureReason() {
+    return loadingFailureReason;
+  }
+
+  /**
+   * Disable validation of the server bind paths.
+   */
+  @VisibleForTesting
+  static void disableBindPathValidation() {
+    validateBindPaths = false;
+  }
+
+  /**
+   * Given a path and a port, compute the effective path by replacing
+   * occurrences of __PORT__ with the port.  This is mainly to make it 
+   * possible to run multiple DataNodes locally for testing purposes.
+   *
+   * @param path            The source path
+   * @param port            Port number to use
+   *
+   * @return                The effective path
+   */
+  public static String getEffectivePath(String path, int port) {
+    return path.replace("__PORT__", String.valueOf(port));
+  }
+
+  /**
+   * Status bits
+   * 
+   * Bit 30: 0 = DomainSocket open, 1 = DomainSocket closed
+   * Bits 29 to 0: the reference count.
+   */
+  private final AtomicInteger status;
+
+  /**
+   * Bit mask representing a closed domain socket. 
+   */
+  private static final int STATUS_CLOSED_MASK = 1 << 30;
+
+  /**
+   * The file descriptor associated with this UNIX domain socket.
+   */
+  private final int fd;
+
+  /**
+   * The path associated with this UNIX domain socket.
+   */
+  private final String path;
+
+  /**
+   * The InputStream associated with this socket.
+   */
+  private final DomainInputStream inputStream = new DomainInputStream();
+
+  /**
+   * The OutputStream associated with this socket.
+   */
+  private final DomainOutputStream outputStream = new DomainOutputStream();
+
+  /**
+   * The Channel associated with this socket.
+   */
+  private final DomainChannel channel = new DomainChannel();
+
+  private DomainSocket(String path, int fd) {
+    this.status = new AtomicInteger(0);
+    this.fd = fd;
+    this.path = path;
+  }
+
+  private static native int bind0(String path) throws IOException;
+
+  /**
+   * Create a new DomainSocket listening on the given path.
+   *
+   * @param path         The path to bind and listen on.
+   * @return             The new DomainSocket.
+   */
+  public static DomainSocket bindAndListen(String path) throws IOException {
+    if (loadingFailureReason != null) {
+      throw new UnsupportedOperationException(loadingFailureReason);
+    }
+    if (validateBindPaths) {
+      validateSocketPathSecurity0(path, 0);
+    }
+    int fd = bind0(path);
+    return new DomainSocket(path, fd);
+  }
+
+  private static native int accept0(int fd) throws IOException;
+
+  /**
+   * Accept a new UNIX domain connection.
+   *
+   * This method can only be used on sockets that were bound with bind().
+   *
+   * @return                              The new connection.
+   * @throws IOException                  If there was an I/O error
+   *                                      performing the accept-- such as the
+   *                                      socket being closed from under us.
+   * @throws SocketTimeoutException       If the accept timed out.
+   */
+  public DomainSocket accept() throws IOException {
+    fdRef();
+    try {
+      return new DomainSocket(path, accept0(fd));
+    } finally {
+      fdUnref();
+    }
+  }
+
+  private static native int connect0(String path);
+
+  /**
+   * Create a new DomainSocket connected to the given path.
+   *
+   * @param path         The path to connect to.
+   * @return             The new DomainSocket.
+   */
+  public static DomainSocket connect(String path) throws IOException {
+    if (loadingFailureReason != null) {
+      throw new UnsupportedOperationException(loadingFailureReason);
+    }
+    int fd = connect0(path);
+    return new DomainSocket(path, fd);
+  }
+
+  /**
+   * Increment the reference count of the underlying file descriptor.
+   *
+   * @throws SocketException  If the file descriptor is closed.
+   */
+  private void fdRef() throws SocketException {
+    int bits = status.incrementAndGet();
+    if ((bits & STATUS_CLOSED_MASK) != 0) {
+      status.decrementAndGet();
+      throw new SocketException("Socket is closed.");
+    }
+  }
+
+  /**
+   * Decrement the reference count of the underlying file descriptor.
+   */
+  private void fdUnref() {
+    int newCount = status.decrementAndGet();
+    assert newCount >= 0;
+  }
+
+  /**
+   * Return true if the file descriptor is currently open.
+   * 
+   * @return                 True if the file descriptor is currently open.
+   */
+  public boolean isOpen() {
+    return ((status.get() & STATUS_CLOSED_MASK) == 0);
+  }
+
+  /**
+   * @return                 The socket path.
+   */
+  public String getPath() {
+    return path;
+  }
+
+  /**
+   * @return                 The socket InputStream
+   */
+  public DomainInputStream getInputStream() {
+    return inputStream;
+  }
+
+  /**
+   * @return                 The socket OutputStream
+   */
+  public DomainOutputStream getOutputStream() {
+    return outputStream;
+  }
+
+  /**
+   * @return                 The socket Channel
+   */
+  public DomainChannel getChannel() {
+    return channel;
+  }
+
+  public static final int SND_BUF_SIZE = 1;
+  public static final int RCV_BUF_SIZE = 2;
+  public static final int SND_TIMEO = 3;
+  public static final int RCV_TIMEO = 4;
+
+  private static native void setAttribute0(int fd, int type, int val)
+      throws IOException;
+
+  public void setAttribute(int type, int size) throws IOException {
+    fdRef();
+    try {
+      setAttribute0(fd, type, size);
+    } finally {
+      fdUnref();
+    }
+  }
+
+  private native int getAttribute0(int fd, int type) throws IOException;
+
+  public int getAttribute(int type) throws IOException {
+    fdRef();
+    try {
+      return getAttribute0(fd, type);
+    } finally {
+      fdUnref();
+    }
+  }
+
+  private static native void close0(int fd) throws IOException;
+
+  private static native void closeFileDescriptor0(FileDescriptor fd)
+      throws IOException;
+
+  private static native void shutdown0(int fd) throws IOException;
+
+  /**
+   * Close the Socket.
+   */
+  @Override
+  public void close() throws IOException {
+    // Set the closed bit on this DomainSocket
+    int bits;
+    while (true) {
+      bits = status.get();
+      if ((bits & STATUS_CLOSED_MASK) != 0) {
+        return; // already closed
+      }
+      if (status.compareAndSet(bits, bits | STATUS_CLOSED_MASK)) {
+        break;
+      }
+    }
+    // Wait for all references to go away
+    boolean didShutdown = false;
+    boolean interrupted = false;
+    while ((bits & (~STATUS_CLOSED_MASK)) > 0) {
+      if (!didShutdown) {
+        try {
+          // Calling shutdown on the socket will interrupt blocking system
+          // calls like accept, write, and read that are going on in a
+          // different thread.
+          shutdown0(fd);
+        } catch (IOException e) {
+          LOG.error("shutdown error: ", e);
+        }
+        didShutdown = true;
+      }
+      try {
+        Thread.sleep(10);
+      } catch (InterruptedException e) {
+        interrupted = true;
+      }
+      bits = status.get();
+    }
+
+    // Close the file descriptor.  After this point, the file descriptor
+    // number will be reused by something else.  Although this DomainSocket
+    // object continues to hold the old file descriptor number (it's a final
+    // field), we never use it again because we look at the closed bit and
+    // realize that this DomainSocket is not usable.
+    close0(fd);
+    if (interrupted) {
+      Thread.currentThread().interrupt();
+    }
+  }
+
+  /*
+   * Clean up if the user forgets to close the socket.
+   */
+  protected void finalize() throws IOException {
+    close();
+  }
+
+  private native static void sendFileDescriptors0(int fd, FileDescriptor jfds[],
+      byte jbuf[], int offset, int length) throws IOException;
+
+  /**
+   * Send some FileDescriptor objects to the process on the other side of this
+   * socket.
+   * 
+   * @param jfds              The file descriptors to send.
+   * @param jbuf              Some bytes to send.  You must send at least
+   *                          one byte.
+   * @param offset            The offset in the jbuf array to start at.
+   * @param length            Length of the jbuf array to use.
+   */
+  public void sendFileDescriptors(FileDescriptor jfds[],
+      byte jbuf[], int offset, int length) throws IOException {
+    fdRef();
+    try {
+      sendFileDescriptors0(fd, jfds, jbuf, offset, length);
+    } finally {
+      fdUnref();
+    }
+  }
+
+  private static native int receiveFileDescriptors0(int fd, FileDescriptor[] jfds,
+      byte jbuf[], int offset, int length) throws IOException;
+
+  /**
+   * Receive some FileDescriptor objects from the process on the other side of
+   * this socket.
+   *
+   * @param jfds              (output parameter) Array of FileDescriptors.
+   *                          We will fill as many slots as possible with file
+   *                          descriptors passed from the remote process.  The
+   *                          other slots will contain NULL.
+   * @param jbuf              (output parameter) Buffer to read into.
+   *                          The UNIX domain sockets API requires you to read
+   *                          at least one byte from the remote process, even
+   *                          if all you care about is the file descriptors
+   *                          you will receive.
+   * @param offset            Offset into the byte buffer to load data
+   * @param length            Length of the byte buffer to use for data
+   *
+   * @return                  The number of bytes read.  This will be -1 if we
+   *                          reached EOF (similar to SocketInputStream);
+   *                          otherwise, it will be positive.
+   * @throws                  IOException if there was an I/O error.
+   */
+  public int receiveFileDescriptors(FileDescriptor[] jfds,
+      byte jbuf[], int offset, int length) throws IOException {
+    fdRef();
+    try {
+      return receiveFileDescriptors0(fd, jfds, jbuf, offset, length);
+    } finally {
+      fdUnref();
+    }
+  }
+
+  /**
+   * Receive some FileDescriptor objects from the process on the other side of
+   * this socket, and wrap them in FileInputStream objects.
+   *
+   * See {@link DomainSocket#recvFileInputStreams(ByteBuffer)}
+   */
+  public int recvFileInputStreams(FileInputStream[] fis, byte buf[],
+        int offset, int length) throws IOException {
+    FileDescriptor fds[] = new FileDescriptor[fis.length];
+    boolean success = false;
+    for (int i = 0; i < fis.length; i++) {
+      fis[i] = null;
+    }
+    fdRef();
+    try {
+      int ret = receiveFileDescriptors0(fd, fds, buf, offset, length);
+      for (int i = 0, j = 0; i < fds.length; i++) {
+        if (fds[i] != null) {
+          fis[j++] = new FileInputStream(fds[i]);
+          fds[i] = null;
+        }
+      }
+      success = true;
+      return ret;
+    } finally {
+      fdUnref();
+      if (!success) {
+        for (int i = 0; i < fds.length; i++) {
+          if (fds[i] != null) {
+            try {
+              closeFileDescriptor0(fds[i]);
+            } catch (Throwable t) {
+              LOG.warn(t);
+            }
+          } else if (fis[i] != null) {
+            try {
+              fis[i].close();
+            } catch (Throwable t) {
+              LOG.warn(t);
+            } finally {
+              fis[i] = null; }
+          }
+        }
+      }
+    }
+  }
+
+  private native static int readArray0(int fd, byte b[], int off, int len)
+      throws IOException;
+  
+  private native static int available0(int fd) throws IOException;
+
+  private static native void write0(int fd, int b) throws IOException;
+
+  private static native void writeArray0(int fd, byte b[], int offset, int length)
+      throws IOException;
+
+  private native static int readByteBufferDirect0(int fd, ByteBuffer dst,
+      int position, int remaining) throws IOException;
+
+  /**
+   * Input stream for UNIX domain sockets.
+   */
+  @InterfaceAudience.LimitedPrivate("HDFS")
+  public class DomainInputStream extends InputStream {
+    @Override
+    public int read() throws IOException {
+      fdRef();
+      try {
+        byte b[] = new byte[1];
+        int ret = DomainSocket.readArray0(DomainSocket.this.fd, b, 0, 1);
+        return (ret >= 0) ? b[0] : -1;
+      } finally {
+        fdUnref();
+      }
+    }
+    
+    @Override
+    public int read(byte b[], int off, int len) throws IOException {
+      fdRef();
+      try {
+        return DomainSocket.readArray0(DomainSocket.this.fd, b, off, len);
+      } finally {
+        fdUnref();
+      }
+    }
+
+    @Override
+    public int available() throws IOException {
+      fdRef();
+      try {
+        return DomainSocket.available0(DomainSocket.this.fd);
+      } finally {
+        fdUnref();
+      }
+    }
+
+    @Override
+    public void close() throws IOException {
+      DomainSocket.this.close();
+    }
+  }
+
+  /**
+   * Output stream for UNIX domain sockets.
+   */
+  @InterfaceAudience.LimitedPrivate("HDFS")
+  public class DomainOutputStream extends OutputStream {
+    @Override
+    public void close() throws IOException {
+      DomainSocket.this.close();
+    }
+
+    @Override
+    public void write(int val) throws IOException {
+      fdRef();
+      try {
+        byte b[] = new byte[1];
+        b[0] = (byte)val;
+        DomainSocket.writeArray0(DomainSocket.this.fd, b, 0, 1);
+      } finally {
+        fdUnref();
+      }
+    }
+
+    @Override
+    public void write(byte[] b, int off, int len) throws IOException {
+      fdRef();
+      try {
+        DomainSocket.writeArray0(DomainSocket.this.fd, b, off, len);
+      } finally {
+        fdUnref();
+      }
+    }
+  }
+
+  @InterfaceAudience.LimitedPrivate("HDFS")
+  public class DomainChannel implements ReadableByteChannel {
+    @Override
+    public boolean isOpen() {
+      return DomainSocket.this.isOpen();
+    }
+
+    @Override
+    public void close() throws IOException {
+      DomainSocket.this.close();
+    }
+
+    @Override
+    public int read(ByteBuffer dst) throws IOException {
+      fdRef();
+      try {
+        int nread = 0;
+        if (dst.isDirect()) {
+          nread = DomainSocket.readByteBufferDirect0(DomainSocket.this.fd,
+              dst, dst.position(), dst.remaining());
+        } else if (dst.hasArray()) {
+          nread = DomainSocket.readArray0(DomainSocket.this.fd,
+              dst.array(), dst.position() + dst.arrayOffset(),
+              dst.remaining());
+        } else {
+          throw new AssertionError("we don't support " +
+              "using ByteBuffers that aren't either direct or backed by " +
+              "arrays");
+        }
+        if (nread > 0) {
+          dst.position(dst.position() + nread);
+        }
+        return nread;
+      } finally {
+        fdUnref();
+      }
+    }
+  }
+
+  @Override
+  public String toString() {
+    return String.format("DomainSocket(fd=%d,path=%s)", fd, path);
+  }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/exception.c b/hadoop-common-project/hadoop-common/src/main/native/src/exception.c
new file mode 100644
index 00000000000..39a03f9cde0
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/native/src/exception.c
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exception.h"
+
+#include <jni.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+jthrowable newExceptionV(JNIEnv* env, const char *name,
+                         const char *fmt, va_list ap)
+{
+  int need;
+  char buf[1], *msg = NULL;
+  va_list ap2;
+  jstring jstr = NULL;
+  jthrowable jthr;
+  jclass clazz;
+  jmethodID excCtor;
+
+  va_copy(ap2, ap);
+  clazz = (*env)->FindClass(env, name);
+  if (!clazz) {
+    jthr = (*env)->ExceptionOccurred(env);
+    (*env)->ExceptionClear(env);
+    goto done;
+  }
+  excCtor = (*env)->GetMethodID(env,
+        clazz, "<init>", "(Ljava/lang/String;)V");
+  if (!excCtor) {
+    jthr = (*env)->ExceptionOccurred(env);
+    (*env)->ExceptionClear(env);
+    goto done;
+  }
+  need = vsnprintf(buf, sizeof(buf), fmt, ap);
+  if (need < 0) {
+    fmt = "vsnprintf error";
+    need = strlen(fmt);
+  }
+  msg = malloc(need + 1);
+  vsnprintf(msg, need + 1, fmt, ap2);
+  jstr = (*env)->NewStringUTF(env, msg);
+  if (!jstr) {
+    jthr = (*env)->ExceptionOccurred(env);
+    (*env)->ExceptionClear(env);
+    goto done;
+  }
+  jthr = (*env)->NewObject(env, clazz, excCtor, jstr);
+  if (!jthr) {
+    jthr = (*env)->ExceptionOccurred(env);
+    (*env)->ExceptionClear(env);
+    goto done;
+  }
+
+done:
+  free(msg);
+  va_end(ap2);
+  (*env)->DeleteLocalRef(env, jstr);
+  return jthr;
+}
+
+jthrowable newException(JNIEnv* env, const char *name, const char *fmt, ...)
+{
+  va_list ap;
+  jthrowable jthr;
+
+  va_start(ap, fmt);
+  jthr = newExceptionV(env, name, fmt, ap);
+  va_end(ap);
+  return jthr;
+}
+
+jthrowable newRuntimeException(JNIEnv* env, const char *fmt, ...)
+{
+  va_list ap;
+  jthrowable jthr;
+
+  va_start(ap, fmt);
+  jthr = newExceptionV(env, "java/lang/RuntimeException", fmt, ap);
+  va_end(ap);
+  return jthr;
+}
+
+jthrowable newIOException(JNIEnv* env, const char *fmt, ...)
+{
+  va_list ap;
+  jthrowable jthr;
+
+  va_start(ap, fmt);
+  jthr = newExceptionV(env, "java/io/IOException", fmt, ap);
+  va_end(ap);
+  return jthr;
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/exception.h b/hadoop-common-project/hadoop-common/src/main/native/src/exception.h
new file mode 100644
index 00000000000..d7af3772c85
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/native/src/exception.h
@@ -0,0 +1,82 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+#ifndef HADOOP_MAIN_NATIVE_SRC_EXCEPTION_H
+#define HADOOP_MAIN_NATIVE_SRC_EXCEPTION_H
+
+#include <jni.h> /* for jthrowable */
+#include <stdarg.h> /* for va_list */
+
+/**
+ * Create a new Exception.
+ *
+ * No exceptions will be pending on return.
+ *
+ * @param env           The JNI environment
+ * @param name          full name of the Java exception class
+ * @param fmt           printf-style format string
+ * @param ap            printf-style arguments
+ *
+ * @return              The RuntimeException
+ */
+jthrowable newExceptionV(JNIEnv* env, const char *name,
+                         const char *fmt, va_list ap);
+
+/**
+ * Create a new Exception.
+ *
+ * No exceptions will be pending on return.
+ *
+ * @param env           The JNI environment
+ * @param name          full name of the Java exception class
+ * @param fmt           printf-style format string
+ * @param ...           printf-style arguments
+ *
+ * @return              The RuntimeException
+ */
+jthrowable newException(JNIEnv* env, const char *name, const char *fmt, ...)
+    __attribute__((format(printf, 3, 4)));
+
+/**
+ * Create a new RuntimeException.
+ *
+ * No exceptions will be pending on return.
+ *
+ * @param env           The JNI environment
+ * @param fmt           printf-style format string
+ * @param ...           printf-style arguments
+ *
+ * @return              The RuntimeException
+ */
+jthrowable newRuntimeException(JNIEnv* env, const char *fmt, ...)
+    __attribute__((format(printf, 2, 3)));
+
+/**
+ * Create a new IOException.
+ *
+ * No exceptions will be pending on return.
+ *
+ * @param env           The JNI environment
+ * @param fmt           printf-style format string
+ * @param ...           printf-style arguments
+ *
+ * @return              The IOException, or another exception if we failed
+ *                      to create the NativeIOException.
+ */
+jthrowable newIOException(JNIEnv* env, const char *fmt, ...)
+    __attribute__((format(printf, 2, 3)));
+
+#endif
diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c
new file mode 100644
index 00000000000..2daa4866c92
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c
@@ -0,0 +1,924 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+
+#include "exception.h"
+#include "org/apache/hadoop/io/nativeio/file_descriptor.h"
+#include "org_apache_hadoop.h"
+#include "org_apache_hadoop_net_unix_DomainSocket.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <jni.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h> /* for FIONREAD */
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#define SND_BUF_SIZE org_apache_hadoop_net_unix_DomainSocket_SND_BUF_SIZE
+#define RCV_BUF_SIZE org_apache_hadoop_net_unix_DomainSocket_RCV_BUF_SIZE
+#define SND_TIMEO org_apache_hadoop_net_unix_DomainSocket_SND_TIMEO
+#define RCV_TIMEO org_apache_hadoop_net_unix_DomainSocket_RCV_TIMEO
+
+#define DEFAULT_RCV_TIMEO 120000
+#define DEFAULT_SND_TIMEO 120000
+#define LISTEN_BACKLOG 128
+
+/**
+ * Can't pass more than this number of file descriptors in a single message.
+ */
+#define MAX_PASSED_FDS 16
+
+static jthrowable setAttribute0(JNIEnv *env, jint fd, jint type, jint val);
+
+/**
+ * Convert an errno to a socket exception name.
+ *
+ * Note: we assume that all of these exceptions have a one-argument constructor
+ * that takes a string.
+ *
+ * @return               The exception class name
+ */
+static const char *errnoToSocketExceptionName(int errnum)
+{
+  switch (errnum) {
+  case EAGAIN:
+    /* accept(2) returns EAGAIN when a socket timeout has been set, and that
+     * timeout elapses without an incoming connection.  This error code is also
+     * used in non-blocking I/O, but we don't support that. */
+  case ETIMEDOUT:
+    return "java/net/SocketTimeoutException";
+  case EHOSTDOWN:
+  case EHOSTUNREACH:
+  case ECONNREFUSED:
+    return "java/net/NoRouteToHostException";
+  case ENOTSUP:
+    return "java/lang/UnsupportedOperationException";
+  default:
+    return "java/net/SocketException";
+  }
+}
+
+static jthrowable newSocketException(JNIEnv *env, int errnum,
+                                     const char *fmt, ...)
+    __attribute__((format(printf, 3, 4)));
+
+static jthrowable newSocketException(JNIEnv *env, int errnum,
+                                     const char *fmt, ...)
+{
+  va_list ap;
+  jthrowable jthr;
+
+  va_start(ap, fmt);
+  jthr = newExceptionV(env, errnoToSocketExceptionName(errnum), fmt, ap);
+  va_end(ap);
+  return jthr;
+}
+
+static const char* terror(int errnum)
+{
+  if ((errnum < 0) || (errnum >= sys_nerr)) {
+    return "unknown error.";
+  }
+  return sys_errlist[errnum];
+}
+
+/**
+ * Flexible buffer that will try to fit data on the stack, and fall back
+ * to the heap if necessary.
+ */
+struct flexibleBuffer {
+  int8_t *curBuf;
+  int8_t *allocBuf;
+  int8_t stackBuf[8196];
+};
+
+static jthrowable flexBufInit(JNIEnv *env, struct flexibleBuffer *flexBuf, jint length)
+{
+  flexBuf->curBuf = flexBuf->allocBuf = NULL;
+  if (length < sizeof(flexBuf->stackBuf)) {
+    flexBuf->curBuf = flexBuf->stackBuf;
+    return NULL;
+  }
+  flexBuf->allocBuf = malloc(length);
+  if (!flexBuf->allocBuf) {
+    return newException(env, "java/lang/OutOfMemoryError",
+        "OOM allocating space for %d bytes of data.", length);
+  }
+  flexBuf->curBuf = flexBuf->allocBuf;
+  return NULL;
+}
+
+static void flexBufFree(struct flexibleBuffer *flexBuf)
+{
+  free(flexBuf->allocBuf);
+}
+
+static jthrowable setup(JNIEnv *env, int *ofd, jobject jpath, int doConnect)
+{
+  const char *cpath = NULL;
+  struct sockaddr_un addr;
+  jthrowable jthr = NULL;
+  int fd = -1, ret;
+
+  fd = socket(PF_UNIX, SOCK_STREAM, 0);
+  if (fd < 0) {
+    ret = errno;
+    jthr = newSocketException(env, ret,
+        "error creating UNIX domain socket with SOCK_STREAM: %s",
+        terror(ret));
+    goto done;
+  }
+  memset(&addr, 0, sizeof(&addr));
+  addr.sun_family = AF_UNIX;
+  cpath = (*env)->GetStringUTFChars(env, jpath, NULL);
+  if (!cpath) {
+    jthr = (*env)->ExceptionOccurred(env);
+    (*env)->ExceptionClear(env);
+    goto done;
+  }
+  ret = snprintf(addr.sun_path, sizeof(addr.sun_path),
+                 "%s", cpath);
+  if (ret < 0) {
+    ret = errno;
+    jthr = newSocketException(env, EIO,
+        "error computing UNIX domain socket path: error %d (%s)",
+        ret, terror(ret));
+    goto done;
+  }
+  if (ret >= sizeof(addr.sun_path)) {
+    jthr = newSocketException(env, ENAMETOOLONG,
+        "error computing UNIX domain socket path: path too long.  "
+        "The longest UNIX domain socket path possible on this host "
+        "is %zd bytes.", sizeof(addr.sun_path) - 1);
+    goto done;
+  }
+  if (doConnect) {
+    RETRY_ON_EINTR(ret, connect(fd, 
+        (struct sockaddr*)&addr, sizeof(addr)));
+    if (ret < 0) {
+      ret = errno;
+      jthr = newException(env, "java/net/ConnectException",
+              "connect(2) error: %s when trying to connect to '%s'",
+              terror(ret), addr.sun_path);
+      goto done;
+    }
+  } else {
+    RETRY_ON_EINTR(ret, unlink(addr.sun_path));
+    RETRY_ON_EINTR(ret, bind(fd, (struct sockaddr*)&addr, sizeof(addr)));
+    if (ret < 0) {
+      ret = errno;
+      jthr = newException(env, "java/net/BindException",
+              "bind(2) error: %s when trying to bind to '%s'",
+              terror(ret), addr.sun_path);
+      goto done;
+    }
+    if (listen(fd, LISTEN_BACKLOG) < 0) {
+      ret = errno;
+      jthr = newException(env, "java/net/BindException",
+              "listen(2) error: %s when trying to listen to '%s'",
+              terror(ret), addr.sun_path);
+      goto done;
+    }
+  }
+
+done:
+  if (cpath) {
+    (*env)->ReleaseStringUTFChars(env, jpath, cpath);
+  }
+  if (jthr) {
+    if (fd > 0) {
+      RETRY_ON_EINTR(ret, close(fd));
+      fd = -1;
+    }
+  } else {
+    *ofd = fd;
+  }
+  return jthr;
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_anchorNative(
+JNIEnv *env, jclass clazz)
+{
+  fd_init(env); // for fd_get, fd_create, etc.
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_validateSocketPathSecurity0(
+JNIEnv *env, jclass clazz, jobject jstr, jint skipComponents)
+{
+  jint utfLength;
+  char path[PATH_MAX], check[PATH_MAX] = { 0 }, *token, *rest;
+  struct stat st;
+  int ret, mode, strlenPath;
+  uid_t uid;
+  jthrowable jthr = NULL;
+
+  utfLength = (*env)->GetStringUTFLength(env, jstr);
+  if (utfLength > sizeof(path)) {
+    jthr = newIOException(env, "path is too long!  We expected a path "
+        "no longer than %zd UTF-8 bytes.", sizeof(path));
+    goto done;
+  }
+  (*env)->GetStringUTFRegion(env, jstr, 0, utfLength, path);
+  jthr = (*env)->ExceptionOccurred(env);
+  if (jthr) {
+    (*env)->ExceptionClear(env);
+    goto done;
+  }
+  uid = geteuid();
+  strlenPath = strlen(path);
+  if (strlenPath == 0) {
+    jthr = newIOException(env, "socket path is empty.");
+    goto done;
+  }
+  if (path[strlenPath - 1] == '/') {
+    /* It makes no sense to have a socket path that ends in a slash, since
+     * sockets are not directories. */
+    jthr = newIOException(env, "bad socket path '%s'.  The socket path "
+          "must not end in a slash.", path);
+    goto done;
+  }
+  rest = path;
+  while ((token = strtok_r(rest, "/", &rest))) {
+    strcat(check, "/");
+    strcat(check, token);
+    if (skipComponents > 0) {
+      skipComponents--;
+      continue;
+    }
+    if (!index(rest, '/')) {
+      /* Don't validate the last component, since it's not supposed to be a
+       * directory.  (If it is a directory, we will fail to create the socket
+       * later with EISDIR or similar.)
+       */
+      break;
+    }
+    if (stat(check, &st) < 0) {
+      ret = errno;
+      jthr = newIOException(env, "failed to stat a path component: '%s'.  "
+          "error code %d (%s)", check, ret, terror(ret));
+      goto done;
+    }
+    mode = st.st_mode & 0777;
+    if (mode & 0002) {
+      jthr = newIOException(env, "the path component: '%s' is "
+        "world-writable.  Its permissions are 0%03o.  Please fix "
+        "this or select a different socket path.", check, mode);
+      goto done;
+    }
+    if ((mode & 0020) && (st.st_gid != 0)) {
+      jthr = newIOException(env, "the path component: '%s' is "
+        "group-writable, and the group is not root.  Its permissions are "
+        "0%03o, and it is owned by gid %d.  Please fix this or "
+        "select a different socket path.", check, mode, st.st_gid);
+      goto done;
+    }
+    if ((mode & 0200) && (st.st_uid != 0) &&
+        (st.st_uid != uid)) {
+      jthr = newIOException(env, "the path component: '%s' is "
+        "owned by a user who is not root and not you.  Your effective user "
+        "id is %d; the path is owned by user id %d, and its permissions are "
+        "0%03o.  Please fix this or select a different socket path.",
+        check, uid, st.st_uid, mode);
+        goto done;
+      goto done;
+    }
+  }
+done:
+  if (jthr) {
+    (*env)->Throw(env, jthr);
+  }
+}
+
+JNIEXPORT jint JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_bind0(
+JNIEnv *env, jclass clazz, jstring path)
+{
+  int fd;
+  jthrowable jthr = NULL;
+
+  jthr = setup(env, &fd, path, 0);
+  if (jthr) {
+    (*env)->Throw(env, jthr);
+  }
+  return fd;
+}
+
+JNIEXPORT jint JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_accept0(
+JNIEnv *env, jclass clazz, jint fd)
+{
+  int ret, newFd = -1;
+  socklen_t slen;
+  struct sockaddr_un remote;
+  jthrowable jthr = NULL;
+
+  slen = sizeof(remote);
+  do {
+    newFd = accept(fd, (struct sockaddr*)&remote, &slen);
+  } while ((newFd < 0) && (errno == EINTR));
+  if (newFd < 0) {
+    ret = errno;
+    jthr = newSocketException(env, ret, "accept(2) error: %s", terror(ret));
+    goto done;
+  }
+
+done:
+  if (jthr) {
+    if (newFd > 0) {
+      RETRY_ON_EINTR(ret, close(newFd));
+      newFd = -1;
+    }
+    (*env)->Throw(env, jthr);
+  }
+  return newFd;
+}
+
+JNIEXPORT jint JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_connect0(
+JNIEnv *env, jclass clazz, jstring path)
+{
+  int ret, fd;
+  jthrowable jthr = NULL;
+
+  jthr = setup(env, &fd, path, 1);
+  if (jthr) {
+    (*env)->Throw(env, jthr);
+    return -1;
+  }
+  if (((jthr = setAttribute0(env, fd, SND_TIMEO, DEFAULT_SND_TIMEO))) || 
+      ((jthr = setAttribute0(env, fd, RCV_TIMEO, DEFAULT_RCV_TIMEO)))) {
+    RETRY_ON_EINTR(ret, close(fd));
+    (*env)->Throw(env, jthr);
+    return -1;
+  }
+  return fd;
+}
+
+static void javaMillisToTimeVal(int javaMillis, struct timeval *tv)
+{
+  tv->tv_sec = javaMillis / 1000;
+  tv->tv_usec = (javaMillis - (tv->tv_sec * 1000)) * 1000;
+}
+
+static jthrowable setAttribute0(JNIEnv *env, jint fd, jint type, jint val)
+{
+  struct timeval tv;
+  int ret, buf;
+
+  switch (type) {
+  case SND_BUF_SIZE:
+    buf = val;
+    if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf, sizeof(buf))) {
+      ret = errno;
+      return newSocketException(env, ret,
+          "setsockopt(SO_SNDBUF) error: %s", terror(ret));
+    }
+    return NULL;
+  case RCV_BUF_SIZE:
+    buf = val;
+    if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &buf, sizeof(buf))) {
+      ret = errno;
+      return newSocketException(env, ret,
+          "setsockopt(SO_RCVBUF) error: %s", terror(ret));
+    }
+    return NULL;
+  case SND_TIMEO:
+    javaMillisToTimeVal(val, &tv);
+    if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, (struct timeval *)&tv,
+               sizeof(tv))) {
+      ret = errno;
+      return newSocketException(env, ret,
+          "setsockopt(SO_SNDTIMEO) error: %s", terror(ret));
+    }
+    return NULL;
+  case RCV_TIMEO:
+    javaMillisToTimeVal(val, &tv);
+    if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (struct timeval *)&tv,
+               sizeof(tv))) {
+      ret = errno;
+      return newSocketException(env, ret,
+          "setsockopt(SO_RCVTIMEO) error: %s", terror(ret));
+    }
+    return NULL;
+  default:
+    break;
+  }
+  return newRuntimeException(env, "Invalid attribute type %d.", type);
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_setAttribute0(
+JNIEnv *env, jclass clazz, jint fd, jint type, jint val)
+{
+  jthrowable jthr = setAttribute0(env, fd, type, val);
+  if (jthr) {
+    (*env)->Throw(env, jthr);
+  }
+}
+
+static jint getSockOptBufSizeToJavaBufSize(int size)
+{
+#ifdef __linux__
+  // Linux always doubles the value that you set with setsockopt.
+  // We cut it in half here so that programs can at least read back the same
+  // value they set.
+  size /= 2;
+#endif
+  return size;
+}
+
+static int timeValToJavaMillis(const struct timeval *tv)
+{
+  return (tv->tv_sec * 1000) + (tv->tv_usec / 1000);
+}
+
+JNIEXPORT jint JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_getAttribute0(
+JNIEnv *env, jclass clazz, jint fd, jint type)
+{
+  struct timeval tv;
+  socklen_t len;
+  int ret, rval = 0;
+
+  switch (type) {
+  case SND_BUF_SIZE:
+    len = sizeof(rval);
+    if (getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &rval, &len)) {
+      ret = errno;
+      (*env)->Throw(env, newSocketException(env, ret,
+          "getsockopt(SO_SNDBUF) error: %s", terror(ret)));
+      return -1;
+    }
+    return getSockOptBufSizeToJavaBufSize(rval);
+  case RCV_BUF_SIZE:
+    len = sizeof(rval);
+    if (getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rval, &len)) {
+      ret = errno;
+      (*env)->Throw(env, newSocketException(env, ret,
+          "getsockopt(SO_RCVBUF) error: %s", terror(ret)));
+      return -1;
+    }
+    return getSockOptBufSizeToJavaBufSize(rval);
+  case SND_TIMEO:
+    memset(&tv, 0, sizeof(tv));
+    len = sizeof(struct timeval);
+    if (getsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, &len)) {
+      ret = errno;
+      (*env)->Throw(env, newSocketException(env, ret,
+          "getsockopt(SO_SNDTIMEO) error: %s", terror(ret)));
+      return -1;
+    }
+    return timeValToJavaMillis(&tv);
+  case RCV_TIMEO:
+    memset(&tv, 0, sizeof(tv));
+    len = sizeof(struct timeval);
+    if (getsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, &len)) {
+      ret = errno;
+      (*env)->Throw(env, newSocketException(env, ret,
+          "getsockopt(SO_RCVTIMEO) error: %s", terror(ret)));
+      return -1;
+    }
+    return timeValToJavaMillis(&tv);
+  default:
+    (*env)->Throw(env, newRuntimeException(env,
+          "Invalid attribute type %d.", type));
+    return -1;
+  }
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_close0(
+JNIEnv *env, jclass clazz, jint fd)
+{
+  int ret;
+
+  RETRY_ON_EINTR(ret, close(fd));
+  if (ret) {
+    ret = errno;
+    (*env)->Throw(env, newSocketException(env, ret,
+          "close(2) error: %s", terror(ret)));
+  }
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_closeFileDescriptor0(
+JNIEnv *env, jclass clazz, jobject jfd)
+{
+  Java_org_apache_hadoop_net_unix_DomainSocket_close0(
+      env, clazz, fd_get(env, jfd));
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_shutdown0(
+JNIEnv *env, jclass clazz, jint fd)
+{
+  int ret;
+
+  RETRY_ON_EINTR(ret, shutdown(fd, SHUT_RDWR));
+  if (ret) {
+    ret = errno;
+    (*env)->Throw(env, newSocketException(env, ret,
+          "shutdown(2) error: %s", terror(ret)));
+  }
+}
+
+/**
+ * Write an entire buffer to a file descriptor.
+ *
+ * @param env            The JNI environment.
+ * @param fd             The fd to write to.
+ * @param buf            The buffer to write
+ * @param amt            The length of the buffer to write.
+ * @return               NULL on success; or the unraised exception representing
+ *                       the problem.
+ */
+static jthrowable write_fully(JNIEnv *env, int fd, int8_t *buf, int amt)
+{
+  int err, res;
+
+  while (amt > 0) {
+    res = write(fd, buf, amt);
+    if (res < 0) {
+      err = errno;
+      if (err == EINTR) {
+        continue;
+      }
+      return newSocketException(env, err, "write(2) error: %s", terror(err));
+    }
+    amt -= res;
+    buf += res;
+  }
+  return NULL;
+}
+
+/**
+ * Our auxillary data setup.
+ *
+ * See man 3 cmsg for more information about auxillary socket data on UNIX.
+ *
+ * We use __attribute__((packed)) to ensure that the compiler doesn't insert any
+ * padding between 'hdr' and 'fds'.
+ * We use __attribute__((aligned(8)) to ensure that the compiler puts the start
+ * of the structure at an address which is a multiple of 8.  If we did not do
+ * this, the attribute((packed)) would cause the compiler to generate a lot of
+ * slow code for accessing unaligned memory.
+ */
+struct cmsghdr_with_fds {
+  struct cmsghdr hdr;
+  int fds[MAX_PASSED_FDS];
+}  __attribute__((packed,aligned(8)));
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_sendFileDescriptors0(
+JNIEnv *env, jclass clazz, jint fd, jobject jfds, jobject jbuf,
+jint offset, jint length)
+{
+  struct iovec vec[1];
+  struct flexibleBuffer flexBuf;
+  struct cmsghdr_with_fds aux;
+  jint jfdsLen;
+  int i, ret = -1, auxLen;
+  struct msghdr socketMsg;
+  jthrowable jthr = NULL;
+
+  jthr = flexBufInit(env, &flexBuf, length);
+  if (jthr) {
+    goto done;
+  }
+  if (length <= 0) {
+    jthr = newException(env, "java/lang/IllegalArgumentException",
+        "You must write at least one byte.");
+    goto done;
+  }
+  jfdsLen = (*env)->GetArrayLength(env, jfds);
+  if (jfdsLen <= 0) {
+    jthr = newException(env, "java/lang/IllegalArgumentException",
+        "Called sendFileDescriptors with no file descriptors.");
+    goto done;
+  } else if (jfdsLen > MAX_PASSED_FDS) {
+    jfdsLen = 0;
+    jthr = newException(env, "java/lang/IllegalArgumentException",
+          "Called sendFileDescriptors with an array of %d length.  "
+          "The maximum is %d.", jfdsLen, MAX_PASSED_FDS);
+    goto done;
+  }
+  (*env)->GetByteArrayRegion(env, jbuf, offset, length, flexBuf.curBuf); 
+  jthr = (*env)->ExceptionOccurred(env);
+  if (jthr) {
+    (*env)->ExceptionClear(env);
+    goto done;
+  }
+  memset(&vec, 0, sizeof(vec));
+  vec[0].iov_base = flexBuf.curBuf;
+  vec[0].iov_len = length;
+  auxLen = CMSG_LEN(jfdsLen * sizeof(int));
+  memset(&aux, 0, auxLen);
+  memset(&socketMsg, 0, sizeof(socketMsg));
+  socketMsg.msg_iov = vec;
+  socketMsg.msg_iovlen = 1;
+  socketMsg.msg_control = &aux;
+  socketMsg.msg_controllen = auxLen;
+  aux.hdr.cmsg_len = auxLen;
+  aux.hdr.cmsg_level = SOL_SOCKET;
+  aux.hdr.cmsg_type = SCM_RIGHTS;
+  for (i = 0; i < jfdsLen; i++) {
+    jobject jfd = (*env)->GetObjectArrayElement(env, jfds, i);
+    if (!jfd) {
+      jthr = (*env)->ExceptionOccurred(env);
+      if (jthr) {
+        (*env)->ExceptionClear(env);
+        goto done;
+      }
+      jthr = newException(env, "java/lang/NullPointerException",
+            "element %d of jfds was NULL.", i);
+      goto done;
+    }
+    aux.fds[i] = fd_get(env, jfd);
+    (*env)->DeleteLocalRef(env, jfd);
+    if (jthr) {
+      goto done;
+    }
+  }
+  RETRY_ON_EINTR(ret, sendmsg(fd, &socketMsg, 0));
+  if (ret < 0) {
+    ret = errno;
+    jthr = newSocketException(env, ret, "sendmsg(2) error: %s", terror(ret));
+    goto done;
+  }
+  length -= ret;
+  if (length > 0) {
+    // Write the rest of the bytes we were asked to send.
+    // This time, no fds will be attached.
+    jthr = write_fully(env, fd, flexBuf.curBuf + ret, length);
+    if (jthr) {
+      goto done;
+    }
+  }
+
+done:
+  flexBufFree(&flexBuf);
+  if (jthr) {
+    (*env)->Throw(env, jthr);
+  }
+}
+
+JNIEXPORT jint JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_receiveFileDescriptors0(
+JNIEnv *env, jclass clazz, jint fd, jarray jfds, jarray jbuf,
+jint offset, jint length)
+{
+  struct iovec vec[1];
+  struct flexibleBuffer flexBuf;
+  struct cmsghdr_with_fds aux;
+  int i, jRecvFdsLen = 0, auxLen;
+  jint jfdsLen = 0;
+  struct msghdr socketMsg;
+  ssize_t bytesRead = -1;
+  jobject fdObj;
+  jthrowable jthr = NULL;
+
+  jthr = flexBufInit(env, &flexBuf, length);
+  if (jthr) {
+    goto done;
+  }
+  if (length <= 0) {
+    jthr = newRuntimeException(env, "You must read at least one byte.");
+    goto done;
+  }
+  jfdsLen = (*env)->GetArrayLength(env, jfds);
+  if (jfdsLen <= 0) {
+    jthr = newException(env, "java/lang/IllegalArgumentException",
+        "Called receiveFileDescriptors with an array of %d length.  "
+        "You must pass at least one fd.", jfdsLen);
+    goto done;
+  } else if (jfdsLen > MAX_PASSED_FDS) {
+    jfdsLen = 0;
+    jthr = newException(env, "java/lang/IllegalArgumentException",
+          "Called receiveFileDescriptors with an array of %d length.  "
+          "The maximum is %d.", jfdsLen, MAX_PASSED_FDS);
+    goto done;
+  }
+  for (i = 0; i < jfdsLen; i++) {
+    (*env)->SetObjectArrayElement(env, jfds, i, NULL);
+  }
+  vec[0].iov_base = flexBuf.curBuf;
+  vec[0].iov_len = length;
+  auxLen = CMSG_LEN(jfdsLen * sizeof(int));
+  memset(&aux, 0, auxLen);
+  memset(&socketMsg, 0, auxLen);
+  socketMsg.msg_iov = vec;
+  socketMsg.msg_iovlen = 1;
+  socketMsg.msg_control = &aux;
+  socketMsg.msg_controllen = auxLen;
+  aux.hdr.cmsg_len = auxLen;
+  aux.hdr.cmsg_level = SOL_SOCKET;
+  aux.hdr.cmsg_type = SCM_RIGHTS;
+  RETRY_ON_EINTR(bytesRead, recvmsg(fd, &socketMsg, 0));
+  if (bytesRead < 0) {
+    int ret = errno;
+    if (ret == ECONNABORTED) {
+      // The remote peer disconnected on us.  Treat this as an EOF.
+      bytesRead = -1;
+      goto done;
+    }
+    jthr = newSocketException(env, ret, "recvmsg(2) failed: %s",
+                              terror(ret));
+    goto done;
+  } else if (bytesRead == 0) {
+    bytesRead = -1;
+    goto done;
+  }
+  jRecvFdsLen = (aux.hdr.cmsg_len - sizeof(struct cmsghdr)) / sizeof(int);
+  for (i = 0; i < jRecvFdsLen; i++) {
+    fdObj = fd_create(env, aux.fds[i]);
+    if (!fdObj) {
+      jthr = (*env)->ExceptionOccurred(env);
+      (*env)->ExceptionClear(env);
+      goto done;
+    }
+    // Make this -1 so we don't attempt to close it twice in an error path.
+    aux.fds[i] = -1;
+    (*env)->SetObjectArrayElement(env, jfds, i, fdObj);
+    // There is no point keeping around a local reference to the fdObj.
+    // The array continues to reference it.
+    (*env)->DeleteLocalRef(env, fdObj);
+  }
+  (*env)->SetByteArrayRegion(env, jbuf, offset, length, flexBuf.curBuf);
+  jthr = (*env)->ExceptionOccurred(env);
+  if (jthr) {
+    (*env)->ExceptionClear(env);
+    goto done;
+  }
+done:
+  flexBufFree(&flexBuf);
+  if (jthr) {
+    // Free any FileDescriptor references we may have created,
+    // or file descriptors we may have been passed.
+    for (i = 0; i < jRecvFdsLen; i++) {
+      if (aux.fds[i] >= 0) {
+        RETRY_ON_EINTR(i, close(aux.fds[i]));
+        aux.fds[i] = -1;
+      }
+      fdObj = (*env)->GetObjectArrayElement(env, jfds, i);
+      if (fdObj) {
+        int ret, afd = fd_get(env, fdObj);
+        if (afd >= 0) {
+          RETRY_ON_EINTR(ret, close(afd));
+        }
+        (*env)->SetObjectArrayElement(env, jfds, i, NULL);
+        (*env)->DeleteLocalRef(env, fdObj);
+      }
+    }
+    (*env)->Throw(env, jthr);
+  }
+  return bytesRead;
+}
+
+JNIEXPORT jint JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_readArray0(
+JNIEnv *env, jclass clazz, jint fd, jarray b, jint offset, jint length)
+{
+  int ret = -1;
+  struct flexibleBuffer flexBuf;
+  jthrowable jthr;
+
+  jthr = flexBufInit(env, &flexBuf, length);
+  if (jthr) {
+    goto done;
+  }
+  RETRY_ON_EINTR(ret, read(fd, flexBuf.curBuf, length));
+  if (ret < 0) {
+    ret = errno;
+    if (ret == ECONNABORTED) {
+      // The remote peer disconnected on us.  Treat this as an EOF.
+      ret = -1;
+      goto done;
+    }
+    jthr = newSocketException(env, ret, "read(2) error: %s", 
+                              terror(ret));
+    goto done;
+  }
+  if (ret == 0) {
+    goto done;
+  }
+  (*env)->SetByteArrayRegion(env, b, offset, ret, flexBuf.curBuf);
+  jthr = (*env)->ExceptionOccurred(env);
+  if (jthr) {
+    (*env)->ExceptionClear(env);
+    goto done;
+  }
+done:
+  flexBufFree(&flexBuf);
+  if (jthr) { 
+    (*env)->Throw(env, jthr);
+  }
+  return ret == 0 ? -1 : ret; // Java wants -1 on EOF
+}
+
+JNIEXPORT jint JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_available0(
+JNIEnv *env, jclass clazz, jint fd)
+{
+  int ret, avail = 0;
+  jthrowable jthr = NULL;
+
+  RETRY_ON_EINTR(ret, ioctl(fd, FIONREAD, &avail));
+  if (ret < 0) {
+    ret = errno;
+    jthr = newSocketException(env, ret,
+              "ioctl(%d, FIONREAD) error: %s", fd, terror(ret));
+    goto done;
+  }
+done:
+  if (jthr) {
+    (*env)->Throw(env, jthr);
+  }
+  return avail;
+}
+
+JNIEXPORT void JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_writeArray0(
+JNIEnv *env, jclass clazz, jint fd, jarray b, jint offset, jint length)
+{
+  struct flexibleBuffer flexBuf;
+  jthrowable jthr;
+
+  jthr = flexBufInit(env, &flexBuf, length);
+  if (jthr) {
+    goto done;
+  }
+  (*env)->GetByteArrayRegion(env, b, offset, length, flexBuf.curBuf);
+  jthr = (*env)->ExceptionOccurred(env);
+  if (jthr) {
+    (*env)->ExceptionClear(env);
+    goto done;
+  }
+  jthr = write_fully(env, fd, flexBuf.curBuf, length);
+  if (jthr) {
+    goto done;
+  }
+
+done:
+  flexBufFree(&flexBuf);
+  if (jthr) { 
+    (*env)->Throw(env, jthr);
+  }
+}
+
+JNIEXPORT jint JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_readByteBufferDirect0(
+JNIEnv *env, jclass clazz, jint fd, jobject dst, jint position, jint remaining)
+{
+  uint8_t *buf;
+  jthrowable jthr = NULL;
+  int res = -1;
+
+  buf = (*env)->GetDirectBufferAddress(env, dst);
+  if (!buf) {
+    jthr = newRuntimeException(env, "GetDirectBufferAddress failed.");
+    goto done;
+  }
+  RETRY_ON_EINTR(res, read(fd, buf + position, remaining));
+  if (res < 0) {
+    res = errno;
+    if (res != ECONNABORTED) {
+      jthr = newSocketException(env, res, "read(2) error: %s", 
+                                terror(res));
+      goto done;
+    } else {
+      // The remote peer disconnected on us.  Treat this as an EOF.
+      res = -1;
+    }
+  }
+done:
+  if (jthr) {
+    (*env)->Throw(env, jthr);
+  }
+  return res;
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org_apache_hadoop.h b/hadoop-common-project/hadoop-common/src/main/native/src/org_apache_hadoop.h
index a50c41dbbb4..21f21e1b0c1 100644
--- a/hadoop-common-project/hadoop-common/src/main/native/src/org_apache_hadoop.h
+++ b/hadoop-common-project/hadoop-common/src/main/native/src/org_apache_hadoop.h
@@ -99,6 +99,10 @@ void *do_dlsym(JNIEnv *env, void *handle, const char *symbol) {
     THROW(env, "java/lang/InternalError", exception_msg); \
   }
 
+#define RETRY_ON_EINTR(ret, expr) do { \
+  ret = expr; \
+} while ((ret == -1) && (errno == EINTR));
+
 #endif
 
 //vim: sw=2: ts=2: et
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TemporarySocketDirectory.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TemporarySocketDirectory.java
new file mode 100644
index 00000000000..1df78d96b7d
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TemporarySocketDirectory.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.net.unix;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.commons.io.FileUtils;
+
+/**
+ * Create a temporary directory in which sockets can be created.
+ * When creating a UNIX domain socket, the name
+ * must be fairly short (around 110 bytes on most platforms).
+ */
+public class TemporarySocketDirectory implements Closeable {
+  private File dir;
+
+  public TemporarySocketDirectory() {
+    String tmp = System.getProperty("java.io.tmpdir", "/tmp");
+    dir = new File(tmp, "socks." + (System.currentTimeMillis() +
+        "." + (new Random().nextInt())));
+    dir.mkdirs();
+    dir.setWritable(true, true);
+  }
+
+  public File getDir() {
+    return dir;
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (dir != null) {
+      FileUtils.deleteDirectory(dir);
+      dir = null;
+    }
+  }
+
+  protected void finalize() throws IOException {
+    close();
+  }
+}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
new file mode 100644
index 00000000000..ab293ff5138
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
@@ -0,0 +1,576 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.net.unix;
+
+import java.io.File;
+import java.io.FileDescriptor;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.SocketTimeoutException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import org.apache.commons.lang.exception.ExceptionUtils;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.net.unix.DomainSocket.DomainChannel;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.util.Shell.ExitCodeException;
+
+import com.google.common.io.Files;
+
+public class TestDomainSocket {
+  private static TemporarySocketDirectory sockDir;
+
+  @BeforeClass
+  public static void init() {
+    sockDir = new TemporarySocketDirectory();
+    DomainSocket.disableBindPathValidation();
+  }
+
+  @AfterClass
+  public static void shutdown() throws IOException {
+    sockDir.close();
+  }
+  
+  @Before
+  public void checkPrecondition() {
+    Assume.assumeTrue(DomainSocket.getLoadingFailureReason() == null);
+  }
+    
+  /**
+   * Test that we can create a socket and close it, even if it hasn't been
+   * opened.
+   *
+   * @throws IOException
+   */
+  @Test(timeout=180000)
+  public void testSocketCreateAndClose() throws IOException {
+    DomainSocket serv = DomainSocket.bindAndListen(
+      new File(sockDir.getDir(), "test_sock_create_and_close").
+        getAbsolutePath());
+    serv.close();
+  }
+
+  /**
+   * Test DomainSocket path setting and getting.
+   *
+   * @throws IOException
+   */
+  @Test(timeout=180000)
+  public void testSocketPathSetGet() throws IOException {
+    Assert.assertEquals("/var/run/hdfs/sock.100",
+        DomainSocket.getEffectivePath("/var/run/hdfs/sock.__PORT__", 100));
+  }
+
+  /**
+   * Test that if one thread is blocking in accept(), another thread
+   * can close the socket and stop the accept.
+   *
+   * @throws IOException
+   */
+  @Test(timeout=180000)
+  public void testSocketAcceptAndClose() throws Exception {
+    final String TEST_PATH =
+        new File(sockDir.getDir(), "test_sock_accept_and_close").getAbsolutePath();
+    final DomainSocket serv = DomainSocket.bindAndListen(TEST_PATH);
+    ExecutorService exeServ = Executors.newSingleThreadExecutor();
+    Callable<Void> callable = new Callable<Void>() {
+      public Void call(){
+        try {
+          serv.accept();
+          throw new RuntimeException("expected the accept() to be " +
+              "interrupted and fail");
+        } catch (IOException e) {
+          return null;
+        }
+      }
+    };
+    Future<Void> future = exeServ.submit(callable);
+    Thread.sleep(500);
+    serv.close();
+    future.get(2, TimeUnit.MINUTES);
+  }
+
+  /**
+   * Test that attempting to connect to an invalid path doesn't work.
+   *
+   * @throws IOException
+   */
+  @Test(timeout=180000)
+  public void testInvalidOperations() throws IOException {
+    try {
+      DomainSocket.connect(
+        new File(sockDir.getDir(), "test_sock_invalid_operation").
+          getAbsolutePath());
+    } catch (IOException e) {
+      GenericTestUtils.assertExceptionContains("connect(2) error: ", e);
+    }
+  }
+
+  /**
+   * Test setting some server options.
+   *
+   * @throws IOException
+   */
+  @Test(timeout=180000)
+  public void testServerOptions() throws Exception {
+    final String TEST_PATH = new File(sockDir.getDir(),
+        "test_sock_server_options").getAbsolutePath();
+    DomainSocket serv = DomainSocket.bindAndListen(TEST_PATH);
+    try {
+      // Let's set a new receive buffer size
+      int bufSize = serv.getAttribute(DomainSocket.RCV_BUF_SIZE);
+      int newBufSize = bufSize / 2;
+      serv.setAttribute(DomainSocket.RCV_BUF_SIZE, newBufSize);
+      int nextBufSize = serv.getAttribute(DomainSocket.RCV_BUF_SIZE);
+      Assert.assertEquals(newBufSize, nextBufSize);
+      // Let's set a server timeout
+      int newTimeout = 1000;
+      serv.setAttribute(DomainSocket.RCV_TIMEO, newTimeout);
+      int nextTimeout = serv.getAttribute(DomainSocket.RCV_TIMEO);
+      Assert.assertEquals(newTimeout, nextTimeout);
+      try {
+        serv.accept();
+        Assert.fail("expected the accept() to time out and fail");
+      } catch (SocketTimeoutException e) {
+        GenericTestUtils.assertExceptionContains("accept(2) error: ", e);
+      }
+    } finally {
+      serv.close();
+      Assert.assertFalse(serv.isOpen());
+    }
+  }
+  
+  /**
+   * A Throwable representing success.
+   *
+   * We can't use null to represent this, because you cannot insert null into
+   * ArrayBlockingQueue.
+   */
+  static class Success extends Throwable {
+    private static final long serialVersionUID = 1L;
+  }
+  
+  static interface WriteStrategy {
+    /**
+     * Initialize a WriteStrategy object from a Socket.
+     */
+    public void init(DomainSocket s) throws IOException;
+    
+    /**
+     * Write some bytes.
+     */
+    public void write(byte b[]) throws IOException;
+  }
+  
+  static class OutputStreamWriteStrategy implements WriteStrategy {
+    private OutputStream outs = null;
+    
+    public void init(DomainSocket s) throws IOException {
+      outs = s.getOutputStream();
+    }
+    
+    public void write(byte b[]) throws IOException {
+      outs.write(b);
+    }
+  }
+  
+  abstract static class ReadStrategy {
+    /**
+     * Initialize a ReadStrategy object from a DomainSocket.
+     */
+    public abstract void init(DomainSocket s) throws IOException;
+    
+    /**
+     * Read some bytes.
+     */
+    public abstract int read(byte b[], int off, int length) throws IOException;
+    
+    public void readFully(byte buf[], int off, int len) throws IOException {
+      int toRead = len;
+      while (toRead > 0) {
+        int ret = read(buf, off, toRead);
+        if (ret < 0) {
+          throw new IOException( "Premature EOF from inputStream");
+        }
+        toRead -= ret;
+        off += ret;
+      }
+    }
+  }
+  
+  static class InputStreamReadStrategy extends ReadStrategy {
+    private InputStream ins = null;
+    
+    @Override
+    public void init(DomainSocket s) throws IOException {
+      ins = s.getInputStream();
+    }
+
+    @Override
+    public int read(byte b[], int off, int length) throws IOException {
+      return ins.read(b, off, length);
+    }
+  }
+  
+  static class DirectByteBufferReadStrategy extends ReadStrategy {
+    private DomainChannel ch = null;
+
+    @Override
+    public void init(DomainSocket s) throws IOException {
+      ch = s.getChannel();
+    }
+    
+    @Override
+    public int read(byte b[], int off, int length) throws IOException {
+      ByteBuffer buf = ByteBuffer.allocateDirect(b.length);
+      int nread = ch.read(buf);
+      if (nread < 0) return nread;
+      buf.flip();
+      buf.get(b, off, nread);
+      return nread;
+    }
+  }
+
+  static class ArrayBackedByteBufferReadStrategy extends ReadStrategy {
+    private DomainChannel ch = null;
+    
+    @Override
+    public void init(DomainSocket s) throws IOException {
+      ch = s.getChannel();
+    }
+    
+    @Override
+    public int read(byte b[], int off, int length) throws IOException {
+      ByteBuffer buf = ByteBuffer.wrap(b);
+      int nread = ch.read(buf);
+      if (nread < 0) return nread;
+      buf.flip();
+      buf.get(b, off, nread);
+      return nread;
+    }
+  }
+  
+  /**
+   * Test a simple client/server interaction.
+   *
+   * @throws IOException
+   */
+  void testClientServer1(final Class<? extends WriteStrategy> writeStrategyClass,
+      final Class<? extends ReadStrategy> readStrategyClass) throws Exception {
+    final String TEST_PATH = new File(sockDir.getDir(),
+        "test_sock_client_server1").getAbsolutePath();
+    final byte clientMsg1[] = new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6 };
+    final byte serverMsg1[] = new byte[] { 0x9, 0x8, 0x7, 0x6, 0x5 };
+    final byte clientMsg2 = 0x45;
+    final ArrayBlockingQueue<Throwable> threadResults =
+        new ArrayBlockingQueue<Throwable>(2);
+    final DomainSocket serv = DomainSocket.bindAndListen(TEST_PATH);
+    Thread serverThread = new Thread() {
+      public void run(){
+        // Run server
+        DomainSocket conn = null;
+        try {
+          conn = serv.accept();
+          byte in1[] = new byte[clientMsg1.length];
+          ReadStrategy reader = readStrategyClass.newInstance();
+          reader.init(conn);
+          reader.readFully(in1, 0, in1.length);
+          Assert.assertTrue(Arrays.equals(clientMsg1, in1));
+          WriteStrategy writer = writeStrategyClass.newInstance();
+          writer.init(conn);
+          writer.write(serverMsg1);
+          InputStream connInputStream = conn.getInputStream();
+          int in2 = connInputStream.read();
+          Assert.assertEquals((int)clientMsg2, in2);
+          conn.close();
+        } catch (Throwable e) {
+          threadResults.add(e);
+          Assert.fail(e.getMessage());
+        }
+        threadResults.add(new Success());
+      }
+    };
+    serverThread.start();
+    
+    Thread clientThread = new Thread() {
+      public void run(){
+        try {
+          DomainSocket client = DomainSocket.connect(TEST_PATH);
+          WriteStrategy writer = writeStrategyClass.newInstance();
+          writer.init(client);
+          writer.write(clientMsg1);
+          ReadStrategy reader = readStrategyClass.newInstance();
+          reader.init(client);
+          byte in1[] = new byte[serverMsg1.length];
+          reader.readFully(in1, 0, in1.length);
+          Assert.assertTrue(Arrays.equals(serverMsg1, in1));
+          OutputStream clientOutputStream = client.getOutputStream();
+          clientOutputStream.write(clientMsg2);
+          client.close();
+        } catch (Throwable e) {
+          threadResults.add(e);
+        }
+        threadResults.add(new Success());
+      }
+    };
+    clientThread.start();
+    
+    for (int i = 0; i < 2; i++) {
+      Throwable t = threadResults.take();
+      if (!(t instanceof Success)) {
+        Assert.fail(t.getMessage() + ExceptionUtils.getStackTrace(t));
+      }
+    }
+    serverThread.join(120000);
+    clientThread.join(120000);
+    serv.close();
+  }
+
+  @Test(timeout=180000)
+  public void testClientServerOutStreamInStream() throws Exception {
+    testClientServer1(OutputStreamWriteStrategy.class,
+        InputStreamReadStrategy.class);
+  }
+
+  @Test(timeout=180000)
+  public void testClientServerOutStreamInDbb() throws Exception {
+    testClientServer1(OutputStreamWriteStrategy.class,
+        DirectByteBufferReadStrategy.class);
+  }
+
+  @Test(timeout=180000)
+  public void testClientServerOutStreamInAbb() throws Exception {
+    testClientServer1(OutputStreamWriteStrategy.class,
+        ArrayBackedByteBufferReadStrategy.class);
+  }
+
+  static private class PassedFile {
+    private final int idx;
+    private final byte[] contents;
+    private FileInputStream fis;
+    
+    public PassedFile(int idx) throws IOException {
+      this.idx = idx;
+      this.contents = new byte[] { (byte)(idx % 127) };
+      Files.write(contents, new File(getPath()));
+      this.fis = new FileInputStream(getPath());
+    }
+
+    public String getPath() {
+      return new File(sockDir.getDir(), "passed_file" + idx).getAbsolutePath();
+    }
+
+    public FileInputStream getInputStream() throws IOException {
+      return fis;
+    }
+    
+    public void cleanup() throws IOException {
+      new File(getPath()).delete();
+      fis.close();
+    }
+
+    public void checkInputStream(FileInputStream fis) throws IOException {
+      byte buf[] = new byte[contents.length];
+      IOUtils.readFully(fis, buf, 0, buf.length);
+      Arrays.equals(contents, buf);
+    }
+    
+    protected void finalize() {
+      try {
+        cleanup();
+      } catch(Throwable t) {
+        // ignore
+      }
+    }
+  }
+
+  /**
+   * Test file descriptor passing.
+   *
+   * @throws IOException
+   */
+  @Test(timeout=180000)
+  public void testFdPassing() throws Exception {
+    final String TEST_PATH =
+        new File(sockDir.getDir(), "test_sock").getAbsolutePath();
+    final byte clientMsg1[] = new byte[] { 0x11, 0x22, 0x33, 0x44, 0x55, 0x66 };
+    final byte serverMsg1[] = new byte[] { 0x31, 0x30, 0x32, 0x34, 0x31, 0x33,
+          0x44, 0x1, 0x1, 0x1, 0x1, 0x1 };
+    final ArrayBlockingQueue<Throwable> threadResults =
+        new ArrayBlockingQueue<Throwable>(2);
+    final DomainSocket serv = DomainSocket.bindAndListen(TEST_PATH);
+    final PassedFile passedFiles[] =
+        new PassedFile[] { new PassedFile(1), new PassedFile(2) };
+    final FileDescriptor passedFds[] = new FileDescriptor[passedFiles.length];
+    for (int i = 0; i < passedFiles.length; i++) {
+      passedFds[i] = passedFiles[i].getInputStream().getFD();
+    }
+    Thread serverThread = new Thread() {
+      public void run(){
+        // Run server
+        DomainSocket conn = null;
+        try {
+          conn = serv.accept();
+          byte in1[] = new byte[clientMsg1.length];
+          InputStream connInputStream = conn.getInputStream();
+          IOUtils.readFully(connInputStream, in1, 0, in1.length);
+          Assert.assertTrue(Arrays.equals(clientMsg1, in1));
+          DomainSocket domainConn = (DomainSocket)conn;
+          domainConn.sendFileDescriptors(passedFds, serverMsg1, 0,
+              serverMsg1.length);
+          conn.close();
+        } catch (Throwable e) {
+          threadResults.add(e);
+          Assert.fail(e.getMessage());
+        }
+        threadResults.add(new Success());
+      }
+    };
+    serverThread.start();
+
+    Thread clientThread = new Thread() {
+      public void run(){
+        try {
+          DomainSocket client = DomainSocket.connect(TEST_PATH);
+          OutputStream clientOutputStream = client.getOutputStream();
+          InputStream clientInputStream = client.getInputStream();
+          clientOutputStream.write(clientMsg1);
+          DomainSocket domainConn = (DomainSocket)client;
+          byte in1[] = new byte[serverMsg1.length];
+          FileInputStream recvFis[] = new FileInputStream[passedFds.length];
+          int r = domainConn.
+              recvFileInputStreams(recvFis, in1, 0, in1.length - 1);
+          Assert.assertTrue(r > 0);
+          IOUtils.readFully(clientInputStream, in1, r, in1.length - r);
+          Assert.assertTrue(Arrays.equals(serverMsg1, in1));
+          for (int i = 0; i < passedFds.length; i++) {
+            Assert.assertNotNull(recvFis[i]);
+            passedFiles[i].checkInputStream(recvFis[i]);
+          }
+          for (FileInputStream fis : recvFis) {
+            fis.close();
+          }
+          client.close();
+        } catch (Throwable e) {
+          threadResults.add(e);
+        }
+        threadResults.add(new Success());
+      }
+    };
+    clientThread.start();
+    
+    for (int i = 0; i < 2; i++) {
+      Throwable t = threadResults.take();
+      if (!(t instanceof Success)) {
+        Assert.fail(t.getMessage() + ExceptionUtils.getStackTrace(t));
+      }
+    }
+    serverThread.join(120000);
+    clientThread.join(120000);
+    serv.close();
+    for (PassedFile pf : passedFiles) {
+      pf.cleanup();
+    }
+  }
+  
+  /**
+   * Run validateSocketPathSecurity
+   *
+   * @param str            The path to validate
+   * @param prefix         A prefix to skip validation for
+   * @throws IOException
+   */
+  private static void testValidateSocketPath(String str, String prefix)
+      throws IOException {
+    int skipComponents = 0;
+    File prefixFile = new File(prefix);
+    while (true) {
+      prefixFile = prefixFile.getParentFile();
+      if (prefixFile == null) {
+        break;
+      }
+      skipComponents++;
+    }
+    DomainSocket.validateSocketPathSecurity0(str,
+        skipComponents);
+  }
+  
+  /**
+   * Test file descriptor path security.
+   *
+   * @throws IOException
+   */
+  @Test(timeout=180000)
+  public void testFdPassingPathSecurity() throws Exception {
+    TemporarySocketDirectory tmp = new TemporarySocketDirectory();
+    try {
+      String prefix = tmp.getDir().getAbsolutePath();
+      Shell.execCommand(new String [] {
+          "mkdir", "-p", prefix + "/foo/bar/baz" });
+      Shell.execCommand(new String [] {
+          "chmod", "0700", prefix + "/foo/bar/baz" });
+      Shell.execCommand(new String [] {
+          "chmod", "0700", prefix + "/foo/bar" });
+      Shell.execCommand(new String [] {
+          "chmod", "0707", prefix + "/foo" });
+      Shell.execCommand(new String [] {
+          "mkdir", "-p", prefix + "/q1/q2" });
+      Shell.execCommand(new String [] {
+          "chmod", "0700", prefix + "/q1" });
+      Shell.execCommand(new String [] {
+          "chmod", "0700", prefix + "/q1/q2" });
+      testValidateSocketPath(prefix + "/q1/q2", prefix);
+      try {
+        testValidateSocketPath(prefix + "/foo/bar/baz", prefix);
+      } catch (IOException e) {
+        GenericTestUtils.assertExceptionContains("/foo' is world-writable.  " +
+            "Its permissions are 0707.  Please fix this or select a " +
+            "different socket path.", e);
+      }
+      try {
+        testValidateSocketPath(prefix + "/nope", prefix);
+      } catch (IOException e) {
+        GenericTestUtils.assertExceptionContains("failed to stat a path " +
+            "component: ", e);
+      }
+      // Root should be secure
+      DomainSocket.validateSocketPathSecurity0("/foo", 0);
+    } finally {
+      tmp.close();
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index 7c509ee7e80..337cbec292e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -4,3 +4,6 @@ These will be integrated to trunk CHANGES.txt after merge
 
 HDFS-4353. Encapsulate connections to peers in Peer and PeerServer classes.
 (Colin Patrick McCabe via todd)
+
+HDFS-4354. Create DomainSocket and DomainPeer and associated unit tests.
+(Colin Patrick McCabe via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/BasicInetPeer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/BasicInetPeer.java
index eb2d0c92d9e..7c6c3f4b6ff 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/BasicInetPeer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/BasicInetPeer.java
@@ -23,6 +23,8 @@ import java.io.OutputStream;
 import java.net.Socket;
 import java.nio.channels.ReadableByteChannel;
 
+import org.apache.hadoop.net.unix.DomainSocket;
+
 /**
  * Represents a peer that we communicate with by using a basic Socket
  * that has no associated Channel.
@@ -118,4 +120,9 @@ class BasicInetPeer implements Peer {
   public String toString() {
     return "BasicInetPeer(" + socket.toString() + ")";
   }
+
+  @Override
+  public DomainSocket getDomainSocket() {
+    return null;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeer.java
new file mode 100644
index 00000000000..abe9b7d52df
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeer.java
@@ -0,0 +1,117 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.net;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.channels.ReadableByteChannel;
+
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * Represents a peer that we communicate with by using blocking I/O 
+ * on a UNIX domain socket.
+ */
+@InterfaceAudience.Private
+public class DomainPeer implements Peer {
+  private final DomainSocket socket;
+  private final OutputStream out;
+  private final InputStream in;
+  private final ReadableByteChannel channel;
+
+  public DomainPeer(DomainSocket socket) {
+    this.socket = socket;
+    this.out = socket.getOutputStream();
+    this.in = socket.getInputStream();
+    this.channel = socket.getChannel();
+  }
+
+  @Override
+  public ReadableByteChannel getInputStreamChannel() {
+    return channel;
+  }
+
+  @Override
+  public void setReadTimeout(int timeoutMs) throws IOException {
+    socket.setAttribute(DomainSocket.RCV_TIMEO, timeoutMs);
+  }
+
+  @Override
+  public int getReceiveBufferSize() throws IOException {
+    return socket.getAttribute(DomainSocket.RCV_BUF_SIZE);
+  }
+
+  @Override
+  public boolean getTcpNoDelay() throws IOException {
+    /* No TCP, no TCP_NODELAY. */
+    return false;
+  }
+
+  @Override
+  public void setWriteTimeout(int timeoutMs) throws IOException {
+    socket.setAttribute(DomainSocket.SND_TIMEO, timeoutMs);
+  }
+
+  @Override
+  public boolean isClosed() {
+    return !socket.isOpen();
+  }
+
+  @Override
+  public void close() throws IOException {
+    socket.close();
+  }
+
+  @Override
+  public String getRemoteAddressString() {
+    return "unix:" + socket.getPath();
+  }
+
+  @Override
+  public String getLocalAddressString() {
+    return "<local>";
+  }
+
+  @Override
+  public InputStream getInputStream() throws IOException {
+    return in;
+  }
+
+  @Override
+  public OutputStream getOutputStream() throws IOException {
+    return out;
+  }
+
+  @Override
+  public boolean isLocal() {
+    /* UNIX domain sockets can only be used for local communication. */
+    return true;
+  }
+
+  @Override
+  public String toString() {
+    return "DomainPeer(" + getRemoteAddressString() + ")";
+  }
+
+  @Override
+  public DomainSocket getDomainSocket() {
+    return socket;
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java
new file mode 100644
index 00000000000..d22584fc787
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.net;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.net.SocketTimeoutException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.hdfs.net.PeerServer;
+import org.apache.hadoop.net.unix.DomainSocket;
+
+class DomainPeerServer implements PeerServer {
+  static Log LOG = LogFactory.getLog(DomainPeerServer.class);
+  private final DomainSocket sock;
+
+  DomainPeerServer(DomainSocket sock) {
+    this.sock = sock;
+  }
+
+  public DomainPeerServer(String path, int port) 
+      throws IOException {
+    this(DomainSocket.bindAndListen(DomainSocket.getEffectivePath(path, port)));
+  }
+  
+  public String getBindPath() {
+    return sock.getPath();
+  }
+
+  @Override
+  public void setReceiveBufferSize(int size) throws IOException {
+    sock.setAttribute(DomainSocket.RCV_BUF_SIZE, size);
+  }
+
+  @Override
+  public Peer accept() throws IOException, SocketTimeoutException {
+    DomainSocket connSock = sock.accept();
+    Peer peer = null;
+    boolean success = false;
+    try {
+      peer = new DomainPeer(connSock);
+      success = true;
+      return peer;
+    } finally {
+      if (!success) {
+        if (peer != null) peer.close();
+        connSock.close();
+      }
+    }
+  }
+
+  @Override
+  public String getListeningString() {
+    return "unix:" + sock.getPath();
+  }
+  
+  @Override
+  public void close() throws IOException {
+    try {
+      sock.close();
+    } catch (IOException e) {
+      LOG.error("error closing DomainPeerServer: ", e);
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "DomainPeerServer(" + getListeningString() + ")";
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/EncryptedPeer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/EncryptedPeer.java
index 295632ca63d..6b0e506d3cf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/EncryptedPeer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/EncryptedPeer.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferEncryptor;
 import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
 import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
+import org.apache.hadoop.net.unix.DomainSocket;
 
 import java.io.InputStream;
 import java.io.OutputStream;
@@ -133,4 +134,9 @@ public class EncryptedPeer implements Peer {
   public String toString() {
     return "EncryptedPeer(" + enclosedPeer + ")";
   }
+
+  @Override
+  public DomainSocket getDomainSocket() {
+    return enclosedPeer.getDomainSocket();
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/NioInetPeer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/NioInetPeer.java
index 1186490512b..1dc9d1d1186 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/NioInetPeer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/NioInetPeer.java
@@ -25,6 +25,7 @@ import java.nio.channels.ReadableByteChannel;
 
 import org.apache.hadoop.net.SocketInputStream;
 import org.apache.hadoop.net.SocketOutputStream;
+import org.apache.hadoop.net.unix.DomainSocket;
 
 /**
  * Represents a peer that we communicate with by using non-blocking I/O 
@@ -122,4 +123,9 @@ class NioInetPeer implements Peer {
   public String toString() {
     return "NioInetPeer(" + socket.toString() + ")";
   }
+
+  @Override
+  public DomainSocket getDomainSocket() {
+    return null;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/Peer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/Peer.java
index 129ada76116..862720e921f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/Peer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/Peer.java
@@ -23,6 +23,7 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.nio.channels.ReadableByteChannel;
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.net.unix.DomainSocket;
 
 /**
  * Represents a connection to a peer.
@@ -105,4 +106,10 @@ public interface Peer extends Closeable {
    *                       computer as we.
    */
   public boolean isLocal();
+
+  /**
+   * @return               The DomainSocket associated with the current
+   *                       peer, or null if there is none.
+   */
+  public DomainSocket getDomainSocket();
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java
index 0953c410b03..bb580bc9534 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java
@@ -31,6 +31,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.net.Peer;
+import org.apache.hadoop.net.unix.DomainSocket;
 import org.junit.Test;
 
 public class TestPeerCache {
@@ -114,6 +115,11 @@ public class TestPeerCache {
     public String toString() {
       return "FakePeer(dnId=" + dnId + ")";
     }
+
+    @Override
+    public DomainSocket getDomainSocket() {
+      return null;
+    }
   }
 
   @Test

From 9a4030e0e84a688c12daa21fe9a165808c3eca70 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Fri, 11 Jan 2013 23:52:22 +0000
Subject: [PATCH 04/52] HDFS-4356. BlockReaderLocal should use passed file
 descriptors rather than paths. Contributed by Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1432335 13f79535-47bb-0310-9956-ffa450edef68
---
 .../apache/hadoop/net/unix/DomainSocket.java  |   2 +-
 .../org/apache/hadoop/util/DataChecksum.java  |   2 +-
 .../hadoop-hdfs/CHANGES.HDFS-347.txt          |   3 +
 .../dev-support/findbugsExcludeFile.xml       |   8 +
 .../org/apache/hadoop/hdfs/BlockReader.java   |  17 +-
 .../hadoop/hdfs/BlockReaderFactory.java       | 148 ++++++-
 .../apache/hadoop/hdfs/BlockReaderLocal.java  | 336 +++++-----------
 .../org/apache/hadoop/hdfs/DFSClient.java     |  56 ++-
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java |   7 +
 .../apache/hadoop/hdfs/DFSInputStream.java    | 148 ++++---
 .../hadoop/hdfs/DomainSocketFactory.java      | 137 +++++++
 .../hadoop/hdfs/FileInputStreamCache.java     | 265 +++++++++++++
 .../apache/hadoop/hdfs/RemoteBlockReader.java |  10 +-
 .../hadoop/hdfs/RemoteBlockReader2.java       |  10 +-
 .../hadoop/hdfs/net/DomainPeerServer.java     |   5 +-
 .../datatransfer/DataTransferProtocol.java    |  12 +
 .../hadoop/hdfs/protocol/datatransfer/Op.java |   3 +-
 .../hdfs/protocol/datatransfer/Receiver.java  |  13 +
 .../hdfs/protocol/datatransfer/Sender.java    |  12 +
 .../hadoop/hdfs/server/common/JspHelper.java  |   5 +-
 .../hadoop/hdfs/server/datanode/DataNode.java | 152 ++++++--
 .../hdfs/server/datanode/DataXceiver.java     |  68 ++++
 .../datanode/fsdataset/FsDatasetSpi.java      |   3 +
 .../fsdataset/impl/FsDatasetImpl.java         |  21 +
 .../hdfs/server/namenode/NamenodeFsck.java    |   2 +-
 .../main/proto/ClientDatanodeProtocol.proto   |   4 +
 .../src/main/proto/datatransfer.proto         |  21 +
 .../hadoop/hdfs/BlockReaderTestUtil.java      |  11 +-
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |  37 +-
 .../hadoop/hdfs/TestBlockReaderLocal.java     | 365 +++++++++++++++---
 .../hdfs/TestClientBlockVerification.java     |   8 +-
 .../hadoop/hdfs/TestFileInputStreamCache.java | 127 ++++++
 .../apache/hadoop/hdfs/TestParallelRead.java  |  26 --
 .../hadoop/hdfs/TestParallelReadUtil.java     |  34 +-
 .../hdfs/TestParallelShortCircuitRead.java    |  48 +++
 ...stParallelShortCircuitReadNoChecksum.java} |  44 +--
 .../hdfs/TestParallelUnixDomainRead.java      |  46 +++
 .../hdfs/TestShortCircuitLocalRead.java       | 174 ++++++---
 .../TestBlockTokenWithDFS.java                |   2 +-
 .../server/datanode/SimulatedFSDataset.java   |   7 +
 .../datanode/TestDataNodeVolumeFailure.java   |   9 +-
 41 files changed, 1840 insertions(+), 568 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/FileInputStreamCache.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileInputStreamCache.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java
 rename hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/{TestParallelLocalRead.java => TestParallelShortCircuitReadNoChecksum.java} (53%)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
index 9f93924b5be..b1f8749f581 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
@@ -111,7 +111,7 @@ public class DomainSocket implements Closeable {
    * Disable validation of the server bind paths.
    */
   @VisibleForTesting
-  static void disableBindPathValidation() {
+  public static void disableBindPathValidation() {
     validateBindPaths = false;
   }
 
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java
index 4a3424bad32..5eb51670829 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java
@@ -104,7 +104,7 @@ public class DataChecksum implements Checksum {
                            ( (bytes[offset+2] & 0xff) << 16 ) |
                            ( (bytes[offset+3] & 0xff) << 8 )  |
                            ( (bytes[offset+4] & 0xff) );
-    return newDataChecksum( Type.valueOf(bytes[0]), bytesPerChecksum );
+    return newDataChecksum( Type.valueOf(bytes[offset]), bytesPerChecksum );
   }
   
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index 337cbec292e..af90e837028 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -7,3 +7,6 @@ HDFS-4353. Encapsulate connections to peers in Peer and PeerServer classes.
 
 HDFS-4354. Create DomainSocket and DomainPeer and associated unit tests.
 (Colin Patrick McCabe via todd)
+
+HDFS-4356. BlockReaderLocal should use passed file descriptors rather than paths.
+(Colin Patrick McCabe via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
index c019b10e423..4eaa65c8e4c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
@@ -290,6 +290,14 @@
        <Method name="persistPaxosData" />
        <Bug pattern="OS_OPEN_STREAM" />
      </Match>
+
+     <!-- getShortCircuitFdsForRead is supposed to return open streams. -->
+     <Match>
+       <Class name="org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl" />
+       <Method name="getShortCircuitFdsForRead" />
+       <Bug pattern="OBL_UNSATISFIED_OBLIGATION" />
+     </Match>
+
      <!-- Don't complain about LocalDatanodeInfo's anonymous class -->
      <Match>
        <Class name="org.apache.hadoop.hdfs.BlockReaderLocal$LocalDatanodeInfo$1" />
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java
index 2bbae525898..17ecb9e30d6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java
@@ -41,18 +41,29 @@ public interface BlockReader extends ByteBufferReadable {
    */
   long skip(long n) throws IOException;
 
+  /**
+   * Returns an estimate of the number of bytes that can be read
+   * (or skipped over) from this input stream without performing
+   * network I/O.
+   */
+  int available() throws IOException;
+
   /**
    * Close the block reader.
    *
    * @param peerCache      The PeerCache to put the Peer we're using back
    *                       into, or null if we should simply close the Peer
    *                       we're using (along with its Socket).
-   *                       Some block readers, like BlockReaderLocal, may
-   *                       not make use of this parameter.
+   *                       Ignored by Readers that don't maintain Peers.
+   * @param fisCache       The FileInputStreamCache to put our FileInputStreams
+   *                       back into, or null if we should simply close them.
+   *                       Ignored by Readers that don't maintain
+   *                       FileInputStreams.
    *
    * @throws IOException
    */
-  void close(PeerCache peerCache) throws IOException;
+  void close(PeerCache peerCache, FileInputStreamCache fisCache)
+      throws IOException;
 
   /**
    * Read exactly the given amount of data, throwing an exception
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
index d6d39304966..95f7b94e0e2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
@@ -17,22 +17,26 @@
  */
 package org.apache.hadoop.hdfs;
 
+import java.io.BufferedOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.net.InetSocketAddress;
-import java.net.Socket;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hdfs.DFSClient.Conf;
 import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferEncryptor;
-import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
-import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
+import org.apache.hadoop.hdfs.protocol.HdfsProtoUtil;
+import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
+import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
+import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
-import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.net.unix.DomainSocket;
 import org.apache.hadoop.security.token.Token;
 
 
@@ -58,6 +62,12 @@ public class BlockReaderFactory {
    * @param clientName  Client name.  Used for log messages.
    * @param peer  The peer
    * @param datanodeID  The datanode that the Peer is connected to
+   * @param domainSocketFactory  The DomainSocketFactory to notify if the Peer
+   *                             is a DomainPeer which turns out to be faulty.
+   *                             If null, no factory will be notified in this
+   *                             case.
+   * @param allowShortCircuitLocalReads  True if short-circuit local reads
+   *                                     should be allowed.
    * @return New BlockReader instance, or null on error.
    */
   @SuppressWarnings("deprecation")
@@ -70,11 +80,44 @@ public class BlockReaderFactory {
                                      boolean verifyChecksum,
                                      String clientName,
                                      Peer peer,
-                                     DatanodeID datanodeID)
-                                     throws IOException {
+                                     DatanodeID datanodeID,
+                                     DomainSocketFactory domSockFactory,
+                                     boolean allowShortCircuitLocalReads)
+  throws IOException {
     peer.setReadTimeout(conf.getInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY,
         HdfsServerConstants.READ_TIMEOUT));
     peer.setWriteTimeout(HdfsServerConstants.WRITE_TIMEOUT);
+
+    if (peer.getDomainSocket() != null) {
+      if (allowShortCircuitLocalReads) {
+        // If this is a domain socket, and short-circuit local reads are 
+        // enabled, try to set up a BlockReaderLocal.
+        BlockReader reader = newShortCircuitBlockReader(conf, file,
+            block, blockToken, startOffset, len, peer, datanodeID,
+            domSockFactory, verifyChecksum);
+        if (reader != null) {
+          // One we've constructed the short-circuit block reader, we don't
+          // need the socket any more.  So let's return it to the cache.
+          PeerCache peerCache = PeerCache.getInstance(
+              conf.getInt(DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY, 
+                DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT),
+              conf.getLong(DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, 
+                DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT));
+          peerCache.put(datanodeID, peer);
+          return reader;
+        }
+      }
+      // If this is a domain socket and we couldn't (or didn't want to) set
+      // up a BlockReaderLocal, check that we are allowed to pass data traffic
+      // over the socket before proceeding.
+      if (!conf.getBoolean(DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC,
+            DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT)) {
+        throw new IOException("Because we can't do short-circuit access, " +
+          "and data traffic over domain sockets is disabled, " +
+          "we cannot use this socket to talk to " + datanodeID);
+      }
+    }
+
     if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER,
         DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER_DEFAULT)) {
       return RemoteBlockReader.newBlockReader(file,
@@ -88,7 +131,94 @@ public class BlockReaderFactory {
           verifyChecksum, clientName, peer, datanodeID);
     }
   }
-  
+
+  /**
+   * Create a new short-circuit BlockReader.
+   * 
+   * Here, we ask the DataNode to pass us file descriptors over our
+   * DomainSocket.  If the DataNode declines to do so, we'll return null here;
+   * otherwise, we'll return the BlockReaderLocal.  If the DataNode declines,
+   * this function will inform the DomainSocketFactory that short-circuit local
+   * reads are disabled for this DataNode, so that we don't ask again.
+   * 
+   * @param conf               the configuration.
+   * @param file               the file name. Used in log messages.
+   * @param block              The block object.
+   * @param blockToken         The block token for security.
+   * @param startOffset        The read offset, relative to block head.
+   * @param len                The number of bytes to read, or -1 to read 
+   *                           as many as possible.
+   * @param peer               The peer to use.
+   * @param datanodeID         The datanode that the Peer is connected to.
+   * @param domSockFactory     The DomainSocketFactory to notify if the Peer
+   *                           is a DomainPeer which turns out to be faulty.
+   *                           If null, no factory will be notified in this
+   *                           case.
+   * @param verifyChecksum     True if we should verify the checksums.
+   *                           Note: even if this is true, when
+   *                           DFS_CLIENT_READ_CHECKSUM_SKIP_CHECKSUM_KEY is
+   *                           set, we will skip checksums.
+   *
+   * @return                   The BlockReaderLocal, or null if the
+   *                           DataNode declined to provide short-circuit
+   *                           access.
+   * @throws IOException       If there was a communication error.
+   */
+  private static BlockReaderLocal newShortCircuitBlockReader(
+      Configuration conf, String file, ExtendedBlock block,
+      Token<BlockTokenIdentifier> blockToken, long startOffset,
+      long len, Peer peer, DatanodeID datanodeID,
+      DomainSocketFactory domSockFactory, boolean verifyChecksum)
+          throws IOException {
+    final DataOutputStream out =
+        new DataOutputStream(new BufferedOutputStream(
+          peer.getOutputStream()));
+    new Sender(out).requestShortCircuitFds(block, blockToken, 1);
+    DataInputStream in =
+        new DataInputStream(peer.getInputStream());
+    BlockOpResponseProto resp = BlockOpResponseProto.parseFrom(
+        HdfsProtoUtil.vintPrefixed(in));
+    DomainSocket sock = peer.getDomainSocket();
+    switch (resp.getStatus()) {
+    case SUCCESS:
+      BlockReaderLocal reader = null;
+      byte buf[] = new byte[1];
+      FileInputStream fis[] = new FileInputStream[2];
+      sock.recvFileInputStreams(fis, buf, 0, buf.length);
+      try {
+        reader = new BlockReaderLocal(conf, file, block,
+            startOffset, len, fis[0], fis[1], datanodeID, verifyChecksum);
+      } finally {
+        if (reader == null) {
+          IOUtils.cleanup(DFSClient.LOG, fis[0], fis[1]);
+        }
+      }
+      return reader;
+    case ERROR_UNSUPPORTED:
+      if (!resp.hasShortCircuitAccessVersion()) {
+        DFSClient.LOG.warn("short-circuit read access is disabled for " +
+            "DataNode " + datanodeID + ".  reason: " + resp.getMessage());
+        domSockFactory.disableShortCircuitForPath(sock.getPath());
+      } else {
+        DFSClient.LOG.warn("short-circuit read access for the file " +
+            file + " is disabled for DataNode " + datanodeID +
+            ".  reason: " + resp.getMessage());
+      }
+      return null;
+    case ERROR_ACCESS_TOKEN:
+      String msg = "access control error while " +
+          "attempting to set up short-circuit access to " +
+          file + resp.getMessage();
+      DFSClient.LOG.debug(msg);
+      throw new InvalidBlockTokenException(msg);
+    default:
+      DFSClient.LOG.warn("error while attempting to set up short-circuit " +
+          "access to " + file + ": " + resp.getMessage());
+      domSockFactory.disableShortCircuitForPath(sock.getPath());
+      return null;
+    }
+  }
+
   /**
    * File name to print when accessing a block directly (from servlets)
    * @param s Address of the block location
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
index eacd902aa2b..1c34a71c26d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
@@ -18,30 +18,18 @@
 package org.apache.hadoop.hdfs;
 
 import java.io.DataInputStream;
-import java.io.File;
+import org.apache.hadoop.conf.Configuration;
 import java.io.FileInputStream;
 import java.io.IOException;
-import java.net.Socket;
 import java.nio.ByteBuffer;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
-import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
-import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
-import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
 import org.apache.hadoop.hdfs.util.DirectBufferPool;
-import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.DataChecksum;
 
 /**
@@ -53,74 +41,19 @@ import org.apache.hadoop.util.DataChecksum;
  * <ul>
  * <li>The client performing short circuit reads must be configured at the
  * datanode.</li>
- * <li>The client gets the path to the file where block is stored using
- * {@link org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol#getBlockLocalPathInfo(ExtendedBlock, Token)}
- * RPC call</li>
- * <li>Client uses kerberos authentication to connect to the datanode over RPC,
- * if security is enabled.</li>
+ * <li>The client gets the file descriptors for the metadata file and the data 
+ * file for the block using
+ * {@link org.apache.hadoop.hdfs.server.datanode.DataXceiver#requestShortCircuitFds}.
+ * </li>
+ * <li>The client reads the file descriptors.</li>
  * </ul>
  */
 class BlockReaderLocal implements BlockReader {
-  private static final Log LOG = LogFactory.getLog(DFSClient.class);
-
-  //Stores the cache and proxy for a local datanode.
-  private static class LocalDatanodeInfo {
-    private ClientDatanodeProtocol proxy = null;
-    private final Map<ExtendedBlock, BlockLocalPathInfo> cache;
-
-    LocalDatanodeInfo() {
-      final int cacheSize = 10000;
-      final float hashTableLoadFactor = 0.75f;
-      int hashTableCapacity = (int) Math.ceil(cacheSize / hashTableLoadFactor) + 1;
-      cache = Collections
-          .synchronizedMap(new LinkedHashMap<ExtendedBlock, BlockLocalPathInfo>(
-              hashTableCapacity, hashTableLoadFactor, true) {
-            private static final long serialVersionUID = 1;
-
-            @Override
-            protected boolean removeEldestEntry(
-                Map.Entry<ExtendedBlock, BlockLocalPathInfo> eldest) {
-              return size() > cacheSize;
-            }
-          });
-    }
-
-    private synchronized ClientDatanodeProtocol getDatanodeProxy(
-        DatanodeInfo node, Configuration conf, int socketTimeout,
-        boolean connectToDnViaHostname) throws IOException {
-      if (proxy == null) {
-        proxy = DFSUtil.createClientDatanodeProtocolProxy(node, conf,
-            socketTimeout, connectToDnViaHostname);
-      }
-      return proxy;
-    }
-    
-    private synchronized void resetDatanodeProxy() {
-      if (null != proxy) {
-        RPC.stopProxy(proxy);
-        proxy = null;
-      }
-    }
-
-    private BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock b) {
-      return cache.get(b);
-    }
-
-    private void setBlockLocalPathInfo(ExtendedBlock b, BlockLocalPathInfo info) {
-      cache.put(b, info);
-    }
-
-    private void removeBlockLocalPathInfo(ExtendedBlock b) {
-      cache.remove(b);
-    }
-  }
-  
-  // Multiple datanodes could be running on the local machine. Store proxies in
-  // a map keyed by the ipc port of the datanode.
-  private static Map<Integer, LocalDatanodeInfo> localDatanodeInfoMap = new HashMap<Integer, LocalDatanodeInfo>();
+  static final Log LOG = LogFactory.getLog(DFSClient.class);
 
   private final FileInputStream dataIn; // reader for the data file
   private final FileInputStream checksumIn;   // reader for the checksum file
+  private final boolean verifyChecksum;
 
   /**
    * Offset from the most recent chunk boundary at which the next read should
@@ -140,7 +73,6 @@ class BlockReaderLocal implements BlockReader {
   private ByteBuffer slowReadBuff = null;
   private ByteBuffer checksumBuff = null;
   private DataChecksum checksum;
-  private final boolean verifyChecksum;
 
   private static DirectBufferPool bufferPool = new DirectBufferPool();
 
@@ -150,186 +82,90 @@ class BlockReaderLocal implements BlockReader {
   /** offset in block where reader wants to actually read */
   private long startOffset;
   private final String filename;
+
+  private final DatanodeID datanodeID;
+  private final ExtendedBlock block;
   
-  /**
-   * The only way this object can be instantiated.
-   */
-  static BlockReaderLocal newBlockReader(Configuration conf, String file,
-      ExtendedBlock blk, Token<BlockTokenIdentifier> token, DatanodeInfo node,
-      int socketTimeout, long startOffset, long length,
-      boolean connectToDnViaHostname) throws IOException {
+  private static int getSlowReadBufferNumChunks(Configuration conf,
+      int bytesPerChecksum) {
 
-    LocalDatanodeInfo localDatanodeInfo = getLocalDatanodeInfo(node
-        .getIpcPort());
-    // check the cache first
-    BlockLocalPathInfo pathinfo = localDatanodeInfo.getBlockLocalPathInfo(blk);
-    if (pathinfo == null) {
-      pathinfo = getBlockPathInfo(blk, node, conf, socketTimeout, token,
-          connectToDnViaHostname);
-    }
+    int bufSize =
+        conf.getInt(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY,
+            DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT);
 
-    // check to see if the file exists. It may so happen that the
-    // HDFS file has been deleted and this block-lookup is occurring
-    // on behalf of a new HDFS file. This time, the block file could
-    // be residing in a different portion of the fs.data.dir directory.
-    // In this case, we remove this entry from the cache. The next
-    // call to this method will re-populate the cache.
-    FileInputStream dataIn = null;
-    FileInputStream checksumIn = null;
-    BlockReaderLocal localBlockReader = null;
-    boolean skipChecksumCheck = skipChecksumCheck(conf);
-    try {
-      // get a local file system
-      File blkfile = new File(pathinfo.getBlockPath());
-      dataIn = new FileInputStream(blkfile);
-
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("New BlockReaderLocal for file " + blkfile + " of size "
-            + blkfile.length() + " startOffset " + startOffset + " length "
-            + length + " short circuit checksum " + !skipChecksumCheck);
-      }
-
-      if (!skipChecksumCheck) {
-        // get the metadata file
-        File metafile = new File(pathinfo.getMetaPath());
-        checksumIn = new FileInputStream(metafile);
-
-        // read and handle the common header here. For now just a version
-        BlockMetadataHeader header = BlockMetadataHeader
-            .readHeader(new DataInputStream(checksumIn));
-        short version = header.getVersion();
-        if (version != BlockMetadataHeader.VERSION) {
-          LOG.warn("Wrong version (" + version + ") for metadata file for "
-              + blk + " ignoring ...");
-        }
-        DataChecksum checksum = header.getChecksum();
-        long firstChunkOffset = startOffset
-            - (startOffset % checksum.getBytesPerChecksum());
-        localBlockReader = new BlockReaderLocal(conf, file, blk, token,
-            startOffset, length, pathinfo, checksum, true, dataIn,
-            firstChunkOffset, checksumIn);
-      } else {
-        localBlockReader = new BlockReaderLocal(conf, file, blk, token,
-            startOffset, length, pathinfo, dataIn);
-      }
-    } catch (IOException e) {
-      // remove from cache
-      localDatanodeInfo.removeBlockLocalPathInfo(blk);
-      DFSClient.LOG.warn("BlockReaderLocal: Removing " + blk
-          + " from cache because local file " + pathinfo.getBlockPath()
-          + " could not be opened.");
-      throw e;
-    } finally {
-      if (localBlockReader == null) {
-        if (dataIn != null) {
-          dataIn.close();
-        }
-        if (checksumIn != null) {
-          checksumIn.close();
-        }
-      }
-    }
-    return localBlockReader;
-  }
-  
-  private static synchronized LocalDatanodeInfo getLocalDatanodeInfo(int port) {
-    LocalDatanodeInfo ldInfo = localDatanodeInfoMap.get(port);
-    if (ldInfo == null) {
-      ldInfo = new LocalDatanodeInfo();
-      localDatanodeInfoMap.put(port, ldInfo);
-    }
-    return ldInfo;
-  }
-  
-  private static BlockLocalPathInfo getBlockPathInfo(ExtendedBlock blk,
-      DatanodeInfo node, Configuration conf, int timeout,
-      Token<BlockTokenIdentifier> token, boolean connectToDnViaHostname)
-          throws IOException {
-    LocalDatanodeInfo localDatanodeInfo = getLocalDatanodeInfo(node.getIpcPort());
-    BlockLocalPathInfo pathinfo = null;
-    ClientDatanodeProtocol proxy = localDatanodeInfo.getDatanodeProxy(node,
-        conf, timeout, connectToDnViaHostname);
-    try {
-      // make RPC to local datanode to find local pathnames of blocks
-      pathinfo = proxy.getBlockLocalPathInfo(blk, token);
-      if (pathinfo != null) {
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("Cached location of block " + blk + " as " + pathinfo);
-        }
-        localDatanodeInfo.setBlockLocalPathInfo(blk, pathinfo);
-      }
-    } catch (IOException e) {
-      localDatanodeInfo.resetDatanodeProxy(); // Reset proxy on error
-      throw e;
-    }
-    return pathinfo;
-  }
-  
-  private static boolean skipChecksumCheck(Configuration conf) {
-    return conf.getBoolean(
-        DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
-        DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT);
-  }
-  
-  private static int getSlowReadBufferNumChunks(Configuration conf, int bytesPerChecksum) {
-    int bufferSizeBytes = conf.getInt(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY,
-        DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT);
-
-    if (bufferSizeBytes < bytesPerChecksum) {
-      throw new IllegalArgumentException("Configured BlockReaderLocal buffer size (" + bufferSizeBytes + ") " +
-          "is not large enough to hold a single chunk (" + bytesPerChecksum +  "). Please configure " +
+    if (bufSize < bytesPerChecksum) {
+      throw new IllegalArgumentException("Configured BlockReaderLocal buffer size (" +
+          bufSize + ") is not large enough to hold a single chunk (" +
+          bytesPerChecksum +  "). Please configure " +
           DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY + " appropriately");
     }
 
     // Round down to nearest chunk size
-    return bufferSizeBytes / bytesPerChecksum;
+    return bufSize / bytesPerChecksum;
   }
 
-  private BlockReaderLocal(Configuration conf, String hdfsfile,
-      ExtendedBlock block, Token<BlockTokenIdentifier> token, long startOffset,
-      long length, BlockLocalPathInfo pathinfo, FileInputStream dataIn)
-      throws IOException {
-    this(conf, hdfsfile, block, token, startOffset, length, pathinfo,
-        DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 4), false,
-        dataIn, startOffset, null);
-  }
-
-  private BlockReaderLocal(Configuration conf, String hdfsfile,
-      ExtendedBlock block, Token<BlockTokenIdentifier> token, long startOffset,
-      long length, BlockLocalPathInfo pathinfo, DataChecksum checksum,
-      boolean verifyChecksum, FileInputStream dataIn, long firstChunkOffset,
-      FileInputStream checksumIn) throws IOException {
-    this.filename = hdfsfile;
-    this.checksum = checksum;
-    this.verifyChecksum = verifyChecksum;
-    this.startOffset = Math.max(startOffset, 0);
-
-    bytesPerChecksum = this.checksum.getBytesPerChecksum();
-    checksumSize = this.checksum.getChecksumSize();
-
+  public BlockReaderLocal(Configuration conf, String filename,
+      ExtendedBlock block, long startOffset, long length,
+      FileInputStream dataIn, FileInputStream checksumIn,
+      DatanodeID datanodeID, boolean verifyChecksum) throws IOException {
     this.dataIn = dataIn;
     this.checksumIn = checksumIn;
-    this.offsetFromChunkBoundary = (int) (startOffset-firstChunkOffset);
+    this.startOffset = Math.max(startOffset, 0);
+    this.filename = filename;
+    this.datanodeID = datanodeID;
+    this.block = block;
 
-    int chunksPerChecksumRead = getSlowReadBufferNumChunks(conf, bytesPerChecksum);
-    slowReadBuff = bufferPool.getBuffer(bytesPerChecksum * chunksPerChecksumRead);
-    checksumBuff = bufferPool.getBuffer(checksumSize * chunksPerChecksumRead);
-    // Initially the buffers have nothing to read.
-    slowReadBuff.flip();
-    checksumBuff.flip();
+    // read and handle the common header here. For now just a version
+    checksumIn.getChannel().position(0);
+    BlockMetadataHeader header = BlockMetadataHeader
+        .readHeader(new DataInputStream(checksumIn));
+    short version = header.getVersion();
+    if (version != BlockMetadataHeader.VERSION) {
+      throw new IOException("Wrong version (" + version + ") of the " +
+          "metadata file for " + filename + ".");
+    }
+    if (!verifyChecksum) {
+      this.verifyChecksum = false; 
+    } else {
+      this.verifyChecksum = !conf.getBoolean(DFSConfigKeys.
+          DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, 
+        DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT);
+    }
+    long firstChunkOffset;
+    if (this.verifyChecksum) {
+      this.checksum = header.getChecksum();
+      this.bytesPerChecksum = this.checksum.getBytesPerChecksum();
+      this.checksumSize = this.checksum.getChecksumSize();
+      firstChunkOffset = startOffset
+          - (startOffset % checksum.getBytesPerChecksum());
+      this.offsetFromChunkBoundary = (int) (startOffset - firstChunkOffset);
+
+      int chunksPerChecksumRead = getSlowReadBufferNumChunks(conf, bytesPerChecksum);
+      slowReadBuff = bufferPool.getBuffer(bytesPerChecksum * chunksPerChecksumRead);
+      checksumBuff = bufferPool.getBuffer(checksumSize * chunksPerChecksumRead);
+      // Initially the buffers have nothing to read.
+      slowReadBuff.flip();
+      checksumBuff.flip();
+      long checkSumOffset = (firstChunkOffset / bytesPerChecksum) * checksumSize;
+      IOUtils.skipFully(checksumIn, checkSumOffset);
+    } else {
+      firstChunkOffset = startOffset;
+      this.checksum = null;
+      this.bytesPerChecksum = 0;
+      this.checksumSize = 0;
+      this.offsetFromChunkBoundary = 0;
+    }
+    
     boolean success = false;
     try {
-      // Skip both input streams to beginning of the chunk containing startOffset
-      IOUtils.skipFully(dataIn, firstChunkOffset);
-      if (checksumIn != null) {
-        long checkSumOffset = (firstChunkOffset / bytesPerChecksum) * checksumSize;
-        IOUtils.skipFully(checksumIn, checkSumOffset);
-      }
+      // Reposition both input streams to the beginning of the chunk
+      // containing startOffset
+      this.dataIn.getChannel().position(firstChunkOffset);
       success = true;
     } finally {
       if (!success) {
-        bufferPool.returnBuffer(slowReadBuff);
-        bufferPool.returnBuffer(checksumBuff);
+        if (slowReadBuff != null) bufferPool.returnBuffer(slowReadBuff);
+        if (checksumBuff != null) bufferPool.returnBuffer(checksumBuff);
       }
     }
   }
@@ -649,9 +485,17 @@ class BlockReaderLocal implements BlockReader {
   }
 
   @Override
-  public synchronized void close(PeerCache peerCache) throws IOException {
-    dataIn.close();
-    if (checksumIn != null) {
+  public synchronized void close(PeerCache peerCache,
+      FileInputStreamCache fisCache) throws IOException {
+    if (fisCache != null) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("putting FileInputStream for " + filename +
+            " back into FileInputStreamCache");
+      }
+      fisCache.put(datanodeID, block, new FileInputStream[] {dataIn, checksumIn});
+    } else {
+      LOG.debug("closing FileInputStream for " + filename);
+      dataIn.close();
       checksumIn.close();
     }
     if (slowReadBuff != null) {
@@ -675,4 +519,10 @@ class BlockReaderLocal implements BlockReader {
   public void readFully(byte[] buf, int off, int len) throws IOException {
     BlockReaderUtil.readFully(this, buf, off, len);
   }
+
+  @Override
+  public int available() throws IOException {
+    // We never do network I/O in BlockReaderLocal.
+    return Integer.MAX_VALUE;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index 8230b992cff..73230671570 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -128,7 +128,6 @@ import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
 import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
 import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpBlockChecksumResponseProto;
 import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status;
-import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
 import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
@@ -227,6 +226,11 @@ public class DFSClient implements java.io.Closeable {
     final boolean getHdfsBlocksMetadataEnabled;
     final int getFileBlockStorageLocationsNumThreads;
     final int getFileBlockStorageLocationsTimeout;
+    final String domainSocketPath;
+    final boolean skipShortCircuitChecksums;
+    final int shortCircuitBufferSize;
+    final boolean shortCircuitLocalReads;
+    final boolean domainSocketDataTraffic;
 
     Conf(Configuration conf) {
       maxFailoverAttempts = conf.getInt(
@@ -288,6 +292,19 @@ public class DFSClient implements java.io.Closeable {
       getFileBlockStorageLocationsTimeout = conf.getInt(
           DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT,
           DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT_DEFAULT);
+      domainSocketPath = conf.get(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY);
+      skipShortCircuitChecksums = conf.getBoolean(
+          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
+          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT);
+      shortCircuitBufferSize = conf.getInt(
+          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY,
+          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT);
+      shortCircuitLocalReads = conf.getBoolean(
+        DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
+        DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT);
+      domainSocketDataTraffic = conf.getBoolean(
+        DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC,
+        DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT);
     }
 
     private DataChecksum.Type getChecksumType(Configuration conf) {
@@ -345,7 +362,7 @@ public class DFSClient implements java.io.Closeable {
   private final Map<String, DFSOutputStream> filesBeingWritten
       = new HashMap<String, DFSOutputStream>();
 
-  private boolean shortCircuitLocalReads;
+  private final DomainSocketFactory domainSocketFactory;
   
   /**
    * Same as this(NameNode.getAddress(conf), conf);
@@ -417,12 +434,8 @@ public class DFSClient implements java.io.Closeable {
     }
 
     // read directly from the block file if configured.
-    this.shortCircuitLocalReads = conf.getBoolean(
-        DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
-        DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT);
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("Short circuit read is " + shortCircuitLocalReads);
-    }
+    this.domainSocketFactory = new DomainSocketFactory(dfsClientConf);
+
     String localInterfaces[] =
       conf.getTrimmedStrings(DFSConfigKeys.DFS_CLIENT_LOCAL_INTERFACES);
     localInterfaceAddrs = getLocalInterfaceAddrs(localInterfaces);
@@ -787,28 +800,11 @@ public class DFSClient implements java.io.Closeable {
                                      AccessControlException.class);
     }
   }
-
-  /**
-   * Get {@link BlockReader} for short circuited local reads.
-   */
-  static BlockReader getLocalBlockReader(Configuration conf,
-      String src, ExtendedBlock blk, Token<BlockTokenIdentifier> accessToken,
-      DatanodeInfo chosenNode, int socketTimeout, long offsetIntoBlock,
-      boolean connectToDnViaHostname) throws InvalidToken, IOException {
-    try {
-      return BlockReaderLocal.newBlockReader(conf, src, blk, accessToken,
-          chosenNode, socketTimeout, offsetIntoBlock, blk.getNumBytes()
-              - offsetIntoBlock, connectToDnViaHostname);
-    } catch (RemoteException re) {
-      throw re.unwrapRemoteException(InvalidToken.class,
-          AccessControlException.class);
-    }
-  }
   
   private static Map<String, Boolean> localAddrMap = Collections
       .synchronizedMap(new HashMap<String, Boolean>());
   
-  private static boolean isLocalAddress(InetSocketAddress targetAddr) {
+  static boolean isLocalAddress(InetSocketAddress targetAddr) {
     InetAddress addr = targetAddr.getAddress();
     Boolean cached = localAddrMap.get(addr.getHostAddress());
     if (cached != null) {
@@ -2108,10 +2104,6 @@ public class DFSClient implements java.io.Closeable {
       super(in);
     }
   }
-  
-  boolean shouldTryShortCircuitRead(InetSocketAddress targetAddr) {
-    return shortCircuitLocalReads && isLocalAddress(targetAddr);
-  }
 
   void reportChecksumFailure(String file, ExtendedBlock blk, DatanodeInfo dn) {
     DatanodeInfo [] dnArr = { dn };
@@ -2135,7 +2127,7 @@ public class DFSClient implements java.io.Closeable {
         + ", ugi=" + ugi + "]"; 
   }
 
-  void disableShortCircuit() {
-    shortCircuitLocalReads = false;
+  public DomainSocketFactory getDomainSocketFactory() {
+    return domainSocketFactory;
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index c62e9f7fc26..d0b29b71aa8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -342,7 +342,13 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY = "dfs.client.read.shortcircuit.skip.checksum";
   public static final boolean DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT = false;
   public static final String DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY = "dfs.client.read.shortcircuit.buffer.size";
+  public static final String DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_KEY = "dfs.client.read.shortcircuit.streams.cache.size";
+  public static final int DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_DEFAULT = 10;
+  public static final String DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY = "dfs.client.read.shortcircuit.streams.cache.expiry.ms";
+  public static final long DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT = 60000;
   public static final int DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT = 1024 * 1024;
+  public static final String DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC = "dfs.client.domain.socket.data.traffic";
+  public static final boolean DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT = false;
 
   // property for fsimage compression
   public static final String DFS_IMAGE_COMPRESS_KEY = "dfs.image.compress";
@@ -393,6 +399,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_WEB_AUTHENTICATION_KERBEROS_KEYTAB_KEY = "dfs.web.authentication.kerberos.keytab";
   
   public static final String DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY = "dfs.block.local-path-access.user";
+  public static final String DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY = "dfs.datanode.domain.socket.path";
 
   // HA related configuration
   public static final String DFS_HA_NAMENODES_KEY_PREFIX = "dfs.ha.namenodes";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
index aec1726a3ab..a33f8f54e7f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hdfs;
 
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.net.Socket;
@@ -38,7 +39,7 @@ import org.apache.hadoop.fs.ChecksumException;
 import org.apache.hadoop.fs.ByteBufferReadable;
 import org.apache.hadoop.fs.FSInputStream;
 import org.apache.hadoop.fs.UnresolvedLinkException;
-import org.apache.hadoop.hdfs.net.EncryptedPeer;
+import org.apache.hadoop.hdfs.net.DomainPeer;
 import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.net.TcpPeerServer;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
@@ -46,17 +47,16 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
-import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
 import org.apache.hadoop.hdfs.protocol.datatransfer.InvalidEncryptionKeyException;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
-import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
 import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
 import org.apache.hadoop.hdfs.server.datanode.ReplicaNotFoundException;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.net.NetUtils;
-import org.apache.hadoop.security.AccessControlException;
+import org.apache.hadoop.net.unix.DomainSocket;
 import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.hdfs.FileInputStreamCache;
 
 /****************************************************************
  * DFSInputStream provides bytes from a named file.  It handles 
@@ -80,6 +80,8 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
   private long pos = 0;
   private long blockEnd = -1;
 
+  private final FileInputStreamCache fileInputStreamCache;
+
   /**
    * This variable tracks the number of failures since the start of the
    * most recent user-facing operation. That is to say, it should be reset
@@ -115,6 +117,13 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
     this.buffersize = buffersize;
     this.src = src;
     this.peerCache = dfsClient.peerCache;
+    this.fileInputStreamCache = new FileInputStreamCache(
+      dfsClient.conf.getInt(DFSConfigKeys.
+        DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_KEY,
+        DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_DEFAULT),
+      dfsClient.conf.getLong(DFSConfigKeys.
+        DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY,
+        DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT));
     prefetchSize = dfsClient.getConf().prefetchSize;
     timeWindow = dfsClient.getConf().timeWindow;
     nCachedConnRetry = dfsClient.getConf().nCachedConnRetry;
@@ -247,7 +256,9 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
         locatedBlocks.getFileLength() + lastBlockBeingWrittenLength;
   }
 
-  private synchronized boolean blockUnderConstruction() {
+  // Short circuit local reads are forbidden for files that are
+  // under construction.  See HDFS-2757.
+  synchronized boolean shortCircuitForbidden() {
     return locatedBlocks.isUnderConstruction();
   }
 
@@ -428,7 +439,7 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
 
     // Will be getting a new BlockReader.
     if (blockReader != null) {
-      blockReader.close(peerCache);
+      blockReader.close(peerCache, fileInputStreamCache);
       blockReader = null;
     }
 
@@ -510,10 +521,11 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
     dfsClient.checkOpen();
 
     if (blockReader != null) {
-      blockReader.close(peerCache);
+      blockReader.close(peerCache, fileInputStreamCache);
       blockReader = null;
     }
     super.close();
+    fileInputStreamCache.close();
     closed = true;
   }
 
@@ -809,10 +821,6 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
                  e.getPos() + " from " + chosenNode);
         // we want to remember what we have tried
         addIntoCorruptedBlockMap(block.getBlock(), chosenNode, corruptedBlockMap);
-      } catch (AccessControlException ex) {
-        DFSClient.LOG.warn("Short circuit access failed ", ex);
-        dfsClient.disableShortCircuit();
-        continue;
       } catch (IOException e) {
         if (e instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) {
           DFSClient.LOG.info("Will fetch a new encryption key and retry, " 
@@ -837,7 +845,7 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
         }
       } finally {
         if (reader != null) {
-          reader.close(peerCache);
+          reader.close(peerCache, fileInputStreamCache);
         }
       }
       // Put chosen node into dead list, continue
@@ -849,19 +857,29 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
     Peer peer = null;
     boolean success = false;
     Socket sock = null;
+    DomainSocket domSock = null;
+
     try {
-      sock = dfsClient.socketFactory.createSocket();
-      NetUtils.connect(sock, addr,
-        dfsClient.getRandomLocalInterfaceAddr(),
-        dfsClient.getConf().socketTimeout);
-      peer = TcpPeerServer.peerFromSocketAndKey(sock, 
-          dfsClient.getDataEncryptionKey());
+      domSock = dfsClient.getDomainSocketFactory().create(addr, this);
+      if (domSock != null) {
+        // Create a UNIX Domain peer.
+        peer = new DomainPeer(domSock);
+      } else {
+        // Create a conventional TCP-based Peer.
+        sock = dfsClient.socketFactory.createSocket();
+        NetUtils.connect(sock, addr,
+          dfsClient.getRandomLocalInterfaceAddr(),
+          dfsClient.getConf().socketTimeout);
+        peer = TcpPeerServer.peerFromSocketAndKey(sock, 
+            dfsClient.getDataEncryptionKey());
+      }
       success = true;
       return peer;
     } finally {
       if (!success) {
         IOUtils.closeQuietly(peer);
         IOUtils.closeQuietly(sock);
+        IOUtils.closeQuietly(domSock);
       }
     }
   }
@@ -895,49 +913,77 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
                                        String clientName)
       throws IOException {
     
-    // Can't local read a block under construction, see HDFS-2757
-    if (dfsClient.shouldTryShortCircuitRead(dnAddr) &&
-        !blockUnderConstruction()) {
-      return DFSClient.getLocalBlockReader(dfsClient.conf, src, block,
-          blockToken, chosenNode, dfsClient.hdfsTimeout, startOffset,
-          dfsClient.connectToDnViaHostname());
-    }
-    
     IOException err = null;
-    boolean fromCache = true;
 
-    // Allow retry since there is no way of knowing whether the cached socket
-    // is good until we actually use it.
-    for (int retries = 0; retries <= nCachedConnRetry && fromCache; ++retries) {
+    // Firstly, we check to see if we have cached any file descriptors for
+    // local blocks.  If so, we can just re-use those file descriptors.
+    FileInputStream fis[] = fileInputStreamCache.get(chosenNode, block);
+    if (fis != null) {
+      if (DFSClient.LOG.isDebugEnabled()) {
+        DFSClient.LOG.debug("got FileInputStreams for " + block + " from " +
+            "the FileInputStreamCache.");
+      }
+      return new BlockReaderLocal(dfsClient.conf, file,
+        block, startOffset, len, fis[0], fis[1], chosenNode, verifyChecksum);
+    }
+
+    // We retry several times here.
+    // On the first nCachedConnRetry times, we try to fetch a socket from
+    // the socketCache and use it.  This may fail, since the old socket may
+    // have been closed by the peer.
+    // After that, we try to create a new socket using newPeer().
+    // This may create either a TCP socket or a UNIX domain socket, depending
+    // on the configuration and whether the peer is remote.
+    // If we try to create a UNIX domain socket and fail, we will not try that 
+    // again.  Instead, we'll try to create a TCP socket.  Only after we've 
+    // failed to create a TCP-based BlockReader will we throw an IOException
+    // from this function.  Throwing an IOException from here is basically
+    // equivalent to declaring the DataNode bad.
+    boolean triedNonDomainSocketReader = false;
+    for (int retries = 0;
+          retries < nCachedConnRetry && (!triedNonDomainSocketReader);
+          ++retries) {
       Peer peer = null;
-      // Don't use the cache on the last attempt - it's possible that there
-      // are arbitrarily many unusable sockets in the cache, but we don't
-      // want to fail the read.
       if (retries < nCachedConnRetry) {
         peer = peerCache.get(chosenNode);
       }
       if (peer == null) {
         peer = newPeer(dnAddr);
-        fromCache = false;
+        if (peer.getDomainSocket() == null) {
+          triedNonDomainSocketReader = true;
+        }
       }
-
+      boolean success = false;
       try {
-        // The OP_READ_BLOCK request is sent as we make the BlockReader
-        BlockReader reader =
-            BlockReaderFactory.newBlockReader(dfsClient.conf,
-                                       file, block,
-                                       blockToken,
-                                       startOffset, len,
-                                       verifyChecksum,
-                                       clientName,
-                                       peer,
-                                       chosenNode);
-        return reader;
-      } catch (IOException ex) {
-        // Our socket is no good.
-        DFSClient.LOG.debug("Error making BlockReader. Closing stale " + peer, ex);
-        IOUtils.closeQuietly(peer);
+        boolean allowShortCircuitLocalReads =
+          (peer.getDomainSocket() != null) &&
+          dfsClient.getConf().shortCircuitLocalReads && 
+          (!shortCircuitForbidden());
+        // Here we will try to send either an OP_READ_BLOCK request or an 
+        // OP_REQUEST_SHORT_CIRCUIT_FDS, depending on what kind of block reader 
+        // we're trying to create.
+        BlockReader blockReader = BlockReaderFactory.newBlockReader(
+            dfsClient.conf, file, block, blockToken, startOffset,
+            len, verifyChecksum, clientName, peer, chosenNode, 
+            dfsClient.getDomainSocketFactory(), allowShortCircuitLocalReads);
+        success = true;
+        return blockReader;
+       } catch (IOException ex) {
+         // Our socket is no good.
+        DFSClient.LOG.debug("Error making BlockReader. " +
+            "Closing stale " + peer, ex);
+        if (peer.getDomainSocket() != null) {
+          // If the Peer that we got the error from was a DomainPeer,
+          // mark the socket path as bad, so that newDataSocket will not try 
+          // to re-open this socket for a while.
+          dfsClient.getDomainSocketFactory().
+              disableDomainSocketPath(peer.getDomainSocket().getPath());
+        }
         err = ex;
+      } finally {
+        if (!success) {
+          IOUtils.closeQuietly(peer);
+        }
       }
     }
 
@@ -1075,7 +1121,7 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
       // the TCP buffer, then just eat up the intervening data.
       //
       int diff = (int)(targetPos - pos);
-      if (diff <= DFSClient.TCP_WINDOW_SIZE) {
+      if (diff <= blockReader.available()) {
         try {
           pos += blockReader.skip(diff);
           if (pos == targetPos) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
new file mode 100644
index 00000000000..50b60521070
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
@@ -0,0 +1,137 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hdfs.DFSClient.Conf;
+
+import org.apache.hadoop.net.unix.DomainSocket;
+
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+
+class DomainSocketFactory {
+  public static final Log LOG = LogFactory.getLog(DomainSocketFactory.class);
+  private final Conf conf;
+
+  enum PathStatus {
+    UNUSABLE,
+    SHORT_CIRCUIT_DISABLED,
+  }
+
+  /**
+   * Information about domain socket paths.
+   */
+  Cache<String, PathStatus> pathInfo =
+      CacheBuilder.newBuilder()
+      .expireAfterWrite(10, TimeUnit.MINUTES)
+      .build();
+
+  public DomainSocketFactory(Conf conf) {
+    this.conf = conf;
+
+    String feature = null;
+    if (conf.shortCircuitLocalReads) {
+      feature = "The short-circuit local reads feature";
+    } else if (conf.domainSocketDataTraffic) {
+      feature = "UNIX domain socket data traffic";
+    }
+    if (feature != null) {
+      if (conf.domainSocketPath == null) {
+        LOG.warn(feature + " is disabled because you have not set " +
+            DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY);
+      } else if (DomainSocket.getLoadingFailureReason() != null) {
+        LOG.error(feature + " is disabled because " +
+              DomainSocket.getLoadingFailureReason());
+      } else {
+        LOG.debug(feature + "is enabled.");
+      }
+    }
+  }
+
+  /**
+   * Create a DomainSocket.
+   * 
+   * @param addr        The address of the DataNode
+   * @param stream      The DFSInputStream the socket will be created for.
+   *
+   * @return            null if the socket could not be created; the
+   *                    socket otherwise.  If there was an error while
+   *                    creating the socket, we will add the socket path
+   *                    to our list of failed domain socket paths.
+   */
+  DomainSocket create(InetSocketAddress addr, DFSInputStream stream) {
+    // If there is no domain socket path configured, we can't use domain
+    // sockets.
+    if (conf.domainSocketPath == null) return null;
+    // UNIX domain sockets can only be used to talk to local peers
+    if (!DFSClient.isLocalAddress(addr)) return null;
+    // If the DomainSocket code is not loaded, we can't create
+    // DomainSocket objects.
+    if (DomainSocket.getLoadingFailureReason() != null) return null;
+    String escapedPath = DomainSocket.
+        getEffectivePath(conf.domainSocketPath, addr.getPort());
+    PathStatus info = pathInfo.getIfPresent(escapedPath);
+    if (info == PathStatus.UNUSABLE) {
+      // We tried to connect to this domain socket before, and it was totally
+      // unusable.
+      return null;
+    }
+    if ((!conf.domainSocketDataTraffic) &&
+        ((info == PathStatus.SHORT_CIRCUIT_DISABLED) || 
+            stream.shortCircuitForbidden())) {
+      // If we don't want to pass data over domain sockets, and we don't want
+      // to pass file descriptors over them either, we have no use for domain
+      // sockets.
+      return null;
+    }
+    boolean success = false;
+    DomainSocket sock = null;
+    try {
+      sock = DomainSocket.connect(escapedPath);
+      sock.setAttribute(DomainSocket.RCV_TIMEO, conf.socketTimeout);
+      success = true;
+    } catch (IOException e) {
+      LOG.error("error creating DomainSocket", e);
+      // fall through
+    } finally {
+      if (!success) {
+        if (sock != null) {
+          IOUtils.closeQuietly(sock);
+        }
+        pathInfo.put(escapedPath, PathStatus.UNUSABLE);
+        sock = null;
+      }
+    }
+    return sock;
+  }
+
+  public void disableShortCircuitForPath(String path) {
+    pathInfo.put(path, PathStatus.SHORT_CIRCUIT_DISABLED);
+  }
+
+  public void disableDomainSocketPath(String path) {
+    pathInfo.put(path, PathStatus.UNUSABLE);
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/FileInputStreamCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/FileInputStreamCache.java
new file mode 100644
index 00000000000..d9045f0cf2c
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/FileInputStreamCache.java
@@ -0,0 +1,265 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.FileInputStream;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.ThreadFactory;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.util.Time;
+
+import com.google.common.collect.LinkedListMultimap;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+
+/**
+ * FileInputStream cache is used to cache FileInputStream objects that we
+ * have received from the DataNode.
+ */
+class FileInputStreamCache {
+  private final static Log LOG = LogFactory.getLog(FileInputStreamCache.class);
+
+  /**
+   * The executor service that runs the cacheCleaner.  There is only one of
+   * these per VM.
+   */
+  private final static ScheduledThreadPoolExecutor executor
+      = new ScheduledThreadPoolExecutor(1, new ThreadFactoryBuilder().
+          setDaemon(true).setNameFormat("FileInputStreamCache Cleaner").
+          build());
+  
+  /**
+   * The CacheCleaner for this FileInputStreamCache.  We don't create this
+   * and schedule it until it becomes necessary.
+   */
+  private CacheCleaner cacheCleaner;
+  
+  /**
+   * Maximum number of entries to allow in the cache.
+   */
+  private final int maxCacheSize;
+  
+  /**
+   * The minimum time in milliseconds to preserve an element in the cache.
+   */
+  private final long expiryTimeMs;
+  
+  /**
+   * True if the FileInputStreamCache is closed.
+   */
+  private boolean closed = false;
+  
+  /**
+   * Cache entries.
+   */
+  private final LinkedListMultimap<Key, Value> map = LinkedListMultimap.create();
+
+  /**
+   * Expiry thread which makes sure that the file descriptors get closed
+   * after a while.
+   */
+  class CacheCleaner implements Runnable {
+    @Override
+    public void run() {
+      synchronized(FileInputStreamCache.this) {
+        if (closed) return;
+        long curTime = Time.monotonicNow();
+        for (Iterator<Entry<Key, Value>> iter = map.entries().iterator();
+              iter.hasNext();
+              iter = map.entries().iterator()) {
+          Entry<Key, Value> entry = iter.next();
+          if (entry.getValue().getTime() + expiryTimeMs >= curTime) {
+            break;
+          }
+          entry.getValue().close();
+          iter.remove();
+        }
+      }
+    }
+  }
+
+  /**
+   * The key identifying a FileInputStream array.
+   */
+  static class Key {
+    private final DatanodeID datanodeID;
+    private final ExtendedBlock block;
+    
+    public Key(DatanodeID datanodeID, ExtendedBlock block) {
+      this.datanodeID = datanodeID;
+      this.block = block;
+    }
+    
+    @Override
+    public boolean equals(Object other) {
+      if (!(other instanceof FileInputStreamCache.Key)) {
+        return false;
+      }
+      FileInputStreamCache.Key otherKey = (FileInputStreamCache.Key)other;
+      return (block.equals(otherKey.block) & 
+          (block.getGenerationStamp() == otherKey.block.getGenerationStamp()) &
+          datanodeID.equals(otherKey.datanodeID));
+    }
+
+    @Override
+    public int hashCode() {
+      return block.hashCode();
+    }
+  }
+
+  /**
+   * The value containing a FileInputStream array and the time it was added to
+   * the cache.
+   */
+  static class Value {
+    private final FileInputStream fis[];
+    private final long time;
+    
+    public Value (FileInputStream fis[]) {
+      this.fis = fis;
+      this.time = Time.monotonicNow();
+    }
+
+    public FileInputStream[] getFileInputStreams() {
+      return fis;
+    }
+
+    public long getTime() {
+      return time;
+    }
+    
+    public void close() {
+      IOUtils.cleanup(LOG, fis);
+    }
+  }
+  
+  /**
+   * Create a new FileInputStream
+   *
+   * @param maxCacheSize         The maximum number of elements to allow in 
+   *                             the cache.
+   * @param expiryTimeMs         The minimum time in milliseconds to preserve
+   *                             elements in the cache.
+   */
+  public FileInputStreamCache(int maxCacheSize, long expiryTimeMs) {
+    this.maxCacheSize = maxCacheSize;
+    this.expiryTimeMs = expiryTimeMs;
+  }
+  
+  /**
+   * Put an array of FileInputStream objects into the cache.
+   *
+   * @param datanodeID          The DatanodeID to store the streams under.
+   * @param block               The Block to store the streams under.
+   * @param fis                 The streams.
+   */
+  public void put(DatanodeID datanodeID, ExtendedBlock block,
+      FileInputStream fis[]) {
+    boolean inserted = false;
+    try {
+      synchronized(this) {
+        if (closed) return;
+        if (map.size() + 1 > maxCacheSize) {
+          Iterator<Entry<Key, Value>> iter = map.entries().iterator();
+          if (!iter.hasNext()) return;
+          Entry<Key, Value> entry = iter.next();
+          entry.getValue().close();
+          iter.remove();
+        }
+        if (cacheCleaner == null) {
+          cacheCleaner = new CacheCleaner();
+          executor.scheduleAtFixedRate(cacheCleaner, expiryTimeMs, expiryTimeMs, 
+              TimeUnit.MILLISECONDS);
+        }
+        map.put(new Key(datanodeID, block), new Value(fis));
+        inserted = true;
+      }
+    } finally {
+      if (!inserted) {
+        IOUtils.cleanup(LOG, fis);
+      }
+    }
+  }
+  
+  /**
+   * Find and remove an array of FileInputStream objects from the cache.
+   *
+   * @param datanodeID          The DatanodeID to search for.
+   * @param block               The Block to search for.
+   *
+   * @return                    null if no streams can be found; the
+   *                            array otherwise.  If this is non-null, the
+   *                            array will have been removed from the cache.
+   */
+  public synchronized FileInputStream[] get(DatanodeID datanodeID,
+      ExtendedBlock block) {
+    Key key = new Key(datanodeID, block);
+    List<Value> ret = map.get(key);
+    if (ret.isEmpty()) return null;
+    Value val = ret.get(0);
+    map.remove(key, val);
+    return val.getFileInputStreams();
+  }
+  
+  /**
+   * Close the cache and free all associated resources.
+   */
+  public synchronized void close() {
+    if (closed) return;
+    closed = true;
+    if (cacheCleaner != null) {
+      executor.remove(cacheCleaner);
+    }
+    for (Iterator<Entry<Key, Value>> iter = map.entries().iterator();
+          iter.hasNext();
+          iter = map.entries().iterator()) {
+      Entry<Key, Value> entry = iter.next();
+      entry.getValue().close();
+      iter.remove();
+    }
+  }
+  
+  public synchronized String toString() {
+    StringBuilder bld = new StringBuilder();
+    bld.append("FileInputStreamCache(");
+    String prefix = "";
+    for (Entry<Key, Value> entry : map.entries()) {
+      bld.append(prefix);
+      bld.append(entry.getKey());
+      prefix = ", ";
+    }
+    bld.append(")");
+    return bld.toString();
+  }
+  
+  public long getExpiryTimeMs() {
+    return expiryTimeMs;
+  }
+  
+  public int getMaxCacheSize() {
+    return maxCacheSize;
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java
index 2a9523351c1..99f1db99057 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java
@@ -413,7 +413,8 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
   }
 
   @Override
-  public synchronized void close(PeerCache peerCache) throws IOException {
+  public synchronized void close(PeerCache peerCache,
+      FileInputStreamCache fisCache) throws IOException {
     startOffset = -1;
     checksum = null;
     if (peerCache != null & sentStatusCode) {
@@ -470,4 +471,11 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader {
   public int read(ByteBuffer buf) throws IOException {
     throw new UnsupportedOperationException("readDirect unsupported in RemoteBlockReader");
   }
+  
+  @Override
+  public int available() throws IOException {
+    // An optimistic estimate of how much data is available
+    // to us without doing network I/O.
+    return DFSClient.TCP_WINDOW_SIZE;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java
index 007249b7922..6961f382dfe 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java
@@ -275,7 +275,8 @@ public class RemoteBlockReader2  implements BlockReader {
 
 
   @Override
-  public synchronized void close(PeerCache peerCache) throws IOException {
+  public synchronized void close(PeerCache peerCache,
+      FileInputStreamCache fisCache) throws IOException {
     packetReceiver.close();
     startOffset = -1;
     checksum = null;
@@ -422,4 +423,11 @@ public class RemoteBlockReader2  implements BlockReader {
       }
     }
   }
+  
+  @Override
+  public int available() throws IOException {
+    // An optimistic estimate of how much data is available
+    // to us without doing network I/O.
+    return DFSClient.TCP_WINDOW_SIZE;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java
index d22584fc787..bded892e8ac 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java
@@ -18,7 +18,6 @@
 
 package org.apache.hadoop.hdfs.net;
 
-import java.io.Closeable;
 import java.io.IOException;
 import java.net.SocketTimeoutException;
 
@@ -27,8 +26,10 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.hdfs.net.PeerServer;
 import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.classification.InterfaceAudience;
 
-class DomainPeerServer implements PeerServer {
+@InterfaceAudience.Private
+public class DomainPeerServer implements PeerServer {
   static Log LOG = LogFactory.getLog(DomainPeerServer.class);
   private final DomainSocket sock;
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/DataTransferProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/DataTransferProtocol.java
index 98094472a73..b584f3b1989 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/DataTransferProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/DataTransferProtocol.java
@@ -104,6 +104,18 @@ public interface DataTransferProtocol {
       final String clientName,
       final DatanodeInfo[] targets) throws IOException;
 
+  /**
+   * Request short circuit access file descriptors from a DataNode.
+   *
+   * @param blk             The block to get file descriptors for.
+   * @param blockToken      Security token for accessing the block.
+   * @param maxVersion      Maximum version of the block data the client 
+   *                        can understand.
+   */
+  public void requestShortCircuitFds(final ExtendedBlock blk,
+      final Token<BlockTokenIdentifier> blockToken,
+      int maxVersion) throws IOException;
+
   /**
    * Receive a block from a source datanode
    * and then notifies the namenode
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Op.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Op.java
index 4b9b47fe3db..d64e83e0329 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Op.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Op.java
@@ -34,7 +34,8 @@ public enum Op {
   REPLACE_BLOCK((byte)83),
   COPY_BLOCK((byte)84),
   BLOCK_CHECKSUM((byte)85),
-  TRANSFER_BLOCK((byte)86);
+  TRANSFER_BLOCK((byte)86),
+  REQUEST_SHORT_CIRCUIT_FDS((byte)87);
 
   /** The code for this operation. */
   public final byte code;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Receiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Receiver.java
index ff7a81babd7..260a0a6e212 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Receiver.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Receiver.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpCopyBlockProto
 import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpReadBlockProto;
 import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpReplaceBlockProto;
 import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpTransferBlockProto;
+import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpRequestShortCircuitAccessProto;
 import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpWriteBlockProto;
 
 /** Receiver */
@@ -77,6 +78,9 @@ public abstract class Receiver implements DataTransferProtocol {
     case TRANSFER_BLOCK:
       opTransferBlock(in);
       break;
+    case REQUEST_SHORT_CIRCUIT_FDS:
+      opRequestShortCircuitFds(in);
+      break;
     default:
       throw new IOException("Unknown op " + op + " in data stream");
     }
@@ -117,6 +121,15 @@ public abstract class Receiver implements DataTransferProtocol {
         fromProtos(proto.getTargetsList()));
   }
 
+  /** Receive {@link Op#REQUEST_SHORT_CIRCUIT_FDS} */
+  private void opRequestShortCircuitFds(DataInputStream in) throws IOException {
+    final OpRequestShortCircuitAccessProto proto =
+      OpRequestShortCircuitAccessProto.parseFrom(vintPrefixed(in));
+    requestShortCircuitFds(fromProto(proto.getHeader().getBlock()),
+        fromProto(proto.getHeader().getToken()),
+        proto.getMaxVersion());
+  }
+
   /** Receive OP_REPLACE_BLOCK */
   private void opReplaceBlock(DataInputStream in) throws IOException {
     OpReplaceBlockProto proto = OpReplaceBlockProto.parseFrom(vintPrefixed(in));
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Sender.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Sender.java
index 03e13080612..f117cdf9745 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Sender.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Sender.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpCopyBlockProto
 import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpReadBlockProto;
 import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpReplaceBlockProto;
 import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpTransferBlockProto;
+import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpRequestShortCircuitAccessProto;
 import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpWriteBlockProto;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.security.token.Token;
@@ -135,6 +136,17 @@ public class Sender implements DataTransferProtocol {
     send(out, Op.TRANSFER_BLOCK, proto);
   }
 
+  @Override
+  public void requestShortCircuitFds(final ExtendedBlock blk,
+      final Token<BlockTokenIdentifier> blockToken,
+      int maxVersion) throws IOException {
+    OpRequestShortCircuitAccessProto proto =
+        OpRequestShortCircuitAccessProto.newBuilder()
+          .setHeader(DataTransferProtoUtil.buildBaseHeader(
+            blk, blockToken)).setMaxVersion(maxVersion).build();
+    send(out, Op.REQUEST_SHORT_CIRCUIT_FDS, proto);
+  }
+  
   @Override
   public void replaceBlock(final ExtendedBlock blk,
       final Token<BlockTokenIdentifier> blockToken,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
index fe4eac1a6e5..4b273d3344e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
@@ -213,7 +213,7 @@ public class JspHelper {
         offsetIntoBlock, amtToRead,  true,
         "JspHelper", TcpPeerServer.peerFromSocketAndKey(s, encryptionKey),
         new DatanodeID(addr.getAddress().toString(),              
-            addr.getHostName(), poolId, addr.getPort(), 0, 0));
+            addr.getHostName(), poolId, addr.getPort(), 0, 0), null, false);
         
     byte[] buf = new byte[(int)amtToRead];
     int readOffset = 0;
@@ -232,8 +232,7 @@ public class JspHelper {
       amtToRead -= numRead;
       readOffset += numRead;
     }
-    blockReader = null;
-    s.close();
+    blockReader.close(null, null);
     out.print(HtmlQuoting.quoteHtmlChars(new String(buf)));
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index 8045481538e..4d1889eb624 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -53,16 +53,15 @@ import java.io.ByteArrayInputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.PrintStream;
 import java.net.InetSocketAddress;
-import java.net.ServerSocket;
 import java.net.Socket;
 import java.net.URI;
 import java.net.UnknownHostException;
-import java.nio.channels.ServerSocketChannel;
 import java.nio.channels.SocketChannel;
 import java.security.PrivilegedExceptionAction;
 import java.util.AbstractList;
@@ -90,6 +89,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HDFSPolicyProvider;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.net.DomainPeerServer;
 import org.apache.hadoop.hdfs.net.TcpPeerServer;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
@@ -149,11 +149,11 @@ import org.apache.hadoop.io.ReadaheadPool;
 import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RemoteException;
-import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.metrics2.util.MBeans;
 import org.apache.hadoop.net.DNS;
 import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.net.unix.DomainSocket;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -233,6 +233,7 @@ public class DataNode extends Configured
     LogFactory.getLog(DataNode.class.getName() + ".clienttrace");
   
   private static final String USAGE = "Usage: java DataNode [-rollback | -regular]";
+  static final int CURRENT_BLOCK_FORMAT_VERSION = 1;
 
   /**
    * Use {@link NetUtils#createSocketAddr(String)} instead.
@@ -250,6 +251,7 @@ public class DataNode extends Configured
   public final static String EMPTY_DEL_HINT = "";
   AtomicInteger xmitsInProgress = new AtomicInteger();
   Daemon dataXceiverServer = null;
+  Daemon localDataXceiverServer = null;
   ThreadGroup threadGroup = null;
   private DNConf dnConf;
   private volatile boolean heartbeatsDisabledForTests = false;
@@ -261,6 +263,7 @@ public class DataNode extends Configured
   private String hostName;
   private DatanodeID id;
   
+  final private String fileDescriptorPassingDisabledReason;
   boolean isBlockTokenEnabled;
   BlockPoolTokenSecretManager blockPoolTokenSecretManager;
   private boolean hasAnyBlockPoolRegistered = false;
@@ -309,6 +312,24 @@ public class DataNode extends Configured
     this.getHdfsBlockLocationsEnabled = conf.getBoolean(
         DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED, 
         DFSConfigKeys.DFS_HDFS_BLOCKS_METADATA_ENABLED_DEFAULT);
+
+    // Determine whether we should try to pass file descriptors to clients.
+    if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
+              DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT)) {
+      String reason = DomainSocket.getLoadingFailureReason();
+      if (reason != null) {
+        LOG.warn("File descriptor passing is disabled because " + reason);
+        this.fileDescriptorPassingDisabledReason = reason;
+      } else {
+        LOG.info("File descriptor passing is enabled.");
+        this.fileDescriptorPassingDisabledReason = null;
+      }
+    } else {
+      this.fileDescriptorPassingDisabledReason =
+          "File descriptor passing was not configured.";
+      LOG.debug(this.fileDescriptorPassingDisabledReason);
+    }
+
     try {
       hostName = getHostName(conf);
       LOG.info("Configured hostname is " + hostName);
@@ -537,6 +558,41 @@ public class DataNode extends Configured
     this.dataXceiverServer = new Daemon(threadGroup, 
         new DataXceiverServer(tcpPeerServer, conf, this));
     this.threadGroup.setDaemon(true); // auto destroy when empty
+
+    DomainPeerServer domainPeerServer =
+              getDomainPeerServer(conf, streamingAddr.getPort());
+    if (domainPeerServer != null) {
+      this.localDataXceiverServer = new Daemon(threadGroup, 
+          new DataXceiverServer(domainPeerServer, conf, this));
+      LOG.info("Listening on UNIX domain socket: " +
+          domainPeerServer.getBindPath());
+    }
+  }
+
+  static DomainPeerServer getDomainPeerServer(Configuration conf,
+      int port) throws IOException {
+    String domainSocketPath =
+        conf.get(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY);
+    if (domainSocketPath == null) {
+      if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
+          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT)) {
+        LOG.warn("Although short-circuit local reads are configured, " +
+            "they are disabled because you didn't configure " +
+            DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY);
+      }
+      return null;
+    }
+    if (DomainSocket.getLoadingFailureReason() != null) {
+      throw new RuntimeException("Although a UNIX domain socket " +
+          "path is configured as " + domainSocketPath + ", we cannot " +
+          "start a localDataXceiverServer because " +
+          DomainSocket.getLoadingFailureReason());
+    }
+    DomainPeerServer domainPeerServer =
+      new DomainPeerServer(domainSocketPath, port);
+    domainPeerServer.setReceiveBufferSize(
+        HdfsConstants.DEFAULT_DATA_SOCKET_SIZE);
+    return domainPeerServer;
   }
   
   // calls specific to BP
@@ -1039,6 +1095,42 @@ public class DataNode extends Configured
     return info;
   }
 
+  @InterfaceAudience.LimitedPrivate("HDFS")
+  static public class ShortCircuitFdsUnsupportedException extends IOException {
+    private static final long serialVersionUID = 1L;
+    public ShortCircuitFdsUnsupportedException(String msg) {
+      super(msg);
+    }
+  }
+
+  @InterfaceAudience.LimitedPrivate("HDFS")
+  static public class ShortCircuitFdsVersionException extends IOException {
+    private static final long serialVersionUID = 1L;
+    public ShortCircuitFdsVersionException(String msg) {
+      super(msg);
+    }
+  }
+
+  FileInputStream[] requestShortCircuitFdsForRead(final ExtendedBlock blk,
+      final Token<BlockTokenIdentifier> token, int maxVersion) 
+          throws ShortCircuitFdsUnsupportedException,
+            ShortCircuitFdsVersionException, IOException {
+    if (fileDescriptorPassingDisabledReason != null) {
+      throw new ShortCircuitFdsUnsupportedException(
+          fileDescriptorPassingDisabledReason);
+    }
+    checkBlockToken(blk, token, BlockTokenSecretManager.AccessMode.READ);
+    int blkVersion = CURRENT_BLOCK_FORMAT_VERSION;
+    if (maxVersion < blkVersion) {
+      throw new ShortCircuitFdsVersionException("Your client is too old " +
+        "to read this block!  Its format version is " + 
+        blkVersion + ", but the highest format version you can read is " +
+        maxVersion);
+    }
+    metrics.incrBlocksGetLocalPathInfo();
+    return data.getShortCircuitFdsForRead(blk);
+  }
+
   @Override
   public HdfsBlocksMetadata getHdfsBlocksMetadata(List<ExtendedBlock> blocks,
       List<Token<BlockTokenIdentifier>> tokens) throws IOException, 
@@ -1113,32 +1205,45 @@ public class DataNode extends Configured
     if (dataXceiverServer != null) {
       ((DataXceiverServer) this.dataXceiverServer.getRunnable()).kill();
       this.dataXceiverServer.interrupt();
-
-      // wait for all data receiver threads to exit
-      if (this.threadGroup != null) {
-        int sleepMs = 2;
-        while (true) {
-          this.threadGroup.interrupt();
-          LOG.info("Waiting for threadgroup to exit, active threads is " +
-                   this.threadGroup.activeCount());
-          if (this.threadGroup.activeCount() == 0) {
-            break;
-          }
-          try {
-            Thread.sleep(sleepMs);
-          } catch (InterruptedException e) {}
-          sleepMs = sleepMs * 3 / 2; // exponential backoff
-          if (sleepMs > 1000) {
-            sleepMs = 1000;
-          }
+    }
+    if (localDataXceiverServer != null) {
+      ((DataXceiverServer) this.localDataXceiverServer.getRunnable()).kill();
+      this.localDataXceiverServer.interrupt();
+    }
+    // wait for all data receiver threads to exit
+    if (this.threadGroup != null) {
+      int sleepMs = 2;
+      while (true) {
+        this.threadGroup.interrupt();
+        LOG.info("Waiting for threadgroup to exit, active threads is " +
+                 this.threadGroup.activeCount());
+        if (this.threadGroup.activeCount() == 0) {
+          break;
+        }
+        try {
+          Thread.sleep(sleepMs);
+        } catch (InterruptedException e) {}
+        sleepMs = sleepMs * 3 / 2; // exponential backoff
+        if (sleepMs > 1000) {
+          sleepMs = 1000;
         }
       }
-      // wait for dataXceiveServer to terminate
+      this.threadGroup = null;
+    }
+    if (this.dataXceiverServer != null) {
+      // wait for dataXceiverServer to terminate
       try {
         this.dataXceiverServer.join();
       } catch (InterruptedException ie) {
       }
     }
+    if (this.localDataXceiverServer != null) {
+      // wait for localDataXceiverServer to terminate
+      try {
+        this.localDataXceiverServer.join();
+      } catch (InterruptedException ie) {
+      }
+    }
     
     if(blockPoolManager != null) {
       try {
@@ -1523,6 +1628,9 @@ public class DataNode extends Configured
 
     // start dataXceiveServer
     dataXceiverServer.start();
+    if (localDataXceiverServer != null) {
+      localDataXceiverServer.start();
+    }
     ipcServer.start();
     startPlugins(conf);
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
index d618c787d13..2998cc61634 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.server.datanode;
 
 import static org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status.ERROR;
+import static org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status.ERROR_UNSUPPORTED;
 import static org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status.ERROR_ACCESS_TOKEN;
 import static org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status.SUCCESS;
 import static org.apache.hadoop.util.Time.now;
@@ -28,6 +29,8 @@ import java.io.BufferedOutputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.EOFException;
+import java.io.FileDescriptor;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InterruptedIOException;
@@ -60,11 +63,14 @@ import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
+import org.apache.hadoop.hdfs.server.datanode.DataNode.ShortCircuitFdsUnsupportedException;
+import org.apache.hadoop.hdfs.server.datanode.DataNode.ShortCircuitFdsVersionException;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.LengthInputStream;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.net.unix.DomainSocket;
 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.DataChecksum;
@@ -232,6 +238,68 @@ class DataXceiver extends Receiver implements Runnable {
     }
   }
 
+  @Override
+  public void requestShortCircuitFds(final ExtendedBlock blk,
+      final Token<BlockTokenIdentifier> token,
+      int maxVersion) throws IOException {
+    updateCurrentThreadName("Passing file descriptors for block " + blk);
+    BlockOpResponseProto.Builder bld = BlockOpResponseProto.newBuilder();
+    FileInputStream fis[] = null;
+    try {
+      if (peer.getDomainSocket() == null) {
+        throw new IOException("You cannot pass file descriptors over " +
+            "anything but a UNIX domain socket.");
+      }
+      fis = datanode.requestShortCircuitFdsForRead(blk, token, maxVersion);
+      bld.setStatus(SUCCESS);
+      bld.setShortCircuitAccessVersion(DataNode.CURRENT_BLOCK_FORMAT_VERSION);
+    } catch (ShortCircuitFdsVersionException e) {
+      bld.setStatus(ERROR_UNSUPPORTED);
+      bld.setShortCircuitAccessVersion(DataNode.CURRENT_BLOCK_FORMAT_VERSION);
+      bld.setMessage(e.getMessage());
+    } catch (ShortCircuitFdsUnsupportedException e) {
+      bld.setStatus(ERROR_UNSUPPORTED);
+      bld.setMessage(e.getMessage());
+    } catch (InvalidToken e) {
+      bld.setStatus(ERROR_ACCESS_TOKEN);
+      bld.setMessage(e.getMessage());
+    } catch (IOException e) {
+      bld.setStatus(ERROR);
+      bld.setMessage(e.getMessage());
+    }
+    try {
+      bld.build().writeDelimitedTo(socketOut);
+      if (fis != null) {
+        FileDescriptor fds[] = new FileDescriptor[fis.length];
+        for (int i = 0; i < fds.length; i++) {
+          fds[i] = fis[i].getFD();
+        }
+        byte buf[] = new byte[] { (byte)0 };
+        peer.getDomainSocket().
+          sendFileDescriptors(fds, buf, 0, buf.length);
+      }
+    } finally {
+      if (ClientTraceLog.isInfoEnabled()) {
+        DatanodeRegistration dnR = datanode.getDNRegistrationForBP(blk
+            .getBlockPoolId());
+        BlockSender.ClientTraceLog.info(String.format(
+          String.format(
+            "src: %s, dest: %s, op: %s, blockid: %s, srvID: %s, " +
+              "success: %b",
+            "127.0.0.1",                   // src IP
+            "127.0.0.1",                   // dst IP
+            "REQUEST_SHORT_CIRCUIT_FDS",   // operation
+            blk.getBlockId(),             // block id
+            dnR.getStorageID(),
+            (fis != null)
+          )));
+      }
+      if (fis != null) {
+        IOUtils.cleanup(LOG, fis);
+      }
+    }
+  }
+
   @Override
   public void readBlock(final ExtendedBlock block,
       final Token<BlockTokenIdentifier> blockToken,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java
index 13ef752750b..57e8887be81 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsDatasetSpi.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset;
 
 
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.List;
@@ -386,4 +387,6 @@ public interface FsDatasetSpi<V extends FsVolumeSpi> extends FSDatasetMBean {
   public HdfsBlocksMetadata getHdfsBlocksMetadata(List<ExtendedBlock> blocks)
       throws IOException;
 
+  FileInputStream[] getShortCircuitFdsForRead(ExtendedBlock block)
+      throws IOException;
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
index c65e5faa405..427bfcf23a3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
@@ -75,6 +75,7 @@ import org.apache.hadoop.hdfs.server.datanode.fsdataset.VolumeChoosingPolicy;
 import org.apache.hadoop.hdfs.server.datanode.metrics.FSDatasetMBean;
 import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock;
 import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
+import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.metrics2.util.MBeans;
 import org.apache.hadoop.util.DataChecksum;
 import org.apache.hadoop.util.DiskChecker.DiskErrorException;
@@ -1668,6 +1669,26 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
     return info;
   }
   
+  @Override // FsDatasetSpi
+  public FileInputStream[] getShortCircuitFdsForRead(ExtendedBlock block) 
+      throws IOException {
+    File datafile = getBlockFile(block);
+    File metafile = FsDatasetUtil.getMetaFile(datafile,
+        block.getGenerationStamp());
+    FileInputStream fis[] = new FileInputStream[2];
+    boolean success = false;
+    try {
+      fis[0] = new FileInputStream(datafile);
+      fis[1] = new FileInputStream(metafile);
+      success = true;
+      return fis;
+    } finally {
+      if (!success) {
+        IOUtils.cleanup(null, fis);
+      }
+    }
+  }
+    
   @Override // FsDatasetSpi
   public HdfsBlocksMetadata getHdfsBlocksMetadata(List<ExtendedBlock> blocks)
       throws IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
index 4dd1e808c54..e66611f8dab 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java
@@ -563,7 +563,7 @@ public class NamenodeFsck {
             conf, file, block, lblock.getBlockToken(), 0, -1, true, "fsck",
             TcpPeerServer.peerFromSocketAndKey(s, namenode.getRpcServer().
                 getDataEncryptionKey()),
-            chosenNode);
+            chosenNode, null, false);
         
       }  catch (IOException ex) {
         // Put chosen node into dead list, continue
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientDatanodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientDatanodeProtocol.proto
index ca24f7a4a44..7659f2ee8fc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientDatanodeProtocol.proto
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientDatanodeProtocol.proto
@@ -74,6 +74,8 @@ message DeleteBlockPoolResponseProto {
  * Gets the file information where block and its metadata is stored
  * block - block for which path information is being requested
  * token - block token
+ *
+ * This message is deprecated in favor of file descriptor passing.
  */
 message GetBlockLocalPathInfoRequestProto {
   required ExtendedBlockProto block = 1;
@@ -84,6 +86,8 @@ message GetBlockLocalPathInfoRequestProto {
  * block - block for which file path information is being returned
  * localPath - file path where the block data is stored
  * localMetaPath - file path where the block meta data is stored
+ *
+ * This message is deprecated in favor of file descriptor passing.
  */
 message GetBlockLocalPathInfoResponseProto {
   required ExtendedBlockProto block = 1;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/datatransfer.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/datatransfer.proto
index 8ce5fd75661..bba125c4cab 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/datatransfer.proto
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/datatransfer.proto
@@ -114,6 +114,16 @@ message OpBlockChecksumProto {
   required BaseHeaderProto header = 1;
 }
 
+message OpRequestShortCircuitAccessProto { 
+  required BaseHeaderProto header = 1;
+
+  /** In order to get short-circuit access to block data, clients must set this
+   * to the highest version of the block data that they can understand.
+   * Currently 1 is the only version, but more versions may exist in the future
+   * if the on-disk format changes.
+   */
+  required uint32 maxVersion = 2;
+}
 
 message PacketHeaderProto {
   // All fields must be fixed-length!
@@ -132,6 +142,7 @@ enum Status {
   ERROR_EXISTS = 4;
   ERROR_ACCESS_TOKEN = 5;
   CHECKSUM_OK = 6;
+  ERROR_UNSUPPORTED = 7;
 }
 
 message PipelineAckProto {
@@ -164,6 +175,16 @@ message BlockOpResponseProto {
 
   /** explanatory text which may be useful to log on the client side */
   optional string message = 5;
+
+  /** If the server chooses to agree to the request of a client for
+   * short-circuit access, it will send a response message with the relevant
+   * file descriptors attached.
+   *
+   * In the body of the message, this version number will be set to the
+   * specific version number of the block data that the client is about to
+   * read.
+   */
+  optional uint32 shortCircuitAccessVersion = 6;
 }
 
 /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java
index 1731b2b5f5d..51b0796572d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.net.unix.DomainSocket;
 
 /**
  * A helper class to setup the cluster, and get to BlockReader and DataNode for a block.
@@ -156,7 +157,7 @@ public class BlockReaderTestUtil {
       testBlock.getBlockToken(), 
       offset, lenToRead,
       true, "BlockReaderTestUtil", TcpPeerServer.peerFromSocket(sock),
-      nodes[0]);
+      nodes[0], null, false);
   }
 
   /**
@@ -168,4 +169,12 @@ public class BlockReaderTestUtil {
     return cluster.getDataNode(ipcport);
   }
 
+  public boolean haveRequiredResources() {
+    if (conf.get(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY) != null) {
+      // To use UNIX Domain sockets, we must have the native code loaded.
+      return DomainSocket.getLoadingFailureReason() == null;
+    } else {
+      return true;
+    }
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index 95008348bda..39f417128a7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -2189,14 +2189,27 @@ public class MiniDFSCluster {
   /**
    * Get file correpsonding to a block
    * @param storageDir storage directory
-   * @param blk block to be corrupted
-   * @return file corresponding to the block
+   * @param blk the block
+   * @return data file corresponding to the block
    */
   public static File getBlockFile(File storageDir, ExtendedBlock blk) {
     return new File(getFinalizedDir(storageDir, blk.getBlockPoolId()), 
         blk.getBlockName());
   }
 
+  /**
+   * Get the latest metadata file correpsonding to a block
+   * @param storageDir storage directory
+   * @param blk the block
+   * @return metadata file corresponding to the block
+   */
+  public static File getBlockMetadataFile(File storageDir, ExtendedBlock blk) {
+    return new File(getFinalizedDir(storageDir, blk.getBlockPoolId()), 
+        blk.getBlockName() + "_" + blk.getGenerationStamp() +
+        Block.METADATA_EXTENSION);
+    
+  }
+
   /**
    * Shut down a cluster if it is not null
    * @param cluster cluster reference or null
@@ -2224,7 +2237,7 @@ public class MiniDFSCluster {
   }
   
   /**
-   * Get files related to a block for a given datanode
+   * Get the block data file for a block from a given datanode
    * @param dnIndex Index of the datanode to get block files for
    * @param block block for which corresponding files are needed
    */
@@ -2239,6 +2252,24 @@ public class MiniDFSCluster {
     }
     return null;
   }
+
+  /**
+   * Get the block metadata file for a block from a given datanode
+   * 
+   * @param dnIndex Index of the datanode to get block files for
+   * @param block block for which corresponding files are needed
+   */
+  public static File getBlockMetadataFile(int dnIndex, ExtendedBlock block) {
+    // Check for block file in the two storage directories of the datanode
+    for (int i = 0; i <=1 ; i++) {
+      File storageDir = MiniDFSCluster.getStorageDir(dnIndex, i);
+      File blockMetaFile = getBlockMetadataFile(storageDir, block);
+      if (blockMetaFile.exists()) {
+        return blockMetaFile;
+      }
+    }
+    return null;
+  }
   
   /**
    * Throw an exception if the MiniDFSCluster is not started with a single
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java
index 7ccd5b6f52f..e35da42a7d4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java
@@ -17,90 +17,333 @@
  */
 package org.apache.hadoop.hdfs;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
+import java.io.EOFException;
+import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
+import java.io.RandomAccessFile;
 import java.nio.ByteBuffer;
+import java.util.concurrent.TimeoutException;
 
 import org.apache.hadoop.fs.ChecksumException;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
+import org.apache.hadoop.io.IOUtils;
+import org.junit.Assert;
 import org.junit.Test;
 
 public class TestBlockReaderLocal {
-  static MiniDFSCluster cluster;
-  static HdfsConfiguration conf;
-
-  @BeforeClass
-  public static void setupCluster() throws IOException {
-    conf = new HdfsConfiguration();
-
-    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
-    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
-        false);
-    conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
-        UserGroupInformation.getCurrentUser().getShortUserName());
-
-    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
-  }
-
-  @AfterClass
-  public static void teardownCluster() {
-    cluster.shutdown();
+  public static void assertArrayRegionsEqual(byte []buf1, int off1, byte []buf2,
+      int off2, int len) {
+    for (int i = 0; i < len; i++) {
+      if (buf1[off1 + i] != buf2[off2 + i]) {
+        Assert.fail("arrays differ at byte " +  i + ". " + 
+          "The first array has " + (int)buf1[off1 + i] + 
+          ", but the second array has " + (int)buf2[off2 + i]);
+      }
+    }
   }
 
   /**
-   * Test that, in the case of an error, the position and limit of a ByteBuffer
-   * are left unchanged. This is not mandated by ByteBufferReadable, but clients
-   * of this class might immediately issue a retry on failure, so it's polite.
+   * Similar to IOUtils#readFully(). Reads bytes in a loop.
+   *
+   * @param reader           The BlockReaderLocal to read bytes from
+   * @param buf              The ByteBuffer to read into
+   * @param off              The offset in the buffer to read into
+   * @param len              The number of bytes to read.
+   * 
+   * @throws IOException     If it could not read the requested number of bytes
    */
+  private static void readFully(BlockReaderLocal reader,
+      ByteBuffer buf, int off, int len) throws IOException {
+    int amt = len;
+    while (amt > 0) {
+      buf.limit(off + len);
+      buf.position(off);
+      long ret = reader.read(buf);
+      if (ret < 0) {
+        throw new EOFException( "Premature EOF from BlockReaderLocal " +
+            "after reading " + (len - amt) + " byte(s).");
+      }
+      amt -= ret;
+      off += ret;
+    }
+  }
+
+  private static interface BlockReaderLocalTest {
+    final int TEST_LENGTH = 12345;
+    public void setup(File blockFile, boolean usingChecksums)
+        throws IOException;
+    public void doTest(BlockReaderLocal reader, byte original[])
+        throws IOException;
+  }
+  
+  public void runBlockReaderLocalTest(BlockReaderLocalTest test,
+      boolean checksum) throws IOException {
+    MiniDFSCluster cluster = null;
+    HdfsConfiguration conf = new HdfsConfiguration();
+    conf.setBoolean(DFSConfigKeys.
+        DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, !checksum);
+    conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, "CRC32C");
+    FileInputStream dataIn = null, checkIn = null;
+    final Path TEST_PATH = new Path("/a");
+    final long RANDOM_SEED = 4567L;
+    BlockReaderLocal blockReaderLocal = null;
+    FSDataInputStream fsIn = null;
+    byte original[] = new byte[BlockReaderLocalTest.TEST_LENGTH];
+    
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+      cluster.waitActive();
+      FileSystem fs = cluster.getFileSystem();
+      DFSTestUtil.createFile(fs, TEST_PATH,
+          BlockReaderLocalTest.TEST_LENGTH, (short)1, RANDOM_SEED);
+      try {
+        DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
+      } catch (InterruptedException e) {
+        Assert.fail("unexpected InterruptedException during " +
+            "waitReplication: " + e);
+      } catch (TimeoutException e) {
+        Assert.fail("unexpected TimeoutException during " +
+            "waitReplication: " + e);
+      }
+      fsIn = fs.open(TEST_PATH);
+      IOUtils.readFully(fsIn, original, 0,
+          BlockReaderLocalTest.TEST_LENGTH);
+      fsIn.close();
+      fsIn = null;
+      ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
+      File dataFile = MiniDFSCluster.getBlockFile(0, block);
+      File metaFile = MiniDFSCluster.getBlockMetadataFile(0, block);
+
+      DatanodeID datanodeID = cluster.getDataNodes().get(0).getDatanodeId();
+      cluster.shutdown();
+      cluster = null;
+      test.setup(dataFile, checksum);
+      dataIn = new FileInputStream(dataFile);
+      checkIn = new FileInputStream(metaFile);
+      blockReaderLocal = new BlockReaderLocal(conf,
+          TEST_PATH.getName(), block, 0, -1,
+          dataIn, checkIn, datanodeID, checksum);
+      dataIn = null;
+      checkIn = null;
+      test.doTest(blockReaderLocal, original);
+    } finally {
+      if (fsIn != null) fsIn.close();
+      if (cluster != null) cluster.shutdown();
+      if (dataIn != null) dataIn.close();
+      if (checkIn != null) checkIn.close();
+      if (blockReaderLocal != null) blockReaderLocal.close(null, null);
+    }
+  }
+  
+  private static class TestBlockReaderLocalImmediateClose 
+      implements BlockReaderLocalTest {
+    @Override
+    public void setup(File blockFile, boolean usingChecksums)
+        throws IOException { }
+    @Override
+    public void doTest(BlockReaderLocal reader, byte original[]) 
+        throws IOException { }
+  }
+  
   @Test
-  public void testStablePositionAfterCorruptRead() throws Exception {
-    final short REPL_FACTOR = 1;
-    final long FILE_LENGTH = 512L;
-    cluster.waitActive();
-    FileSystem fs = cluster.getFileSystem();
+  public void testBlockReaderLocalImmediateClose() throws IOException {
+    runBlockReaderLocalTest(new TestBlockReaderLocalImmediateClose(), true);
+    runBlockReaderLocalTest(new TestBlockReaderLocalImmediateClose(), false);
+  }
+  
+  private static class TestBlockReaderSimpleReads 
+      implements BlockReaderLocalTest {
+    @Override
+    public void setup(File blockFile, boolean usingChecksums)
+        throws IOException { }
+    @Override
+    public void doTest(BlockReaderLocal reader, byte original[]) 
+        throws IOException {
+      byte buf[] = new byte[TEST_LENGTH];
+      reader.readFully(buf, 0, 512);
+      assertArrayRegionsEqual(original, 0, buf, 0, 512);
+      reader.readFully(buf, 512, 512);
+      assertArrayRegionsEqual(original, 512, buf, 512, 512);
+      reader.readFully(buf, 1024, 513);
+      assertArrayRegionsEqual(original, 1024, buf, 1024, 513);
+      reader.readFully(buf, 1537, 514);
+      assertArrayRegionsEqual(original, 1537, buf, 1537, 514);
+    }
+  }
+  
+  @Test
+  public void testBlockReaderSimpleReads() throws IOException {
+    runBlockReaderLocalTest(new TestBlockReaderSimpleReads(), true);
+  }
 
-    Path path = new Path("/corrupted");
+  @Test
+  public void testBlockReaderSimpleReadsNoChecksum() throws IOException {
+    runBlockReaderLocalTest(new TestBlockReaderSimpleReads(), false);
+  }
+  
+  private static class TestBlockReaderLocalArrayReads2 
+      implements BlockReaderLocalTest {
+    @Override
+    public void setup(File blockFile, boolean usingChecksums)
+        throws IOException { }
+    @Override
+    public void doTest(BlockReaderLocal reader, byte original[]) 
+        throws IOException {
+      byte buf[] = new byte[TEST_LENGTH];
+      reader.readFully(buf, 0, 10);
+      assertArrayRegionsEqual(original, 0, buf, 0, 10);
+      reader.readFully(buf, 10, 100);
+      assertArrayRegionsEqual(original, 10, buf, 10, 100);
+      reader.readFully(buf, 110, 700);
+      assertArrayRegionsEqual(original, 110, buf, 110, 700);
+      reader.readFully(buf, 810, 1); // from offset 810 to offset 811
+      reader.readFully(buf, 811, 5);
+      assertArrayRegionsEqual(original, 811, buf, 811, 5);
+      reader.readFully(buf, 816, 900); // skip from offset 816 to offset 1716
+      reader.readFully(buf, 1716, 5);
+      assertArrayRegionsEqual(original, 1716, buf, 1716, 5);
+    }
+  }
+  
+  @Test
+  public void testBlockReaderLocalArrayReads2() throws IOException {
+    runBlockReaderLocalTest(new TestBlockReaderLocalArrayReads2(),
+        true);
+  }
 
-    DFSTestUtil.createFile(fs, path, FILE_LENGTH, REPL_FACTOR, 12345L);
-    DFSTestUtil.waitReplication(fs, path, REPL_FACTOR);
+  @Test
+  public void testBlockReaderLocalArrayReads2NoChecksum()
+      throws IOException {
+    runBlockReaderLocalTest(new TestBlockReaderLocalArrayReads2(),
+        false);
+  }
 
-    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, path);
-    int blockFilesCorrupted = cluster.corruptBlockOnDataNodes(block);
-    assertEquals("All replicas not corrupted", REPL_FACTOR, blockFilesCorrupted);
+  private static class TestBlockReaderLocalByteBufferReads 
+      implements BlockReaderLocalTest {
+    @Override
+    public void setup(File blockFile, boolean usingChecksums)
+        throws IOException { }
+    @Override
+    public void doTest(BlockReaderLocal reader, byte original[]) 
+        throws IOException {
+      ByteBuffer buf = ByteBuffer.wrap(new byte[TEST_LENGTH]);
+      readFully(reader, buf, 0, 10);
+      assertArrayRegionsEqual(original, 0, buf.array(), 0, 10);
+      readFully(reader, buf, 10, 100);
+      assertArrayRegionsEqual(original, 10, buf.array(), 10, 100);
+      readFully(reader, buf, 110, 700);
+      assertArrayRegionsEqual(original, 110, buf.array(), 110, 700);
+      reader.skip(1); // skip from offset 810 to offset 811
+      readFully(reader, buf, 811, 5);
+      assertArrayRegionsEqual(original, 811, buf.array(), 811, 5);
+    }
+  }
+  
+  @Test
+  public void testBlockReaderLocalByteBufferReads()
+      throws IOException {
+    runBlockReaderLocalTest(
+        new TestBlockReaderLocalByteBufferReads(), true);
+  }
 
-    FSDataInputStream dis = cluster.getFileSystem().open(path);
-    ByteBuffer buf = ByteBuffer.allocateDirect((int)FILE_LENGTH);
-    boolean sawException = false;
-    try {
-      dis.read(buf);
-    } catch (ChecksumException ex) {
-      sawException = true;
+  @Test
+  public void testBlockReaderLocalByteBufferReadsNoChecksum()
+      throws IOException {
+    runBlockReaderLocalTest(
+        new TestBlockReaderLocalByteBufferReads(), false);
+  }
+  
+  private static class TestBlockReaderLocalReadCorruptStart
+      implements BlockReaderLocalTest {
+    boolean usingChecksums = false;
+    @Override
+    public void setup(File blockFile, boolean usingChecksums)
+        throws IOException {
+      RandomAccessFile bf = null;
+      this.usingChecksums = usingChecksums;
+      try {
+        bf = new RandomAccessFile(blockFile, "rw");
+        bf.write(new byte[] {0,0,0,0,0,0,0,0,0,0,0,0,0,0});
+      } finally {
+        if (bf != null) bf.close();
+      }
     }
 
-    assertTrue(sawException);
-    assertEquals(0, buf.position());
-    assertEquals(buf.capacity(), buf.limit());
-
-    dis = cluster.getFileSystem().open(path);
-    buf.position(3);
-    buf.limit(25);
-    sawException = false;
-    try {
-      dis.read(buf);
-    } catch (ChecksumException ex) {
-      sawException = true;
+    public void doTest(BlockReaderLocal reader, byte original[]) 
+        throws IOException {
+      byte buf[] = new byte[TEST_LENGTH];
+      if (usingChecksums) {
+        try {
+          reader.readFully(buf, 0, 10);
+          Assert.fail("did not detect corruption");
+        } catch (IOException e) {
+          // expected
+        }
+      } else {
+        reader.readFully(buf, 0, 10);
+      }
+    }
+  }
+  
+  @Test
+  public void testBlockReaderLocalReadCorruptStart()
+      throws IOException {
+    runBlockReaderLocalTest(new TestBlockReaderLocalReadCorruptStart(), true);
+  }
+  
+  private static class TestBlockReaderLocalReadCorrupt
+      implements BlockReaderLocalTest {
+    boolean usingChecksums = false;
+    @Override
+    public void setup(File blockFile, boolean usingChecksums) 
+        throws IOException {
+      RandomAccessFile bf = null;
+      this.usingChecksums = usingChecksums;
+      try {
+        bf = new RandomAccessFile(blockFile, "rw");
+        bf.seek(1539);
+        bf.write(new byte[] {0,0,0,0,0,0,0,0,0,0,0,0,0,0});
+      } finally {
+        if (bf != null) bf.close();
+      }
     }
 
-    assertTrue(sawException);
-    assertEquals(3, buf.position());
-    assertEquals(25, buf.limit());
+    public void doTest(BlockReaderLocal reader, byte original[]) 
+        throws IOException {
+      byte buf[] = new byte[TEST_LENGTH];
+      try {
+        reader.readFully(buf, 0, 10);
+        assertArrayRegionsEqual(original, 0, buf, 0, 10);
+        reader.readFully(buf, 10, 100);
+        assertArrayRegionsEqual(original, 10, buf, 10, 100);
+        reader.readFully(buf, 110, 700);
+        assertArrayRegionsEqual(original, 110, buf, 110, 700);
+        reader.skip(1); // skip from offset 810 to offset 811
+        reader.readFully(buf, 811, 5);
+        assertArrayRegionsEqual(original, 811, buf, 811, 5);
+        reader.readFully(buf, 816, 900);
+        if (usingChecksums) {
+          // We should detect the corruption when using a checksum file.
+          Assert.fail("did not detect corruption");
+        }
+      } catch (ChecksumException e) {
+        if (!usingChecksums) {
+          Assert.fail("didn't expect to get ChecksumException: not " +
+              "using checksums.");
+        }
+      }
+    }
+  }
+
+  @Test
+  public void testBlockReaderLocalReadCorrupt()
+      throws IOException {
+    runBlockReaderLocalTest(new TestBlockReaderLocalReadCorrupt(), true);
+    runBlockReaderLocalTest(new TestBlockReaderLocalReadCorrupt(), false);
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientBlockVerification.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientBlockVerification.java
index 2a0e0a85565..4a88e0b1d30 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientBlockVerification.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientBlockVerification.java
@@ -61,7 +61,7 @@ public class TestClientBlockVerification {
         util.getBlockReader(testBlock, 0, FILE_SIZE_K * 1024));
     util.readAndCheckEOS(reader, FILE_SIZE_K * 1024, true);
     verify(reader).sendReadResult(Status.CHECKSUM_OK);
-    reader.close(null);
+    reader.close(null, null);
   }
 
   /**
@@ -76,7 +76,7 @@ public class TestClientBlockVerification {
     // We asked the blockreader for the whole file, and only read
     // half of it, so no CHECKSUM_OK
     verify(reader, never()).sendReadResult(Status.CHECKSUM_OK);
-    reader.close(null);
+    reader.close(null, null);
   }
 
   /**
@@ -92,7 +92,7 @@ public class TestClientBlockVerification {
     // And read half the file
     util.readAndCheckEOS(reader, FILE_SIZE_K * 1024 / 2, true);
     verify(reader).sendReadResult(Status.CHECKSUM_OK);
-    reader.close(null);
+    reader.close(null, null);
   }
 
   /**
@@ -111,7 +111,7 @@ public class TestClientBlockVerification {
             util.getBlockReader(testBlock, startOffset, length));
         util.readAndCheckEOS(reader, length, true);
         verify(reader).sendReadResult(Status.CHECKSUM_OK);
-        reader.close(null);
+        reader.close(null, null);
       }
     }
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileInputStreamCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileInputStreamCache.java
new file mode 100644
index 00000000000..7f28a43ceb4
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileInputStreamCache.java
@@ -0,0 +1,127 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import junit.framework.Assert;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.net.unix.TemporarySocketDirectory;
+import org.junit.Test;
+
+public class TestFileInputStreamCache {
+  static final Log LOG = LogFactory.getLog(TestFileInputStreamCache.class);
+
+  @Test
+  public void testCreateAndDestroy() throws Exception {
+    FileInputStreamCache cache = new FileInputStreamCache(10, 1000);
+    cache.close();
+  }
+  
+  private static class TestFileDescriptorPair {
+    TemporarySocketDirectory dir = new TemporarySocketDirectory();
+    FileInputStream fis[];
+
+    public TestFileDescriptorPair() throws IOException {
+      fis = new FileInputStream[2];
+      for (int i = 0; i < 2; i++) {
+        String name = dir.getDir() + "/file" + i;
+        FileOutputStream fos = new FileOutputStream(name);
+        fos.write(1);
+        fos.close();
+        fis[i] = new FileInputStream(name);
+      }
+    }
+
+    public FileInputStream[] getFileInputStreams() {
+      return fis;
+    }
+
+    public void close() throws IOException {
+      IOUtils.cleanup(LOG, fis);
+      dir.close();
+    }
+
+    public boolean compareWith(FileInputStream other[]) {
+      if ((other == null) || (fis == null)) {
+        return other == fis;
+      }
+      if (fis.length != other.length) return false;
+      for (int i = 0; i < fis.length; i++) {
+        if (fis[i] != other[i]) return false;
+      }
+      return true;
+    }
+  }
+
+  @Test
+  public void testAddAndRetrieve() throws Exception {
+    FileInputStreamCache cache = new FileInputStreamCache(1, 1000000);
+    DatanodeID dnId = new DatanodeID("127.0.0.1", "localhost", 
+        "xyzzy", 8080, 9090, 7070);
+    ExtendedBlock block = new ExtendedBlock("poolid", 123);
+    TestFileDescriptorPair pair = new TestFileDescriptorPair();
+    cache.put(dnId, block, pair.getFileInputStreams());
+    FileInputStream fis[] = cache.get(dnId, block);
+    Assert.assertTrue(pair.compareWith(fis));
+    pair.close();
+    cache.close();
+  }
+
+  @Test
+  public void testExpiry() throws Exception {
+    FileInputStreamCache cache = new FileInputStreamCache(1, 10);
+    DatanodeID dnId = new DatanodeID("127.0.0.1", "localhost", 
+        "xyzzy", 8080, 9090, 7070);
+    ExtendedBlock block = new ExtendedBlock("poolid", 123);
+    TestFileDescriptorPair pair = new TestFileDescriptorPair();
+    cache.put(dnId, block, pair.getFileInputStreams());
+    Thread.sleep(cache.getExpiryTimeMs() * 100);
+    FileInputStream fis[] = cache.get(dnId, block);
+    Assert.assertNull(fis);
+    pair.close();
+    cache.close();
+  }
+
+  @Test
+  public void testEviction() throws Exception {
+    FileInputStreamCache cache = new FileInputStreamCache(1, 10000000);
+    DatanodeID dnId = new DatanodeID("127.0.0.1", "localhost", 
+        "xyzzy", 8080, 9090, 7070);
+    ExtendedBlock block = new ExtendedBlock("poolid", 123);
+    TestFileDescriptorPair pair = new TestFileDescriptorPair();
+    cache.put(dnId, block, pair.getFileInputStreams());
+    DatanodeID dnId2 = new DatanodeID("127.0.0.1", "localhost", 
+        "xyzzy", 8081, 9091, 7071);
+    TestFileDescriptorPair pair2 = new TestFileDescriptorPair();
+    cache.put(dnId2, block, pair2.getFileInputStreams());
+    FileInputStream fis[] = cache.get(dnId, block);
+    Assert.assertNull(fis);
+    FileInputStream fis2[] = cache.get(dnId2, block);
+    Assert.assertTrue(pair2.compareWith(fis2));
+    pair.close();
+    cache.close();
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelRead.java
index b4320520354..3ec1a069b0a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelRead.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelRead.java
@@ -17,14 +17,10 @@
  */
 package org.apache.hadoop.hdfs;
 
-import java.io.IOException;
-
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
-import org.junit.Test;
 
 public class TestParallelRead extends TestParallelReadUtil {
-
   @BeforeClass
   static public void setupCluster() throws Exception {
     setupCluster(DEFAULT_REPLICATION_FACTOR, new HdfsConfiguration());
@@ -34,26 +30,4 @@ public class TestParallelRead extends TestParallelReadUtil {
   static public void teardownCluster() throws Exception {
     TestParallelReadUtil.teardownCluster();
   }
-
-  /**
-   * Do parallel read several times with different number of files and threads.
-   *
-   * Note that while this is the only "test" in a junit sense, we're actually
-   * dispatching a lot more. Failures in the other methods (and other threads)
-   * need to be manually collected, which is inconvenient.
-   */
-  @Test
-  public void testParallelReadCopying() throws IOException {
-    runTestWorkload(new CopyingReadWorkerHelper());
-  }
-
-  @Test
-  public void testParallelReadByteBuffer() throws IOException {
-    runTestWorkload(new DirectReadWorkerHelper());
-  }
-
-  @Test
-  public void testParallelReadMixed() throws IOException {
-    runTestWorkload(new MixedWorkloadHelper());
-  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java
index 1c59eca871d..6f10804d5db 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java
@@ -32,12 +32,18 @@ import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.util.Time;
 import org.apache.log4j.Level;
 import org.apache.log4j.LogManager;
+import org.junit.Assume;
+import org.junit.Ignore;
+import org.junit.Test;
 
 /**
  * Driver class for testing the use of DFSInputStream by multiple concurrent
- * readers, using the different read APIs. See subclasses for the actual test
- * cases.
+ * readers, using the different read APIs.
+ *
+ * This class is marked as @Ignore so that junit doesn't try to execute the
+ * tests in here directly.  They are executed from subclasses.
  */
+@Ignore
 public class TestParallelReadUtil {
 
   static final Log LOG = LogFactory.getLog(TestParallelReadUtil.class);
@@ -386,4 +392,28 @@ public class TestParallelReadUtil {
     util.shutdown();
   }
 
+  /**
+   * Do parallel read several times with different number of files and threads.
+   *
+   * Note that while this is the only "test" in a junit sense, we're actually
+   * dispatching a lot more. Failures in the other methods (and other threads)
+   * need to be manually collected, which is inconvenient.
+   */
+  @Test
+  public void testParallelReadCopying() throws IOException {
+    Assume.assumeTrue(util.haveRequiredResources());
+    runTestWorkload(new CopyingReadWorkerHelper());
+  }
+
+  @Test
+  public void testParallelReadByteBuffer() throws IOException {
+    Assume.assumeTrue(util.haveRequiredResources());
+    runTestWorkload(new DirectReadWorkerHelper());
+  }
+
+  @Test
+  public void testParallelReadMixed() throws IOException {
+    Assume.assumeTrue(util.haveRequiredResources());
+    runTestWorkload(new MixedWorkloadHelper());
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java
new file mode 100644
index 00000000000..776e5d7e030
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.File;
+
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.net.unix.TemporarySocketDirectory;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+public class TestParallelShortCircuitRead extends TestParallelReadUtil {
+  private static TemporarySocketDirectory sockDir;
+
+  @BeforeClass
+  static public void setupCluster() throws Exception {
+    sockDir = new TemporarySocketDirectory();
+    HdfsConfiguration conf = new HdfsConfiguration();
+    conf.set(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY,
+      new File(sockDir.getDir(), "TestParallelLocalRead.%d.sock").getAbsolutePath());
+    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
+    conf.setBoolean(DFSConfigKeys.
+        DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
+    DomainSocket.disableBindPathValidation();
+    setupCluster(1, conf);
+  }
+
+  @AfterClass
+  static public void teardownCluster() throws Exception {
+    sockDir.close();
+    TestParallelReadUtil.teardownCluster();
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelLocalRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadNoChecksum.java
similarity index 53%
rename from hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelLocalRead.java
rename to hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadNoChecksum.java
index 05702d2aa4b..2f2a2c6b96b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelLocalRead.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadNoChecksum.java
@@ -17,52 +17,32 @@
  */
 package org.apache.hadoop.hdfs;
 
-import java.io.IOException;
+import java.io.File;
 
-import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.net.unix.TemporarySocketDirectory;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
-import org.junit.Test;
 
-public class TestParallelLocalRead extends TestParallelReadUtil {
+public class TestParallelShortCircuitReadNoChecksum extends TestParallelReadUtil {
+  private static TemporarySocketDirectory sockDir;
 
   @BeforeClass
   static public void setupCluster() throws Exception {
+    sockDir = new TemporarySocketDirectory();
     HdfsConfiguration conf = new HdfsConfiguration();
-
+    conf.set(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY,
+      new File(sockDir.getDir(), "TestParallelLocalRead.%d.sock").getAbsolutePath());
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
-    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
-        false);
-    conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
-        UserGroupInformation.getCurrentUser().getShortUserName());
-
+    conf.setBoolean(DFSConfigKeys.
+        DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, true);
+    DomainSocket.disableBindPathValidation();
     setupCluster(1, conf);
   }
 
   @AfterClass
   static public void teardownCluster() throws Exception {
+    sockDir.close();
     TestParallelReadUtil.teardownCluster();
   }
-
-  /**
-   * Do parallel read several times with different number of files and threads.
-   *
-   * Note that while this is the only "test" in a junit sense, we're actually
-   * dispatching a lot more. Failures in the other methods (and other threads)
-   * need to be manually collected, which is inconvenient.
-   */
-  @Test
-  public void testParallelReadCopying() throws IOException {
-    runTestWorkload(new CopyingReadWorkerHelper());
-  }
-
-  @Test
-  public void testParallelReadByteBuffer() throws IOException {
-    runTestWorkload(new DirectReadWorkerHelper());
-  }
-
-  @Test
-  public void testParallelReadMixed() throws IOException {
-    runTestWorkload(new MixedWorkloadHelper());
-  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java
new file mode 100644
index 00000000000..d2170d32bfe
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.File;
+
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.net.unix.TemporarySocketDirectory;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+public class TestParallelUnixDomainRead extends TestParallelReadUtil {
+  private static TemporarySocketDirectory sockDir;
+
+  @BeforeClass
+  static public void setupCluster() throws Exception {
+    sockDir = new TemporarySocketDirectory();
+    HdfsConfiguration conf = new HdfsConfiguration();
+    conf.set(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY,
+      new File(sockDir.getDir(), "TestParallelLocalRead.%d.sock").getAbsolutePath());
+    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, false);
+    DomainSocket.disableBindPathValidation();
+    setupCluster(1, conf);
+  }
+
+  @AfterClass
+  static public void teardownCluster() throws Exception {
+    sockDir.close();
+    TestParallelReadUtil.teardownCluster();
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
index 623c1f656c2..3f21e230e90 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
@@ -20,9 +20,11 @@ package org.apache.hadoop.hdfs;
 import static org.junit.Assert.assertTrue;
 
 import java.io.EOFException;
+import java.io.File;
 import java.io.IOException;
+import java.io.RandomAccessFile;
 import java.nio.ByteBuffer;
-import java.security.PrivilegedExceptionAction;
+import java.util.concurrent.TimeoutException;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -30,21 +32,22 @@ import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
-import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtocol;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
-import org.apache.hadoop.hdfs.server.datanode.DataNode;
-import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
 import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset;
 import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.net.unix.TemporarySocketDirectory;
 import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Time;
+import org.junit.AfterClass;
 import org.junit.Assert;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
 /**
@@ -55,8 +58,18 @@ import org.junit.Test;
  * system.
  */
 public class TestShortCircuitLocalRead {
+  private static TemporarySocketDirectory sockDir;
 
-  static final String DIR = "/" + TestShortCircuitLocalRead.class.getSimpleName() + "/";
+  @BeforeClass
+  public static void init() {
+    sockDir = new TemporarySocketDirectory();
+    DomainSocket.disableBindPathValidation();
+  }
+
+  @AfterClass
+  public static void shutdown() throws IOException {
+    sockDir.close();
+  }
 
   static final long seed = 0xDEADBEEFL;
   static final int blockSize = 5120;
@@ -81,7 +94,9 @@ public class TestShortCircuitLocalRead {
     for (int idx = 0; idx < len; idx++) {
       if (expected[from + idx] != actual[idx]) {
         Assert.fail(message + " byte " + (from + idx) + " differs. expected "
-            + expected[from + idx] + " actual " + actual[idx]);
+            + expected[from + idx] + " actual " + actual[idx] +
+            "\nexpected: " + StringUtils.byteToHexString(expected, from, from + len) +
+            "\nactual:   " + StringUtils.byteToHexString(actual, 0, len));
       }
     }
   }
@@ -170,8 +185,9 @@ public class TestShortCircuitLocalRead {
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
         ignoreChecksum);
-    conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
-        UserGroupInformation.getCurrentUser().getShortUserName());
+    conf.set(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY,
+        new File(sockDir.getDir(),
+          "TestShortCircuitLocalRead.__PORT__.sock").getAbsolutePath());
     if (simulatedStorage) {
       SimulatedFSDataset.setFactory(conf);
     }
@@ -229,23 +245,17 @@ public class TestShortCircuitLocalRead {
     doTestShortCircuitRead(false, 10*blockSize+100, 777);
     doTestShortCircuitRead(true, 10*blockSize+100, 777);
   }
-   
+
   @Test
-  public void testGetBlockLocalPathInfo() throws IOException, InterruptedException {
+  public void testDeprecatedGetBlockLocalPathInfoRpc()
+      throws IOException, InterruptedException {
     final Configuration conf = new Configuration();
-    conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
-        "alloweduser1,alloweduser2");
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
         .format(true).build();
     cluster.waitActive();
-    final DataNode dn = cluster.getDataNodes().get(0);
     FileSystem fs = cluster.getFileSystem();
     try {
       DFSTestUtil.createFile(fs, new Path("/tmp/x"), 16, (short) 1, 23);
-      UserGroupInformation aUgi1 =
-          UserGroupInformation.createRemoteUser("alloweduser1");
-      UserGroupInformation aUgi2 =
-          UserGroupInformation.createRemoteUser("alloweduser2");
       LocatedBlocks lb = cluster.getNameNode().getRpcServer()
           .getBlockLocations("/tmp/x", 0, 16);
       // Create a new block object, because the block inside LocatedBlock at
@@ -253,51 +263,11 @@ public class TestShortCircuitLocalRead {
       ExtendedBlock blk = new ExtendedBlock(lb.get(0).getBlock());
       Token<BlockTokenIdentifier> token = lb.get(0).getBlockToken();
       final DatanodeInfo dnInfo = lb.get(0).getLocations()[0];
-      ClientDatanodeProtocol proxy = aUgi1
-          .doAs(new PrivilegedExceptionAction<ClientDatanodeProtocol>() {
-            @Override
-            public ClientDatanodeProtocol run() throws Exception {
-              return DFSUtil.createClientDatanodeProtocolProxy(dnInfo, conf,
-                  60000, false);
-            }
-          });
-      
-      // This should succeed
-      BlockLocalPathInfo blpi = proxy.getBlockLocalPathInfo(blk, token);
-      Assert.assertEquals(
-          DataNodeTestUtils.getFSDataset(dn).getBlockLocalPathInfo(blk).getBlockPath(),
-          blpi.getBlockPath());
-
-      // Try with the other allowed user
-      proxy = aUgi2
-          .doAs(new PrivilegedExceptionAction<ClientDatanodeProtocol>() {
-            @Override
-            public ClientDatanodeProtocol run() throws Exception {
-              return DFSUtil.createClientDatanodeProtocolProxy(dnInfo, conf,
-                  60000, false);
-            }
-          });
-
-      // This should succeed as well
-      blpi = proxy.getBlockLocalPathInfo(blk, token);
-      Assert.assertEquals(
-          DataNodeTestUtils.getFSDataset(dn).getBlockLocalPathInfo(blk).getBlockPath(),
-          blpi.getBlockPath());
-
-      // Now try with a disallowed user
-      UserGroupInformation bUgi = UserGroupInformation
-          .createRemoteUser("notalloweduser");
-      proxy = bUgi
-          .doAs(new PrivilegedExceptionAction<ClientDatanodeProtocol>() {
-            @Override
-            public ClientDatanodeProtocol run() throws Exception {
-              return DFSUtil.createClientDatanodeProtocolProxy(dnInfo, conf,
-                  60000, false);
-            }
-          });
+      ClientDatanodeProtocol proxy = 
+          DFSUtil.createClientDatanodeProtocolProxy(dnInfo, conf, 60000, false);
       try {
         proxy.getBlockLocalPathInfo(blk, token);
-        Assert.fail("The call should have failed as " + bUgi.getShortUserName()
+        Assert.fail("The call should have failed as this user "
             + " is not allowed to call getBlockLocalPathInfo");
       } catch (IOException ex) {
         Assert.assertTrue(ex.getMessage().contains(
@@ -315,8 +285,6 @@ public class TestShortCircuitLocalRead {
     Configuration conf = new Configuration();
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
-    conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
-        UserGroupInformation.getCurrentUser().getShortUserName());
     if (simulatedStorage) {
       SimulatedFSDataset.setFactory(conf);
     }
@@ -354,6 +322,86 @@ public class TestShortCircuitLocalRead {
       cluster.shutdown();
     }
   }
+
+  @Test
+  public void testHandleTruncatedBlockFile() throws IOException {
+    MiniDFSCluster cluster = null;
+    HdfsConfiguration conf = new HdfsConfiguration();
+    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
+    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
+    conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, "CRC32C");
+    final Path TEST_PATH = new Path("/a");
+    final Path TEST_PATH2 = new Path("/b");
+    final long RANDOM_SEED = 4567L;
+    final long RANDOM_SEED2 = 4568L;
+    FSDataInputStream fsIn = null;
+    final int TEST_LENGTH = 3456;
+    
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+      cluster.waitActive();
+      FileSystem fs = cluster.getFileSystem();
+      DFSTestUtil.createFile(fs, TEST_PATH,
+          TEST_LENGTH, (short)1, RANDOM_SEED);
+      DFSTestUtil.createFile(fs, TEST_PATH2,
+          TEST_LENGTH, (short)1, RANDOM_SEED2);
+      fsIn = cluster.getFileSystem().open(TEST_PATH2);
+      byte original[] = new byte[TEST_LENGTH];
+      IOUtils.readFully(fsIn, original, 0, TEST_LENGTH);
+      fsIn.close();
+      fsIn = null;
+      try {
+        DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
+      } catch (InterruptedException e) {
+        Assert.fail("unexpected InterruptedException during " +
+            "waitReplication: " + e);
+      } catch (TimeoutException e) {
+        Assert.fail("unexpected TimeoutException during " +
+            "waitReplication: " + e);
+      }
+      ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
+      File dataFile = MiniDFSCluster.getBlockFile(0, block);
+      cluster.shutdown();
+      cluster = null;
+      RandomAccessFile raf = null;
+      try {
+        raf = new RandomAccessFile(dataFile, "rw");
+        raf.setLength(0);
+      } finally {
+        if (raf != null) raf.close();
+      }
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).format(false).build();
+      cluster.waitActive();
+      fs = cluster.getFileSystem();
+      fsIn = fs.open(TEST_PATH);
+      try {
+        byte buf[] = new byte[100];
+        fsIn.seek(2000);
+        fsIn.readFully(buf, 0, buf.length);
+        Assert.fail("shouldn't be able to read from corrupt 0-length " +
+            "block file.");
+      } catch (IOException e) {
+        DFSClient.LOG.error("caught exception ", e);
+      }
+      fsIn.close();
+      fsIn = null;
+
+      // We should still be able to read the other file.
+      // This is important because it indicates that we detected that the 
+      // previous block was corrupt, rather than blaming the problem on
+      // communication.
+      fsIn = fs.open(TEST_PATH2);
+      byte buf[] = new byte[original.length];
+      fsIn.readFully(buf, 0, buf.length);
+      TestBlockReaderLocal.assertArrayRegionsEqual(original, 0, buf, 0,
+          original.length);
+      fsIn.close();
+      fsIn = null;
+    } finally {
+      if (fsIn != null) fsIn.close();
+      if (cluster != null) cluster.shutdown();
+    }
+  }
      
   /**
    * Test to run benchmarks between shortcircuit read vs regular read with
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java
index fc2c6c67912..d68625de36a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java
@@ -148,7 +148,7 @@ public class TestBlockTokenWithDFS {
       blockReader = BlockReaderFactory.newBlockReader(
           conf, file, block, lblock.getBlockToken(), 0, -1,
           true, "TestBlockTokenWithDFS", TcpPeerServer.peerFromSocket(s),
-          nodes[0]);
+          nodes[0], null, false);
 
     } catch (IOException ex) {
       if (ex instanceof InvalidBlockTokenException) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java
index 26926be0f36..ef73d868151 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.server.datanode;
 
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
@@ -962,6 +963,12 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
   public BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock b) {
     throw new UnsupportedOperationException();
   }
+
+  @Override
+  public FileInputStream[] getShortCircuitFdsForRead(ExtendedBlock block)
+      throws IOException {
+    throw new UnsupportedOperationException();
+  }
   
   @Override
   public HdfsBlocksMetadata getHdfsBlocksMetadata(List<ExtendedBlock> blocks)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
index efe0a4cc962..3ba91c4dc1c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java
@@ -32,6 +32,7 @@ import java.util.Map;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.BlockReader;
 import org.apache.hadoop.hdfs.BlockReaderFactory;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
@@ -281,11 +282,11 @@ public class TestDataNodeVolumeFailure {
     String file = BlockReaderFactory.getFileName(targetAddr, 
         "test-blockpoolid",
         block.getBlockId());
-    BlockReaderFactory.newBlockReader(conf, file, block,
+    BlockReader blockReader =
+      BlockReaderFactory.newBlockReader(conf, file, block,
         lblock.getBlockToken(), 0, -1, true, "TestDataNodeVolumeFailure",
-        TcpPeerServer.peerFromSocket(s), datanode);
-
-    // nothing - if it fails - it will throw and exception
+        TcpPeerServer.peerFromSocket(s), datanode, null, false);
+    blockReader.close(null, null);
   }
   
   /**

From 6f8ee865debd830d2b800de88dd150fc049d1e42 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Sat, 12 Jan 2013 00:12:30 +0000
Subject: [PATCH 05/52] HDFS-4388. DomainSocket should throw
 AsynchronousCloseException when appropriate. Contributed by Colin Patrick
 McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1432339 13f79535-47bb-0310-9956-ffa450edef68
---
 .../apache/hadoop/net/unix/DomainSocket.java  |  75 +++++++---
 .../hadoop/net/unix/TestDomainSocket.java     | 139 +++++++++++++++++-
 .../hadoop-hdfs/CHANGES.HDFS-347.txt          |   3 +
 3 files changed, 193 insertions(+), 24 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
index b1f8749f581..9c37db3d4fb 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
@@ -25,6 +25,8 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.net.SocketException;
+import java.nio.channels.AsynchronousCloseException;
+import java.nio.channels.ClosedChannelException;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.ByteBuffer;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -207,10 +209,13 @@ public class DomainSocket implements Closeable {
    */
   public DomainSocket accept() throws IOException {
     fdRef();
+    boolean exc = true;
     try {
-      return new DomainSocket(path, accept0(fd));
+      DomainSocket ret = new DomainSocket(path, accept0(fd));
+      exc = false;
+      return ret;
     } finally {
-      fdUnref();
+      fdUnref(exc);
     }
   }
 
@@ -235,20 +240,23 @@ public class DomainSocket implements Closeable {
    *
    * @throws SocketException  If the file descriptor is closed.
    */
-  private void fdRef() throws SocketException {
+  private void fdRef() throws ClosedChannelException {
     int bits = status.incrementAndGet();
     if ((bits & STATUS_CLOSED_MASK) != 0) {
       status.decrementAndGet();
-      throw new SocketException("Socket is closed.");
+      throw new ClosedChannelException();
     }
   }
 
   /**
    * Decrement the reference count of the underlying file descriptor.
    */
-  private void fdUnref() {
+  private void fdUnref(boolean checkClosed) throws AsynchronousCloseException {
     int newCount = status.decrementAndGet();
-    assert newCount >= 0;
+    assert (newCount & ~STATUS_CLOSED_MASK) >= 0;
+    if (checkClosed & ((newCount & STATUS_CLOSED_MASK) != 0)) {
+      throw new AsynchronousCloseException();
+    }
   }
 
   /**
@@ -298,10 +306,12 @@ public class DomainSocket implements Closeable {
 
   public void setAttribute(int type, int size) throws IOException {
     fdRef();
+    boolean exc = true;
     try {
       setAttribute0(fd, type, size);
+      exc = false;
     } finally {
-      fdUnref();
+      fdUnref(exc);
     }
   }
 
@@ -309,10 +319,14 @@ public class DomainSocket implements Closeable {
 
   public int getAttribute(int type) throws IOException {
     fdRef();
+    int attribute;
+    boolean exc = true;
     try {
-      return getAttribute0(fd, type);
+      attribute = getAttribute0(fd, type);
+      exc = false;
+      return attribute;
     } finally {
-      fdUnref();
+      fdUnref(exc);
     }
   }
 
@@ -396,10 +410,12 @@ public class DomainSocket implements Closeable {
   public void sendFileDescriptors(FileDescriptor jfds[],
       byte jbuf[], int offset, int length) throws IOException {
     fdRef();
+    boolean exc = true;
     try {
       sendFileDescriptors0(fd, jfds, jbuf, offset, length);
+      exc = false;
     } finally {
-      fdUnref();
+      fdUnref(exc);
     }
   }
 
@@ -430,10 +446,13 @@ public class DomainSocket implements Closeable {
   public int receiveFileDescriptors(FileDescriptor[] jfds,
       byte jbuf[], int offset, int length) throws IOException {
     fdRef();
+    boolean exc = true;
     try {
-      return receiveFileDescriptors0(fd, jfds, jbuf, offset, length);
+      int nBytes = receiveFileDescriptors0(fd, jfds, jbuf, offset, length);
+      exc = false;
+      return nBytes;
     } finally {
-      fdUnref();
+      fdUnref(exc);
     }
   }
 
@@ -462,7 +481,6 @@ public class DomainSocket implements Closeable {
       success = true;
       return ret;
     } finally {
-      fdUnref();
       if (!success) {
         for (int i = 0; i < fds.length; i++) {
           if (fds[i] != null) {
@@ -481,6 +499,7 @@ public class DomainSocket implements Closeable {
           }
         }
       }
+      fdUnref(!success);
     }
   }
 
@@ -505,32 +524,40 @@ public class DomainSocket implements Closeable {
     @Override
     public int read() throws IOException {
       fdRef();
+      boolean exc = true;
       try {
         byte b[] = new byte[1];
         int ret = DomainSocket.readArray0(DomainSocket.this.fd, b, 0, 1);
+        exc = false;
         return (ret >= 0) ? b[0] : -1;
       } finally {
-        fdUnref();
+        fdUnref(exc);
       }
     }
     
     @Override
     public int read(byte b[], int off, int len) throws IOException {
       fdRef();
+      boolean exc = true;
       try {
-        return DomainSocket.readArray0(DomainSocket.this.fd, b, off, len);
+        int nRead = DomainSocket.readArray0(DomainSocket.this.fd, b, off, len);
+        exc = false;
+        return nRead;
       } finally {
-        fdUnref();
+        fdUnref(exc);
       }
     }
 
     @Override
     public int available() throws IOException {
       fdRef();
+      boolean exc = true;
       try {
-        return DomainSocket.available0(DomainSocket.this.fd);
+        int nAvailable = DomainSocket.available0(DomainSocket.this.fd);
+        exc = false;
+        return nAvailable;
       } finally {
-        fdUnref();
+        fdUnref(exc);
       }
     }
 
@@ -553,22 +580,26 @@ public class DomainSocket implements Closeable {
     @Override
     public void write(int val) throws IOException {
       fdRef();
+      boolean exc = true;
       try {
         byte b[] = new byte[1];
         b[0] = (byte)val;
         DomainSocket.writeArray0(DomainSocket.this.fd, b, 0, 1);
+        exc = false;
       } finally {
-        fdUnref();
+        fdUnref(exc);
       }
     }
 
     @Override
     public void write(byte[] b, int off, int len) throws IOException {
       fdRef();
+        boolean exc = true;
       try {
         DomainSocket.writeArray0(DomainSocket.this.fd, b, off, len);
+        exc = false;
       } finally {
-        fdUnref();
+        fdUnref(exc);
       }
     }
   }
@@ -588,6 +619,7 @@ public class DomainSocket implements Closeable {
     @Override
     public int read(ByteBuffer dst) throws IOException {
       fdRef();
+      boolean exc = true;
       try {
         int nread = 0;
         if (dst.isDirect()) {
@@ -605,9 +637,10 @@ public class DomainSocket implements Closeable {
         if (nread > 0) {
           dst.position(dst.position() + nread);
         }
+        exc = false;
         return nread;
       } finally {
-        fdUnref();
+        fdUnref(exc);
       }
     }
   }
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
index ab293ff5138..3bba0b1e256 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
@@ -25,6 +25,8 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.net.SocketTimeoutException;
 import java.nio.ByteBuffer;
+import java.nio.channels.AsynchronousCloseException;
+import java.nio.channels.ClosedChannelException;
 import java.util.Arrays;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.Callable;
@@ -96,8 +98,47 @@ public class TestDomainSocket {
   }
 
   /**
-   * Test that if one thread is blocking in accept(), another thread
-   * can close the socket and stop the accept.
+   * Test that we get a read result of -1 on EOF.
+   *
+   * @throws IOException
+   */
+  @Test(timeout=180000)
+  public void testSocketReadEof() throws Exception {
+    final String TEST_PATH = new File(sockDir.getDir(),
+        "testSocketReadEof").getAbsolutePath();
+    final DomainSocket serv = DomainSocket.bindAndListen(TEST_PATH);
+    ExecutorService exeServ = Executors.newSingleThreadExecutor();
+    Callable<Void> callable = new Callable<Void>() {
+      public Void call(){
+        DomainSocket conn;
+        try {
+          conn = serv.accept();
+        } catch (IOException e) {
+          throw new RuntimeException("unexpected IOException", e);
+        }
+        byte buf[] = new byte[100];
+        for (int i = 0; i < buf.length; i++) {
+          buf[i] = 0;
+        }
+        try {
+          Assert.assertEquals(-1, conn.getInputStream().read());
+        } catch (IOException e) {
+          throw new RuntimeException("unexpected IOException", e);
+        }
+        return null;
+      }
+    };
+    Future<Void> future = exeServ.submit(callable);
+    DomainSocket conn = DomainSocket.connect(serv.getPath());
+    Thread.sleep(50);
+    conn.close();
+    serv.close();
+    future.get(2, TimeUnit.MINUTES);
+  }
+
+  /**
+   * Test that if one thread is blocking in a read or write operation, another
+   * thread can close the socket and stop the accept.
    *
    * @throws IOException
    */
@@ -113,8 +154,10 @@ public class TestDomainSocket {
           serv.accept();
           throw new RuntimeException("expected the accept() to be " +
               "interrupted and fail");
-        } catch (IOException e) {
+        } catch (AsynchronousCloseException e) {
           return null;
+        } catch (IOException e) {
+          throw new RuntimeException("unexpected IOException", e);
         }
       }
     };
@@ -124,6 +167,96 @@ public class TestDomainSocket {
     future.get(2, TimeUnit.MINUTES);
   }
 
+  /**
+   * Test that we get an AsynchronousCloseException when the DomainSocket
+   * we're using is closed during a read or write operation.
+   *
+   * @throws IOException
+   */
+  private void testAsyncCloseDuringIO(final boolean closeDuringWrite)
+      throws Exception {
+    final String TEST_PATH = new File(sockDir.getDir(),
+        "testAsyncCloseDuringIO(" + closeDuringWrite + ")").getAbsolutePath();
+    final DomainSocket serv = DomainSocket.bindAndListen(TEST_PATH);
+    ExecutorService exeServ = Executors.newFixedThreadPool(2);
+    Callable<Void> serverCallable = new Callable<Void>() {
+      public Void call() {
+        DomainSocket serverConn = null;
+        try {
+          serverConn = serv.accept();
+          byte buf[] = new byte[100];
+          for (int i = 0; i < buf.length; i++) {
+            buf[i] = 0;
+          }
+          // The server just continues either writing or reading until someone
+          // asynchronously closes the client's socket.  At that point, all our
+          // reads return EOF, and writes get a socket error.
+          if (closeDuringWrite) {
+            try {
+              while (true) {
+                serverConn.getOutputStream().write(buf);
+              }
+            } catch (IOException e) {
+            }
+          } else {
+            do { ; } while 
+              (serverConn.getInputStream().read(buf, 0, buf.length) != -1);
+          }
+        } catch (IOException e) {
+          throw new RuntimeException("unexpected IOException", e);
+        } finally {
+          IOUtils.cleanup(DomainSocket.LOG, serverConn);
+        }
+        return null;
+      }
+    };
+    Future<Void> serverFuture = exeServ.submit(serverCallable);
+    final DomainSocket clientConn = DomainSocket.connect(serv.getPath());
+    Callable<Void> clientCallable = new Callable<Void>() {
+      public Void call(){
+        // The client writes or reads until another thread
+        // asynchronously closes the socket.  At that point, we should
+        // get ClosedChannelException, or possibly its subclass
+        // AsynchronousCloseException.
+        byte buf[] = new byte[100];
+        for (int i = 0; i < buf.length; i++) {
+          buf[i] = 0;
+        }
+        try {
+          if (closeDuringWrite) {
+            while (true) {
+              clientConn.getOutputStream().write(buf);
+            }
+          } else {
+            while (true) {
+              clientConn.getInputStream().read(buf, 0, buf.length);
+            }
+          }
+        } catch (ClosedChannelException e) {
+          return null;
+        } catch (IOException e) {
+          throw new RuntimeException("unexpected IOException", e);
+        }
+      }
+    };
+    Future<Void> clientFuture = exeServ.submit(clientCallable);
+    Thread.sleep(500);
+    clientConn.close();
+    serv.close();
+    clientFuture.get(2, TimeUnit.MINUTES);
+    serverFuture.get(2, TimeUnit.MINUTES);
+  }
+  
+  @Test(timeout=180000)
+  public void testAsyncCloseDuringWrite() throws Exception {
+    testAsyncCloseDuringIO(true);
+  }
+  
+  @Test(timeout=180000)
+  public void testAsyncCloseDuringRead() throws Exception {
+    testAsyncCloseDuringIO(false);
+  }
+  
   /**
    * Test that attempting to connect to an invalid path doesn't work.
    *
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index af90e837028..b5dce88312b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -10,3 +10,6 @@ HDFS-4354. Create DomainSocket and DomainPeer and associated unit tests.
 
 HDFS-4356. BlockReaderLocal should use passed file descriptors rather than paths.
 (Colin Patrick McCabe via todd)
+
+HDFS-4388. DomainSocket should throw AsynchronousCloseException when appropriate.
+(Colin Patrick McCabe via todd)

From d2737575ad52137d870c62a3bd7a8dcf83b189ac Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Sat, 12 Jan 2013 01:19:27 +0000
Subject: [PATCH 06/52] HDFS-4390. Bypass UNIX domain socket unit tests when
 they cannot be run. Contributed by Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1432350 13f79535-47bb-0310-9956-ffa450edef68
---
 .../org/apache/hadoop/net/unix/TestDomainSocket.java |  5 +----
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt |  4 ++++
 .../org/apache/hadoop/hdfs/BlockReaderTestUtil.java  | 11 -----------
 .../org/apache/hadoop/hdfs/TestParallelReadUtil.java |  3 ---
 .../hadoop/hdfs/TestParallelShortCircuitRead.java    | 12 +++++++++++-
 .../hdfs/TestParallelShortCircuitReadNoChecksum.java | 10 ++++++++++
 .../hadoop/hdfs/TestParallelUnixDomainRead.java      | 12 +++++++++++-
 .../hadoop/hdfs/TestShortCircuitLocalRead.java       |  8 ++++++++
 8 files changed, 45 insertions(+), 20 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
index 3bba0b1e256..2d31874a0d2 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
@@ -30,12 +30,10 @@ import java.nio.channels.ClosedChannelException;
 import java.util.Arrays;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.TimeoutException;
 
 import org.junit.AfterClass;
 import org.junit.Assert;
@@ -49,7 +47,6 @@ import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.net.unix.DomainSocket.DomainChannel;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.util.Shell;
-import org.apache.hadoop.util.Shell.ExitCodeException;
 
 import com.google.common.io.Files;
 
@@ -68,7 +65,7 @@ public class TestDomainSocket {
   }
   
   @Before
-  public void checkPrecondition() {
+  public void before() {
     Assume.assumeTrue(DomainSocket.getLoadingFailureReason() == null);
   }
     
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index b5dce88312b..ad9e4abd43f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -13,3 +13,7 @@ HDFS-4356. BlockReaderLocal should use passed file descriptors rather than paths
 
 HDFS-4388. DomainSocket should throw AsynchronousCloseException when appropriate.
 (Colin Patrick McCabe via todd)
+
+HDFS-4390. Bypass UNIX domain socket unit tests when they cannot be run.
+(Colin Patrick McCabe via todd)
+
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java
index 51b0796572d..f96849e64cd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java
@@ -28,7 +28,6 @@ import java.net.Socket;
 import java.util.List;
 import java.util.Random;
 
-import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.net.TcpPeerServer;
@@ -38,7 +37,6 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.net.NetUtils;
-import org.apache.hadoop.net.unix.DomainSocket;
 
 /**
  * A helper class to setup the cluster, and get to BlockReader and DataNode for a block.
@@ -168,13 +166,4 @@ public class BlockReaderTestUtil {
     int ipcport = nodes[0].getIpcPort();
     return cluster.getDataNode(ipcport);
   }
-
-  public boolean haveRequiredResources() {
-    if (conf.get(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY) != null) {
-      // To use UNIX Domain sockets, we must have the native code loaded.
-      return DomainSocket.getLoadingFailureReason() == null;
-    } else {
-      return true;
-    }
-  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java
index 6f10804d5db..c8d3ca69b74 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelReadUtil.java
@@ -401,19 +401,16 @@ public class TestParallelReadUtil {
    */
   @Test
   public void testParallelReadCopying() throws IOException {
-    Assume.assumeTrue(util.haveRequiredResources());
     runTestWorkload(new CopyingReadWorkerHelper());
   }
 
   @Test
   public void testParallelReadByteBuffer() throws IOException {
-    Assume.assumeTrue(util.haveRequiredResources());
     runTestWorkload(new DirectReadWorkerHelper());
   }
 
   @Test
   public void testParallelReadMixed() throws IOException {
-    Assume.assumeTrue(util.haveRequiredResources());
     runTestWorkload(new MixedWorkloadHelper());
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java
index 776e5d7e030..f5b0ef6672a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java
@@ -22,13 +22,17 @@ import java.io.File;
 import org.apache.hadoop.net.unix.DomainSocket;
 import org.apache.hadoop.net.unix.TemporarySocketDirectory;
 import org.junit.AfterClass;
+import org.junit.Assume;
+import org.junit.Before;
 import org.junit.BeforeClass;
+import static org.hamcrest.CoreMatchers.*;
 
 public class TestParallelShortCircuitRead extends TestParallelReadUtil {
   private static TemporarySocketDirectory sockDir;
 
   @BeforeClass
   static public void setupCluster() throws Exception {
+    if (DomainSocket.getLoadingFailureReason() != null) return;
     sockDir = new TemporarySocketDirectory();
     HdfsConfiguration conf = new HdfsConfiguration();
     conf.set(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY,
@@ -40,9 +44,15 @@ public class TestParallelShortCircuitRead extends TestParallelReadUtil {
     setupCluster(1, conf);
   }
 
+  @Before
+  public void before() {
+    Assume.assumeThat(DomainSocket.getLoadingFailureReason(), equalTo(null));
+  }
+
   @AfterClass
   static public void teardownCluster() throws Exception {
+    if (DomainSocket.getLoadingFailureReason() != null) return;
     sockDir.close();
     TestParallelReadUtil.teardownCluster();
   }
-}
\ No newline at end of file
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadNoChecksum.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadNoChecksum.java
index 2f2a2c6b96b..a27016ab702 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadNoChecksum.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadNoChecksum.java
@@ -22,13 +22,17 @@ import java.io.File;
 import org.apache.hadoop.net.unix.DomainSocket;
 import org.apache.hadoop.net.unix.TemporarySocketDirectory;
 import org.junit.AfterClass;
+import org.junit.Assume;
+import org.junit.Before;
 import org.junit.BeforeClass;
+import static org.hamcrest.CoreMatchers.*;
 
 public class TestParallelShortCircuitReadNoChecksum extends TestParallelReadUtil {
   private static TemporarySocketDirectory sockDir;
 
   @BeforeClass
   static public void setupCluster() throws Exception {
+    if (DomainSocket.getLoadingFailureReason() != null) return;
     sockDir = new TemporarySocketDirectory();
     HdfsConfiguration conf = new HdfsConfiguration();
     conf.set(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY,
@@ -40,8 +44,14 @@ public class TestParallelShortCircuitReadNoChecksum extends TestParallelReadUtil
     setupCluster(1, conf);
   }
 
+  @Before
+  public void before() {
+    Assume.assumeThat(DomainSocket.getLoadingFailureReason(), equalTo(null));
+  }
+
   @AfterClass
   static public void teardownCluster() throws Exception {
+    if (DomainSocket.getLoadingFailureReason() != null) return;
     sockDir.close();
     TestParallelReadUtil.teardownCluster();
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java
index d2170d32bfe..5113dbf8eb2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java
@@ -22,13 +22,17 @@ import java.io.File;
 import org.apache.hadoop.net.unix.DomainSocket;
 import org.apache.hadoop.net.unix.TemporarySocketDirectory;
 import org.junit.AfterClass;
+import org.junit.Assume;
+import org.junit.Before;
 import org.junit.BeforeClass;
+import static org.hamcrest.CoreMatchers.*;
 
 public class TestParallelUnixDomainRead extends TestParallelReadUtil {
   private static TemporarySocketDirectory sockDir;
 
   @BeforeClass
   static public void setupCluster() throws Exception {
+    if (DomainSocket.getLoadingFailureReason() != null) return;
     sockDir = new TemporarySocketDirectory();
     HdfsConfiguration conf = new HdfsConfiguration();
     conf.set(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY,
@@ -38,9 +42,15 @@ public class TestParallelUnixDomainRead extends TestParallelReadUtil {
     setupCluster(1, conf);
   }
 
+  @Before
+  public void before() {
+    Assume.assumeThat(DomainSocket.getLoadingFailureReason(), equalTo(null));
+  }
+
   @AfterClass
   static public void teardownCluster() throws Exception {
+    if (DomainSocket.getLoadingFailureReason() != null) return;
     sockDir.close();
     TestParallelReadUtil.teardownCluster();
   }
-}
\ No newline at end of file
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
index 3f21e230e90..b0a1c4a91ea 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
@@ -47,8 +47,11 @@ import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Time;
 import org.junit.AfterClass;
 import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
+import static org.hamcrest.CoreMatchers.*;
 
 /**
  * Test for short circuit read functionality using {@link BlockReaderLocal}.
@@ -71,6 +74,11 @@ public class TestShortCircuitLocalRead {
     sockDir.close();
   }
 
+  @Before
+  public void before() {
+    Assume.assumeThat(DomainSocket.getLoadingFailureReason(), equalTo(null));
+  }
+  
   static final long seed = 0xDEADBEEFL;
   static final int blockSize = 5120;
   boolean simulatedStorage = false;

From d08b1af26a56b096c7087b46a3ccd4417f4cc7ef Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 14 Jan 2013 21:34:22 +0000
Subject: [PATCH 07/52] HDFS-4400. DFSInputStream#getBlockReader: last retries
 should ignore the cache. Contributed by Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1433144 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt            | 2 ++
 .../src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java    | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index ad9e4abd43f..1289d9e21a6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -17,3 +17,5 @@ HDFS-4388. DomainSocket should throw AsynchronousCloseException when appropriate
 HDFS-4390. Bypass UNIX domain socket unit tests when they cannot be run.
 (Colin Patrick McCabe via todd)
 
+HDFS-4400. DFSInputStream#getBlockReader: last retries should ignore the cache
+(Colin Patrick McCabe via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
index a33f8f54e7f..0f4b7e350fc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
@@ -941,7 +941,7 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
     // equivalent to declaring the DataNode bad.
     boolean triedNonDomainSocketReader = false;
     for (int retries = 0;
-          retries < nCachedConnRetry && (!triedNonDomainSocketReader);
+          retries < nCachedConnRetry || (!triedNonDomainSocketReader);
           ++retries) {
       Peer peer = null;
       if (retries < nCachedConnRetry) {

From 35a145d92f6b8513924616cca2a08e569333cfb2 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Tue, 15 Jan 2013 00:12:47 +0000
Subject: [PATCH 08/52] HDFS-4401. Fix bug in DomainSocket path validation.
 Contributed by Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1433229 13f79535-47bb-0310-9956-ffa450edef68
---
 .../org/apache/hadoop/net/unix/DomainSocket.c | 27 +++++++++++--------
 .../hadoop/net/unix/TestDomainSocket.java     |  2 +-
 .../hadoop-hdfs/CHANGES.HDFS-347.txt          |  3 +++
 3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c
index 2daa4866c92..d05499609f5 100644
--- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c
+++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c
@@ -232,7 +232,7 @@ Java_org_apache_hadoop_net_unix_DomainSocket_validateSocketPathSecurity0(
 JNIEnv *env, jclass clazz, jobject jstr, jint skipComponents)
 {
   jint utfLength;
-  char path[PATH_MAX], check[PATH_MAX] = { 0 }, *token, *rest;
+  char path[PATH_MAX], check[PATH_MAX], *token, *rest;
   struct stat st;
   int ret, mode, strlenPath;
   uid_t uid;
@@ -263,21 +263,26 @@ JNIEnv *env, jclass clazz, jobject jstr, jint skipComponents)
           "must not end in a slash.", path);
     goto done;
   }
-  rest = path;
-  while ((token = strtok_r(rest, "/", &rest))) {
-    strcat(check, "/");
+  // This loop iterates through all of the path components except for the very
+  // last one.  We don't validate the last component, since it's not supposed to
+  // be a directory.  (If it is a directory, we will fail to create the socket
+  // later with EISDIR or similar.)
+  for (check[0] = '/', check[1] = '\0', rest = path, token = "";
+       token && rest[0];
+       token = strtok_r(rest, "/", &rest)) {
+    if (strcmp(check, "/") != 0) {
+      // If the previous directory we checked was '/', we skip appending another
+      // slash to the end because it would be unncessary.  Otherwise we do it.
+      strcat(check, "/");
+    }
+    // These strcats are safe because the length of 'check' is the same as the
+    // length of 'path' and we never add more slashes than were in the original
+    // path.
     strcat(check, token);
     if (skipComponents > 0) {
       skipComponents--;
       continue;
     }
-    if (!index(rest, '/')) {
-      /* Don't validate the last component, since it's not supposed to be a
-       * directory.  (If it is a directory, we will fail to create the socket
-       * later with EISDIR or similar.)
-       */
-      break;
-    }
     if (stat(check, &st) < 0) {
       ret = errno;
       jthr = newIOException(env, "failed to stat a path component: '%s'.  "
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
index 2d31874a0d2..a997f52056a 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
@@ -698,7 +698,7 @@ public class TestDomainSocket {
             "component: ", e);
       }
       // Root should be secure
-      DomainSocket.validateSocketPathSecurity0("/foo", 0);
+      DomainSocket.validateSocketPathSecurity0("/foo", 1);
     } finally {
       tmp.close();
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index 1289d9e21a6..96afb6a9847 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -19,3 +19,6 @@ HDFS-4390. Bypass UNIX domain socket unit tests when they cannot be run.
 
 HDFS-4400. DFSInputStream#getBlockReader: last retries should ignore the cache
 (Colin Patrick McCabe via todd)
+
+HDFS-4401. Fix bug in DomainSocket path validation
+(Colin Patrick McCabe via todd)

From d79868d3e5abb8d4f9e7a22fe681946fc263ab33 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Tue, 15 Jan 2013 00:20:34 +0000
Subject: [PATCH 09/52] Add branch-specific CHANGES.txt file to RAT excludes
 list

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1433232 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/pom.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml
index 6d110e8d704..6cdc0599326 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml
@@ -500,6 +500,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
           <excludes>
             <exclude>CHANGES.txt</exclude>
             <exclude>CHANGES.HDFS-1623.txt</exclude>
+            <exclude>CHANGES.HDFS-347.txt</exclude>
             <exclude>.idea/**</exclude>
             <exclude>src/main/conf/*</exclude>
             <exclude>src/main/docs/**</exclude>

From 2fd41b3b429775a7151e84d971f593d99ef8de14 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Tue, 15 Jan 2013 00:31:35 +0000
Subject: [PATCH 10/52] HDFS-4402. Some small DomainSocket fixes: avoid
 findbugs warning, change log level, etc. Contributed by Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1433242 13f79535-47bb-0310-9956-ffa450edef68
---
 .../main/java/org/apache/hadoop/net/unix/DomainSocket.java  | 2 +-
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt        | 3 +++
 .../main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java  | 6 +++++-
 .../java/org/apache/hadoop/hdfs/DomainSocketFactory.java    | 6 +++---
 .../java/org/apache/hadoop/hdfs/FileInputStreamCache.java   | 1 -
 5 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
index 9c37db3d4fb..034ac3b62da 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
@@ -254,7 +254,7 @@ public class DomainSocket implements Closeable {
   private void fdUnref(boolean checkClosed) throws AsynchronousCloseException {
     int newCount = status.decrementAndGet();
     assert (newCount & ~STATUS_CLOSED_MASK) >= 0;
-    if (checkClosed & ((newCount & STATUS_CLOSED_MASK) != 0)) {
+    if (checkClosed && ((newCount & STATUS_CLOSED_MASK) != 0)) {
       throw new AsynchronousCloseException();
     }
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index 96afb6a9847..afc04b4ff4c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -22,3 +22,6 @@ HDFS-4400. DFSInputStream#getBlockReader: last retries should ignore the cache
 
 HDFS-4401. Fix bug in DomainSocket path validation
 (Colin Patrick McCabe via todd)
+
+HDFS-4402. Some small DomainSocket fixes: avoid findbugs warning, change log level, etc.
+(Colin Patrick McCabe via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
index 1c34a71c26d..ca9ce080f64 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
@@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs;
 
 import java.io.DataInputStream;
 import org.apache.hadoop.conf.Configuration;
+
+import java.io.BufferedInputStream;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
@@ -118,7 +120,9 @@ class BlockReaderLocal implements BlockReader {
     // read and handle the common header here. For now just a version
     checksumIn.getChannel().position(0);
     BlockMetadataHeader header = BlockMetadataHeader
-        .readHeader(new DataInputStream(checksumIn));
+        .readHeader(new DataInputStream(
+            new BufferedInputStream(checksumIn,
+                BlockMetadataHeader.getHeaderSize())));
     short version = header.getVersion();
     if (version != BlockMetadataHeader.VERSION) {
       throw new IOException("Wrong version (" + version + ") of the " +
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
index 50b60521070..db9afc1c40c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
@@ -62,7 +62,7 @@ class DomainSocketFactory {
         LOG.warn(feature + " is disabled because you have not set " +
             DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY);
       } else if (DomainSocket.getLoadingFailureReason() != null) {
-        LOG.error(feature + " is disabled because " +
+        LOG.warn(feature + " is disabled because " +
               DomainSocket.getLoadingFailureReason());
       } else {
         LOG.debug(feature + "is enabled.");
@@ -113,7 +113,7 @@ class DomainSocketFactory {
       sock.setAttribute(DomainSocket.RCV_TIMEO, conf.socketTimeout);
       success = true;
     } catch (IOException e) {
-      LOG.error("error creating DomainSocket", e);
+      LOG.warn("error creating DomainSocket", e);
       // fall through
     } finally {
       if (!success) {
@@ -134,4 +134,4 @@ class DomainSocketFactory {
   public void disableDomainSocketPath(String path) {
     pathInfo.put(path, PathStatus.UNUSABLE);
   }
-}
\ No newline at end of file
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/FileInputStreamCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/FileInputStreamCache.java
index d9045f0cf2c..ac0af814362 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/FileInputStreamCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/FileInputStreamCache.java
@@ -22,7 +22,6 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map.Entry;
 import java.util.concurrent.ScheduledThreadPoolExecutor;
-import java.util.concurrent.ThreadFactory;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.commons.logging.Log;

From 52c5b8ea9ff2876a047174f50f251c87c71c21c9 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 17 Jan 2013 23:11:30 +0000
Subject: [PATCH 11/52] HDFS-4418. increase default FileInputStreamCache size.
 Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1434953 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt          | 2 ++
 .../src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java   | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index afc04b4ff4c..5ae5750fe2b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -25,3 +25,5 @@ HDFS-4401. Fix bug in DomainSocket path validation
 
 HDFS-4402. Some small DomainSocket fixes: avoid findbugs warning, change log level, etc.
 (Colin Patrick McCabe via todd)
+
+HDFS-4418. increase default FileInputStreamCache size (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index d0b29b71aa8..034a0152f19 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -343,9 +343,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final boolean DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT = false;
   public static final String DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY = "dfs.client.read.shortcircuit.buffer.size";
   public static final String DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_KEY = "dfs.client.read.shortcircuit.streams.cache.size";
-  public static final int DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_DEFAULT = 10;
+  public static final int DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_DEFAULT = 100;
   public static final String DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_KEY = "dfs.client.read.shortcircuit.streams.cache.expiry.ms";
-  public static final long DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT = 60000;
+  public static final long DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_EXPIRY_MS_DEFAULT = 5000;
   public static final int DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT = 1024 * 1024;
   public static final String DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC = "dfs.client.domain.socket.data.traffic";
   public static final boolean DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT = false;

From 89bd14913aa5f00506a610ca325db495372c4c87 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 21 Jan 2013 19:29:06 +0000
Subject: [PATCH 12/52] HDFS-4416. Rename dfs.datanode.domain.socket.path to
 dfs.domain.socket.path. Contributed by Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1436568 13f79535-47bb-0310-9956-ffa450edef68
---
 .../org/apache/hadoop/conf/Configuration.java   | 17 ++++++++++++++++-
 .../apache/hadoop/net/unix/DomainSocket.java    |  4 ++--
 .../hadoop/net/unix/TestDomainSocket.java       |  2 +-
 .../hadoop-hdfs/CHANGES.HDFS-347.txt            |  3 +++
 .../java/org/apache/hadoop/hdfs/DFSClient.java  |  3 ++-
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java   |  3 ++-
 .../apache/hadoop/hdfs/DomainSocketFactory.java |  4 ++--
 .../hadoop/hdfs/server/datanode/DataNode.java   |  7 ++++---
 .../src/main/resources/hdfs-default.xml         | 11 +++++++++++
 .../hdfs/TestParallelShortCircuitRead.java      |  2 +-
 .../TestParallelShortCircuitReadNoChecksum.java |  2 +-
 .../hadoop/hdfs/TestParallelUnixDomainRead.java |  2 +-
 .../hadoop/hdfs/TestShortCircuitLocalRead.java  |  4 ++--
 13 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
index 73faf968628..c44a18f9737 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
@@ -743,6 +743,21 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
       return value.trim();
     }
   }
+  
+  /**
+   * Get the value of the <code>name</code> property as a trimmed <code>String</code>, 
+   * <code>defaultVal</code> if no such property exists. 
+   * See @{Configuration#getTrimmed} for more details.
+   * 
+   * @param name          the property name.
+   * @param defaultVal    the property default value.
+   * @return              the value of the <code>name</code> or defaultVal 
+   *                      if it is not set.
+   */
+  public String getTrimmed(String name, String defaultValue) {
+    String ret = getTrimmed(name);
+    return ret == null ? defaultValue : ret;
+  }
 
   /**
    * Get the value of the <code>name</code> property, without doing
@@ -877,7 +892,7 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
     }
     return result;
   }
-    
+
   /** 
    * Get the value of the <code>name</code> property as an <code>int</code>.
    *   
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
index 034ac3b62da..aa4b952e8d1 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
@@ -119,7 +119,7 @@ public class DomainSocket implements Closeable {
 
   /**
    * Given a path and a port, compute the effective path by replacing
-   * occurrences of __PORT__ with the port.  This is mainly to make it 
+   * occurrences of _PORT with the port.  This is mainly to make it 
    * possible to run multiple DataNodes locally for testing purposes.
    *
    * @param path            The source path
@@ -128,7 +128,7 @@ public class DomainSocket implements Closeable {
    * @return                The effective path
    */
   public static String getEffectivePath(String path, int port) {
-    return path.replace("__PORT__", String.valueOf(port));
+    return path.replace("_PORT", String.valueOf(port));
   }
 
   /**
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
index a997f52056a..a490fefcac7 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
@@ -91,7 +91,7 @@ public class TestDomainSocket {
   @Test(timeout=180000)
   public void testSocketPathSetGet() throws IOException {
     Assert.assertEquals("/var/run/hdfs/sock.100",
-        DomainSocket.getEffectivePath("/var/run/hdfs/sock.__PORT__", 100));
+        DomainSocket.getEffectivePath("/var/run/hdfs/sock._PORT", 100));
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index 5ae5750fe2b..d1096376bcc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -27,3 +27,6 @@ HDFS-4402. Some small DomainSocket fixes: avoid findbugs warning, change log lev
 (Colin Patrick McCabe via todd)
 
 HDFS-4418. increase default FileInputStreamCache size (todd)
+
+HDFS-4416. Rename dfs.datanode.domain.socket.path to dfs.domain.socket.path
+(Colin Patrick McCabe via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index f68dae7b909..d8e08620072 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -291,7 +291,8 @@ public class DFSClient implements java.io.Closeable {
       getFileBlockStorageLocationsTimeout = conf.getInt(
           DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT,
           DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT_DEFAULT);
-      domainSocketPath = conf.get(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY);
+      domainSocketPath = conf.getTrimmed(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
+          DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_DEFAULT);
       skipShortCircuitChecksums = conf.getBoolean(
           DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
           DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 034a0152f19..c8317c01d7d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -399,7 +399,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_WEB_AUTHENTICATION_KERBEROS_KEYTAB_KEY = "dfs.web.authentication.kerberos.keytab";
   
   public static final String DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY = "dfs.block.local-path-access.user";
-  public static final String DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY = "dfs.datanode.domain.socket.path";
+  public static final String DFS_DOMAIN_SOCKET_PATH_KEY = "dfs.domain.socket.path";
+  public static final String DFS_DOMAIN_SOCKET_PATH_DEFAULT = "";
 
   // HA related configuration
   public static final String DFS_HA_NAMENODES_KEY_PREFIX = "dfs.ha.namenodes";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
index db9afc1c40c..103c946e5e2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
@@ -58,9 +58,9 @@ class DomainSocketFactory {
       feature = "UNIX domain socket data traffic";
     }
     if (feature != null) {
-      if (conf.domainSocketPath == null) {
+      if (conf.domainSocketPath.isEmpty()) {
         LOG.warn(feature + " is disabled because you have not set " +
-            DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY);
+            DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY);
       } else if (DomainSocket.getLoadingFailureReason() != null) {
         LOG.warn(feature + " is disabled because " +
               DomainSocket.getLoadingFailureReason());
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index d3e3b5b2d63..99e508b53d2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -572,13 +572,14 @@ public class DataNode extends Configured
   static DomainPeerServer getDomainPeerServer(Configuration conf,
       int port) throws IOException {
     String domainSocketPath =
-        conf.get(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY);
-    if (domainSocketPath == null) {
+        conf.getTrimmed(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
+            DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_DEFAULT);
+    if (domainSocketPath.isEmpty()) {
       if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
           DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT)) {
         LOG.warn("Although short-circuit local reads are configured, " +
             "they are disabled because you didn't configure " +
-            DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY);
+            DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY);
       }
       return null;
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 34cd8465fd7..5ec12106c47 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -1197,4 +1197,15 @@
   </description>
 </property>
 
+<property>
+  <name>dfs.domain.socket.path</name>
+  <value></value>
+  <description>
+    Optional.  This is a path to a UNIX domain socket that will be used for
+    communication between the DataNode and local HDFS clients.
+    If the string "_PORT" is present in this path, it will be replaced by the
+    TCP port of the DataNode.
+  </description>
+</property>
+
 </configuration>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java
index f5b0ef6672a..0e21cdcd8f4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java
@@ -35,7 +35,7 @@ public class TestParallelShortCircuitRead extends TestParallelReadUtil {
     if (DomainSocket.getLoadingFailureReason() != null) return;
     sockDir = new TemporarySocketDirectory();
     HdfsConfiguration conf = new HdfsConfiguration();
-    conf.set(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY,
+    conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
       new File(sockDir.getDir(), "TestParallelLocalRead.%d.sock").getAbsolutePath());
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
     conf.setBoolean(DFSConfigKeys.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadNoChecksum.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadNoChecksum.java
index a27016ab702..a704dd30bb2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadNoChecksum.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadNoChecksum.java
@@ -35,7 +35,7 @@ public class TestParallelShortCircuitReadNoChecksum extends TestParallelReadUtil
     if (DomainSocket.getLoadingFailureReason() != null) return;
     sockDir = new TemporarySocketDirectory();
     HdfsConfiguration conf = new HdfsConfiguration();
-    conf.set(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY,
+    conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
       new File(sockDir.getDir(), "TestParallelLocalRead.%d.sock").getAbsolutePath());
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
     conf.setBoolean(DFSConfigKeys.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java
index 5113dbf8eb2..84688cb4081 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java
@@ -35,7 +35,7 @@ public class TestParallelUnixDomainRead extends TestParallelReadUtil {
     if (DomainSocket.getLoadingFailureReason() != null) return;
     sockDir = new TemporarySocketDirectory();
     HdfsConfiguration conf = new HdfsConfiguration();
-    conf.set(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY,
+    conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
       new File(sockDir.getDir(), "TestParallelLocalRead.%d.sock").getAbsolutePath());
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, false);
     DomainSocket.disableBindPathValidation();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
index b0a1c4a91ea..e71fc8ce183 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
@@ -193,9 +193,9 @@ public class TestShortCircuitLocalRead {
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
         ignoreChecksum);
-    conf.set(DFSConfigKeys.DFS_DATANODE_DOMAIN_SOCKET_PATH_KEY,
+    conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
         new File(sockDir.getDir(),
-          "TestShortCircuitLocalRead.__PORT__.sock").getAbsolutePath());
+          "TestShortCircuitLocalRead._PORT.sock").getAbsolutePath());
     if (simulatedStorage) {
       SimulatedFSDataset.setFactory(conf);
     }

From d12f465c674b3bb5102671b6d6c2746261602d7e Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 23 Jan 2013 18:38:56 +0000
Subject: [PATCH 13/52] HDFS-4417. Fix case where local reads get disabled
 incorrectly. Contributed by Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1437616 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-347.txt          |   3 +
 .../hadoop/hdfs/BlockReaderFactory.java       |   2 +-
 .../apache/hadoop/hdfs/DFSInputStream.java    | 158 +++++++++---------
 .../org/apache/hadoop/hdfs/PeerCache.java     |  43 ++++-
 .../hdfs/server/datanode/DataXceiver.java     |   1 -
 .../hdfs/TestDataTransferKeepalive.java       |   2 +-
 .../TestParallelShortCircuitReadUnCached.java |  73 ++++++++
 .../org/apache/hadoop/hdfs/TestPeerCache.java |  81 +++++++--
 8 files changed, 263 insertions(+), 100 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadUnCached.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index d1096376bcc..cc81fa36076 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -30,3 +30,6 @@ HDFS-4418. increase default FileInputStreamCache size (todd)
 
 HDFS-4416. Rename dfs.datanode.domain.socket.path to dfs.domain.socket.path
 (Colin Patrick McCabe via todd)
+
+HDFS-4417. Fix case where local reads get disabled incorrectly
+(Colin Patrick McCabe and todd via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
index 7dd18436469..89ed7c4859d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
@@ -68,7 +68,7 @@ public class BlockReaderFactory {
    *                             case.
    * @param allowShortCircuitLocalReads  True if short-circuit local reads
    *                                     should be allowed.
-   * @return New BlockReader instance, or null on error.
+   * @return New BlockReader instance
    */
   @SuppressWarnings("deprecation")
   public static BlockReader newBlockReader(
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
index 0f4b7e350fc..69198ff6ddf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
@@ -35,8 +35,8 @@ import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.fs.ChecksumException;
 import org.apache.hadoop.fs.ByteBufferReadable;
+import org.apache.hadoop.fs.ChecksumException;
 import org.apache.hadoop.fs.FSInputStream;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.hdfs.net.DomainPeer;
@@ -56,7 +56,8 @@ import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.net.unix.DomainSocket;
 import org.apache.hadoop.security.token.Token;
-import org.apache.hadoop.hdfs.FileInputStreamCache;
+
+import com.google.common.annotations.VisibleForTesting;
 
 /****************************************************************
  * DFSInputStream provides bytes from a named file.  It handles 
@@ -64,11 +65,11 @@ import org.apache.hadoop.hdfs.FileInputStreamCache;
  ****************************************************************/
 @InterfaceAudience.Private
 public class DFSInputStream extends FSInputStream implements ByteBufferReadable {
+  @VisibleForTesting
+  static boolean tcpReadsDisabledForTesting = false;
   private final PeerCache peerCache;
-
   private final DFSClient dfsClient;
   private boolean closed = false;
-
   private final String src;
   private final long prefetchSize;
   private BlockReader blockReader = null;
@@ -853,33 +854,23 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
     }
   }
 
-  private Peer newPeer(InetSocketAddress addr) throws IOException {
+  private Peer newTcpPeer(InetSocketAddress addr) throws IOException {
     Peer peer = null;
     boolean success = false;
     Socket sock = null;
-    DomainSocket domSock = null;
-
     try {
-      domSock = dfsClient.getDomainSocketFactory().create(addr, this);
-      if (domSock != null) {
-        // Create a UNIX Domain peer.
-        peer = new DomainPeer(domSock);
-      } else {
-        // Create a conventional TCP-based Peer.
-        sock = dfsClient.socketFactory.createSocket();
-        NetUtils.connect(sock, addr,
-          dfsClient.getRandomLocalInterfaceAddr(),
-          dfsClient.getConf().socketTimeout);
-        peer = TcpPeerServer.peerFromSocketAndKey(sock, 
-            dfsClient.getDataEncryptionKey());
-      }
+      sock = dfsClient.socketFactory.createSocket();
+      NetUtils.connect(sock, addr,
+        dfsClient.getRandomLocalInterfaceAddr(),
+        dfsClient.getConf().socketTimeout);
+      peer = TcpPeerServer.peerFromSocketAndKey(sock, 
+          dfsClient.getDataEncryptionKey());
       success = true;
       return peer;
     } finally {
       if (!success) {
         IOUtils.closeQuietly(peer);
         IOUtils.closeQuietly(sock);
-        IOUtils.closeQuietly(domSock);
       }
     }
   }
@@ -888,6 +879,9 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
    * Retrieve a BlockReader suitable for reading.
    * This method will reuse the cached connection to the DN if appropriate.
    * Otherwise, it will create a new connection.
+   * Throwing an IOException from this method is basically equivalent to 
+   * declaring the DataNode bad, so we try to connect a lot of different ways
+   * before doing that.
    *
    * @param dnAddr  Address of the datanode
    * @param chosenNode Chosen datanode information
@@ -912,9 +906,6 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
                                        boolean verifyChecksum,
                                        String clientName)
       throws IOException {
-    
-    IOException err = null;
-
     // Firstly, we check to see if we have cached any file descriptors for
     // local blocks.  If so, we can just re-use those file descriptors.
     FileInputStream fis[] = fileInputStreamCache.get(chosenNode, block);
@@ -927,67 +918,84 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
         block, startOffset, len, fis[0], fis[1], chosenNode, verifyChecksum);
     }
 
-    // We retry several times here.
-    // On the first nCachedConnRetry times, we try to fetch a socket from
-    // the socketCache and use it.  This may fail, since the old socket may
-    // have been closed by the peer.
-    // After that, we try to create a new socket using newPeer().
-    // This may create either a TCP socket or a UNIX domain socket, depending
-    // on the configuration and whether the peer is remote.
-    // If we try to create a UNIX domain socket and fail, we will not try that 
-    // again.  Instead, we'll try to create a TCP socket.  Only after we've 
-    // failed to create a TCP-based BlockReader will we throw an IOException
-    // from this function.  Throwing an IOException from here is basically
-    // equivalent to declaring the DataNode bad.
-    boolean triedNonDomainSocketReader = false;
-    for (int retries = 0;
-          retries < nCachedConnRetry || (!triedNonDomainSocketReader);
-          ++retries) {
-      Peer peer = null;
-      if (retries < nCachedConnRetry) {
-        peer = peerCache.get(chosenNode);
-      }
-      if (peer == null) {
-        peer = newPeer(dnAddr);
-        if (peer.getDomainSocket() == null) {
-          triedNonDomainSocketReader = true;
-        }
-      }
-      boolean success = false;
+    // Look for cached domain peers.
+    int cacheTries = 0;
+    DomainSocketFactory dsFactory = dfsClient.getDomainSocketFactory();
+    BlockReader reader = null;
+    for (; cacheTries < nCachedConnRetry; ++cacheTries) {
+      Peer peer = peerCache.get(chosenNode, true);
+      if (peer == null) break;
       try {
-        boolean allowShortCircuitLocalReads =
-          (peer.getDomainSocket() != null) &&
-          dfsClient.getConf().shortCircuitLocalReads && 
-          (!shortCircuitForbidden());
-        // Here we will try to send either an OP_READ_BLOCK request or an 
-        // OP_REQUEST_SHORT_CIRCUIT_FDS, depending on what kind of block reader 
-        // we're trying to create.
-        BlockReader blockReader = BlockReaderFactory.newBlockReader(
+        boolean allowShortCircuitLocalReads = dfsClient.getConf().
+            shortCircuitLocalReads && (!shortCircuitForbidden());
+        reader = BlockReaderFactory.newBlockReader(
             dfsClient.conf, file, block, blockToken, startOffset,
             len, verifyChecksum, clientName, peer, chosenNode, 
-            dfsClient.getDomainSocketFactory(), allowShortCircuitLocalReads);
-        success = true;
-        return blockReader;
-       } catch (IOException ex) {
-         // Our socket is no good.
-        DFSClient.LOG.debug("Error making BlockReader. " +
+            dsFactory, allowShortCircuitLocalReads);
+        return reader;
+      } catch (IOException ex) {
+        DFSClient.LOG.debug("Error making BlockReader with DomainSocket. " +
             "Closing stale " + peer, ex);
-        if (peer.getDomainSocket() != null) {
-          // If the Peer that we got the error from was a DomainPeer,
-          // mark the socket path as bad, so that newDataSocket will not try 
-          // to re-open this socket for a while.
-          dfsClient.getDomainSocketFactory().
-              disableDomainSocketPath(peer.getDomainSocket().getPath());
-        }
-        err = ex;
       } finally {
-        if (!success) {
+        if (reader == null) {
           IOUtils.closeQuietly(peer);
         }
       }
     }
 
-    throw err;
+    // Try to create a DomainPeer.
+    DomainSocket domSock = dsFactory.create(dnAddr, this);
+    if (domSock != null) {
+      Peer peer = new DomainPeer(domSock);
+      try {
+        boolean allowShortCircuitLocalReads = dfsClient.getConf().
+            shortCircuitLocalReads && (!shortCircuitForbidden());
+        reader = BlockReaderFactory.newBlockReader(
+            dfsClient.conf, file, block, blockToken, startOffset,
+            len, verifyChecksum, clientName, peer, chosenNode, 
+            dsFactory, allowShortCircuitLocalReads);
+        return reader;
+      } catch (IOException e) {
+        DFSClient.LOG.warn("failed to connect to " + domSock, e);
+      } finally {
+        if (reader == null) {
+         // If the Peer that we got the error from was a DomainPeer,
+         // mark the socket path as bad, so that newDataSocket will not try 
+         // to re-open this socket for a while.
+         dsFactory.disableDomainSocketPath(domSock.getPath());
+         IOUtils.closeQuietly(peer);
+        }
+      }
+    }
+
+    // Look for cached peers.
+    for (; cacheTries < nCachedConnRetry; ++cacheTries) {
+      Peer peer = peerCache.get(chosenNode, false);
+      if (peer == null) break;
+      try {
+        reader = BlockReaderFactory.newBlockReader(
+            dfsClient.conf, file, block, blockToken, startOffset,
+            len, verifyChecksum, clientName, peer, chosenNode, 
+            dsFactory, false);
+        return reader;
+      } catch (IOException ex) {
+        DFSClient.LOG.debug("Error making BlockReader. Closing stale " +
+          peer, ex);
+      } finally {
+        if (reader == null) {
+          IOUtils.closeQuietly(peer);
+        }
+      }
+    }
+    if (tcpReadsDisabledForTesting) {
+      throw new IOException("TCP reads are disabled.");
+    }
+    // Try to create a new remote peer.
+    Peer peer = newTcpPeer(dnAddr);
+    return BlockReaderFactory.newBlockReader(
+        dfsClient.conf, file, block, blockToken, startOffset,
+        len, verifyChecksum, clientName, peer, chosenNode, 
+        dsFactory, false);
   }
 
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java
index 09b2ef70b14..dcb4bc18331 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java
@@ -39,6 +39,30 @@ import org.apache.hadoop.util.Time;
 class PeerCache {
   private static final Log LOG = LogFactory.getLog(PeerCache.class);
   
+  private static class Key {
+    final DatanodeID dnID;
+    final boolean isDomain;
+    
+    Key(DatanodeID dnID, boolean isDomain) {
+      this.dnID = dnID;
+      this.isDomain = isDomain;
+    }
+    
+    @Override
+    public boolean equals(Object o) {
+      if (!(o instanceof Key)) {
+        return false;
+      }
+      Key other = (Key)o;
+      return dnID.equals(other.dnID) && isDomain == other.isDomain;
+    }
+
+    @Override
+    public int hashCode() {
+      return dnID.hashCode() ^ (isDomain ? 1 : 0);
+    }
+  }
+  
   private static class Value {
     private final Peer peer;
     private final long time;
@@ -59,7 +83,7 @@ class PeerCache {
 
   private Daemon daemon;
   /** A map for per user per datanode. */
-  private static LinkedListMultimap<DatanodeID, Value> multimap =
+  private static LinkedListMultimap<Key, Value> multimap =
     LinkedListMultimap.create();
   private static int capacity;
   private static long expiryPeriod;
@@ -124,16 +148,18 @@ class PeerCache {
   /**
    * Get a cached peer connected to the given DataNode.
    * @param dnId         The DataNode to get a Peer for.
+   * @param isDomain     Whether to retrieve a DomainPeer or not.
+   *
    * @return             An open Peer connected to the DN, or null if none
    *                     was found. 
    */
-  public synchronized Peer get(DatanodeID dnId) {
+  public synchronized Peer get(DatanodeID dnId, boolean isDomain) {
 
     if (capacity <= 0) { // disabled
       return null;
     }
 
-    List<Value> sockStreamList = multimap.get(dnId);
+    List<Value> sockStreamList = multimap.get(new Key(dnId, isDomain));
     if (sockStreamList == null) {
       return null;
     }
@@ -168,7 +194,8 @@ class PeerCache {
     if (capacity == multimap.size()) {
       evictOldest();
     }
-    multimap.put(dnId, new Value(peer, Time.monotonicNow()));
+    multimap.put(new Key(dnId, peer.getDomainSocket() != null),
+        new Value(peer, Time.monotonicNow()));
   }
 
   public synchronized int size() {
@@ -180,9 +207,9 @@ class PeerCache {
    */
   private synchronized void evictExpired(long expiryPeriod) {
     while (multimap.size() != 0) {
-      Iterator<Entry<DatanodeID, Value>> iter =
+      Iterator<Entry<Key, Value>> iter =
         multimap.entries().iterator();
-      Entry<DatanodeID, Value> entry = iter.next();
+      Entry<Key, Value> entry = iter.next();
       // if oldest socket expired, remove it
       if (entry == null || 
         Time.monotonicNow() - entry.getValue().getTime() <
@@ -201,13 +228,13 @@ class PeerCache {
     // We can get the oldest element immediately, because of an interesting
     // property of LinkedListMultimap: its iterator traverses entries in the
     // order that they were added.
-    Iterator<Entry<DatanodeID, Value>> iter =
+    Iterator<Entry<Key, Value>> iter =
       multimap.entries().iterator();
     if (!iter.hasNext()) {
       throw new IllegalStateException("Cannot evict from empty cache! " +
         "capacity: " + capacity);
     }
-    Entry<DatanodeID, Value> entry = iter.next();
+    Entry<Key, Value> entry = iter.next();
     IOUtils.cleanup(LOG, entry.getValue().getPeer());
     iter.remove();
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
index 54433e0463c..466635b5e88 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
@@ -70,7 +70,6 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.net.NetUtils;
-import org.apache.hadoop.net.unix.DomainSocket;
 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.DataChecksum;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java
index 9ef0f093ffa..4e785110476 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java
@@ -114,7 +114,7 @@ public class TestDataTransferKeepalive {
     
     // Take it out of the cache - reading should
     // give an EOF.
-    Peer peer = dfsClient.peerCache.get(dn.getDatanodeId());
+    Peer peer = dfsClient.peerCache.get(dn.getDatanodeId(), false);
     assertNotNull(peer);
     assertEquals(-1, peer.getInputStream().read());
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadUnCached.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadUnCached.java
new file mode 100644
index 00000000000..634eef8a7e2
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadUnCached.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.File;
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.net.unix.TemporarySocketDirectory;
+import org.junit.AfterClass;
+import org.junit.Assume;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import static org.hamcrest.CoreMatchers.*;
+
+/**
+ * This class tests short-circuit local reads without any FileInputStream or
+ * Socket caching.  This is a regression test for HDFS-4417.
+ */
+public class TestParallelShortCircuitReadUnCached extends TestParallelReadUtil {
+  private static TemporarySocketDirectory sockDir;
+
+  @BeforeClass
+  static public void setupCluster() throws Exception {
+    if (DomainSocket.getLoadingFailureReason() != null) return;
+    sockDir = new TemporarySocketDirectory();
+    HdfsConfiguration conf = new HdfsConfiguration();
+    conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
+      new File(sockDir.getDir(), 
+        "TestParallelShortCircuitReadUnCached._PORT.sock").getAbsolutePath());
+    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
+    conf.setBoolean(DFSConfigKeys.
+        DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
+    conf.setBoolean(DFSConfigKeys.
+        DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC, true);
+    // We want to test reading from stale sockets.
+    conf.setInt(DFSConfigKeys.DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY, 1);
+    conf.setLong(DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY,
+        5 * 60 * 1000);
+    conf.setInt(DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY, 32);
+    // Avoid using the FileInputStreamCache.
+    conf.setInt(DFSConfigKeys.
+        DFS_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_KEY, 0);
+    DomainSocket.disableBindPathValidation();
+    DFSInputStream.tcpReadsDisabledForTesting = true;
+    setupCluster(1, conf);
+  }
+
+  @Before
+  public void before() {
+    Assume.assumeThat(DomainSocket.getLoadingFailureReason(), equalTo(null));
+  }
+
+  @AfterClass
+  static public void teardownCluster() throws Exception {
+    if (DomainSocket.getLoadingFailureReason() != null) return;
+    sockDir.close();
+    TestParallelReadUtil.teardownCluster();
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java
index bb580bc9534..3c8db549ab9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java
@@ -33,22 +33,25 @@ import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.net.unix.DomainSocket;
 import org.junit.Test;
+import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
 
 public class TestPeerCache {
   static final Log LOG = LogFactory.getLog(TestPeerCache.class);
 
   private static final int CAPACITY = 3;
   private static final int EXPIRY_PERIOD = 20;
-  private static PeerCache cache =
-      PeerCache.getInstance(CAPACITY, EXPIRY_PERIOD);
 
   private static class FakePeer implements Peer {
     private boolean closed = false;
+    private final boolean hasDomain;
 
     private DatanodeID dnId;
 
-    public FakePeer(DatanodeID dnId) {
+    public FakePeer(DatanodeID dnId, boolean hasDomain) {
       this.dnId = dnId;
+      this.hasDomain = hasDomain;
     }
 
     @Override
@@ -118,39 +121,50 @@ public class TestPeerCache {
 
     @Override
     public DomainSocket getDomainSocket() {
-      return null;
+      if (!hasDomain) return null;
+      // Return a mock which throws an exception whenever any function is
+      // called.
+      return Mockito.mock(DomainSocket.class,
+          new Answer<Object>() {
+            @Override
+            public Object answer(InvocationOnMock invocation)
+                throws Throwable {
+              throw new RuntimeException("injected fault.");
+          } });
     }
   }
 
   @Test
   public void testAddAndRetrieve() throws Exception {
+    PeerCache cache = PeerCache.getInstance(3, 100000);
     DatanodeID dnId = new DatanodeID("192.168.0.1",
           "fakehostname", "fake_storage_id",
           100, 101, 102);
-    FakePeer peer = new FakePeer(dnId);
+    FakePeer peer = new FakePeer(dnId, false);
     cache.put(dnId, peer);
     assertTrue(!peer.isClosed());
     assertEquals(1, cache.size());
-    assertEquals(peer, cache.get(dnId));
+    assertEquals(peer, cache.get(dnId, false));
     assertEquals(0, cache.size());
     cache.clear();
   }
 
   @Test
   public void testExpiry() throws Exception {
+    final int CAPACITY = 3;
+    final int EXPIRY_PERIOD = 10;
+    PeerCache cache = PeerCache.getInstance(CAPACITY, EXPIRY_PERIOD);
     DatanodeID dnIds[] = new DatanodeID[CAPACITY];
     FakePeer peers[] = new FakePeer[CAPACITY];
     for (int i = 0; i < CAPACITY; ++i) {
       dnIds[i] = new DatanodeID("192.168.0.1",
           "fakehostname_" + i, "fake_storage_id",
           100, 101, 102);
-      peers[i] = new FakePeer(dnIds[i]);
+      peers[i] = new FakePeer(dnIds[i], false);
     }
     for (int i = 0; i < CAPACITY; ++i) {
       cache.put(dnIds[i], peers[i]);
     }
-    // Check that the peers are cached
-    assertEquals(CAPACITY, cache.size());
 
     // Wait for the peers to expire
     Thread.sleep(EXPIRY_PERIOD * 50);
@@ -169,13 +183,15 @@ public class TestPeerCache {
 
   @Test
   public void testEviction() throws Exception {
+    final int CAPACITY = 3;
+    PeerCache cache = PeerCache.getInstance(CAPACITY, 100000);
     DatanodeID dnIds[] = new DatanodeID[CAPACITY + 1];
     FakePeer peers[] = new FakePeer[CAPACITY + 1];
     for (int i = 0; i < dnIds.length; ++i) {
       dnIds[i] = new DatanodeID("192.168.0.1",
           "fakehostname_" + i, "fake_storage_id_" + i,
           100, 101, 102);
-      peers[i] = new FakePeer(dnIds[i]);
+      peers[i] = new FakePeer(dnIds[i], false);
     }
     for (int i = 0; i < CAPACITY; ++i) {
       cache.put(dnIds[i], peers[i]);
@@ -186,11 +202,11 @@ public class TestPeerCache {
     // Add another entry and check that the first entry was evicted
     cache.put(dnIds[CAPACITY], peers[CAPACITY]);
     assertEquals(CAPACITY, cache.size());
-    assertSame(null, cache.get(dnIds[0]));
+    assertSame(null, cache.get(dnIds[0], false));
 
     // Make sure that the other entries are still there
     for (int i = 1; i < CAPACITY; ++i) {
-      Peer peer = cache.get(dnIds[i]);
+      Peer peer = cache.get(dnIds[i], false);
       assertSame(peers[i], peer);
       assertTrue(!peer.isClosed());
       peer.close();
@@ -201,19 +217,56 @@ public class TestPeerCache {
 
   @Test
   public void testMultiplePeersWithSameDnId() throws Exception {
+    final int CAPACITY = 3;
+    PeerCache cache = PeerCache.getInstance(CAPACITY, 100000);
     DatanodeID dnId = new DatanodeID("192.168.0.1",
           "fakehostname", "fake_storage_id",
           100, 101, 102);
     HashSet<FakePeer> peers = new HashSet<FakePeer>(CAPACITY);
     for (int i = 0; i < CAPACITY; ++i) {
-      FakePeer peer = new FakePeer(dnId);
+      FakePeer peer = new FakePeer(dnId, false);
       peers.add(peer);
       cache.put(dnId, peer);
     }
     // Check that all of the peers ended up in the cache
     assertEquals(CAPACITY, cache.size());
     while (!peers.isEmpty()) {
-      Peer peer = cache.get(dnId);
+      Peer peer = cache.get(dnId, false);
+      assertTrue(peer != null);
+      assertTrue(!peer.isClosed());
+      peers.remove(peer);
+    }
+    assertEquals(0, cache.size());
+    cache.clear();
+  }
+
+  @Test
+  public void testDomainSocketPeers() throws Exception {
+    final int CAPACITY = 3;
+    PeerCache cache = PeerCache.getInstance(CAPACITY, 100000);
+    DatanodeID dnId = new DatanodeID("192.168.0.1",
+          "fakehostname", "fake_storage_id",
+          100, 101, 102);
+    HashSet<FakePeer> peers = new HashSet<FakePeer>(CAPACITY);
+    for (int i = 0; i < CAPACITY; ++i) {
+      FakePeer peer = new FakePeer(dnId, i == CAPACITY - 1);
+      peers.add(peer);
+      cache.put(dnId, peer);
+    }
+    // Check that all of the peers ended up in the cache
+    assertEquals(CAPACITY, cache.size());
+    // Test that get(requireDomainPeer=true) finds the peer with the 
+    // domain socket.
+    Peer peer = cache.get(dnId, true);
+    assertTrue(peer.getDomainSocket() != null);
+    peers.remove(peer);
+    // Test that get(requireDomainPeer=true) returns null when there are
+    // no more peers with domain sockets.
+    peer = cache.get(dnId, true);
+    assertTrue(peer == null);
+    // Check that all of the other peers ended up in the cache.
+    while (!peers.isEmpty()) {
+      peer = cache.get(dnId, false);
       assertTrue(peer != null);
       assertTrue(!peer.isClosed());
       peers.remove(peer);

From 4e74c52e60213a8ddceb53bb3cc40221c65d8bf3 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 23 Jan 2013 20:01:33 +0000
Subject: [PATCH 14/52] HDFS-4433. Make TestPeerCache not flaky. Contributed by
 Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1437680 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-347.txt          |  2 +
 .../org/apache/hadoop/hdfs/PeerCache.java     | 60 ++++++++++++-------
 .../org/apache/hadoop/hdfs/TestPeerCache.java | 45 ++++++++------
 3 files changed, 68 insertions(+), 39 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index cc81fa36076..fcc860fc91c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -33,3 +33,5 @@ HDFS-4416. Rename dfs.datanode.domain.socket.path to dfs.domain.socket.path
 
 HDFS-4417. Fix case where local reads get disabled incorrectly
 (Colin Patrick McCabe and todd via todd)
+
+HDFS-4433. Make TestPeerCache not flaky (Colin Patrick McCabe via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java
index dcb4bc18331..424b641c8c3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/PeerCache.java
@@ -83,32 +83,35 @@ class PeerCache {
 
   private Daemon daemon;
   /** A map for per user per datanode. */
-  private static LinkedListMultimap<Key, Value> multimap =
+  private final LinkedListMultimap<Key, Value> multimap =
     LinkedListMultimap.create();
-  private static int capacity;
-  private static long expiryPeriod;
-  private static PeerCache instance = new PeerCache();
-  private static boolean isInitedOnce = false;
+  private final int capacity;
+  private final long expiryPeriod;
+  private static PeerCache instance = null;
+  
+  @VisibleForTesting
+  PeerCache(int c, long e) {
+    this.capacity = c;
+    this.expiryPeriod = e;
+
+    if (capacity == 0 ) {
+      LOG.info("SocketCache disabled.");
+    }
+    else if (expiryPeriod == 0) {
+      throw new IllegalStateException("Cannot initialize expiryPeriod to " +
+         expiryPeriod + "when cache is enabled.");
+    }
+  }
  
   public static synchronized PeerCache getInstance(int c, long e) {
     // capacity is only initialized once
-    if (isInitedOnce == false) {
-      capacity = c;
-      expiryPeriod = e;
-
-      if (capacity == 0 ) {
-        LOG.info("SocketCache disabled.");
-      }
-      else if (expiryPeriod == 0) {
-        throw new IllegalStateException("Cannot initialize expiryPeriod to " +
-           expiryPeriod + "when cache is enabled.");
-      }
-      isInitedOnce = true;
+    if (instance == null) {
+      instance = new PeerCache(c, e);
     } else { //already initialized once
-      if (capacity != c || expiryPeriod != e) {
-        LOG.info("capacity and expiry periods already set to " + capacity + 
-          " and " + expiryPeriod + " respectively. Cannot set it to " + c + 
-          " and " + e);
+      if (instance.capacity != c || instance.expiryPeriod != e) {
+        LOG.info("capacity and expiry periods already set to " +
+          instance.capacity + " and " + instance.expiryPeriod +
+          " respectively. Cannot set it to " + c + " and " + e);
       }
     }
 
@@ -267,5 +270,18 @@ class PeerCache {
     }
     multimap.clear();
   }
-
+  
+  @VisibleForTesting
+  void close() {
+    clear();
+    if (daemon != null) {
+      daemon.interrupt();
+      try {
+        daemon.join();
+      } catch (InterruptedException e) {
+        throw new RuntimeException("failed to join thread");
+      }
+    }
+    daemon = null;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java
index 3c8db549ab9..7836bc66805 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPeerCache.java
@@ -25,7 +25,6 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.nio.channels.ReadableByteChannel;
-import java.util.HashSet;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -37,12 +36,11 @@ import org.mockito.Mockito;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 
+import com.google.common.collect.HashMultiset;
+
 public class TestPeerCache {
   static final Log LOG = LogFactory.getLog(TestPeerCache.class);
 
-  private static final int CAPACITY = 3;
-  private static final int EXPIRY_PERIOD = 20;
-
   private static class FakePeer implements Peer {
     private boolean closed = false;
     private final boolean hasDomain;
@@ -132,11 +130,24 @@ public class TestPeerCache {
               throw new RuntimeException("injected fault.");
           } });
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (!(o instanceof FakePeer)) return false;
+      FakePeer other = (FakePeer)o;
+      return hasDomain == other.hasDomain &&
+          dnId.equals(other.dnId);
+    }
+
+    @Override
+    public int hashCode() {
+      return dnId.hashCode() ^ (hasDomain ? 1 : 0);
+    }
   }
 
   @Test
   public void testAddAndRetrieve() throws Exception {
-    PeerCache cache = PeerCache.getInstance(3, 100000);
+    PeerCache cache = new PeerCache(3, 100000);
     DatanodeID dnId = new DatanodeID("192.168.0.1",
           "fakehostname", "fake_storage_id",
           100, 101, 102);
@@ -146,14 +157,14 @@ public class TestPeerCache {
     assertEquals(1, cache.size());
     assertEquals(peer, cache.get(dnId, false));
     assertEquals(0, cache.size());
-    cache.clear();
+    cache.close();
   }
 
   @Test
   public void testExpiry() throws Exception {
     final int CAPACITY = 3;
     final int EXPIRY_PERIOD = 10;
-    PeerCache cache = PeerCache.getInstance(CAPACITY, EXPIRY_PERIOD);
+    PeerCache cache = new PeerCache(CAPACITY, EXPIRY_PERIOD);
     DatanodeID dnIds[] = new DatanodeID[CAPACITY];
     FakePeer peers[] = new FakePeer[CAPACITY];
     for (int i = 0; i < CAPACITY; ++i) {
@@ -178,13 +189,13 @@ public class TestPeerCache {
     // sleep for another second and see if 
     // the daemon thread runs fine on empty cache
     Thread.sleep(EXPIRY_PERIOD * 50);
-    cache.clear();
+    cache.close();
   }
 
   @Test
   public void testEviction() throws Exception {
     final int CAPACITY = 3;
-    PeerCache cache = PeerCache.getInstance(CAPACITY, 100000);
+    PeerCache cache = new PeerCache(CAPACITY, 100000);
     DatanodeID dnIds[] = new DatanodeID[CAPACITY + 1];
     FakePeer peers[] = new FakePeer[CAPACITY + 1];
     for (int i = 0; i < dnIds.length; ++i) {
@@ -212,17 +223,17 @@ public class TestPeerCache {
       peer.close();
     }
     assertEquals(1, cache.size());
-    cache.clear();
+    cache.close();
   }
 
   @Test
-  public void testMultiplePeersWithSameDnId() throws Exception {
+  public void testMultiplePeersWithSameKey() throws Exception {
     final int CAPACITY = 3;
-    PeerCache cache = PeerCache.getInstance(CAPACITY, 100000);
+    PeerCache cache = new PeerCache(CAPACITY, 100000);
     DatanodeID dnId = new DatanodeID("192.168.0.1",
           "fakehostname", "fake_storage_id",
           100, 101, 102);
-    HashSet<FakePeer> peers = new HashSet<FakePeer>(CAPACITY);
+    HashMultiset<FakePeer> peers = HashMultiset.create(CAPACITY);
     for (int i = 0; i < CAPACITY; ++i) {
       FakePeer peer = new FakePeer(dnId, false);
       peers.add(peer);
@@ -237,17 +248,17 @@ public class TestPeerCache {
       peers.remove(peer);
     }
     assertEquals(0, cache.size());
-    cache.clear();
+    cache.close();
   }
 
   @Test
   public void testDomainSocketPeers() throws Exception {
     final int CAPACITY = 3;
-    PeerCache cache = PeerCache.getInstance(CAPACITY, 100000);
+    PeerCache cache = new PeerCache(CAPACITY, 100000);
     DatanodeID dnId = new DatanodeID("192.168.0.1",
           "fakehostname", "fake_storage_id",
           100, 101, 102);
-    HashSet<FakePeer> peers = new HashSet<FakePeer>(CAPACITY);
+    HashMultiset<FakePeer> peers = HashMultiset.create(CAPACITY);
     for (int i = 0; i < CAPACITY; ++i) {
       FakePeer peer = new FakePeer(dnId, i == CAPACITY - 1);
       peers.add(peer);
@@ -272,6 +283,6 @@ public class TestPeerCache {
       peers.remove(peer);
     }
     assertEquals(0, cache.size());
-    cache.clear();
+    cache.close();
   }
 }

From f5ab88b5b71dadf88f02c0810f63955ac90436a0 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 23 Jan 2013 23:18:57 +0000
Subject: [PATCH 15/52] Fix trivial javadoc warning on branch

(javadoc references parameter 'defaultVal' but it is actually 'defaultValue')


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1437797 13f79535-47bb-0310-9956-ffa450edef68
---
 .../src/main/java/org/apache/hadoop/conf/Configuration.java | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
index c44a18f9737..67befcc323c 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
@@ -746,12 +746,12 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
   
   /**
    * Get the value of the <code>name</code> property as a trimmed <code>String</code>, 
-   * <code>defaultVal</code> if no such property exists. 
+   * <code>defaultValue</code> if no such property exists. 
    * See @{Configuration#getTrimmed} for more details.
    * 
    * @param name          the property name.
-   * @param defaultVal    the property default value.
-   * @return              the value of the <code>name</code> or defaultVal 
+   * @param defaultValue  the property default value.
+   * @return              the value of the <code>name</code> or defaultValue
    *                      if it is not set.
    */
   public String getTrimmed(String name, String defaultValue) {

From 554774c6d111d90082791b400d068cc90d5ca604 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Fri, 25 Jan 2013 00:39:02 +0000
Subject: [PATCH 16/52] HDFS-4438. TestDomainSocket fails when system umask is
 set to 0002. Contributed by Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1438279 13f79535-47bb-0310-9956-ffa450edef68
---
 .../test/java/org/apache/hadoop/net/unix/TestDomainSocket.java  | 2 +-
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt            | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
index a490fefcac7..3347655cf6f 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
@@ -646,7 +646,7 @@ public class TestDomainSocket {
    */
   private static void testValidateSocketPath(String str, String prefix)
       throws IOException {
-    int skipComponents = 0;
+    int skipComponents = 1;
     File prefixFile = new File(prefix);
     while (true) {
       prefixFile = prefixFile.getParentFile();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index fcc860fc91c..b067e0c1065 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -35,3 +35,5 @@ HDFS-4417. Fix case where local reads get disabled incorrectly
 (Colin Patrick McCabe and todd via todd)
 
 HDFS-4433. Make TestPeerCache not flaky (Colin Patrick McCabe via todd)
+
+HDFS-4438. TestDomainSocket fails when system umask is set to 0002. (Colin Patrick McCabe via atm)

From c0e16efe91a5de374a55e272ee97d568def60b6b Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Fri, 25 Jan 2013 00:44:37 +0000
Subject: [PATCH 17/52] HDFS-4440. Avoid annoying log message when
 dfs.domain.socket.path is not set. Contributed by Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1438280 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt            | 2 ++
 .../main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java   | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index b067e0c1065..faf82389ce4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -37,3 +37,5 @@ HDFS-4417. Fix case where local reads get disabled incorrectly
 HDFS-4433. Make TestPeerCache not flaky (Colin Patrick McCabe via todd)
 
 HDFS-4438. TestDomainSocket fails when system umask is set to 0002. (Colin Patrick McCabe via atm)
+
+HDFS-4440. Avoid annoying log message when dfs.domain.socket.path is not set. (Colin Patrick McCabe via atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
index 103c946e5e2..69a6416dfc5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
@@ -84,7 +84,7 @@ class DomainSocketFactory {
   DomainSocket create(InetSocketAddress addr, DFSInputStream stream) {
     // If there is no domain socket path configured, we can't use domain
     // sockets.
-    if (conf.domainSocketPath == null) return null;
+    if (conf.domainSocketPath.isEmpty()) return null;
     // UNIX domain sockets can only be used to talk to local peers
     if (!DFSClient.isLocalAddress(addr)) return null;
     // If the DomainSocket code is not loaded, we can't create

From 3d46863e12824103306902bbe62ee1f1535bd2a7 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Thu, 7 Feb 2013 00:02:12 +0000
Subject: [PATCH 18/52] HDFS-4473. Don't create domain socket unless we need
 it. Contributed by Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1443283 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-347.txt          |  2 ++
 .../hadoop/hdfs/server/datanode/DataNode.java | 19 ++++++++++++-------
 .../apache/hadoop/hdfs/TestParallelRead.java  | 12 +++++++++++-
 .../hdfs/TestShortCircuitLocalRead.java       |  7 +++++++
 4 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index faf82389ce4..8ca55f814d7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -39,3 +39,5 @@ HDFS-4433. Make TestPeerCache not flaky (Colin Patrick McCabe via todd)
 HDFS-4438. TestDomainSocket fails when system umask is set to 0002. (Colin Patrick McCabe via atm)
 
 HDFS-4440. Avoid annoying log message when dfs.domain.socket.path is not set. (Colin Patrick McCabe via atm)
+
+HDFS-4473. Don't create domain socket unless we need it. (Colin Patrick McCabe via atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index 99e508b53d2..10a1f8dd7e7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -559,13 +559,18 @@ public class DataNode extends Configured
         new DataXceiverServer(tcpPeerServer, conf, this));
     this.threadGroup.setDaemon(true); // auto destroy when empty
 
-    DomainPeerServer domainPeerServer =
-              getDomainPeerServer(conf, streamingAddr.getPort());
-    if (domainPeerServer != null) {
-      this.localDataXceiverServer = new Daemon(threadGroup, 
-          new DataXceiverServer(domainPeerServer, conf, this));
-      LOG.info("Listening on UNIX domain socket: " +
-          domainPeerServer.getBindPath());
+    if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
+              DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT) ||
+        conf.getBoolean(DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC,
+              DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT)) {
+      DomainPeerServer domainPeerServer =
+                getDomainPeerServer(conf, streamingAddr.getPort());
+      if (domainPeerServer != null) {
+        this.localDataXceiverServer = new Daemon(threadGroup,
+            new DataXceiverServer(domainPeerServer, conf, this));
+        LOG.info("Listening on UNIX domain socket: " +
+            domainPeerServer.getBindPath());
+      }
     }
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelRead.java
index 769551c99dd..c06fab50d11 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelRead.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelRead.java
@@ -26,7 +26,17 @@ import org.junit.BeforeClass;
 public class TestParallelRead extends TestParallelReadUtil {
   @BeforeClass
   static public void setupCluster() throws Exception {
-    setupCluster(DEFAULT_REPLICATION_FACTOR, new HdfsConfiguration());
+    // This is a test of the normal (TCP) read path.  For this reason, we turn
+    // off both short-circuit local reads and UNIX domain socket data traffic.
+    HdfsConfiguration conf = new HdfsConfiguration();
+    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, false);
+    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC,
+                    false);
+    // dfs.domain.socket.path should be ignored because the previous two keys
+    // were set to false.  This is a regression test for HDFS-4473.
+    conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY, "/will/not/be/created");
+
+    setupCluster(DEFAULT_REPLICATION_FACTOR, conf);
   }
 
   @AfterClass
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
index e71fc8ce183..94931ca4806 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
@@ -293,6 +293,9 @@ public class TestShortCircuitLocalRead {
     Configuration conf = new Configuration();
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
+    conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
+        "/tmp/testSkipWithVerifyChecksum._PORT");
+    DomainSocket.disableBindPathValidation();
     if (simulatedStorage) {
       SimulatedFSDataset.setFactory(conf);
     }
@@ -337,6 +340,8 @@ public class TestShortCircuitLocalRead {
     HdfsConfiguration conf = new HdfsConfiguration();
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
+    conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
+        "/tmp/testHandleTruncatedBlockFile._PORT");
     conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, "CRC32C");
     final Path TEST_PATH = new Path("/a");
     final Path TEST_PATH2 = new Path("/b");
@@ -432,6 +437,8 @@ public class TestShortCircuitLocalRead {
     // Setup create a file
     Configuration conf = new Configuration();
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, shortcircuit);
+    conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
+        "/tmp/TestShortCircuitLocalRead._PORT");
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
         checksum);
     

From 1132f51a9cd81bf3f5c1864d53efdda16a271300 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Sat, 9 Feb 2013 00:58:46 +0000
Subject: [PATCH 19/52] HDFS-4485. DN should chmod socket path a+w. Contributed
 by Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1444304 13f79535-47bb-0310-9956-ffa450edef68
---
 .../src/org/apache/hadoop/net/unix/DomainSocket.c | 15 +++++++++++++++
 .../hadoop-hdfs/CHANGES.HDFS-347.txt              |  2 ++
 2 files changed, 17 insertions(+)

diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c
index d05499609f5..b8056b654a9 100644
--- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c
+++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c
@@ -196,6 +196,21 @@ static jthrowable setup(JNIEnv *env, int *ofd, jobject jpath, int doConnect)
               terror(ret), addr.sun_path);
       goto done;
     }
+    /* We need to make the socket readable and writable for all users in the
+     * system.
+     *
+     * If the system administrator doesn't want the socket to be accessible to
+     * all users, he can simply adjust the +x permissions on one of the socket's
+     * parent directories.
+     *
+     * See HDFS-4485 for more discussion.
+     */
+    if (chmod(addr.sun_path, 0666)) {
+      ret = errno;
+      jthr = newException(env, "java/net/BindException",
+              "chmod(%s, 0666) failed: %s", addr.sun_path, terror(ret));
+      goto done;
+    }
     if (listen(fd, LISTEN_BACKLOG) < 0) {
       ret = errno;
       jthr = newException(env, "java/net/BindException",
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index 8ca55f814d7..37c93d9589c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -41,3 +41,5 @@ HDFS-4438. TestDomainSocket fails when system umask is set to 0002. (Colin Patri
 HDFS-4440. Avoid annoying log message when dfs.domain.socket.path is not set. (Colin Patrick McCabe via atm)
 
 HDFS-4473. Don't create domain socket unless we need it. (Colin Patrick McCabe via atm)
+
+HDFS-4485. DN should chmod socket path a+w. (Colin Patrick McCabe via atm)

From aa92072b54e308e74b082329bedef49af3d79e16 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Mon, 11 Feb 2013 21:10:23 +0000
Subject: [PATCH 20/52] HDFS-4453. Make a simple doc to describe the usage and
 design of the shortcircuit read feature. Contributed by Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1444963 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-347.txt          |  2 +
 .../site/apt/ShortCircuitLocalReads.apt.vm    | 68 +++++++++++++++++++
 hadoop-project/src/site/site.xml              |  2 +
 3 files changed, 72 insertions(+)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ShortCircuitLocalReads.apt.vm

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index 37c93d9589c..5be58b3f013 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -43,3 +43,5 @@ HDFS-4440. Avoid annoying log message when dfs.domain.socket.path is not set. (C
 HDFS-4473. Don't create domain socket unless we need it. (Colin Patrick McCabe via atm)
 
 HDFS-4485. DN should chmod socket path a+w. (Colin Patrick McCabe via atm)
+
+HDFS-4453. Make a simple doc to describe the usage and design of the shortcircuit read feature. (Colin Patrick McCabe via atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ShortCircuitLocalReads.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ShortCircuitLocalReads.apt.vm
new file mode 100644
index 00000000000..6792f156b38
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/ShortCircuitLocalReads.apt.vm
@@ -0,0 +1,68 @@
+
+~~ Licensed under the Apache License, Version 2.0 (the "License");
+~~ you may not use this file except in compliance with the License.
+~~ You may obtain a copy of the License at
+~~
+~~   http://www.apache.org/licenses/LICENSE-2.0
+~~
+~~ Unless required by applicable law or agreed to in writing, software
+~~ distributed under the License is distributed on an "AS IS" BASIS,
+~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+~~ See the License for the specific language governing permissions and
+~~ limitations under the License. See accompanying LICENSE file.
+
+  ---
+  Hadoop Distributed File System-${project.version} - Short-Circuit Local Reads
+  ---
+  ---
+  ${maven.build.timestamp}
+
+HDFS Short-Circuit Local Reads
+
+  \[ {{{./index.html}Go Back}} \]
+
+%{toc|section=1|fromDepth=0}
+
+* {Background}
+
+  In <<<HDFS>>>, reads normally go through the <<<DataNode>>>.  Thus, when the
+  client asks the <<<DataNode>>> to read a file, the <<<DataNode>>> reads that
+  file off of the disk and sends the data to the client over a TCP socket.
+  So-called "short-circuit" reads bypass the <<<DataNode>>>, allowing the client
+  to read the file directly.  Obviously, this is only possible in cases where
+  the client is co-located with the data.  Short-circuit reads provide a
+  substantial performance boost to many applications.
+
+* {Configuration}
+
+  To configure short-circuit local reads, you will need to enable
+  <<<libhadoop.so>>>.  See
+  {{{../hadoop-common/NativeLibraries.html}Native
+  Libraries}} for details on enabling this library.
+
+  Short-circuit reads make use of a UNIX domain socket.  This is a special path
+  in the filesystem that allows the client and the DataNodes to communicate.
+  You will need to set a path to this socket.  The DataNode needs to be able to
+  create this path.  On the other hand, it should not be possible for any user
+  except the hdfs user or root to create this path.  For this reason, paths
+  under <<</var/run>>> or <<</var/lib>>> are often used.
+
+  Short-circuit local reads need to be configured on both the <<<DataNode>>>
+  and the client.
+
+* {Example Configuration}
+
+  Here is an example configuration.
+
+----
+<configuration>
+  <property>
+    <name>dfs.client.read.shortcircuit</name>
+    <value>true</value>
+  </property>
+  <property>
+    <name>dfs.domain.socket.path</name>
+    <value>/var/lib/hadoop-hdfs/dn_socket</value>
+  </property>
+</configuration>
+----
diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml
index 2cfe2e8c456..31620b90eff 100644
--- a/hadoop-project/src/site/site.xml
+++ b/hadoop-project/src/site/site.xml
@@ -61,6 +61,8 @@
       <item name="Federation" href="hadoop-project-dist/hadoop-hdfs/Federation.html"/>
       <item name="WebHDFS REST API" href="hadoop-project-dist/hadoop-hdfs/WebHDFS.html"/>
       <item name="HttpFS Gateway" href="hadoop-hdfs-httpfs/index.html"/>
+      <item name="Short Circuit Local Reads" 
+          href="hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html"/>
     </menu>
 
     <menu name="MapReduce" inherit="top">

From 27e158362c6e0147f375ea3d73ff87d886ce6575 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 13 Feb 2013 07:26:42 +0000
Subject: [PATCH 21/52] HDFS-4496. DFSClient: don't create a domain socket
 unless we need it. Contributed by Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1445491 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt          | 2 ++
 .../main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index 5be58b3f013..94b5c0ce949 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -45,3 +45,5 @@ HDFS-4473. Don't create domain socket unless we need it. (Colin Patrick McCabe v
 HDFS-4485. DN should chmod socket path a+w. (Colin Patrick McCabe via atm)
 
 HDFS-4453. Make a simple doc to describe the usage and design of the shortcircuit read feature. (Colin Patrick McCabe via atm)
+
+HDFS-4496. DFSClient: don't create a domain socket unless we need it (Colin Patrick McCabe via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
index 69a6416dfc5..0e2025bea21 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
@@ -85,6 +85,10 @@ class DomainSocketFactory {
     // If there is no domain socket path configured, we can't use domain
     // sockets.
     if (conf.domainSocketPath.isEmpty()) return null;
+    // If we can't do anything with the domain socket, don't create it.
+    if (!(conf.domainSocketDataTraffic || conf.shortCircuitLocalReads)) {
+      return null;
+    }
     // UNIX domain sockets can only be used to talk to local peers
     if (!DFSClient.isLocalAddress(addr)) return null;
     // If the DomainSocket code is not loaded, we can't create

From 3a417cbf1d4bfc249f1f9fbd3c2b792c5e78bf5f Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Sat, 16 Feb 2013 00:59:01 +0000
Subject: [PATCH 22/52] HDFS-347: style cleanups. Contributed by Colin Patrick
 McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1446830 13f79535-47bb-0310-9956-ffa450edef68
---
 .../apache/hadoop/net/unix/DomainSocket.java  | 282 +++++++++++-------
 .../org/apache/hadoop/net/unix/DomainSocket.c |  32 +-
 .../hadoop/net/unix/TestDomainSocket.java     |  10 +-
 .../hadoop-hdfs/CHANGES.HDFS-347.txt          |   2 +
 .../hadoop/hdfs/DomainSocketFactory.java      |   2 +-
 .../apache/hadoop/hdfs/net/DomainPeer.java    |   6 +-
 .../hadoop/hdfs/net/DomainPeerServer.java     |   2 +-
 7 files changed, 195 insertions(+), 141 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
index aa4b952e8d1..4c6ae0592c2 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
@@ -51,7 +51,7 @@ public class DomainSocket implements Closeable {
     } else if (!NativeCodeLoader.isNativeCodeLoaded()) {
       loadingFailureReason = "libhadoop cannot be loaded.";
     } else {
-      String problem = "DomainSocket#anchorNative got error: unknown";
+      String problem;
       try {
         anchorNative();
         problem = null;
@@ -132,17 +132,99 @@ public class DomainSocket implements Closeable {
   }
 
   /**
-   * Status bits
-   * 
-   * Bit 30: 0 = DomainSocket open, 1 = DomainSocket closed
-   * Bits 29 to 0: the reference count.
+   * Tracks the reference count of the file descriptor, and also whether it is
+   * open or closed.
    */
-  private final AtomicInteger status;
+  private static class Status {
+    /**
+     * Bit mask representing a closed domain socket. 
+     */
+    private static final int STATUS_CLOSED_MASK = 1 << 30;
+    
+    /**
+     * Status bits
+     * 
+     * Bit 30: 0 = DomainSocket open, 1 = DomainSocket closed
+     * Bits 29 to 0: the reference count.
+     */
+    private final AtomicInteger bits = new AtomicInteger(0);
+
+    Status() { }
+
+    /**
+     * Increment the reference count of the underlying file descriptor.
+     *
+     * @throws ClosedChannelException      If the file descriptor is closed.
+     */
+    void reference() throws ClosedChannelException {
+      int curBits = bits.incrementAndGet();
+      if ((curBits & STATUS_CLOSED_MASK) != 0) {
+        bits.decrementAndGet();
+        throw new ClosedChannelException();
+      }
+    }
+
+    /**
+     * Decrement the reference count of the underlying file descriptor.
+     *
+     * @param checkClosed        Whether to throw an exception if the file
+     *                           descriptor is closed.
+     *
+     * @throws AsynchronousCloseException  If the file descriptor is closed and
+     *                                     checkClosed is set.
+     */
+    void unreference(boolean checkClosed) throws AsynchronousCloseException {
+      int newCount = bits.decrementAndGet();
+      assert (newCount & ~STATUS_CLOSED_MASK) >= 0;
+      if (checkClosed && ((newCount & STATUS_CLOSED_MASK) != 0)) {
+        throw new AsynchronousCloseException();
+      }
+    }
+
+    /**
+     * Return true if the file descriptor is currently open.
+     * 
+     * @return                 True if the file descriptor is currently open.
+     */
+    boolean isOpen() {
+      return ((bits.get() & STATUS_CLOSED_MASK) == 0);
+    }
+
+    /**
+     * Mark the file descriptor as closed.
+     *
+     * Once the file descriptor is closed, it cannot be reopened.
+     *
+     * @return                         The current reference count.
+     * @throws ClosedChannelException  If someone else closes the file 
+     *                                 descriptor before we do.
+     */
+    int setClosed() throws ClosedChannelException {
+      while (true) {
+        int curBits = bits.get();
+        if ((curBits & STATUS_CLOSED_MASK) != 0) {
+          throw new ClosedChannelException();
+        }
+        if (bits.compareAndSet(curBits, curBits | STATUS_CLOSED_MASK)) {
+          return curBits & (~STATUS_CLOSED_MASK);
+        }
+      }
+    }
+
+    /**
+     * Get the current reference count.
+     *
+     * @return                 The current reference count.
+     */
+    int getReferenceCount() {
+      return bits.get() & (~STATUS_CLOSED_MASK);
+    }
+  }
 
   /**
-   * Bit mask representing a closed domain socket. 
+   * The socket status.
    */
-  private static final int STATUS_CLOSED_MASK = 1 << 30;
+  private final Status status;
 
   /**
    * The file descriptor associated with this UNIX domain socket.
@@ -170,7 +252,7 @@ public class DomainSocket implements Closeable {
   private final DomainChannel channel = new DomainChannel();
 
   private DomainSocket(String path, int fd) {
-    this.status = new AtomicInteger(0);
+    this.status = new Status();
     this.fd = fd;
     this.path = path;
   }
@@ -208,14 +290,14 @@ public class DomainSocket implements Closeable {
    * @throws SocketTimeoutException       If the accept timed out.
    */
   public DomainSocket accept() throws IOException {
-    fdRef();
+    status.reference();
     boolean exc = true;
     try {
       DomainSocket ret = new DomainSocket(path, accept0(fd));
       exc = false;
       return ret;
     } finally {
-      fdUnref(exc);
+      status.unreference(exc);
     }
   }
 
@@ -235,38 +317,14 @@ public class DomainSocket implements Closeable {
     return new DomainSocket(path, fd);
   }
 
-  /**
-   * Increment the reference count of the underlying file descriptor.
-   *
-   * @throws SocketException  If the file descriptor is closed.
-   */
-  private void fdRef() throws ClosedChannelException {
-    int bits = status.incrementAndGet();
-    if ((bits & STATUS_CLOSED_MASK) != 0) {
-      status.decrementAndGet();
-      throw new ClosedChannelException();
-    }
-  }
-
-  /**
-   * Decrement the reference count of the underlying file descriptor.
-   */
-  private void fdUnref(boolean checkClosed) throws AsynchronousCloseException {
-    int newCount = status.decrementAndGet();
-    assert (newCount & ~STATUS_CLOSED_MASK) >= 0;
-    if (checkClosed && ((newCount & STATUS_CLOSED_MASK) != 0)) {
-      throw new AsynchronousCloseException();
-    }
-  }
-
-  /**
-   * Return true if the file descriptor is currently open.
-   * 
-   * @return                 True if the file descriptor is currently open.
-   */
-  public boolean isOpen() {
-    return ((status.get() & STATUS_CLOSED_MASK) == 0);
-  }
+ /**
+  * Return true if the file descriptor is currently open.
+  *
+  * @return                 True if the file descriptor is currently open.
+  */
+ public boolean isOpen() {
+   return status.isOpen();
+ }
 
   /**
    * @return                 The socket path.
@@ -296,29 +354,29 @@ public class DomainSocket implements Closeable {
     return channel;
   }
 
-  public static final int SND_BUF_SIZE = 1;
-  public static final int RCV_BUF_SIZE = 2;
-  public static final int SND_TIMEO = 3;
-  public static final int RCV_TIMEO = 4;
+  public static final int SEND_BUFFER_SIZE = 1;
+  public static final int RECEIVE_BUFFER_SIZE = 2;
+  public static final int SEND_TIMEOUT = 3;
+  public static final int RECEIVE_TIMEOUT = 4;
 
   private static native void setAttribute0(int fd, int type, int val)
       throws IOException;
 
   public void setAttribute(int type, int size) throws IOException {
-    fdRef();
+    status.reference();
     boolean exc = true;
     try {
       setAttribute0(fd, type, size);
       exc = false;
     } finally {
-      fdUnref(exc);
+      status.unreference(exc);
     }
   }
 
   private native int getAttribute0(int fd, int type) throws IOException;
 
   public int getAttribute(int type) throws IOException {
-    fdRef();
+    status.reference();
     int attribute;
     boolean exc = true;
     try {
@@ -326,7 +384,7 @@ public class DomainSocket implements Closeable {
       exc = false;
       return attribute;
     } finally {
-      fdUnref(exc);
+      status.unreference(exc);
     }
   }
 
@@ -343,20 +401,17 @@ public class DomainSocket implements Closeable {
   @Override
   public void close() throws IOException {
     // Set the closed bit on this DomainSocket
-    int bits;
-    while (true) {
-      bits = status.get();
-      if ((bits & STATUS_CLOSED_MASK) != 0) {
-        return; // already closed
-      }
-      if (status.compareAndSet(bits, bits | STATUS_CLOSED_MASK)) {
-        break;
-      }
+    int refCount;
+    try {
+      refCount = status.setClosed();
+    } catch (ClosedChannelException e) {
+      // Someone else already closed the DomainSocket.
+      return;
     }
     // Wait for all references to go away
     boolean didShutdown = false;
     boolean interrupted = false;
-    while ((bits & (~STATUS_CLOSED_MASK)) > 0) {
+    while (refCount > 0) {
       if (!didShutdown) {
         try {
           // Calling shutdown on the socket will interrupt blocking system
@@ -373,60 +428,57 @@ public class DomainSocket implements Closeable {
       } catch (InterruptedException e) {
         interrupted = true;
       }
-      bits = status.get();
+      refCount = status.getReferenceCount();
     }
 
-    // Close the file descriptor.  After this point, the file descriptor
-    // number will be reused by something else.  Although this DomainSocket
-    // object continues to hold the old file descriptor number (it's a final
-    // field), we never use it again because we look at the closed bit and
-    // realize that this DomainSocket is not usable.
+    // At this point, nobody has a reference to the file descriptor, 
+    // and nobody will be able to get one in the future either.
+    // We now call close(2) on the file descriptor.
+    // After this point, the file descriptor number will be reused by 
+    // something else.  Although this DomainSocket object continues to hold 
+    // the old file descriptor number (it's a final field), we never use it 
+    // again because this DomainSocket is closed.
     close0(fd);
     if (interrupted) {
       Thread.currentThread().interrupt();
     }
   }
 
-  /*
-   * Clean up if the user forgets to close the socket.
-   */
-  protected void finalize() throws IOException {
-    close();
-  }
-
-  private native static void sendFileDescriptors0(int fd, FileDescriptor jfds[],
+  private native static void sendFileDescriptors0(int fd,
+      FileDescriptor descriptors[],
       byte jbuf[], int offset, int length) throws IOException;
 
   /**
    * Send some FileDescriptor objects to the process on the other side of this
    * socket.
    * 
-   * @param jfds              The file descriptors to send.
+   * @param descriptors       The file descriptors to send.
    * @param jbuf              Some bytes to send.  You must send at least
    *                          one byte.
    * @param offset            The offset in the jbuf array to start at.
    * @param length            Length of the jbuf array to use.
    */
-  public void sendFileDescriptors(FileDescriptor jfds[],
+  public void sendFileDescriptors(FileDescriptor descriptors[],
       byte jbuf[], int offset, int length) throws IOException {
-    fdRef();
+    status.reference();
     boolean exc = true;
     try {
-      sendFileDescriptors0(fd, jfds, jbuf, offset, length);
+      sendFileDescriptors0(fd, descriptors, jbuf, offset, length);
       exc = false;
     } finally {
-      fdUnref(exc);
+      status.unreference(exc);
     }
   }
 
-  private static native int receiveFileDescriptors0(int fd, FileDescriptor[] jfds,
+  private static native int receiveFileDescriptors0(int fd,
+      FileDescriptor[] descriptors,
       byte jbuf[], int offset, int length) throws IOException;
 
   /**
    * Receive some FileDescriptor objects from the process on the other side of
    * this socket.
    *
-   * @param jfds              (output parameter) Array of FileDescriptors.
+   * @param descriptors       (output parameter) Array of FileDescriptors.
    *                          We will fill as many slots as possible with file
    *                          descriptors passed from the remote process.  The
    *                          other slots will contain NULL.
@@ -443,16 +495,16 @@ public class DomainSocket implements Closeable {
    *                          otherwise, it will be positive.
    * @throws                  IOException if there was an I/O error.
    */
-  public int receiveFileDescriptors(FileDescriptor[] jfds,
+  public int receiveFileDescriptors(FileDescriptor[] descriptors,
       byte jbuf[], int offset, int length) throws IOException {
-    fdRef();
+    status.reference();
     boolean exc = true;
     try {
-      int nBytes = receiveFileDescriptors0(fd, jfds, jbuf, offset, length);
+      int nBytes = receiveFileDescriptors0(fd, descriptors, jbuf, offset, length);
       exc = false;
       return nBytes;
     } finally {
-      fdUnref(exc);
+      status.unreference(exc);
     }
   }
 
@@ -462,44 +514,44 @@ public class DomainSocket implements Closeable {
    *
    * See {@link DomainSocket#recvFileInputStreams(ByteBuffer)}
    */
-  public int recvFileInputStreams(FileInputStream[] fis, byte buf[],
+  public int recvFileInputStreams(FileInputStream[] streams, byte buf[],
         int offset, int length) throws IOException {
-    FileDescriptor fds[] = new FileDescriptor[fis.length];
+    FileDescriptor descriptors[] = new FileDescriptor[streams.length];
     boolean success = false;
-    for (int i = 0; i < fis.length; i++) {
-      fis[i] = null;
+    for (int i = 0; i < streams.length; i++) {
+      streams[i] = null;
     }
-    fdRef();
+    status.reference();
     try {
-      int ret = receiveFileDescriptors0(fd, fds, buf, offset, length);
-      for (int i = 0, j = 0; i < fds.length; i++) {
-        if (fds[i] != null) {
-          fis[j++] = new FileInputStream(fds[i]);
-          fds[i] = null;
+      int ret = receiveFileDescriptors0(fd, descriptors, buf, offset, length);
+      for (int i = 0, j = 0; i < descriptors.length; i++) {
+        if (descriptors[i] != null) {
+          streams[j++] = new FileInputStream(descriptors[i]);
+          descriptors[i] = null;
         }
       }
       success = true;
       return ret;
     } finally {
       if (!success) {
-        for (int i = 0; i < fds.length; i++) {
-          if (fds[i] != null) {
+        for (int i = 0; i < descriptors.length; i++) {
+          if (descriptors[i] != null) {
             try {
-              closeFileDescriptor0(fds[i]);
+              closeFileDescriptor0(descriptors[i]);
             } catch (Throwable t) {
               LOG.warn(t);
             }
-          } else if (fis[i] != null) {
+          } else if (streams[i] != null) {
             try {
-              fis[i].close();
+              streams[i].close();
             } catch (Throwable t) {
               LOG.warn(t);
             } finally {
-              fis[i] = null; }
+              streams[i] = null; }
           }
         }
       }
-      fdUnref(!success);
+      status.unreference(!success);
     }
   }
 
@@ -523,7 +575,7 @@ public class DomainSocket implements Closeable {
   public class DomainInputStream extends InputStream {
     @Override
     public int read() throws IOException {
-      fdRef();
+      status.reference();
       boolean exc = true;
       try {
         byte b[] = new byte[1];
@@ -531,33 +583,33 @@ public class DomainSocket implements Closeable {
         exc = false;
         return (ret >= 0) ? b[0] : -1;
       } finally {
-        fdUnref(exc);
+        status.unreference(exc);
       }
     }
     
     @Override
     public int read(byte b[], int off, int len) throws IOException {
-      fdRef();
+      status.reference();
       boolean exc = true;
       try {
         int nRead = DomainSocket.readArray0(DomainSocket.this.fd, b, off, len);
         exc = false;
         return nRead;
       } finally {
-        fdUnref(exc);
+        status.unreference(exc);
       }
     }
 
     @Override
     public int available() throws IOException {
-      fdRef();
+      status.reference();
       boolean exc = true;
       try {
         int nAvailable = DomainSocket.available0(DomainSocket.this.fd);
         exc = false;
         return nAvailable;
       } finally {
-        fdUnref(exc);
+        status.unreference(exc);
       }
     }
 
@@ -579,7 +631,7 @@ public class DomainSocket implements Closeable {
 
     @Override
     public void write(int val) throws IOException {
-      fdRef();
+      status.reference();
       boolean exc = true;
       try {
         byte b[] = new byte[1];
@@ -587,19 +639,19 @@ public class DomainSocket implements Closeable {
         DomainSocket.writeArray0(DomainSocket.this.fd, b, 0, 1);
         exc = false;
       } finally {
-        fdUnref(exc);
+        status.unreference(exc);
       }
     }
 
     @Override
     public void write(byte[] b, int off, int len) throws IOException {
-      fdRef();
+      status.reference();
         boolean exc = true;
       try {
         DomainSocket.writeArray0(DomainSocket.this.fd, b, off, len);
         exc = false;
       } finally {
-        fdUnref(exc);
+        status.unreference(exc);
       }
     }
   }
@@ -618,7 +670,7 @@ public class DomainSocket implements Closeable {
 
     @Override
     public int read(ByteBuffer dst) throws IOException {
-      fdRef();
+      status.reference();
       boolean exc = true;
       try {
         int nread = 0;
@@ -640,7 +692,7 @@ public class DomainSocket implements Closeable {
         exc = false;
         return nread;
       } finally {
-        fdUnref(exc);
+        status.unreference(exc);
       }
     }
   }
diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c
index b8056b654a9..1c2ea698a00 100644
--- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c
+++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c
@@ -38,13 +38,13 @@
 #include <sys/un.h>
 #include <unistd.h>
 
-#define SND_BUF_SIZE org_apache_hadoop_net_unix_DomainSocket_SND_BUF_SIZE
-#define RCV_BUF_SIZE org_apache_hadoop_net_unix_DomainSocket_RCV_BUF_SIZE
-#define SND_TIMEO org_apache_hadoop_net_unix_DomainSocket_SND_TIMEO
-#define RCV_TIMEO org_apache_hadoop_net_unix_DomainSocket_RCV_TIMEO
+#define SEND_BUFFER_SIZE org_apache_hadoop_net_unix_DomainSocket_SEND_BUFFER_SIZE
+#define RECEIVE_BUFFER_SIZE org_apache_hadoop_net_unix_DomainSocket_RECEIVE_BUFFER_SIZE
+#define SEND_TIMEOUT org_apache_hadoop_net_unix_DomainSocket_SEND_TIMEOUT
+#define RECEIVE_TIMEOUT org_apache_hadoop_net_unix_DomainSocket_RECEIVE_TIMEOUT
 
-#define DEFAULT_RCV_TIMEO 120000
-#define DEFAULT_SND_TIMEO 120000
+#define DEFAULT_RECEIVE_TIMEOUT 120000
+#define DEFAULT_SEND_TIMEOUT 120000
 #define LISTEN_BACKLOG 128
 
 /**
@@ -391,8 +391,8 @@ JNIEnv *env, jclass clazz, jstring path)
     (*env)->Throw(env, jthr);
     return -1;
   }
-  if (((jthr = setAttribute0(env, fd, SND_TIMEO, DEFAULT_SND_TIMEO))) || 
-      ((jthr = setAttribute0(env, fd, RCV_TIMEO, DEFAULT_RCV_TIMEO)))) {
+  if (((jthr = setAttribute0(env, fd, SEND_TIMEOUT, DEFAULT_SEND_TIMEOUT))) || 
+      ((jthr = setAttribute0(env, fd, RECEIVE_TIMEOUT, DEFAULT_RECEIVE_TIMEOUT)))) {
     RETRY_ON_EINTR(ret, close(fd));
     (*env)->Throw(env, jthr);
     return -1;
@@ -412,7 +412,7 @@ static jthrowable setAttribute0(JNIEnv *env, jint fd, jint type, jint val)
   int ret, buf;
 
   switch (type) {
-  case SND_BUF_SIZE:
+  case SEND_BUFFER_SIZE:
     buf = val;
     if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &buf, sizeof(buf))) {
       ret = errno;
@@ -420,7 +420,7 @@ static jthrowable setAttribute0(JNIEnv *env, jint fd, jint type, jint val)
           "setsockopt(SO_SNDBUF) error: %s", terror(ret));
     }
     return NULL;
-  case RCV_BUF_SIZE:
+  case RECEIVE_BUFFER_SIZE:
     buf = val;
     if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &buf, sizeof(buf))) {
       ret = errno;
@@ -428,7 +428,7 @@ static jthrowable setAttribute0(JNIEnv *env, jint fd, jint type, jint val)
           "setsockopt(SO_RCVBUF) error: %s", terror(ret));
     }
     return NULL;
-  case SND_TIMEO:
+  case SEND_TIMEOUT:
     javaMillisToTimeVal(val, &tv);
     if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, (struct timeval *)&tv,
                sizeof(tv))) {
@@ -437,7 +437,7 @@ static jthrowable setAttribute0(JNIEnv *env, jint fd, jint type, jint val)
           "setsockopt(SO_SNDTIMEO) error: %s", terror(ret));
     }
     return NULL;
-  case RCV_TIMEO:
+  case RECEIVE_TIMEOUT:
     javaMillisToTimeVal(val, &tv);
     if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (struct timeval *)&tv,
                sizeof(tv))) {
@@ -487,7 +487,7 @@ JNIEnv *env, jclass clazz, jint fd, jint type)
   int ret, rval = 0;
 
   switch (type) {
-  case SND_BUF_SIZE:
+  case SEND_BUFFER_SIZE:
     len = sizeof(rval);
     if (getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &rval, &len)) {
       ret = errno;
@@ -496,7 +496,7 @@ JNIEnv *env, jclass clazz, jint fd, jint type)
       return -1;
     }
     return getSockOptBufSizeToJavaBufSize(rval);
-  case RCV_BUF_SIZE:
+  case RECEIVE_BUFFER_SIZE:
     len = sizeof(rval);
     if (getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rval, &len)) {
       ret = errno;
@@ -505,7 +505,7 @@ JNIEnv *env, jclass clazz, jint fd, jint type)
       return -1;
     }
     return getSockOptBufSizeToJavaBufSize(rval);
-  case SND_TIMEO:
+  case SEND_TIMEOUT:
     memset(&tv, 0, sizeof(tv));
     len = sizeof(struct timeval);
     if (getsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, &len)) {
@@ -515,7 +515,7 @@ JNIEnv *env, jclass clazz, jint fd, jint type)
       return -1;
     }
     return timeValToJavaMillis(&tv);
-  case RCV_TIMEO:
+  case RECEIVE_TIMEOUT:
     memset(&tv, 0, sizeof(tv));
     len = sizeof(struct timeval);
     if (getsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, &len)) {
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
index 3347655cf6f..d512027d45d 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
@@ -282,15 +282,15 @@ public class TestDomainSocket {
     DomainSocket serv = DomainSocket.bindAndListen(TEST_PATH);
     try {
       // Let's set a new receive buffer size
-      int bufSize = serv.getAttribute(DomainSocket.RCV_BUF_SIZE);
+      int bufSize = serv.getAttribute(DomainSocket.RECEIVE_BUFFER_SIZE);
       int newBufSize = bufSize / 2;
-      serv.setAttribute(DomainSocket.RCV_BUF_SIZE, newBufSize);
-      int nextBufSize = serv.getAttribute(DomainSocket.RCV_BUF_SIZE);
+      serv.setAttribute(DomainSocket.RECEIVE_BUFFER_SIZE, newBufSize);
+      int nextBufSize = serv.getAttribute(DomainSocket.RECEIVE_BUFFER_SIZE);
       Assert.assertEquals(newBufSize, nextBufSize);
       // Let's set a server timeout
       int newTimeout = 1000;
-      serv.setAttribute(DomainSocket.RCV_TIMEO, newTimeout);
-      int nextTimeout = serv.getAttribute(DomainSocket.RCV_TIMEO);
+      serv.setAttribute(DomainSocket.RECEIVE_TIMEOUT, newTimeout);
+      int nextTimeout = serv.getAttribute(DomainSocket.RECEIVE_TIMEOUT);
       Assert.assertEquals(newTimeout, nextTimeout);
       try {
         serv.accept();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index 94b5c0ce949..343dcb18767 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -47,3 +47,5 @@ HDFS-4485. DN should chmod socket path a+w. (Colin Patrick McCabe via atm)
 HDFS-4453. Make a simple doc to describe the usage and design of the shortcircuit read feature. (Colin Patrick McCabe via atm)
 
 HDFS-4496. DFSClient: don't create a domain socket unless we need it (Colin Patrick McCabe via todd)
+
+HDFS-347: style cleanups (Colin Patrick McCabe via atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
index 0e2025bea21..a248f0b6718 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
@@ -114,7 +114,7 @@ class DomainSocketFactory {
     DomainSocket sock = null;
     try {
       sock = DomainSocket.connect(escapedPath);
-      sock.setAttribute(DomainSocket.RCV_TIMEO, conf.socketTimeout);
+      sock.setAttribute(DomainSocket.RECEIVE_TIMEOUT, conf.socketTimeout);
       success = true;
     } catch (IOException e) {
       LOG.warn("error creating DomainSocket", e);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeer.java
index abe9b7d52df..46279b62e66 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeer.java
@@ -50,12 +50,12 @@ public class DomainPeer implements Peer {
 
   @Override
   public void setReadTimeout(int timeoutMs) throws IOException {
-    socket.setAttribute(DomainSocket.RCV_TIMEO, timeoutMs);
+    socket.setAttribute(DomainSocket.RECEIVE_TIMEOUT, timeoutMs);
   }
 
   @Override
   public int getReceiveBufferSize() throws IOException {
-    return socket.getAttribute(DomainSocket.RCV_BUF_SIZE);
+    return socket.getAttribute(DomainSocket.RECEIVE_BUFFER_SIZE);
   }
 
   @Override
@@ -66,7 +66,7 @@ public class DomainPeer implements Peer {
 
   @Override
   public void setWriteTimeout(int timeoutMs) throws IOException {
-    socket.setAttribute(DomainSocket.SND_TIMEO, timeoutMs);
+    socket.setAttribute(DomainSocket.SEND_TIMEOUT, timeoutMs);
   }
 
   @Override
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java
index bded892e8ac..dce64621482 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DomainPeerServer.java
@@ -48,7 +48,7 @@ public class DomainPeerServer implements PeerServer {
 
   @Override
   public void setReceiveBufferSize(int size) throws IOException {
-    sock.setAttribute(DomainSocket.RCV_BUF_SIZE, size);
+    sock.setAttribute(DomainSocket.RECEIVE_BUFFER_SIZE, size);
   }
 
   @Override

From 694a6721316aea14c1244447974231abc8dff0cb Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 27 Mar 2013 19:28:12 +0000
Subject: [PATCH 23/52] HDFS-4538. Allow use of legacy blockreader. Contributed
 by Colin Patrick McCabe.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1461818 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-347.txt          |   3 +
 .../hadoop/hdfs/BlockReaderFactory.java       |  27 +-
 .../apache/hadoop/hdfs/BlockReaderLocal.java  |   5 +-
 .../hadoop/hdfs/BlockReaderLocalLegacy.java   | 688 ++++++++++++++++++
 .../org/apache/hadoop/hdfs/DFSClient.java     |  24 +-
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java |   2 +
 .../apache/hadoop/hdfs/DFSInputStream.java    |  17 +
 .../hadoop/hdfs/DomainSocketFactory.java      |   5 +-
 .../hadoop/hdfs/server/datanode/DataNode.java |   4 +-
 .../hdfs/TestBlockReaderLocalLegacy.java      | 154 ++++
 .../TestParallelShortCircuitLegacyRead.java   |  46 ++
 .../hdfs/TestParallelShortCircuitRead.java    |   1 +
 ...estParallelShortCircuitReadNoChecksum.java |   1 +
 .../hdfs/TestParallelUnixDomainRead.java      |   2 +
 14 files changed, 967 insertions(+), 12 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocalLegacy.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitLegacyRead.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
index 343dcb18767..378d435944d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-347.txt
@@ -49,3 +49,6 @@ HDFS-4453. Make a simple doc to describe the usage and design of the shortcircui
 HDFS-4496. DFSClient: don't create a domain socket unless we need it (Colin Patrick McCabe via todd)
 
 HDFS-347: style cleanups (Colin Patrick McCabe via atm)
+
+HDFS-4538. Allow use of legacy blockreader (Colin Patrick McCabe via todd)
+
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
index 89ed7c4859d..527291b82e5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
 import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
@@ -36,7 +37,10 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.security.AccessControlException;
+import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.security.token.Token;
 
 
@@ -89,7 +93,9 @@ public class BlockReaderFactory {
     peer.setWriteTimeout(HdfsServerConstants.WRITE_TIMEOUT);
 
     if (peer.getDomainSocket() != null) {
-      if (allowShortCircuitLocalReads) {
+      if (allowShortCircuitLocalReads &&
+         (!conf.getBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL,
+            DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT))) {
         // If this is a domain socket, and short-circuit local reads are 
         // enabled, try to set up a BlockReaderLocal.
         BlockReader reader = newShortCircuitBlockReader(conf, file,
@@ -230,4 +236,23 @@ public class BlockReaderFactory {
       final String poolId, final long blockId) {
     return s.toString() + ":" + poolId + ":" + blockId;
   }
+
+  /**
+   * Get {@link BlockReaderLocalLegacy} for short circuited local reads.
+   * This block reader implements the path-based style of local reads
+   * first introduced in HDFS-2246.
+   */
+  static BlockReader getLegacyBlockReaderLocal(Configuration conf,
+      String src, ExtendedBlock blk, Token<BlockTokenIdentifier> accessToken,
+      DatanodeInfo chosenNode, int socketTimeout, long offsetIntoBlock,
+      boolean connectToDnViaHostname) throws InvalidToken, IOException {
+    try {
+      return BlockReaderLocalLegacy.newBlockReader(conf, src, blk, accessToken,
+          chosenNode, socketTimeout, offsetIntoBlock, blk.getNumBytes()
+              - offsetIntoBlock, connectToDnViaHostname);
+    } catch (RemoteException re) {
+      throw re.unwrapRemoteException(InvalidToken.class,
+          AccessControlException.class);
+    }
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
index ca9ce080f64..ea22a9888f8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
@@ -51,7 +51,7 @@ import org.apache.hadoop.util.DataChecksum;
  * </ul>
  */
 class BlockReaderLocal implements BlockReader {
-  static final Log LOG = LogFactory.getLog(DFSClient.class);
+  static final Log LOG = LogFactory.getLog(BlockReaderLocal.class);
 
   private final FileInputStream dataIn; // reader for the data file
   private final FileInputStream checksumIn;   // reader for the checksum file
@@ -499,8 +499,7 @@ class BlockReaderLocal implements BlockReader {
       fisCache.put(datanodeID, block, new FileInputStream[] {dataIn, checksumIn});
     } else {
       LOG.debug("closing FileInputStream for " + filename);
-      dataIn.close();
-      checksumIn.close();
+      IOUtils.cleanup(LOG, dataIn, checksumIn);
     }
     if (slowReadBuff != null) {
       bufferPool.returnBuffer(slowReadBuff);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java
new file mode 100644
index 00000000000..6d6096064bb
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java
@@ -0,0 +1,688 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.net.Socket;
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
+import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair;
+import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
+import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
+import org.apache.hadoop.hdfs.util.DirectBufferPool;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.util.DataChecksum;
+
+/**
+ * BlockReaderLocalLegacy enables local short circuited reads. If the DFS client is on
+ * the same machine as the datanode, then the client can read files directly
+ * from the local file system rather than going through the datanode for better
+ * performance. <br>
+ *
+ * This is the legacy implementation based on HDFS-2246, which requires
+ * permissions on the datanode to be set so that clients can directly access the
+ * blocks. The new implementation based on HDFS-347 should be preferred on UNIX
+ * systems where the required native code has been implemented.<br>
+ *
+ * {@link BlockReaderLocalLegacy} works as follows:
+ * <ul>
+ * <li>The client performing short circuit reads must be configured at the
+ * datanode.</li>
+ * <li>The client gets the path to the file where block is stored using
+ * {@link org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol#getBlockLocalPathInfo(ExtendedBlock, Token)}
+ * RPC call</li>
+ * <li>Client uses kerberos authentication to connect to the datanode over RPC,
+ * if security is enabled.</li>
+ * </ul>
+ */
+class BlockReaderLocalLegacy implements BlockReader {
+  private static final Log LOG = LogFactory.getLog(DFSClient.class);
+
+  //Stores the cache and proxy for a local datanode.
+  private static class LocalDatanodeInfo {
+    private ClientDatanodeProtocol proxy = null;
+    private final Map<ExtendedBlock, BlockLocalPathInfo> cache;
+
+    LocalDatanodeInfo() {
+      final int cacheSize = 10000;
+      final float hashTableLoadFactor = 0.75f;
+      int hashTableCapacity = (int) Math.ceil(cacheSize / hashTableLoadFactor) + 1;
+      cache = Collections
+          .synchronizedMap(new LinkedHashMap<ExtendedBlock, BlockLocalPathInfo>(
+              hashTableCapacity, hashTableLoadFactor, true) {
+            private static final long serialVersionUID = 1;
+
+            @Override
+            protected boolean removeEldestEntry(
+                Map.Entry<ExtendedBlock, BlockLocalPathInfo> eldest) {
+              return size() > cacheSize;
+            }
+          });
+    }
+
+    private synchronized ClientDatanodeProtocol getDatanodeProxy(
+        DatanodeInfo node, Configuration conf, int socketTimeout,
+        boolean connectToDnViaHostname) throws IOException {
+      if (proxy == null) {
+        proxy = DFSUtil.createClientDatanodeProtocolProxy(node, conf,
+            socketTimeout, connectToDnViaHostname);
+      }
+      return proxy;
+    }
+    
+    private synchronized void resetDatanodeProxy() {
+      if (null != proxy) {
+        RPC.stopProxy(proxy);
+        proxy = null;
+      }
+    }
+
+    private BlockLocalPathInfo getBlockLocalPathInfo(ExtendedBlock b) {
+      return cache.get(b);
+    }
+
+    private void setBlockLocalPathInfo(ExtendedBlock b, BlockLocalPathInfo info) {
+      cache.put(b, info);
+    }
+
+    private void removeBlockLocalPathInfo(ExtendedBlock b) {
+      cache.remove(b);
+    }
+  }
+  
+  // Multiple datanodes could be running on the local machine. Store proxies in
+  // a map keyed by the ipc port of the datanode.
+  private static Map<Integer, LocalDatanodeInfo> localDatanodeInfoMap = new HashMap<Integer, LocalDatanodeInfo>();
+
+  private final FileInputStream dataIn; // reader for the data file
+  private final FileInputStream checksumIn;   // reader for the checksum file
+
+  /**
+   * Offset from the most recent chunk boundary at which the next read should
+   * take place. Is only set to non-zero at construction time, and is
+   * decremented (usually to 0) by subsequent reads. This avoids having to do a
+   * checksum read at construction to position the read cursor correctly.
+   */
+  private int offsetFromChunkBoundary;
+  
+  private byte[] skipBuf = null;
+
+  /**
+   * Used for checksummed reads that need to be staged before copying to their
+   * output buffer because they are either a) smaller than the checksum chunk
+   * size or b) issued by the slower read(byte[]...) path
+   */
+  private ByteBuffer slowReadBuff = null;
+  private ByteBuffer checksumBuff = null;
+  private DataChecksum checksum;
+  private final boolean verifyChecksum;
+
+  private static DirectBufferPool bufferPool = new DirectBufferPool();
+
+  private final int bytesPerChecksum;
+  private final int checksumSize;
+
+  /** offset in block where reader wants to actually read */
+  private long startOffset;
+  private final String filename;
+  
+  /**
+   * The only way this object can be instantiated.
+   */
+  static BlockReaderLocalLegacy newBlockReader(Configuration conf, String file,
+      ExtendedBlock blk, Token<BlockTokenIdentifier> token, DatanodeInfo node,
+      int socketTimeout, long startOffset, long length,
+      boolean connectToDnViaHostname) throws IOException {
+
+    LocalDatanodeInfo localDatanodeInfo = getLocalDatanodeInfo(node
+        .getIpcPort());
+    // check the cache first
+    BlockLocalPathInfo pathinfo = localDatanodeInfo.getBlockLocalPathInfo(blk);
+    if (pathinfo == null) {
+      pathinfo = getBlockPathInfo(blk, node, conf, socketTimeout, token,
+          connectToDnViaHostname);
+    }
+
+    // check to see if the file exists. It may so happen that the
+    // HDFS file has been deleted and this block-lookup is occurring
+    // on behalf of a new HDFS file. This time, the block file could
+    // be residing in a different portion of the fs.data.dir directory.
+    // In this case, we remove this entry from the cache. The next
+    // call to this method will re-populate the cache.
+    FileInputStream dataIn = null;
+    FileInputStream checksumIn = null;
+    BlockReaderLocalLegacy localBlockReader = null;
+    boolean skipChecksumCheck = skipChecksumCheck(conf);
+    try {
+      // get a local file system
+      File blkfile = new File(pathinfo.getBlockPath());
+      dataIn = new FileInputStream(blkfile);
+
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("New BlockReaderLocalLegacy for file " + blkfile + " of size "
+            + blkfile.length() + " startOffset " + startOffset + " length "
+            + length + " short circuit checksum " + !skipChecksumCheck);
+      }
+
+      if (!skipChecksumCheck) {
+        // get the metadata file
+        File metafile = new File(pathinfo.getMetaPath());
+        checksumIn = new FileInputStream(metafile);
+
+        // read and handle the common header here. For now just a version
+        BlockMetadataHeader header = BlockMetadataHeader
+            .readHeader(new DataInputStream(checksumIn));
+        short version = header.getVersion();
+        if (version != BlockMetadataHeader.VERSION) {
+          LOG.warn("Wrong version (" + version + ") for metadata file for "
+              + blk + " ignoring ...");
+        }
+        DataChecksum checksum = header.getChecksum();
+        long firstChunkOffset = startOffset
+            - (startOffset % checksum.getBytesPerChecksum());
+        localBlockReader = new BlockReaderLocalLegacy(conf, file, blk, token,
+            startOffset, length, pathinfo, checksum, true, dataIn,
+            firstChunkOffset, checksumIn);
+      } else {
+        localBlockReader = new BlockReaderLocalLegacy(conf, file, blk, token,
+            startOffset, length, pathinfo, dataIn);
+      }
+    } catch (IOException e) {
+      // remove from cache
+      localDatanodeInfo.removeBlockLocalPathInfo(blk);
+      DFSClient.LOG.warn("BlockReaderLocalLegacy: Removing " + blk
+          + " from cache because local file " + pathinfo.getBlockPath()
+          + " could not be opened.");
+      throw e;
+    } finally {
+      if (localBlockReader == null) {
+        if (dataIn != null) {
+          dataIn.close();
+        }
+        if (checksumIn != null) {
+          checksumIn.close();
+        }
+      }
+    }
+    return localBlockReader;
+  }
+  
+  private static synchronized LocalDatanodeInfo getLocalDatanodeInfo(int port) {
+    LocalDatanodeInfo ldInfo = localDatanodeInfoMap.get(port);
+    if (ldInfo == null) {
+      ldInfo = new LocalDatanodeInfo();
+      localDatanodeInfoMap.put(port, ldInfo);
+    }
+    return ldInfo;
+  }
+  
+  private static BlockLocalPathInfo getBlockPathInfo(ExtendedBlock blk,
+      DatanodeInfo node, Configuration conf, int timeout,
+      Token<BlockTokenIdentifier> token, boolean connectToDnViaHostname)
+          throws IOException {
+    LocalDatanodeInfo localDatanodeInfo = getLocalDatanodeInfo(node.getIpcPort());
+    BlockLocalPathInfo pathinfo = null;
+    ClientDatanodeProtocol proxy = localDatanodeInfo.getDatanodeProxy(node,
+        conf, timeout, connectToDnViaHostname);
+    try {
+      // make RPC to local datanode to find local pathnames of blocks
+      pathinfo = proxy.getBlockLocalPathInfo(blk, token);
+      if (pathinfo != null) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Cached location of block " + blk + " as " + pathinfo);
+        }
+        localDatanodeInfo.setBlockLocalPathInfo(blk, pathinfo);
+      }
+    } catch (IOException e) {
+      localDatanodeInfo.resetDatanodeProxy(); // Reset proxy on error
+      throw e;
+    }
+    return pathinfo;
+  }
+  
+  private static boolean skipChecksumCheck(Configuration conf) {
+    return conf.getBoolean(
+        DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
+        DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_DEFAULT);
+  }
+  
+  private static int getSlowReadBufferNumChunks(Configuration conf, int bytesPerChecksum) {
+    int bufferSizeBytes = conf.getInt(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY,
+        DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT);
+
+    if (bufferSizeBytes < bytesPerChecksum) {
+      throw new IllegalArgumentException("Configured BlockReaderLocalLegacy buffer size (" + bufferSizeBytes + ") " +
+          "is not large enough to hold a single chunk (" + bytesPerChecksum +  "). Please configure " +
+          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY + " appropriately");
+    }
+
+    // Round down to nearest chunk size
+    return bufferSizeBytes / bytesPerChecksum;
+  }
+
+  private BlockReaderLocalLegacy(Configuration conf, String hdfsfile,
+      ExtendedBlock block, Token<BlockTokenIdentifier> token, long startOffset,
+      long length, BlockLocalPathInfo pathinfo, FileInputStream dataIn)
+      throws IOException {
+    this(conf, hdfsfile, block, token, startOffset, length, pathinfo,
+        DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 4), false,
+        dataIn, startOffset, null);
+  }
+
+  private BlockReaderLocalLegacy(Configuration conf, String hdfsfile,
+      ExtendedBlock block, Token<BlockTokenIdentifier> token, long startOffset,
+      long length, BlockLocalPathInfo pathinfo, DataChecksum checksum,
+      boolean verifyChecksum, FileInputStream dataIn, long firstChunkOffset,
+      FileInputStream checksumIn) throws IOException {
+    this.filename = hdfsfile;
+    this.checksum = checksum;
+    this.verifyChecksum = verifyChecksum;
+    this.startOffset = Math.max(startOffset, 0);
+
+    bytesPerChecksum = this.checksum.getBytesPerChecksum();
+    checksumSize = this.checksum.getChecksumSize();
+
+    this.dataIn = dataIn;
+    this.checksumIn = checksumIn;
+    this.offsetFromChunkBoundary = (int) (startOffset-firstChunkOffset);
+
+    int chunksPerChecksumRead = getSlowReadBufferNumChunks(conf, bytesPerChecksum);
+    slowReadBuff = bufferPool.getBuffer(bytesPerChecksum * chunksPerChecksumRead);
+    checksumBuff = bufferPool.getBuffer(checksumSize * chunksPerChecksumRead);
+    // Initially the buffers have nothing to read.
+    slowReadBuff.flip();
+    checksumBuff.flip();
+    boolean success = false;
+    try {
+      // Skip both input streams to beginning of the chunk containing startOffset
+      IOUtils.skipFully(dataIn, firstChunkOffset);
+      if (checksumIn != null) {
+        long checkSumOffset = (firstChunkOffset / bytesPerChecksum) * checksumSize;
+        IOUtils.skipFully(checksumIn, checkSumOffset);
+      }
+      success = true;
+    } finally {
+      if (!success) {
+        bufferPool.returnBuffer(slowReadBuff);
+        bufferPool.returnBuffer(checksumBuff);
+      }
+    }
+  }
+
+  /**
+   * Reads bytes into a buffer until EOF or the buffer's limit is reached
+   */
+  private int fillBuffer(FileInputStream stream, ByteBuffer buf)
+      throws IOException {
+    int bytesRead = stream.getChannel().read(buf);
+    if (bytesRead < 0) {
+      //EOF
+      return bytesRead;
+    }
+    while (buf.remaining() > 0) {
+      int n = stream.getChannel().read(buf);
+      if (n < 0) {
+        //EOF
+        return bytesRead;
+      }
+      bytesRead += n;
+    }
+    return bytesRead;
+  }
+  
+  /**
+   * Utility method used by read(ByteBuffer) to partially copy a ByteBuffer into
+   * another.
+   */
+  private void writeSlice(ByteBuffer from, ByteBuffer to, int length) {
+    int oldLimit = from.limit();
+    from.limit(from.position() + length);
+    try {
+      to.put(from);
+    } finally {
+      from.limit(oldLimit);
+    }
+  }
+
+  @Override
+  public synchronized int read(ByteBuffer buf) throws IOException {
+    int nRead = 0;
+    if (verifyChecksum) {
+      // A 'direct' read actually has three phases. The first drains any
+      // remaining bytes from the slow read buffer. After this the read is
+      // guaranteed to be on a checksum chunk boundary. If there are still bytes
+      // to read, the fast direct path is used for as many remaining bytes as
+      // possible, up to a multiple of the checksum chunk size. Finally, any
+      // 'odd' bytes remaining at the end of the read cause another slow read to
+      // be issued, which involves an extra copy.
+
+      // Every 'slow' read tries to fill the slow read buffer in one go for
+      // efficiency's sake. As described above, all non-checksum-chunk-aligned
+      // reads will be served from the slower read path.
+
+      if (slowReadBuff.hasRemaining()) {
+        // There are remaining bytes from a small read available. This usually
+        // means this read is unaligned, which falls back to the slow path.
+        int fromSlowReadBuff = Math.min(buf.remaining(), slowReadBuff.remaining());
+        writeSlice(slowReadBuff, buf, fromSlowReadBuff);
+        nRead += fromSlowReadBuff;
+      }
+
+      if (buf.remaining() >= bytesPerChecksum && offsetFromChunkBoundary == 0) {
+        // Since we have drained the 'small read' buffer, we are guaranteed to
+        // be chunk-aligned
+        int len = buf.remaining() - (buf.remaining() % bytesPerChecksum);
+
+        // There's only enough checksum buffer space available to checksum one
+        // entire slow read buffer. This saves keeping the number of checksum
+        // chunks around.
+        len = Math.min(len, slowReadBuff.capacity());
+        int oldlimit = buf.limit();
+        buf.limit(buf.position() + len);
+        int readResult = 0;
+        try {
+          readResult = doByteBufferRead(buf);
+        } finally {
+          buf.limit(oldlimit);
+        }
+        if (readResult == -1) {
+          return nRead;
+        } else {
+          nRead += readResult;
+          buf.position(buf.position() + readResult);
+        }
+      }
+
+      // offsetFromChunkBoundary > 0 => unaligned read, use slow path to read
+      // until chunk boundary
+      if ((buf.remaining() > 0 && buf.remaining() < bytesPerChecksum) || offsetFromChunkBoundary > 0) {
+        int toRead = Math.min(buf.remaining(), bytesPerChecksum - offsetFromChunkBoundary);
+        int readResult = fillSlowReadBuffer(toRead);
+        if (readResult == -1) {
+          return nRead;
+        } else {
+          int fromSlowReadBuff = Math.min(readResult, buf.remaining());
+          writeSlice(slowReadBuff, buf, fromSlowReadBuff);
+          nRead += fromSlowReadBuff;
+        }
+      }
+    } else {
+      // Non-checksummed reads are much easier; we can just fill the buffer directly.
+      nRead = doByteBufferRead(buf);
+      if (nRead > 0) {
+        buf.position(buf.position() + nRead);
+      }
+    }
+    return nRead;
+  }
+
+  /**
+   * Tries to read as many bytes as possible into supplied buffer, checksumming
+   * each chunk if needed.
+   *
+   * <b>Preconditions:</b>
+   * <ul>
+   * <li>
+   * If checksumming is enabled, buf.remaining must be a multiple of
+   * bytesPerChecksum. Note that this is not a requirement for clients of
+   * read(ByteBuffer) - in the case of non-checksum-sized read requests,
+   * read(ByteBuffer) will substitute a suitably sized buffer to pass to this
+   * method.
+   * </li>
+   * </ul>
+   * <b>Postconditions:</b>
+   * <ul>
+   * <li>buf.limit and buf.mark are unchanged.</li>
+   * <li>buf.position += min(offsetFromChunkBoundary, totalBytesRead) - so the
+   * requested bytes can be read straight from the buffer</li>
+   * </ul>
+   *
+   * @param buf
+   *          byte buffer to write bytes to. If checksums are not required, buf
+   *          can have any number of bytes remaining, otherwise there must be a
+   *          multiple of the checksum chunk size remaining.
+   * @return <tt>max(min(totalBytesRead, len) - offsetFromChunkBoundary, 0)</tt>
+   *         that is, the the number of useful bytes (up to the amount
+   *         requested) readable from the buffer by the client.
+   */
+  private synchronized int doByteBufferRead(ByteBuffer buf) throws IOException {
+    if (verifyChecksum) {
+      assert buf.remaining() % bytesPerChecksum == 0;
+    }
+    int dataRead = -1;
+
+    int oldpos = buf.position();
+    // Read as much as we can into the buffer.
+    dataRead = fillBuffer(dataIn, buf);
+
+    if (dataRead == -1) {
+      return -1;
+    }
+
+    if (verifyChecksum) {
+      ByteBuffer toChecksum = buf.duplicate();
+      toChecksum.position(oldpos);
+      toChecksum.limit(oldpos + dataRead);
+
+      checksumBuff.clear();
+      // Equivalent to (int)Math.ceil(toChecksum.remaining() * 1.0 / bytesPerChecksum );
+      int numChunks =
+        (toChecksum.remaining() + bytesPerChecksum - 1) / bytesPerChecksum;
+      checksumBuff.limit(checksumSize * numChunks);
+
+      fillBuffer(checksumIn, checksumBuff);
+      checksumBuff.flip();
+
+      checksum.verifyChunkedSums(toChecksum, checksumBuff, filename,
+          this.startOffset);
+    }
+
+    if (dataRead >= 0) {
+      buf.position(oldpos + Math.min(offsetFromChunkBoundary, dataRead));
+    }
+
+    if (dataRead < offsetFromChunkBoundary) {
+      // yikes, didn't even get enough bytes to honour offset. This can happen
+      // even if we are verifying checksums if we are at EOF.
+      offsetFromChunkBoundary -= dataRead;
+      dataRead = 0;
+    } else {
+      dataRead -= offsetFromChunkBoundary;
+      offsetFromChunkBoundary = 0;
+    }
+
+    return dataRead;
+  }
+
+  /**
+   * Ensures that up to len bytes are available and checksummed in the slow read
+   * buffer. The number of bytes available to read is returned. If the buffer is
+   * not already empty, the number of remaining bytes is returned and no actual
+   * read happens.
+   *
+   * @param len
+   *          the maximum number of bytes to make available. After len bytes
+   *          are read, the underlying bytestream <b>must</b> be at a checksum
+   *          boundary, or EOF. That is, (len + currentPosition) %
+   *          bytesPerChecksum == 0.
+   * @return the number of bytes available to read, or -1 if EOF.
+   */
+  private synchronized int fillSlowReadBuffer(int len) throws IOException {
+    int nRead = -1;
+    if (slowReadBuff.hasRemaining()) {
+      // Already got data, good to go.
+      nRead = Math.min(len, slowReadBuff.remaining());
+    } else {
+      // Round a complete read of len bytes (plus any implicit offset) to the
+      // next chunk boundary, since we try and read in multiples of a chunk
+      int nextChunk = len + offsetFromChunkBoundary +
+          (bytesPerChecksum - ((len + offsetFromChunkBoundary) % bytesPerChecksum));
+      int limit = Math.min(nextChunk, slowReadBuff.capacity());
+      assert limit % bytesPerChecksum == 0;
+
+      slowReadBuff.clear();
+      slowReadBuff.limit(limit);
+
+      nRead = doByteBufferRead(slowReadBuff);
+
+      if (nRead > 0) {
+        // So that next time we call slowReadBuff.hasRemaining(), we don't get a
+        // false positive.
+        slowReadBuff.limit(nRead + slowReadBuff.position());
+      }
+    }
+    return nRead;
+  }
+
+  @Override
+  public synchronized int read(byte[] buf, int off, int len) throws IOException {
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("read off " + off + " len " + len);
+    }
+    if (!verifyChecksum) {
+      return dataIn.read(buf, off, len);
+    }
+
+    int nRead = fillSlowReadBuffer(slowReadBuff.capacity());
+
+    if (nRead > 0) {
+      // Possible that buffer is filled with a larger read than we need, since
+      // we tried to read as much as possible at once
+      nRead = Math.min(len, nRead);
+      slowReadBuff.get(buf, off, nRead);
+    }
+
+    return nRead;
+  }
+
+  @Override
+  public synchronized long skip(long n) throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("skip " + n);
+    }
+    if (n <= 0) {
+      return 0;
+    }
+    if (!verifyChecksum) {
+      return dataIn.skip(n);
+    }
+  
+    // caller made sure newPosition is not beyond EOF.
+    int remaining = slowReadBuff.remaining();
+    int position = slowReadBuff.position();
+    int newPosition = position + (int)n;
+  
+    // if the new offset is already read into dataBuff, just reposition
+    if (n <= remaining) {
+      assert offsetFromChunkBoundary == 0;
+      slowReadBuff.position(newPosition);
+      return n;
+    }
+  
+    // for small gap, read through to keep the data/checksum in sync
+    if (n - remaining <= bytesPerChecksum) {
+      slowReadBuff.position(position + remaining);
+      if (skipBuf == null) {
+        skipBuf = new byte[bytesPerChecksum];
+      }
+      int ret = read(skipBuf, 0, (int)(n - remaining));
+      return ret;
+    }
+  
+    // optimize for big gap: discard the current buffer, skip to
+    // the beginning of the appropriate checksum chunk and then
+    // read to the middle of that chunk to be in sync with checksums.
+  
+    // We can't use this.offsetFromChunkBoundary because we need to know how
+    // many bytes of the offset were really read. Calling read(..) with a
+    // positive this.offsetFromChunkBoundary causes that many bytes to get
+    // silently skipped.
+    int myOffsetFromChunkBoundary = newPosition % bytesPerChecksum;
+    long toskip = n - remaining - myOffsetFromChunkBoundary;
+
+    slowReadBuff.position(slowReadBuff.limit());
+    checksumBuff.position(checksumBuff.limit());
+  
+    IOUtils.skipFully(dataIn, toskip);
+    long checkSumOffset = (toskip / bytesPerChecksum) * checksumSize;
+    IOUtils.skipFully(checksumIn, checkSumOffset);
+
+    // read into the middle of the chunk
+    if (skipBuf == null) {
+      skipBuf = new byte[bytesPerChecksum];
+    }
+    assert skipBuf.length == bytesPerChecksum;
+    assert myOffsetFromChunkBoundary < bytesPerChecksum;
+
+    int ret = read(skipBuf, 0, myOffsetFromChunkBoundary);
+
+    if (ret == -1) {  // EOS
+      return toskip;
+    } else {
+      return (toskip + ret);
+    }
+  }
+
+  @Override
+  public synchronized void close(PeerCache peerCache,
+      FileInputStreamCache fisCache) throws IOException {
+    IOUtils.cleanup(LOG, dataIn, checksumIn);
+    if (slowReadBuff != null) {
+      bufferPool.returnBuffer(slowReadBuff);
+      slowReadBuff = null;
+    }
+    if (checksumBuff != null) {
+      bufferPool.returnBuffer(checksumBuff);
+      checksumBuff = null;
+    }
+    startOffset = -1;
+    checksum = null;
+  }
+
+  @Override
+  public int readAll(byte[] buf, int offset, int len) throws IOException {
+    return BlockReaderUtil.readAll(this, buf, offset, len);
+  }
+
+  @Override
+  public void readFully(byte[] buf, int off, int len) throws IOException {
+    BlockReaderUtil.readFully(this, buf, off, len);
+  }
+
+  @Override
+  public int available() throws IOException {
+    // We never do network I/O in BlockReaderLocalLegacy.
+    return Integer.MAX_VALUE;
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index 345fef41d85..a2b4e35626a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -195,7 +195,8 @@ public class DFSClient implements java.io.Closeable {
   private Random r = new Random();
   private SocketAddress[] localInterfaceAddrs;
   private DataEncryptionKey encryptionKey;
-
+  private boolean shouldUseLegacyBlockReaderLocal;
+  
   /**
    * DFSClient configuration 
    */
@@ -221,7 +222,7 @@ public class DFSClient implements java.io.Closeable {
     final short defaultReplication;
     final String taskId;
     final FsPermission uMask;
-    final boolean useLegacyBlockReader;
+    final boolean useLegacyBlockReaderLocal;
     final boolean connectToDnViaHostname;
     final boolean getHdfsBlocksMetadataEnabled;
     final int getFileBlockStorageLocationsNumThreads;
@@ -278,9 +279,9 @@ public class DFSClient implements java.io.Closeable {
           .getInt(DFS_CLIENT_BLOCK_WRITE_LOCATEFOLLOWINGBLOCK_RETRIES_KEY,
               DFS_CLIENT_BLOCK_WRITE_LOCATEFOLLOWINGBLOCK_RETRIES_DEFAULT);
       uMask = FsPermission.getUMask(conf);
-      useLegacyBlockReader = conf.getBoolean(
-          DFS_CLIENT_USE_LEGACY_BLOCKREADER,
-          DFS_CLIENT_USE_LEGACY_BLOCKREADER_DEFAULT);
+      useLegacyBlockReaderLocal = conf.getBoolean(
+          DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL,
+          DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT);
       connectToDnViaHostname = conf.getBoolean(DFS_CLIENT_USE_DN_HOSTNAME,
           DFS_CLIENT_USE_DN_HOSTNAME_DEFAULT);
       getHdfsBlocksMetadataEnabled = conf.getBoolean(
@@ -407,6 +408,11 @@ public class DFSClient implements java.io.Closeable {
     throws IOException {
     // Copy only the required DFSClient configuration
     this.dfsClientConf = new Conf(conf);
+    this.shouldUseLegacyBlockReaderLocal = 
+        this.dfsClientConf.useLegacyBlockReaderLocal;
+    if (this.dfsClientConf.useLegacyBlockReaderLocal) {
+      LOG.debug("Using legacy short-circuit local reads.");
+    }
     this.conf = conf;
     this.stats = stats;
     this.socketFactory = NetUtils.getSocketFactory(conf, ClientProtocol.class);
@@ -2218,4 +2224,12 @@ public class DFSClient implements java.io.Closeable {
   public DomainSocketFactory getDomainSocketFactory() {
     return domainSocketFactory;
   }
+
+  public void disableLegacyBlockReaderLocal() {
+    shouldUseLegacyBlockReaderLocal = false;
+  }
+
+  public boolean useLegacyBlockReaderLocal() {
+    return shouldUseLegacyBlockReaderLocal;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 269ea9d844d..302ec83e420 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -261,6 +261,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final int     DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_DEFAULT = 3;
   public static final String  DFS_CLIENT_USE_LEGACY_BLOCKREADER = "dfs.client.use.legacy.blockreader";
   public static final boolean DFS_CLIENT_USE_LEGACY_BLOCKREADER_DEFAULT = false;
+  public static final String  DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL = "dfs.client.use.legacy.blockreader.local";
+  public static final boolean DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT = false;
   public static final String  DFS_BALANCER_MOVEDWINWIDTH_KEY = "dfs.balancer.movedWinWidth";
   public static final long    DFS_BALANCER_MOVEDWINWIDTH_DEFAULT = 5400*1000L;
   public static final String  DFS_DATANODE_ADDRESS_KEY = "dfs.datanode.address";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
index 69198ff6ddf..7a6557c3952 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
@@ -917,6 +917,23 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
       return new BlockReaderLocal(dfsClient.conf, file,
         block, startOffset, len, fis[0], fis[1], chosenNode, verifyChecksum);
     }
+    
+    // If the legacy local block reader is enabled and we are reading a local
+    // block, try to create a BlockReaderLocalLegacy.  The legacy local block
+    // reader implements local reads in the style first introduced by HDFS-2246.
+    if ((dfsClient.useLegacyBlockReaderLocal()) &&
+        DFSClient.isLocalAddress(dnAddr) &&
+        (!shortCircuitForbidden())) {
+      try {
+        return BlockReaderFactory.getLegacyBlockReaderLocal(dfsClient.conf,
+            clientName, block, blockToken, chosenNode,
+            dfsClient.hdfsTimeout, startOffset,dfsClient.connectToDnViaHostname());
+      } catch (IOException e) {
+        DFSClient.LOG.warn("error creating legacy BlockReaderLocal.  " +
+            "Disabling legacy local reads.", e);
+        dfsClient.disableLegacyBlockReaderLocal();
+      }
+    }
 
     // Look for cached domain peers.
     int cacheTries = 0;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
index a248f0b6718..ebcb84a810e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DomainSocketFactory.java
@@ -52,7 +52,7 @@ class DomainSocketFactory {
     this.conf = conf;
 
     String feature = null;
-    if (conf.shortCircuitLocalReads) {
+    if (conf.shortCircuitLocalReads && (!conf.useLegacyBlockReaderLocal)) {
       feature = "The short-circuit local reads feature";
     } else if (conf.domainSocketDataTraffic) {
       feature = "UNIX domain socket data traffic";
@@ -86,7 +86,8 @@ class DomainSocketFactory {
     // sockets.
     if (conf.domainSocketPath.isEmpty()) return null;
     // If we can't do anything with the domain socket, don't create it.
-    if (!(conf.domainSocketDataTraffic || conf.shortCircuitLocalReads)) {
+    if ((conf.domainSocketDataTraffic == false) &&
+        ((!conf.shortCircuitLocalReads) || conf.useLegacyBlockReaderLocal)) {
       return null;
     }
     // UNIX domain sockets can only be used to talk to local peers
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index 10a1f8dd7e7..999ef8daa87 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -581,7 +581,9 @@ public class DataNode extends Configured
             DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_DEFAULT);
     if (domainSocketPath.isEmpty()) {
       if (conf.getBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
-          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT)) {
+            DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_DEFAULT) &&
+         (!conf.getBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL,
+          DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT))) {
         LOG.warn("Although short-circuit local reads are configured, " +
             "they are disabled because you didn't configure " +
             DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocalLegacy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocalLegacy.java
new file mode 100644
index 00000000000..718b349c5bb
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocalLegacy.java
@@ -0,0 +1,154 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+import org.apache.hadoop.fs.ChecksumException;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.net.unix.TemporarySocketDirectory;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestBlockReaderLocalLegacy {
+  @BeforeClass
+  public static void setupCluster() throws IOException {
+    DFSInputStream.tcpReadsDisabledForTesting = true;
+    DomainSocket.disableBindPathValidation();
+  }
+  
+  private static HdfsConfiguration getConfiguration(
+      TemporarySocketDirectory socketDir) throws IOException {
+    HdfsConfiguration conf = new HdfsConfiguration();
+    if (socketDir == null) {
+      conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY, "");
+    } else {
+      conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
+        new File(socketDir.getDir(), "TestBlockReaderLocalLegacy.%d.sock").
+          getAbsolutePath());
+    }
+    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
+    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL, true);
+    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
+        false);
+    conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
+        UserGroupInformation.getCurrentUser().getShortUserName());
+    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC, false);
+    return conf;
+  }
+
+  /**
+   * Test that, in the case of an error, the position and limit of a ByteBuffer
+   * are left unchanged. This is not mandated by ByteBufferReadable, but clients
+   * of this class might immediately issue a retry on failure, so it's polite.
+   */
+  @Test
+  public void testStablePositionAfterCorruptRead() throws Exception {
+    final short REPL_FACTOR = 1;
+    final long FILE_LENGTH = 512L;
+    
+    HdfsConfiguration conf = getConfiguration(null);
+    MiniDFSCluster cluster =
+        new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+    cluster.waitActive();
+    FileSystem fs = cluster.getFileSystem();
+
+    Path path = new Path("/corrupted");
+
+    DFSTestUtil.createFile(fs, path, FILE_LENGTH, REPL_FACTOR, 12345L);
+    DFSTestUtil.waitReplication(fs, path, REPL_FACTOR);
+
+    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, path);
+    int blockFilesCorrupted = cluster.corruptBlockOnDataNodes(block);
+    assertEquals("All replicas not corrupted", REPL_FACTOR, blockFilesCorrupted);
+
+    FSDataInputStream dis = cluster.getFileSystem().open(path);
+    ByteBuffer buf = ByteBuffer.allocateDirect((int)FILE_LENGTH);
+    boolean sawException = false;
+    try {
+      dis.read(buf);
+    } catch (ChecksumException ex) {
+      sawException = true;
+    }
+
+    assertTrue(sawException);
+    assertEquals(0, buf.position());
+    assertEquals(buf.capacity(), buf.limit());
+
+    dis = cluster.getFileSystem().open(path);
+    buf.position(3);
+    buf.limit(25);
+    sawException = false;
+    try {
+      dis.read(buf);
+    } catch (ChecksumException ex) {
+      sawException = true;
+    }
+
+    assertTrue(sawException);
+    assertEquals(3, buf.position());
+    assertEquals(25, buf.limit());
+    cluster.shutdown();
+  }
+
+  @Test
+  public void testBothOldAndNewShortCircuitConfigured() throws Exception {
+    final short REPL_FACTOR = 1;
+    final int FILE_LENGTH = 512;
+    Assume.assumeTrue(null == DomainSocket.getLoadingFailureReason());
+    TemporarySocketDirectory socketDir = new TemporarySocketDirectory();
+    HdfsConfiguration conf = getConfiguration(socketDir);
+    MiniDFSCluster cluster =
+        new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+    cluster.waitActive();
+    socketDir.close();
+    FileSystem fs = cluster.getFileSystem();
+
+    Path path = new Path("/foo");
+    byte orig[] = new byte[FILE_LENGTH];
+    for (int i = 0; i < orig.length; i++) {
+      orig[i] = (byte)(i%10);
+    }
+    FSDataOutputStream fos = fs.create(path, (short)1);
+    fos.write(orig);
+    fos.close();
+    DFSTestUtil.waitReplication(fs, path, REPL_FACTOR);
+    FSDataInputStream fis = cluster.getFileSystem().open(path);
+    byte buf[] = new byte[FILE_LENGTH];
+    IOUtils.readFully(fis, buf, 0, FILE_LENGTH);
+    fis.close();
+    Assert.assertArrayEquals(orig, buf);
+    Arrays.equals(orig, buf);
+    cluster.shutdown();
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitLegacyRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitLegacyRead.java
new file mode 100644
index 00000000000..4ba31bdbb61
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitLegacyRead.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+public class TestParallelShortCircuitLegacyRead extends TestParallelReadUtil {
+  @BeforeClass
+  static public void setupCluster() throws Exception {
+    DFSInputStream.tcpReadsDisabledForTesting = true;
+    HdfsConfiguration conf = new HdfsConfiguration();
+    conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY, "");
+    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL, true);
+    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC, false);
+    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
+    conf.setBoolean(DFSConfigKeys.
+        DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
+    conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
+        UserGroupInformation.getCurrentUser().getShortUserName());
+    DomainSocket.disableBindPathValidation();
+    setupCluster(1, conf);
+  }
+
+  @AfterClass
+  static public void teardownCluster() throws Exception {
+    TestParallelReadUtil.teardownCluster();
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java
index 0e21cdcd8f4..a0450fa1a23 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitRead.java
@@ -33,6 +33,7 @@ public class TestParallelShortCircuitRead extends TestParallelReadUtil {
   @BeforeClass
   static public void setupCluster() throws Exception {
     if (DomainSocket.getLoadingFailureReason() != null) return;
+    DFSInputStream.tcpReadsDisabledForTesting = true;
     sockDir = new TemporarySocketDirectory();
     HdfsConfiguration conf = new HdfsConfiguration();
     conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadNoChecksum.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadNoChecksum.java
index a704dd30bb2..172af31ce17 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadNoChecksum.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelShortCircuitReadNoChecksum.java
@@ -33,6 +33,7 @@ public class TestParallelShortCircuitReadNoChecksum extends TestParallelReadUtil
   @BeforeClass
   static public void setupCluster() throws Exception {
     if (DomainSocket.getLoadingFailureReason() != null) return;
+    DFSInputStream.tcpReadsDisabledForTesting = true;
     sockDir = new TemporarySocketDirectory();
     HdfsConfiguration conf = new HdfsConfiguration();
     conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java
index 84688cb4081..00c65f9e61e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestParallelUnixDomainRead.java
@@ -33,11 +33,13 @@ public class TestParallelUnixDomainRead extends TestParallelReadUtil {
   @BeforeClass
   static public void setupCluster() throws Exception {
     if (DomainSocket.getLoadingFailureReason() != null) return;
+    DFSInputStream.tcpReadsDisabledForTesting = true;
     sockDir = new TemporarySocketDirectory();
     HdfsConfiguration conf = new HdfsConfiguration();
     conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
       new File(sockDir.getDir(), "TestParallelLocalRead.%d.sock").getAbsolutePath());
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, false);
+    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC, true);
     DomainSocket.disableBindPathValidation();
     setupCluster(1, conf);
   }

From bbb24fbf5d220fbe137d43651ba3802a9806b1a3 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Fri, 29 Mar 2013 21:33:35 +0000
Subject: [PATCH 24/52] Merge trunk into branch.

Conflicts resolved:
C       hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
!     C hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/SocketCache.java
C       hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
C       hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
C       hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
C       hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java

(thanks to Colin for help resolving)


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-347@1462652 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   | 180 +++++-
 .../hadoop-hdfs/src/CMakeLists.txt            |   5 +-
 .../BookKeeperEditLogInputStream.java         |   5 +
 .../src/contrib/libwebhdfs/CMakeLists.txt     |   3 +
 .../libwebhdfs/src/test_libwebhdfs_ops.c      |  11 +-
 .../hadoop-hdfs/src/main/bin/hdfs             |  36 +-
 .../hadoop-hdfs/src/main/bin/hdfs-config.cmd  |  43 ++
 .../hadoop-hdfs/src/main/bin/hdfs.cmd         | 171 ++++++
 .../hadoop-hdfs/src/main/bin/start-dfs.cmd    |  41 ++
 .../hadoop-hdfs/src/main/bin/stop-dfs.cmd     |  41 ++
 .../src/documentation/content/xdocs/site.xml  |   1 -
 .../hadoop/hdfs/BlockReaderFactory.java       |  14 +-
 .../hadoop/hdfs/BlockReaderLocalLegacy.java   |  48 +-
 .../org/apache/hadoop/hdfs/DFSClient.java     |   9 +-
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java |  10 +-
 .../apache/hadoop/hdfs/DFSInputStream.java    |  17 +-
 .../apache/hadoop/hdfs/DFSOutputStream.java   |  34 +-
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  |  18 +-
 .../hadoop/hdfs/DistributedFileSystem.java    |  24 +-
 .../apache/hadoop/hdfs/HftpFileSystem.java    |   5 +
 .../qjournal/client/IPCLoggerChannel.java     |   8 +
 .../hdfs/qjournal/client/QuorumCall.java      |  28 +
 .../hadoop/hdfs/qjournal/server/Journal.java  |  10 +-
 .../hdfs/qjournal/server/JournalNode.java     |  16 +-
 .../DelegationTokenSecretManager.java         |   1 +
 .../server/blockmanagement/BlockManager.java  |   5 +-
 .../server/blockmanagement/BlocksMap.java     |  31 +-
 .../blockmanagement/DatanodeManager.java      | 203 ++++---
 .../hadoop/hdfs/server/common/Storage.java    |   5 +-
 .../hdfs/server/datanode/BPOfferService.java  |   2 +-
 .../hdfs/server/datanode/BlockReceiver.java   |   8 +-
 .../hdfs/server/datanode/BlockSender.java     |  11 +-
 .../hadoop/hdfs/server/datanode/DataNode.java |  44 +-
 .../fsdataset/impl/FsDatasetImpl.java         |  39 +-
 .../hdfs/server/namenode/CheckpointConf.java  |   8 +
 .../namenode/CheckpointFaultInjector.java     |   2 +
 .../namenode/EditLogBackupInputStream.java    |   5 +
 .../namenode/EditLogFileInputStream.java      |  13 +-
 .../server/namenode/EditLogInputStream.java   |   5 +
 .../hdfs/server/namenode/FSDirectory.java     |   5 +
 .../hdfs/server/namenode/FSEditLogOp.java     |  19 +-
 .../hadoop/hdfs/server/namenode/FSImage.java  |  89 ++-
 .../hdfs/server/namenode/FSImageFormat.java   |   4 +-
 ...ImagePreTransactionalStorageInspector.java |   8 +-
 .../namenode/FSImageStorageInspector.java     |   4 +-
 .../FSImageTransactionalStorageInspector.java |  30 +-
 .../hdfs/server/namenode/FSNamesystem.java    | 556 +++++++++---------
 .../server/namenode/FSPermissionChecker.java  |  56 +-
 .../hadoop/hdfs/server/namenode/INode.java    |   6 +-
 .../server/namenode/NameNodeHttpServer.java   |  12 +-
 .../server/namenode/NameNodeRpcServer.java    |   6 -
 .../hdfs/server/namenode/Namesystem.java      |   4 +
 .../namenode/RedundantEditLogInputStream.java |   7 +
 .../server/namenode/SecondaryNameNode.java    |  62 +-
 .../hdfs/server/namenode/TransferFsImage.java |   1 -
 .../hdfs/server/namenode/ha/HAContext.java    |  14 +-
 .../web/resources/NamenodeWebHdfsMethods.java |   2 -
 .../OfflineEditsXmlLoader.java                |   3 +-
 .../hadoop/hdfs/util/LightWeightGSet.java     |  55 +-
 .../org/apache/hadoop/hdfs/util/XMLUtils.java | 138 ++++-
 .../org/apache/hadoop/hdfs/web/JsonUtil.java  |   4 +-
 .../hadoop/hdfs/web/WebHdfsFileSystem.java    | 156 +++--
 .../web/resources/ConcatSourcesParam.java     |  19 +-
 .../hdfs/web/resources/DeleteOpParam.java     |   5 +
 .../hadoop/hdfs/web/resources/GetOpParam.java |  14 +-
 .../hdfs/web/resources/HttpOpParam.java       |   8 +
 .../hdfs/web/resources/PostOpParam.java       |   5 +
 .../hadoop/hdfs/web/resources/PutOpParam.java |  16 +-
 .../hadoop-hdfs/src/main/java/overview.html   |  20 +-
 .../src/main/native/tests/test-libhdfs.sh     |   2 +-
 .../src/main/resources/hdfs-default.xml       |  47 +-
 .../src/site/apt/HdfsUserGuide.apt.vm         |   4 +-
 .../hadoop-hdfs/src/site/apt/WebHDFS.apt.vm   |  15 +-
 .../hadoop/hdfs/LogVerificationAppender.java  |  64 ++
 .../hdfs/TestDFSClientExcludedNodes.java      | 114 +++-
 .../hadoop/hdfs/TestDFSClientFailover.java    |  77 +++
 .../org/apache/hadoop/hdfs/TestDFSShell.java  | 256 ++++----
 .../hadoop/hdfs/TestDFSUpgradeFromImage.java  |  11 +-
 .../org/apache/hadoop/hdfs/TestDFSUtil.java   |  20 +
 .../hdfs/TestDataTransferKeepalive.java       |   1 +
 .../hadoop/hdfs/TestFileConcurrentReader.java |  45 +-
 .../hadoop/hdfs/TestHftpURLTimeouts.java      |   4 +-
 .../hadoop/hdfs/TestMiniDFSCluster.java       |   9 +-
 .../hdfs/TestShortCircuitLocalRead.java       | 156 ++++-
 .../hadoop/hdfs/qjournal/TestNNWithQJM.java   |  19 +-
 .../hdfs/qjournal/server/TestJournal.java     |  30 +-
 .../hdfs/qjournal/server/TestJournalNode.java |  30 +-
 .../server/blockmanagement/TestNodeCount.java |   2 +-
 .../TestReplicationPolicy.java                |  26 +-
 .../hdfs/server/datanode/TestDataDirs.java    |  31 +-
 .../hdfs/server/namenode/FSImageTestUtil.java |  14 +-
 .../namenode/OfflineEditsViewerHelper.java    |   2 +-
 .../hdfs/server/namenode/TestAuditLogs.java   |   9 +-
 .../hdfs/server/namenode/TestCheckpoint.java  | 161 +++++
 .../hdfs/server/namenode/TestEditLog.java     |   5 +
 .../namenode/TestFSImageStorageInspector.java |   2 +-
 .../hdfs/server/namenode/TestHostsFiles.java  |   7 +-
 .../hdfs/server/namenode/TestINodeFile.java   | 129 ++--
 .../server/namenode/TestNameNodeRecovery.java |  60 +-
 .../namenode/TestProcessCorruptBlocks.java    |  13 +-
 .../server/namenode/TestSaveNamespace.java    |  42 +-
 .../hdfs/server/namenode/TestStartup.java     | 115 +++-
 .../namenode/ha/TestStandbyCheckpoints.java   |  51 ++
 .../server/namenode/ha/TestStandbyIsHot.java  |   1 +
 .../namenode/metrics/TestNameNodeMetrics.java |  20 +-
 .../apache/hadoop/hdfs/tools/TestGetConf.java |  17 +-
 .../org/apache/hadoop/hdfs/util/TestGSet.java |  78 +++
 .../apache/hadoop/hdfs/util/TestXMLUtils.java |  70 +++
 .../hadoop/hdfs/web/TestWebHdfsTokens.java    | 202 +++++++
 .../hadoop/hdfs/web/TestWebHdfsUrl.java       | 306 ++++++++--
 .../hadoop/hdfs/web/resources/TestParam.java  |  26 +-
 .../hadoop/net/TestNetworkTopology.java       |  72 +++
 .../src/test/resources/testHDFSConf.xml       |  33 ++
 113 files changed, 3721 insertions(+), 1165 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.cmd
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs.cmd
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.cmd
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.cmd
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/LogVerificationAppender.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestXMLUtils.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsTokens.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 9e6cadcd2e5..7a9e504c434 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -22,6 +22,10 @@ Trunk (Unreleased)
 
     HDFS-4296. Reserve layout version for release 1.2.0. (suresh)
 
+    HADOOP-8562. Enhancements to support Hadoop on Windows Server and Windows
+    Azure environments. (See breakdown of tasks below for subtasks and
+    contributors)
+
   IMPROVEMENTS
 
     HDFS-1620. Rename HdfsConstants -> HdfsServerConstants, FSConstants ->
@@ -177,6 +181,9 @@ Trunk (Unreleased)
     HDFS-4346. Add SequentialNumber as a base class for INodeId and
     GenerationStamp.  (szetszwo)
 
+    HDFS-4633 TestDFSClientExcludedNodes fails sporadically if excluded nodes
+    cache expires too quickly  (Chris Nauroth via Sanjay)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -299,7 +306,44 @@ Trunk (Unreleased)
     HDFS-4340. Update addBlock() to inculde inode id as additional argument.
     (Brandon Li via suresh)
 
-Release 2.0.4-beta - UNRELEASED
+    HDFS-4502. JsonUtil.toFileStatus(..) should check if the fileId property
+    exists.  (Brandon Li via suresh)
+
+    HDFS-4391. TestDataTransferKeepalive fails when tests are executed in a
+    certain order. (Andrew Wang via atm)
+
+    HDFS-4586. TestDataDirs.testGetDataDirsFromURIs fails with all directories
+    in dfs.datanode.data.dir are invalid. (Ivan Mitic via atm)
+
+  BREAKDOWN OF HADOOP-8562 SUBTASKS AND RELATED JIRAS
+
+    HDFS-4145. Merge hdfs cmd line scripts from branch-1-win. (David Lao,
+    Bikas Saha, Lauren Yang, Chuan Liu, Thejas M Nair and Ivan Mitic via suresh)
+
+    HDFS-4163. HDFS distribution build fails on Windows. (Chris Nauroth via
+    suresh)
+
+    HDFS-4316. branch-trunk-win contains test code accidentally added during 
+    work on fixing tests on Windows. (Chris Nauroth via suresh)
+
+    HDFS-4297. Fix issues related to datanode concurrent reading and writing on
+    Windows. (Arpit Agarwal, Chuan Liu via suresh)
+
+    HDFS-4573. Fix TestINodeFile on Windows. (Arpit Agarwal via suresh)
+
+    HDFS-4572. Fix TestJournal failures on Windows. (Arpit Agarwal via suresh)
+
+    HDFS-4287. HTTPFS tests fail on Windows. (Chris Nauroth via suresh)
+
+    HDFS-4593. TestSaveNamespace fails on Windows. (Arpit Agarwal via suresh)
+
+    HDFS-4582. TestHostsFiles fails on Windows. (Ivan Mitic via suresh)
+
+    HDFS-4603. TestMiniDFSCluster fails on Windows. (Ivan Mitic via suresh)
+
+    HDFS-4604. TestJournalNode fails on Windows. (Ivan Mitic via suresh)
+
+Release 2.0.5-beta - UNRELEASED
 
   INCOMPATIBLE CHANGES
 
@@ -307,6 +351,33 @@ Release 2.0.4-beta - UNRELEASED
 
   IMPROVEMENTS
 
+    HDFS-4222. NN is unresponsive and loses heartbeats from DNs when 
+    configured to use LDAP and LDAP has issues. (Xiaobo Peng, suresh)
+
+    HDFS-4304. Make FSEditLogOp.MAX_OP_SIZE configurable. (Colin Patrick
+    McCabe via atm)
+
+    HDFS-4518. Finer grained metrics for HDFS capacity.
+    (Arpit Agarwal via suresh)
+
+    HDFS-4519. Support overriding jsvc binary and log file locations
+    when launching secure datanode. (Chris Nauroth via suresh)
+
+    HDFS-4569. Small image transfer related cleanups.
+    (Andrew Wang via suresh)
+
+    HDFS-4521. Invalid network toploogies should not be cached. (Colin Patrick
+    McCabe via atm)
+
+    HDFS-4246. The exclude node list should be more forgiving, for each output
+    stream. (harsh via atm)
+
+    HDFS-4635. Move BlockManager#computeCapacity to LightWeightGSet. (suresh)
+
+    HDFS-4621. Additional logging to help diagnose slow QJM syncs. (todd)
+
+    HDFS-4618. Default transaction interval for checkpoints is too low. (todd)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -321,6 +392,86 @@ Release 2.0.4-beta - UNRELEASED
     but not in dfs.namenode.edits.dir are silently ignored.  (Arpit Agarwal
     via szetszwo)
 
+    HDFS-4482. ReplicationMonitor thread can exit with NPE due to the race 
+    between delete and replication of same file. (umamahesh)
+
+    HDFS-4235. When outputting XML, OfflineEditsViewer can't handle some edits
+    containing non-ASCII strings. (Colin Patrick McCabe via atm)
+
+    HDFS-4541. Set hadoop.log.dir and hadoop.id.str when starting secure
+    datanode to write the logs to right dir by default. (Arpit Gupta via
+    suresh)
+
+    HDFS-4540. Namenode http server should use the web authentication 
+    keytab for spnego principal. (Arpit Gupta via suresh)
+
+    HDFS-4544. Error in deleting blocks should not do check disk, for
+    all types of errors. (Arpit Agarwal via suresh)
+
+    HDFS-4565. Use DFSUtil.getSpnegoKeytabKey() to get the spnego keytab key
+    in secondary namenode and namenode http server. (Arpit Gupta via suresh)
+
+    HDFS-4571. WebHDFS should not set the service hostname on the server side. 
+    (tucu)
+
+    HDFS-4013. TestHftpURLTimeouts throws NPE. (Chao Shi via suresh)
+
+    HDFS-4592. Default values for access time precision are out of sync between
+    hdfs-default.xml and the code. (atm)
+
+    HDFS-4522. LightWeightGSet expects incrementing a volatile to be atomic.
+    (Colin Patrick McCabe via atm)
+
+    HDFS-4484. libwebhdfs compilation broken with gcc 4.6.2. (Colin Patrick
+    McCabe via atm)
+
+    HDFS-4595. When short circuit read is fails, DFSClient does not fallback
+    to regular reads. (suresh)
+
+    HDFS-4583. TestNodeCount fails. (Ivan Mitic via suresh)
+
+    HDFS-4591. HA clients can fail to fail over while Standby NN is performing
+    long checkpoint. (atm)
+
+    HDFS-3277. fail over to loading a different FSImage if the first one we
+    try to load is corrupt. (Colin Patrick McCabe and Andrew Wang via atm)
+
+    HDFS-4596. Shutting down namenode during checkpointing can lead to md5sum
+    error. (Andrew Wang via atm)
+
+    HDFS-4614. FSNamesystem#getContentSummary should use getPermissionChecker
+    helper method. (atm)
+
+    HDFS-4620. Documentation for dfs.namenode.rpc-address specifies wrong
+    format. (Sandy Ryza via atm)
+
+    HDFS-4607. In TestGetConf.testGetSpecificKey(), use a platform-specific
+    line separator; otherwise, it fails on Windows.  (Ivan Mitic via szetszwo)
+
+    HDFS-4609. TestAuditLogs should release log handles between tests. 
+    (Ivan Mitic via szetszwo)
+
+    HDFS-4615. Fix TestDFSShell failures on Windows.  (Arpit Agarwal
+    via szetszwo)
+
+    HDFS-4584. Skip TestNNWithQJM.testNewNamenodeTakesOverWriter() on Windows.
+    (Arpit Agarwal via szetszwo)
+
+    HDFS-4598. Fix the default value of ConcatSourcesParam and the WebHDFS doc.
+    (szetszwo)
+
+Release 2.0.4-alpha - UNRELEASED
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
 Release 2.0.3-alpha - 2013-02-06
 
   INCOMPATIBLE CHANGES
@@ -2292,6 +2443,8 @@ Release 0.23.7 - UNRELEASED
 
   OPTIMIZATIONS
 
+    HDFS-4532. RPC call queue may fill due to current user lookup (daryn)
+
   BUG FIXES
 
     HDFS-4288. NN accepts incremental BR as IBR in safemode (daryn via kihwal)
@@ -2299,6 +2452,31 @@ Release 0.23.7 - UNRELEASED
     HDFS-4495. Allow client-side lease renewal to be retried beyond soft-limit
     (kihwal)
 
+    HDFS-4128. 2NN gets stuck in inconsistent state if edit log replay fails
+    in the middle (kihwal via daryn)
+
+    HDFS-4542. Webhdfs doesn't support secure proxy users (Daryn Sharp via
+    kihwal)
+
+    HDFS-4560. Webhdfs cannot use tokens obtained by another user (daryn)
+
+    HDFS-4566. Webdhfs token cancelation should use authentication (daryn)
+
+    HDFS-4567. Webhdfs does not need a token for token operations (daryn via
+    kihwal)
+
+    HDFS-4577. Webhdfs operations should declare if authentication is required
+    (daryn via kihwal)
+
+    HDFS-3344. Unreliable corrupt blocks counting in TestProcessCorruptBlocks
+    (kihwal)
+
+    HDFS-3367. WebHDFS doesn't use the logged in user when opening
+    connections (daryn)
+
+    HDFS-4581. checkDiskError should not be called on network errors (Rohit
+    Kochar via kihwal)
+
 Release 0.23.6 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/CMakeLists.txt b/hadoop-hdfs-project/hadoop-hdfs/src/CMakeLists.txt
index d0b3c060194..7b1f1b2175e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/CMakeLists.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/CMakeLists.txt
@@ -68,8 +68,9 @@ if (NOT GENERATED_JAVAH)
     MESSAGE(FATAL_ERROR "You must set the CMake variable GENERATED_JAVAH")
 endif (NOT GENERATED_JAVAH)
 
-set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -Wall -O2 -D_GNU_SOURCE")
-set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_REENTRANT -D_FILE_OFFSET_BITS=64")
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g -Wall -O2")
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_REENTRANT -D_GNU_SOURCE")
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64")
 
 include_directories(
     ${GENERATED_JAVAH}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogInputStream.java
index 35c09eb1127..a3d4237314a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogInputStream.java
@@ -163,6 +163,11 @@ class BookKeeperEditLogInputStream extends EditLogInputStream {
     return ("BookKeeperEditLogInputStream {" + this.getName() + "}");
   }
 
+  @Override
+  public void setMaxOpSize(int maxOpSize) {
+    reader.setMaxOpSize(maxOpSize);
+  }
+
   /**
    * Input stream implementation which can be used by 
    * FSEditLogOp.Reader
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/libwebhdfs/CMakeLists.txt b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/libwebhdfs/CMakeLists.txt
index 4529cdfe410..25f95b2b2fe 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/libwebhdfs/CMakeLists.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/libwebhdfs/CMakeLists.txt
@@ -48,6 +48,7 @@ add_executable(test_libwebhdfs_ops
 )
 target_link_libraries(test_libwebhdfs_ops
     webhdfs
+    native_mini_dfs
 )
 
 add_executable(test_libwebhdfs_read
@@ -69,4 +70,6 @@ add_executable(test_libwebhdfs_threaded
 )
 target_link_libraries(test_libwebhdfs_threaded
     webhdfs
+    native_mini_dfs
+    pthread
 )
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/libwebhdfs/src/test_libwebhdfs_ops.c b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/libwebhdfs/src/test_libwebhdfs_ops.c
index 8c16b396239..87550ae8f51 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/libwebhdfs/src/test_libwebhdfs_ops.c
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/libwebhdfs/src/test_libwebhdfs_ops.c
@@ -257,7 +257,8 @@ int main(int argc, char **argv)
 
         const char* path[] = {"/foo", "/foo/bar", "foobar", "//foo/bar//foobar",
                               "foo//bar", "foo/bar///", "/", "////"};
-        for (int i = 0; i < 8; i++) {
+        int i;
+        for (i = 0; i < 8; i++) {
             fprintf(stderr, "hdfsSetWorkingDirectory: %s, %s\n",
                     ((result = hdfsSetWorkingDirectory(fs, path[i])) ?
                      "Failed!" : "Success!"),
@@ -281,8 +282,8 @@ int main(int argc, char **argv)
             fprintf(stderr, "Name: %s, ", fileInfo->mName);
             fprintf(stderr, "Type: %c, ", (char)(fileInfo->mKind));
             fprintf(stderr, "Replication: %d, ", fileInfo->mReplication);
-            fprintf(stderr, "BlockSize: %lld, ", fileInfo->mBlockSize);
-            fprintf(stderr, "Size: %lld, ", fileInfo->mSize);
+            fprintf(stderr, "BlockSize: %"PRId64", ", fileInfo->mBlockSize);
+            fprintf(stderr, "Size: %"PRId64", ", fileInfo->mSize);
             fprintf(stderr, "LastMod: %s", ctime(&fileInfo->mLastMod));
             fprintf(stderr, "Owner: %s, ", fileInfo->mOwner);
             fprintf(stderr, "Group: %s, ", fileInfo->mGroup);
@@ -305,8 +306,8 @@ int main(int argc, char **argv)
                 fprintf(stderr, "Name: %s, ", fileList[i].mName);
                 fprintf(stderr, "Type: %c, ", (char)fileList[i].mKind);
                 fprintf(stderr, "Replication: %d, ", fileList[i].mReplication);
-                fprintf(stderr, "BlockSize: %lld, ", fileList[i].mBlockSize);
-                fprintf(stderr, "Size: %lld, ", fileList[i].mSize);
+                fprintf(stderr, "BlockSize: %"PRId64", ", fileList[i].mBlockSize);
+                fprintf(stderr, "Size: %"PRId64", ", fileList[i].mSize);
                 fprintf(stderr, "LastMod: %s", ctime(&fileList[i].mLastMod));
                 fprintf(stderr, "Owner: %s, ", fileList[i].mOwner);
                 fprintf(stderr, "Group: %s, ", fileList[i].mGroup);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
index d7aef66c2ee..709520f4a7c 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
@@ -15,6 +15,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Environment Variables
+#
+#   JSVC_HOME  home directory of jsvc binary.  Required for starting secure
+#              datanode.
+#
+#   JSVC_OUTFILE  path to jsvc output file.  Defaults to
+#                 $HADOOP_LOG_DIR/jsvc.out.
+#
+#   JSVC_ERRFILE  path to jsvc error file.  Defaults to $HADOOP_LOG_DIR/jsvc.err.
+
 bin=`which $0`
 bin=`dirname ${bin}`
 bin=`cd "$bin" > /dev/null; pwd`
@@ -56,6 +66,14 @@ fi
 COMMAND=$1
 shift
 
+case $COMMAND in
+  # usage flags
+  --help|-help|-h)
+    print_usage
+    exit
+    ;;
+esac
+
 # Determine if we're starting a secure datanode, and if so, redefine appropriate variables
 if [ "$COMMAND" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_USER" ]; then
   if [ -n "$JSVC_HOME" ]; then
@@ -65,9 +83,11 @@ if [ "$COMMAND" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HADOOP_SECURE_DN_
   
     if [ -n "$HADOOP_SECURE_DN_LOG_DIR" ]; then
       HADOOP_LOG_DIR=$HADOOP_SECURE_DN_LOG_DIR
+      HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
     fi
    
     HADOOP_IDENT_STRING=$HADOOP_SECURE_DN_USER
+    HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
     starting_secure_dn="true"
   else
     echo "It looks like you're trying to start a secure DN, but \$JSVC_HOME"\
@@ -126,9 +146,6 @@ else
   CLASS="$COMMAND"
 fi
 
-if $cygwin; then
-  CLASSPATH=`cygpath -p -w "$CLASSPATH"`
-fi
 export CLASSPATH=$CLASSPATH
 
 HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"
@@ -148,9 +165,18 @@ if [ "$starting_secure_dn" = "true" ]; then
       "and set JSVC_HOME to the directory containing the jsvc binary."
     exit
   fi
+
+  if [[ ! $JSVC_OUTFILE ]]; then
+    JSVC_OUTFILE="$HADOOP_LOG_DIR/jsvc.out"
+  fi
+
+  if [[ ! $JSVC_ERRFILE ]]; then
+    JSVC_ERRFILE="$HADOOP_LOG_DIR/jsvc.err"
+  fi
+
   exec "$JSVC" \
-           -Dproc_$COMMAND -outfile "$HADOOP_LOG_DIR/jsvc.out" \
-           -errfile "$HADOOP_LOG_DIR/jsvc.err" \
+           -Dproc_$COMMAND -outfile "$JSVC_OUTFILE" \
+           -errfile "$JSVC_ERRFILE" \
            -pidfile "$HADOOP_SECURE_DN_PID" \
            -nodetach \
            -user "$HADOOP_SECURE_DN_USER" \
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.cmd b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.cmd
new file mode 100644
index 00000000000..f3aa7338eeb
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs-config.cmd
@@ -0,0 +1,43 @@
+@echo off
+@rem Licensed to the Apache Software Foundation (ASF) under one or more
+@rem contributor license agreements.  See the NOTICE file distributed with
+@rem this work for additional information regarding copyright ownership.
+@rem The ASF licenses this file to You under the Apache License, Version 2.0
+@rem (the "License"); you may not use this file except in compliance with
+@rem the License.  You may obtain a copy of the License at
+@rem
+@rem     http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+
+@rem included in all the hdfs scripts with source command
+@rem should not be executed directly
+
+if not defined HADOOP_BIN_PATH ( 
+  set HADOOP_BIN_PATH=%~dp0
+)
+
+if "%HADOOP_BIN_PATH:~-1%" == "\" (
+  set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1%
+)
+
+set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec
+if not defined HADOOP_LIBEXEC_DIR (
+  set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR%
+)
+
+if exist %HADOOP_LIBEXEC_DIR%\hadoop-config.cmd (
+  call %HADOOP_LIBEXEC_DIR%\hadoop-config.cmd %*
+) else if exist %HADOOP_COMMON_HOME%\libexec\hadoop-config.cmd (
+  call %HADOOP_COMMON_HOME%\libexec\hadoop-config.cmd %*
+) else if exist %HADOOP_HOME%\libexec\hadoop-config.cmd (
+  call %HADOOP_HOME%\libexec\hadoop-config.cmd %*
+) else (
+  echo Hadoop common not found.
+)
+
+:eof
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs.cmd b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs.cmd
new file mode 100644
index 00000000000..70af80c7d54
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs.cmd
@@ -0,0 +1,171 @@
+@echo off
+@rem Licensed to the Apache Software Foundation (ASF) under one or more
+@rem contributor license agreements.  See the NOTICE file distributed with
+@rem this work for additional information regarding copyright ownership.
+@rem The ASF licenses this file to You under the Apache License, Version 2.0
+@rem (the "License"); you may not use this file except in compliance with
+@rem the License.  You may obtain a copy of the License at
+@rem
+@rem     http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+setlocal enabledelayedexpansion
+
+if not defined HADOOP_BIN_PATH ( 
+  set HADOOP_BIN_PATH=%~dp0
+)
+
+if "%HADOOP_BIN_PATH:~-1%" == "\" (
+  set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1%
+)
+
+set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec
+if not defined HADOOP_LIBEXEC_DIR (
+  set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR%
+)
+
+call %HADOOP_LIBEXEC_DIR%\hdfs-config.cmd %*
+if "%1" == "--config" (
+  shift
+  shift
+)
+
+:main
+  if exist %HADOOP_CONF_DIR%\hadoop-env.cmd (
+    call %HADOOP_CONF_DIR%\hadoop-env.cmd
+  )
+
+  set hdfs-command=%1
+  call :make_command_arguments %*
+
+  if not defined hdfs-command (
+      goto print_usage
+  )
+
+  call :%hdfs-command% %hdfs-command-arguments%
+  set java_arguments=%JAVA_HEAP_MAX% %HADOOP_OPTS% -classpath %CLASSPATH% %CLASS% %hdfs-command-arguments%
+  call %JAVA% %java_arguments%
+
+goto :eof
+
+:namenode
+  set CLASS=org.apache.hadoop.hdfs.server.namenode.NameNode
+  set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_NAMENODE_OPTS%
+  goto :eof
+
+:zkfc
+  set CLASS=org.apache.hadoop.hdfs.tools.DFSZKFailoverController
+  set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_ZKFC_OPTS%
+  goto :eof
+
+:secondarynamenode
+  set CLASS=org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode
+  set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_SECONDARYNAMENODE_OPTS%
+  goto :eof
+
+:datanode
+  set CLASS=org.apache.hadoop.hdfs.server.datanode.DataNode
+  set HADOOP_OPTS=%HADOOP_OPTS% -server %HADOOP_DATANODE_OPTS%
+  goto :eof
+
+:dfs
+  set CLASS=org.apache.hadoop.fs.FsShell
+  set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_CLIENT_OPTS%
+  goto :eof
+
+:dfsadmin
+  set CLASS=org.apache.hadoop.hdfs.tools.DFSAdmin
+  set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_CLIENT_OPTS%
+  goto :eof
+
+:haadmin
+  set CLASS=org.apache.hadoop.hdfs.tools.DFSHAAdmin
+  set CLASSPATH=%CLASSPATH%;%TOOL_PATH%
+  set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_CLIENT_OPTS%
+  goto :eof
+
+:fsck
+  set CLASS=org.apache.hadoop.hdfs.tools.DFSck
+  set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_CLIENT_OPTS%
+  goto :eof
+
+:balancer
+  set CLASS=org.apache.hadoop.hdfs.server.balancer.Balancer
+  set HADOOP_OPTS=%HADOOP_OPTS% %HADOOP_BALANCER_OPTS%
+  goto :eof
+
+:jmxget
+  set CLASS=org.apache.hadoop.hdfs.tools.JMXGet
+  goto :eof
+
+:oiv
+  set CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
+  goto :eof
+
+:oev
+  set CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
+  goto :eof
+
+:fetchdt
+  set CLASS=org.apache.hadoop.hdfs.tools.DelegationTokenFetcher
+  goto :eof
+
+:getconf
+  set CLASS=org.apache.hadoop.hdfs.tools.GetConf
+  goto :eof
+
+:groups
+  set CLASS=org.apache.hadoop.hdfs.tools.GetGroups
+  goto :eof
+
+@rem This changes %1, %2 etc. Hence those cannot be used after calling this.
+:make_command_arguments
+  if "%1" == "--config" (
+    shift
+    shift
+  )
+  if [%2] == [] goto :eof
+  shift
+  set _hdfsarguments=
+  :MakeCmdArgsLoop 
+  if [%1]==[] goto :EndLoop 
+
+  if not defined _hdfsarguments (
+    set _hdfsarguments=%1
+  ) else (
+    set _hdfsarguments=!_hdfsarguments! %1
+  )
+  shift
+  goto :MakeCmdArgsLoop 
+  :EndLoop 
+  set hdfs-command-arguments=%_hdfsarguments%
+  goto :eof
+
+:print_usage
+  @echo Usage: hdfs [--config confdir] COMMAND
+  @echo        where COMMAND is one of:
+  @echo   dfs                  run a filesystem command on the file systems supported in Hadoop.
+  @echo   namenode -format     format the DFS filesystem
+  @echo   secondarynamenode    run the DFS secondary namenode
+  @echo   namenode             run the DFS namenode
+  @echo   zkfc                 run the ZK Failover Controller daemon
+  @echo   datanode             run a DFS datanode
+  @echo   dfsadmin             run a DFS admin client
+  @echo   fsck                 run a DFS filesystem checking utility
+  @echo   balancer             run a cluster balancing utility
+  @echo   jmxget               get JMX exported values from NameNode or DataNode.
+  @echo   oiv                  apply the offline fsimage viewer to an fsimage
+  @echo   oev                  apply the offline edits viewer to an edits file
+  @echo   fetchdt              fetch a delegation token from the NameNode
+  @echo   getconf              get config values from configuration
+  @echo   groups               get the groups which users belong to
+  @echo                        Use -help to see options
+  @echo.
+  @echo Most commands print help when invoked w/o parameters.
+
+endlocal
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.cmd b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.cmd
new file mode 100644
index 00000000000..9f20e5afa31
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/start-dfs.cmd
@@ -0,0 +1,41 @@
+@echo off
+@rem Licensed to the Apache Software Foundation (ASF) under one or more
+@rem contributor license agreements.  See the NOTICE file distributed with
+@rem this work for additional information regarding copyright ownership.
+@rem The ASF licenses this file to You under the Apache License, Version 2.0
+@rem (the "License"); you may not use this file except in compliance with
+@rem the License.  You may obtain a copy of the License at
+@rem
+@rem     http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+setlocal enabledelayedexpansion
+
+if not defined HADOOP_BIN_PATH ( 
+  set HADOOP_BIN_PATH=%~dp0
+)
+
+if "%HADOOP_BIN_PATH:~-1%" == "\" (
+  set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1%
+)
+
+set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec
+if not defined HADOOP_LIBEXEC_DIR (
+  set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR%
+)
+
+call %HADOOP_LIBEXEC_DIR%\hdfs-config.cmd %*
+if "%1" == "--config" (
+  shift
+  shift
+)
+
+start "Apache Hadoop Distribution" hadoop namenode
+start "Apache Hadoop Distribution" hadoop datanode
+
+endlocal
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.cmd b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.cmd
new file mode 100644
index 00000000000..f0cf0150802
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/stop-dfs.cmd
@@ -0,0 +1,41 @@
+@echo off
+@rem Licensed to the Apache Software Foundation (ASF) under one or more
+@rem contributor license agreements.  See the NOTICE file distributed with
+@rem this work for additional information regarding copyright ownership.
+@rem The ASF licenses this file to You under the Apache License, Version 2.0
+@rem (the "License"); you may not use this file except in compliance with
+@rem the License.  You may obtain a copy of the License at
+@rem
+@rem     http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+setlocal enabledelayedexpansion
+
+if not defined HADOOP_BIN_PATH ( 
+  set HADOOP_BIN_PATH=%~dp0
+)
+
+if "%HADOOP_BIN_PATH:~-1%" == "\" (
+  set HADOOP_BIN_PATH=%HADOOP_BIN_PATH:~0,-1%
+)
+
+set DEFAULT_LIBEXEC_DIR=%HADOOP_BIN_PATH%\..\libexec
+if not defined HADOOP_LIBEXEC_DIR (
+  set HADOOP_LIBEXEC_DIR=%DEFAULT_LIBEXEC_DIR%
+)
+
+call %HADOOP_LIBEXEC_DIR%\hadoop-config.cmd %*
+if "%1" == "--config" (
+  shift
+  shift
+)
+
+Taskkill /FI "WINDOWTITLE eq Apache Hadoop Distribution - hadoop   namenode"
+Taskkill /FI "WINDOWTITLE eq Apache Hadoop Distribution - hadoop   datanode"
+
+endlocal
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/site.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/site.xml
index 19cc1b592bb..ffb32198333 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/site.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/site.xml
@@ -76,7 +76,6 @@ See http://forrest.apache.org/docs/linking.html for more info.
     <zlib      href="http://www.zlib.net/" />
     <gzip      href="http://www.gzip.org/" />
     <bzip      href="http://www.bzip.org/" />
-    <cygwin    href="http://www.cygwin.com/" />
     <osx       href="http://www.apple.com/macosx" />
     <hod href="">
       <cluster-resources href="http://www.clusterresources.com" />
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
index 527291b82e5..7fbb1a01d59 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java
@@ -40,6 +40,7 @@ import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.net.unix.DomainSocket;
 import org.apache.hadoop.security.AccessControlException;
+import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.security.token.Token;
 
@@ -242,14 +243,15 @@ public class BlockReaderFactory {
    * This block reader implements the path-based style of local reads
    * first introduced in HDFS-2246.
    */
-  static BlockReader getLegacyBlockReaderLocal(Configuration conf,
-      String src, ExtendedBlock blk, Token<BlockTokenIdentifier> accessToken,
-      DatanodeInfo chosenNode, int socketTimeout, long offsetIntoBlock,
+  static BlockReader getLegacyBlockReaderLocal(UserGroupInformation ugi,
+      Configuration conf, String src, ExtendedBlock blk,
+      Token<BlockTokenIdentifier> accessToken, DatanodeInfo chosenNode,
+      int socketTimeout, long offsetIntoBlock,
       boolean connectToDnViaHostname) throws InvalidToken, IOException {
     try {
-      return BlockReaderLocalLegacy.newBlockReader(conf, src, blk, accessToken,
-          chosenNode, socketTimeout, offsetIntoBlock, blk.getNumBytes()
-              - offsetIntoBlock, connectToDnViaHostname);
+      return BlockReaderLocalLegacy.newBlockReader(ugi, conf, src,
+          blk, accessToken, chosenNode, socketTimeout, offsetIntoBlock,
+          blk.getNumBytes() - offsetIntoBlock, connectToDnViaHostname);
     } catch (RemoteException re) {
       throw re.unwrapRemoteException(InvalidToken.class,
           AccessControlException.class);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java
index 6d6096064bb..11fb4922fcb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java
@@ -23,6 +23,7 @@ import java.io.FileInputStream;
 import java.io.IOException;
 import java.net.Socket;
 import java.nio.ByteBuffer;
+import java.security.PrivilegedExceptionAction;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
@@ -31,6 +32,7 @@ import java.util.Map;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.protocol.BlockLocalPathInfo;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
@@ -41,6 +43,7 @@ import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
 import org.apache.hadoop.hdfs.util.DirectBufferPool;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.DataChecksum;
 
@@ -92,11 +95,21 @@ class BlockReaderLocalLegacy implements BlockReader {
     }
 
     private synchronized ClientDatanodeProtocol getDatanodeProxy(
-        DatanodeInfo node, Configuration conf, int socketTimeout,
-        boolean connectToDnViaHostname) throws IOException {
+        UserGroupInformation ugi, final DatanodeInfo node,
+        final Configuration conf, final int socketTimeout,
+        final boolean connectToDnViaHostname) throws IOException {
       if (proxy == null) {
-        proxy = DFSUtil.createClientDatanodeProtocolProxy(node, conf,
-            socketTimeout, connectToDnViaHostname);
+        try {
+          proxy = ugi.doAs(new PrivilegedExceptionAction<ClientDatanodeProtocol>() {
+            @Override
+            public ClientDatanodeProtocol run() throws Exception {
+              return DFSUtil.createClientDatanodeProtocolProxy(node, conf,
+                  socketTimeout, connectToDnViaHostname);
+            }
+          });
+        } catch (InterruptedException e) {
+          LOG.warn("encountered exception ", e);
+        }
       }
       return proxy;
     }
@@ -160,17 +173,18 @@ class BlockReaderLocalLegacy implements BlockReader {
   /**
    * The only way this object can be instantiated.
    */
-  static BlockReaderLocalLegacy newBlockReader(Configuration conf, String file,
-      ExtendedBlock blk, Token<BlockTokenIdentifier> token, DatanodeInfo node,
-      int socketTimeout, long startOffset, long length,
-      boolean connectToDnViaHostname) throws IOException {
+  static BlockReaderLocalLegacy newBlockReader(UserGroupInformation ugi,
+      Configuration conf, String file, ExtendedBlock blk,
+      Token<BlockTokenIdentifier> token, DatanodeInfo node, int socketTimeout,
+      long startOffset, long length, boolean connectToDnViaHostname)
+      throws IOException {
 
     LocalDatanodeInfo localDatanodeInfo = getLocalDatanodeInfo(node
         .getIpcPort());
     // check the cache first
     BlockLocalPathInfo pathinfo = localDatanodeInfo.getBlockLocalPathInfo(blk);
     if (pathinfo == null) {
-      pathinfo = getBlockPathInfo(blk, node, conf, socketTimeout, token,
+      pathinfo = getBlockPathInfo(ugi, blk, node, conf, socketTimeout, token,
           connectToDnViaHostname);
     }
 
@@ -247,13 +261,13 @@ class BlockReaderLocalLegacy implements BlockReader {
     return ldInfo;
   }
   
-  private static BlockLocalPathInfo getBlockPathInfo(ExtendedBlock blk,
-      DatanodeInfo node, Configuration conf, int timeout,
+  private static BlockLocalPathInfo getBlockPathInfo(UserGroupInformation ugi,
+      ExtendedBlock blk, DatanodeInfo node, Configuration conf, int timeout,
       Token<BlockTokenIdentifier> token, boolean connectToDnViaHostname)
-          throws IOException {
+      throws IOException {
     LocalDatanodeInfo localDatanodeInfo = getLocalDatanodeInfo(node.getIpcPort());
     BlockLocalPathInfo pathinfo = null;
-    ClientDatanodeProtocol proxy = localDatanodeInfo.getDatanodeProxy(node,
+    ClientDatanodeProtocol proxy = localDatanodeInfo.getDatanodeProxy(ugi, node,
         conf, timeout, connectToDnViaHostname);
     try {
       // make RPC to local datanode to find local pathnames of blocks
@@ -282,9 +296,11 @@ class BlockReaderLocalLegacy implements BlockReader {
         DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT);
 
     if (bufferSizeBytes < bytesPerChecksum) {
-      throw new IllegalArgumentException("Configured BlockReaderLocalLegacy buffer size (" + bufferSizeBytes + ") " +
-          "is not large enough to hold a single chunk (" + bytesPerChecksum +  "). Please configure " +
-          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY + " appropriately");
+      throw new IllegalArgumentException("Configured BlockReaderLocalLegacy " +
+          "buffer size (" + bufferSizeBytes + ") is not large enough to hold " +
+          "a single chunk (" + bytesPerChecksum +  "). Please configure " +
+          DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY +
+          " appropriately");
     }
 
     // Round down to nearest chunk size
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index a2b4e35626a..016405a714a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -41,6 +41,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPAC
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER_DEFAULT;
@@ -212,6 +214,7 @@ public class DFSClient implements java.io.Closeable {
     final int socketTimeout;
     final int socketCacheCapacity;
     final long socketCacheExpiry;
+    final long excludedNodesCacheExpiry;
     /** Wait time window (in msec) if BlockMissingException is caught */
     final int timeWindow;
     final int nCachedConnRetry;
@@ -267,6 +270,9 @@ public class DFSClient implements java.io.Closeable {
           DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT);
       socketCacheExpiry = conf.getLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY,
           DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT);
+      excludedNodesCacheExpiry = conf.getLong(
+          DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL,
+          DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL_DEFAULT);
       prefetchSize = conf.getLong(DFS_CLIENT_READ_PREFETCH_SIZE_KEY,
           10 * defaultBlockSize);
       timeWindow = conf
@@ -436,6 +442,7 @@ public class DFSClient implements java.io.Closeable {
           "null URI");
       NameNodeProxies.ProxyAndInfo<ClientProtocol> proxyInfo =
         NameNodeProxies.createProxy(conf, nameNodeUri, ClientProtocol.class);
+      
       this.dtService = proxyInfo.getDelegationTokenService();
       this.namenode = proxyInfo.getProxy();
     }
@@ -1614,7 +1621,7 @@ public class DFSClient implements java.io.Closeable {
    * @param socketFactory to create sockets to connect to DNs
    * @param socketTimeout timeout to use when connecting and waiting for a response
    * @param encryptionKey the key needed to communicate with DNs in this cluster
-   * @param connectToDnViaHostname {@see #connectToDnViaHostname()}
+   * @param connectToDnViaHostname {@link #connectToDnViaHostname()}
    * @return The checksum 
    */
   static MD5MD5CRC32FileChecksum getFileChecksum(String src,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 302ec83e420..a90b28abfd1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -76,6 +76,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   
   public static final String  DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY = "dfs.client.socketcache.expiryMsec";
   public static final long    DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT = 2 * 60 * 1000;
+  public static final String  DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL = "dfs.client.write.exclude.nodes.cache.expiry.interval.millis";
+  public static final long    DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL_DEFAULT = 10 * 60 * 1000; // 10 minutes, in ms
   public static final String  DFS_NAMENODE_BACKUP_ADDRESS_KEY = "dfs.namenode.backup.address";
   public static final String  DFS_NAMENODE_BACKUP_ADDRESS_DEFAULT = "localhost:50100";
   public static final String  DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY = "dfs.namenode.backup.http-address";
@@ -120,7 +122,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_NAMENODE_CHECKPOINT_PERIOD_KEY = "dfs.namenode.checkpoint.period";
   public static final long    DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT = 3600;
   public static final String  DFS_NAMENODE_CHECKPOINT_TXNS_KEY = "dfs.namenode.checkpoint.txns";
-  public static final long    DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT = 40000;
+  public static final long    DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT = 1000000;
+  public static final String  DFS_NAMENODE_CHECKPOINT_MAX_RETRIES_KEY = "dfs.namenode.checkpoint.max-retries";
+  public static final int     DFS_NAMENODE_CHECKPOINT_MAX_RETRIES_DEFAULT = 3;
   public static final String  DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY = "dfs.namenode.heartbeat.recheck-interval";
   public static final int     DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_DEFAULT = 5*60*1000;
   public static final String  DFS_NAMENODE_TOLERATE_HEARTBEAT_MULTIPLIER_KEY = "dfs.namenode.tolerate.heartbeat.multiplier";
@@ -367,7 +371,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
 
   // Image transfer timeout
   public static final String DFS_IMAGE_TRANSFER_TIMEOUT_KEY = "dfs.image.transfer.timeout";
-  public static final int DFS_IMAGE_TRANSFER_TIMEOUT_DEFAULT = 60 * 1000;
+  public static final int DFS_IMAGE_TRANSFER_TIMEOUT_DEFAULT = 10 * 60 * 1000;
 
   //Keys with no defaults
   public static final String  DFS_DATANODE_PLUGINS_KEY = "dfs.datanode.plugins";
@@ -400,6 +404,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final int     DFS_NAMENODE_CHECKED_VOLUMES_MINIMUM_DEFAULT = 1;
   public static final String  DFS_WEB_AUTHENTICATION_KERBEROS_PRINCIPAL_KEY = "dfs.web.authentication.kerberos.principal";
   public static final String  DFS_WEB_AUTHENTICATION_KERBEROS_KEYTAB_KEY = "dfs.web.authentication.kerberos.keytab";
+  public static final String  DFS_NAMENODE_MAX_OP_SIZE_KEY = "dfs.namenode.max.op.size";
+  public static final int     DFS_NAMENODE_MAX_OP_SIZE_DEFAULT = 50 * 1024 * 1024;
   
   public static final String DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY = "dfs.block.local-path-access.user";
   public static final String DFS_DOMAIN_SOCKET_PATH_KEY = "dfs.domain.socket.path";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
index 7a6557c3952..685487c8497 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
@@ -55,6 +55,7 @@ import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.token.Token;
 
 import com.google.common.annotations.VisibleForTesting;
@@ -476,6 +477,10 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
                              " for " + blk);
         }
         return chosenNode;
+      } catch (AccessControlException ex) {
+        DFSClient.LOG.warn("Short circuit access failed " + ex);
+        dfsClient.disableLegacyBlockReaderLocal();
+        continue;
       } catch (IOException ex) {
         if (ex instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) {
           DFSClient.LOG.info("Will fetch a new encryption key and retry, " 
@@ -822,6 +827,10 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
                  e.getPos() + " from " + chosenNode);
         // we want to remember what we have tried
         addIntoCorruptedBlockMap(block.getBlock(), chosenNode, corruptedBlockMap);
+      } catch (AccessControlException ex) {
+        DFSClient.LOG.warn("Short circuit access failed " + ex);
+        dfsClient.disableLegacyBlockReaderLocal();
+        continue;
       } catch (IOException e) {
         if (e instanceof InvalidEncryptionKeyException && refetchEncryptionKey > 0) {
           DFSClient.LOG.info("Will fetch a new encryption key and retry, " 
@@ -925,8 +934,8 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
         DFSClient.isLocalAddress(dnAddr) &&
         (!shortCircuitForbidden())) {
       try {
-        return BlockReaderFactory.getLegacyBlockReaderLocal(dfsClient.conf,
-            clientName, block, blockToken, chosenNode,
+        return BlockReaderFactory.getLegacyBlockReaderLocal(dfsClient.ugi,
+            dfsClient.conf, clientName, block, blockToken, chosenNode,
             dfsClient.hdfsTimeout, startOffset,dfsClient.connectToDnViaHostname());
       } catch (IOException e) {
         DFSClient.LOG.warn("error creating legacy BlockReaderLocal.  " +
@@ -1083,8 +1092,8 @@ public class DFSInputStream extends FSInputStream implements ByteBufferReadable
    * only report if the total number of replica is 1. We do not
    * report otherwise since this maybe due to the client is a handicapped client
    * (who can not read).
-   * @param corruptedBlockMap, map of corrupted blocks
-   * @param dataNodeCount, number of data nodes who contains the block replicas
+   * @param corruptedBlockMap map of corrupted blocks
+   * @param dataNodeCount number of data nodes who contains the block replicas
    */
   private void reportCheckSumFailure(
       Map<ExtendedBlock, Set<DatanodeInfo>> corruptedBlockMap, 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
index 1edd2fe746f..a255881a497 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
@@ -35,6 +35,7 @@ import java.util.Arrays;
 import java.util.EnumSet;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -82,6 +83,11 @@ import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.Time;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import com.google.common.cache.RemovalListener;
+import com.google.common.cache.RemovalNotification;
 
 
 /****************************************************************
@@ -289,7 +295,25 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable {
     private DataInputStream blockReplyStream;
     private ResponseProcessor response = null;
     private volatile DatanodeInfo[] nodes = null; // list of targets for current block
-    private ArrayList<DatanodeInfo> excludedNodes = new ArrayList<DatanodeInfo>();
+    private LoadingCache<DatanodeInfo, DatanodeInfo> excludedNodes =
+        CacheBuilder.newBuilder()
+        .expireAfterWrite(
+            dfsClient.getConf().excludedNodesCacheExpiry,
+            TimeUnit.MILLISECONDS)
+        .removalListener(new RemovalListener<DatanodeInfo, DatanodeInfo>() {
+          @Override
+          public void onRemoval(
+              RemovalNotification<DatanodeInfo, DatanodeInfo> notification) {
+            DFSClient.LOG.info("Removing node " +
+                notification.getKey() + " from the excluded nodes list");
+          }
+        })
+        .build(new CacheLoader<DatanodeInfo, DatanodeInfo>() {
+          @Override
+          public DatanodeInfo load(DatanodeInfo key) throws Exception {
+            return key;
+          }
+        });
     volatile boolean hasError = false;
     volatile int errorIndex = -1;
     private BlockConstructionStage stage;  // block construction stage
@@ -999,8 +1023,10 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable {
         success = false;
 
         long startTime = Time.now();
-        DatanodeInfo[] excluded = excludedNodes.toArray(
-            new DatanodeInfo[excludedNodes.size()]);
+        DatanodeInfo[] excluded =
+            excludedNodes.getAllPresent(excludedNodes.asMap().keySet())
+            .keySet()
+            .toArray(new DatanodeInfo[0]);
         block = oldBlock;
         lb = locateFollowingBlock(startTime,
             excluded.length > 0 ? excluded : null);
@@ -1019,7 +1045,7 @@ public class DFSOutputStream extends FSOutputSummer implements Syncable {
           dfsClient.namenode.abandonBlock(block, src, dfsClient.clientName);
           block = null;
           DFSClient.LOG.info("Excluding datanode " + nodes[errorIndex]);
-          excludedNodes.add(nodes[errorIndex]);
+          excludedNodes.put(nodes[errorIndex], nodes[errorIndex]);
         }
       } while (!success && --count >= 0);
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index 15d8864e905..44d1fda7919 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -1259,4 +1259,20 @@ public class DFSUtil {
         "It should be a positive, non-zero integer value.");
     return blocksReplWorkMultiplier;
   }
-}
+  
+  /**
+   * Get SPNEGO keytab Key from configuration
+   * 
+   * @param conf
+   *          Configuration
+   * @param defaultKey
+   * @return DFS_WEB_AUTHENTICATION_KERBEROS_KEYTAB_KEY if the key is not empty
+   *         else return defaultKey
+   */
+  public static String getSpnegoKeytabKey(Configuration conf, String defaultKey) {
+    String value = 
+        conf.get(DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBEROS_KEYTAB_KEY);
+    return (value == null || value.isEmpty()) ?
+        defaultKey : DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBEROS_KEYTAB_KEY;
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
index 6772be01017..e772859b8ee 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
@@ -62,11 +62,14 @@ import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.Progressable;
 
+import com.google.common.annotations.VisibleForTesting;
+
 
 /****************************************************************
  * Implementation of the abstract FileSystem for the DFS system.
@@ -310,13 +313,14 @@ public class DistributedFileSystem extends FileSystem {
   }
   
   /**
-   * Move blocks from srcs to trg
-   * and delete srcs afterwards
-   * RESTRICTION: all blocks should be the same size
+   * Move blocks from srcs to trg and delete srcs afterwards.
+   * The file block sizes must be the same.
+   * 
    * @param trg existing file to append to
    * @param psrcs list of files (same block size, same replication)
    * @throws IOException
    */
+  @Override
   public void concat(Path trg, Path [] psrcs) throws IOException {
     String [] srcs = new String [psrcs.length];
     for(int i=0; i<psrcs.length; i++) {
@@ -564,9 +568,8 @@ public class DistributedFileSystem extends FileSystem {
     return "DFS[" + dfs + "]";
   }
 
-  /** @deprecated DFSClient should not be accessed directly. */
   @InterfaceAudience.Private
-  @Deprecated
+  @VisibleForTesting
   public DFSClient getClient() {
     return dfs;
   }        
@@ -891,6 +894,17 @@ public class DistributedFileSystem extends FileSystem {
   public String getCanonicalServiceName() {
     return dfs.getCanonicalServiceName();
   }
+  
+  @Override
+  protected URI canonicalizeUri(URI uri) {
+    if (HAUtil.isLogicalUri(getConf(), uri)) {
+      // Don't try to DNS-resolve logical URIs, since the 'authority'
+      // portion isn't a proper hostname
+      return uri;
+    } else {
+      return NetUtils.getCanonicalUri(uri, getDefaultPort());
+    }
+  }
 
   /**
    * Utility function that returns if the NameNode is in safemode or not. In HA
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HftpFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HftpFileSystem.java
index d97c62012e9..7d97c972532 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HftpFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HftpFileSystem.java
@@ -161,6 +161,11 @@ public class HftpFileSystem extends FileSystem
     // actual port in the uri
     return SecurityUtil.buildTokenService(nnSecureUri).toString();
   }
+  
+  @Override
+  protected URI canonicalizeUri(URI uri) {
+    return NetUtils.getCanonicalUri(uri, getDefaultPort());
+  }
 
   /**
    * Return the protocol scheme for the FileSystem.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java
index 1c82af858c0..c64acd85d7d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java
@@ -133,6 +133,8 @@ public class IPCLoggerChannel implements AsyncLogger {
   private Stopwatch lastHeartbeatStopwatch = new Stopwatch();
   
   private static final long HEARTBEAT_INTERVAL_MILLIS = 1000;
+
+  private static final long WARN_JOURNAL_MILLIS_THRESHOLD = 1000;
   
   static final Factory FACTORY = new AsyncLogger.Factory() {
     @Override
@@ -371,6 +373,12 @@ public class IPCLoggerChannel implements AsyncLogger {
                 now - submitNanos, TimeUnit.NANOSECONDS);
             metrics.addWriteEndToEndLatency(endToEndTime);
             metrics.addWriteRpcLatency(rpcTime);
+            if (rpcTime / 1000 > WARN_JOURNAL_MILLIS_THRESHOLD) {
+              QuorumJournalManager.LOG.warn(
+                  "Took " + (rpcTime / 1000) + "ms to send a batch of " +
+                  numTxns + " edits (" + data.length + " bytes) to " +
+                  "remote journal " + IPCLoggerChannel.this);
+            }
           }
           synchronized (IPCLoggerChannel.this) {
             highestAckedTxId = firstTxnId + numTxns - 1;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java
index c42f6b917a6..f15e4626f41 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java
@@ -24,6 +24,7 @@ import java.util.concurrent.TimeoutException;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.util.Time;
 
+import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Maps;
 import com.google.common.util.concurrent.FutureCallback;
@@ -120,6 +121,15 @@ class QuorumCall<KEY, RESULT> {
         String msg = String.format(
             "Waited %s ms (timeout=%s ms) for a response for %s",
             waited, millis, operationName);
+        if (!successes.isEmpty()) {
+          msg += ". Succeeded so far: [" + Joiner.on(",").join(successes.keySet()) + "]";
+        }
+        if (!exceptions.isEmpty()) {
+          msg += ". Exceptions so far: [" + getExceptionMapString() + "]";
+        }
+        if (successes.isEmpty() && exceptions.isEmpty()) {
+          msg += ". No responses yet.";
+        }
         if (waited > millis * WAIT_PROGRESS_WARN_THRESHOLD) {
           QuorumJournalManager.LOG.warn(msg);
         } else {
@@ -227,4 +237,22 @@ class QuorumCall<KEY, RESULT> {
     }
     return sb.toString();
   }
+
+  /**
+   * Return a string suitable for displaying to the user, containing
+   * any exceptions that have been received so far.
+   */
+  private String getExceptionMapString() {
+    StringBuilder sb = new StringBuilder();
+    boolean first = true;
+    for (Map.Entry<KEY, Throwable> e : exceptions.entrySet()) {
+      if (!first) {
+        sb.append(", ");
+      }
+      first = false;
+      sb.append(e.getKey()).append(": ")
+        .append(e.getValue().getLocalizedMessage());
+    }
+    return sb.toString();
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
index 38a58e89bcc..58965ce02a9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
@@ -128,6 +128,10 @@ class Journal implements Closeable {
 
   private final JournalMetrics metrics;
 
+  /**
+   * Time threshold for sync calls, beyond which a warning should be logged to the console.
+   */
+  private static final int WARN_SYNC_MILLIS_THRESHOLD = 1000;
 
   Journal(File logDir, String journalId,
       StorageErrorReporter errorReporter) throws IOException {
@@ -215,8 +219,8 @@ class Journal implements Closeable {
   @Override // Closeable
   public void close() throws IOException {
     storage.close();
-    
     IOUtils.closeStream(committedTxnId);
+    IOUtils.closeStream(curSegment);
   }
   
   JNStorage getStorage() {
@@ -370,6 +374,10 @@ class Journal implements Closeable {
     sw.stop();
     
     metrics.addSync(sw.elapsedTime(TimeUnit.MICROSECONDS));
+    if (sw.elapsedTime(TimeUnit.MILLISECONDS) > WARN_SYNC_MILLIS_THRESHOLD) {
+      LOG.warn("Sync of transaction range " + firstTxnId + "-" + lastTxnId +
+               " took " + sw.elapsedTime(TimeUnit.MILLISECONDS) + "ms");
+    }
 
     if (isLagging) {
       // This batch of edits has already been committed on a quorum of other
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java
index 91629d930c9..fe08f209f4a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.metrics2.source.JvmMetrics;
 import org.apache.hadoop.security.SecurityUtil;
+import org.apache.hadoop.util.DiskChecker;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
@@ -82,7 +83,6 @@ public class JournalNode implements Tool, Configurable {
     return journal;
   }
 
-
   @Override
   public void setConf(Configuration conf) {
     this.conf = conf;
@@ -97,21 +97,9 @@ public class JournalNode implements Tool, Configurable {
           "Journal dir '" + dir + "' should be an absolute path");
     }
 
-    if (!dir.exists() && !dir.mkdirs()) {
-      throw new IOException("Could not create journal dir '" +
-          dir + "'");
-    } else if (!dir.isDirectory()) {
-      throw new IOException("Journal directory '" + dir + "' is not " +
-          "a directory");
-    }
-    
-    if (!dir.canWrite()) {
-      throw new IOException("Unable to write to journal dir '" +
-          dir + "'");
-    }
+    DiskChecker.checkDir(dir);
   }
 
-
   @Override
   public Configuration getConf() {
     return conf;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
index a25ba5b52fd..d18ac9f456d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
@@ -78,6 +78,7 @@ public class DelegationTokenSecretManager
   
   @Override //SecretManager
   public void checkAvailableForRead() throws StandbyException {
+    namesystem.checkOperation(OperationCategory.READ);
     namesystem.readLock();
     try {
       namesystem.checkOperation(OperationCategory.READ);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 2d08bc4b690..8a22c55f389 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -62,6 +62,7 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
 import org.apache.hadoop.hdfs.server.namenode.FSClusterStats;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.Namesystem;
+import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
 import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
 import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
@@ -234,6 +235,7 @@ public class BlockManager {
     heartbeatManager = datanodeManager.getHeartbeatManager();
     invalidateBlocks = new InvalidateBlocks(datanodeManager);
 
+    // Compute the map capacity by allocating 2% of total memory
     blocksMap = new BlocksMap(DEFAULT_MAP_LOAD_FACTOR);
     blockplacement = BlockPlacementPolicy.getInstance(
         conf, stats, datanodeManager.getNetworkTopology());
@@ -874,9 +876,10 @@ public class BlockManager {
    */
   public BlocksWithLocations getBlocks(DatanodeID datanode, long size
       ) throws IOException {
+    namesystem.checkOperation(OperationCategory.READ);
     namesystem.readLock();
     try {
-      namesystem.checkSuperuserPrivilege();
+      namesystem.checkOperation(OperationCategory.READ);
       return getBlocksWithLocations(datanode, size);  
     } finally {
       namesystem.readUnlock();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlocksMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlocksMap.java
index ef21dc3a332..dbfcaa70138 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlocksMap.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlocksMap.java
@@ -60,38 +60,11 @@ class BlocksMap {
   private GSet<Block, BlockInfo> blocks;
 
   BlocksMap(final float loadFactor) {
-    this.capacity = computeCapacity();
+    // Use 2% of total memory to size the GSet capacity
+    this.capacity = LightWeightGSet.computeCapacity(2.0, "BlocksMap");
     this.blocks = new LightWeightGSet<Block, BlockInfo>(capacity);
   }
 
-  /**
-   * Let t = 2% of max memory.
-   * Let e = round(log_2 t).
-   * Then, we choose capacity = 2^e/(size of reference),
-   * unless it is outside the close interval [1, 2^30].
-   */
-  private static int computeCapacity() {
-    //VM detection
-    //See http://java.sun.com/docs/hotspot/HotSpotFAQ.html#64bit_detection
-    final String vmBit = System.getProperty("sun.arch.data.model");
-
-    //2% of max memory
-    final double twoPC = Runtime.getRuntime().maxMemory()/50.0;
-
-    //compute capacity
-    final int e1 = (int)(Math.log(twoPC)/Math.log(2.0) + 0.5);
-    final int e2 = e1 - ("32".equals(vmBit)? 2: 3);
-    final int exponent = e2 < 0? 0: e2 > 30? 30: e2;
-    final int c = 1 << exponent;
-
-    if (LightWeightGSet.LOG.isDebugEnabled()) {
-      LightWeightGSet.LOG.debug("VM type       = " + vmBit + "-bit");
-      LightWeightGSet.LOG.debug("2% max memory = " + twoPC/(1 << 20) + " MB");
-      LightWeightGSet.LOG.debug("capacity      = 2^" + exponent
-          + " = " + c + " entries");
-    }
-    return c;
-  }
 
   void close() {
     // Empty blocks once GSet#clear is implemented (HDFS-3940)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
index e4b9ffb8dd8..525bf602cd1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
@@ -66,6 +66,7 @@ import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.net.CachedDNSToSwitchMapping;
 import org.apache.hadoop.net.DNSToSwitchMapping;
 import org.apache.hadoop.net.NetworkTopology;
+import org.apache.hadoop.net.NetworkTopology.InvalidTopologyException;
 import org.apache.hadoop.net.Node;
 import org.apache.hadoop.net.NodeBase;
 import org.apache.hadoop.net.ScriptBasedMapping;
@@ -431,8 +432,8 @@ public class DatanodeManager {
       host2DatanodeMap.remove(datanodeMap.put(node.getStorageID(), node));
     }
 
+    networktopology.add(node); // may throw InvalidTopologyException
     host2DatanodeMap.add(node);
-    networktopology.add(node);
     checkIfClusterIsNowMultiRack(node);
 
     if (LOG.isDebugEnabled()) {
@@ -647,92 +648,122 @@ public class DatanodeManager {
       nodeReg.setIpAddr(ip);
       nodeReg.setPeerHostName(hostname);
     }
-
-    nodeReg.setExportedKeys(blockManager.getBlockKeys());
-
-    // Checks if the node is not on the hosts list.  If it is not, then
-    // it will be disallowed from registering. 
-    if (!inHostsList(nodeReg)) {
-      throw new DisallowedDatanodeException(nodeReg);
-    }
-      
-    NameNode.stateChangeLog.info("BLOCK* registerDatanode: from "
-        + nodeReg + " storage " + nodeReg.getStorageID());
-
-    DatanodeDescriptor nodeS = datanodeMap.get(nodeReg.getStorageID());
-    DatanodeDescriptor nodeN = host2DatanodeMap.getDatanodeByXferAddr(
-        nodeReg.getIpAddr(), nodeReg.getXferPort());
-      
-    if (nodeN != null && nodeN != nodeS) {
-      NameNode.LOG.info("BLOCK* registerDatanode: " + nodeN);
-      // nodeN previously served a different data storage, 
-      // which is not served by anybody anymore.
-      removeDatanode(nodeN);
-      // physically remove node from datanodeMap
-      wipeDatanode(nodeN);
-      nodeN = null;
-    }
-
-    if (nodeS != null) {
-      if (nodeN == nodeS) {
-        // The same datanode has been just restarted to serve the same data 
-        // storage. We do not need to remove old data blocks, the delta will
-        // be calculated on the next block report from the datanode
-        if(NameNode.stateChangeLog.isDebugEnabled()) {
-          NameNode.stateChangeLog.debug("BLOCK* registerDatanode: "
-              + "node restarted.");
-        }
-      } else {
-        // nodeS is found
-        /* The registering datanode is a replacement node for the existing 
-          data storage, which from now on will be served by a new node.
-          If this message repeats, both nodes might have same storageID 
-          by (insanely rare) random chance. User needs to restart one of the
-          nodes with its data cleared (or user can just remove the StorageID
-          value in "VERSION" file under the data directory of the datanode,
-          but this is might not work if VERSION file format has changed 
-       */        
-        NameNode.stateChangeLog.info("BLOCK* registerDatanode: " + nodeS
-            + " is replaced by " + nodeReg + " with the same storageID "
-            + nodeReg.getStorageID());
-      }
-      // update cluster map
-      getNetworkTopology().remove(nodeS);
-      nodeS.updateRegInfo(nodeReg);
-      nodeS.setDisallowed(false); // Node is in the include list
-      
-      // resolve network location
-      resolveNetworkLocation(nodeS);
-      getNetworkTopology().add(nodeS);
-        
-      // also treat the registration message as a heartbeat
-      heartbeatManager.register(nodeS);
-      checkDecommissioning(nodeS);
-      return;
-    } 
-
-    // this is a new datanode serving a new data storage
-    if ("".equals(nodeReg.getStorageID())) {
-      // this data storage has never been registered
-      // it is either empty or was created by pre-storageID version of DFS
-      nodeReg.setStorageID(newStorageID());
-      if (NameNode.stateChangeLog.isDebugEnabled()) {
-        NameNode.stateChangeLog.debug(
-            "BLOCK* NameSystem.registerDatanode: "
-            + "new storageID " + nodeReg.getStorageID() + " assigned.");
-      }
-    }
-    // register new datanode
-    DatanodeDescriptor nodeDescr 
-      = new DatanodeDescriptor(nodeReg, NetworkTopology.DEFAULT_RACK);
-    resolveNetworkLocation(nodeDescr);
-    addDatanode(nodeDescr);
-    checkDecommissioning(nodeDescr);
     
-    // also treat the registration message as a heartbeat
-    // no need to update its timestamp
-    // because its is done when the descriptor is created
-    heartbeatManager.addDatanode(nodeDescr);
+    try {
+      nodeReg.setExportedKeys(blockManager.getBlockKeys());
+  
+      // Checks if the node is not on the hosts list.  If it is not, then
+      // it will be disallowed from registering. 
+      if (!inHostsList(nodeReg)) {
+        throw new DisallowedDatanodeException(nodeReg);
+      }
+        
+      NameNode.stateChangeLog.info("BLOCK* registerDatanode: from "
+          + nodeReg + " storage " + nodeReg.getStorageID());
+  
+      DatanodeDescriptor nodeS = datanodeMap.get(nodeReg.getStorageID());
+      DatanodeDescriptor nodeN = host2DatanodeMap.getDatanodeByXferAddr(
+          nodeReg.getIpAddr(), nodeReg.getXferPort());
+        
+      if (nodeN != null && nodeN != nodeS) {
+        NameNode.LOG.info("BLOCK* registerDatanode: " + nodeN);
+        // nodeN previously served a different data storage, 
+        // which is not served by anybody anymore.
+        removeDatanode(nodeN);
+        // physically remove node from datanodeMap
+        wipeDatanode(nodeN);
+        nodeN = null;
+      }
+  
+      if (nodeS != null) {
+        if (nodeN == nodeS) {
+          // The same datanode has been just restarted to serve the same data 
+          // storage. We do not need to remove old data blocks, the delta will
+          // be calculated on the next block report from the datanode
+          if(NameNode.stateChangeLog.isDebugEnabled()) {
+            NameNode.stateChangeLog.debug("BLOCK* registerDatanode: "
+                + "node restarted.");
+          }
+        } else {
+          // nodeS is found
+          /* The registering datanode is a replacement node for the existing 
+            data storage, which from now on will be served by a new node.
+            If this message repeats, both nodes might have same storageID 
+            by (insanely rare) random chance. User needs to restart one of the
+            nodes with its data cleared (or user can just remove the StorageID
+            value in "VERSION" file under the data directory of the datanode,
+            but this is might not work if VERSION file format has changed 
+         */        
+          NameNode.stateChangeLog.info("BLOCK* registerDatanode: " + nodeS
+              + " is replaced by " + nodeReg + " with the same storageID "
+              + nodeReg.getStorageID());
+        }
+        
+        boolean success = false;
+        try {
+          // update cluster map
+          getNetworkTopology().remove(nodeS);
+          nodeS.updateRegInfo(nodeReg);
+          nodeS.setDisallowed(false); // Node is in the include list
+          
+          // resolve network location
+          resolveNetworkLocation(nodeS);
+          getNetworkTopology().add(nodeS);
+            
+          // also treat the registration message as a heartbeat
+          heartbeatManager.register(nodeS);
+          checkDecommissioning(nodeS);
+          success = true;
+        } finally {
+          if (!success) {
+            removeDatanode(nodeS);
+            wipeDatanode(nodeS);
+          }
+        }
+        return;
+      } 
+  
+      // this is a new datanode serving a new data storage
+      if ("".equals(nodeReg.getStorageID())) {
+        // this data storage has never been registered
+        // it is either empty or was created by pre-storageID version of DFS
+        nodeReg.setStorageID(newStorageID());
+        if (NameNode.stateChangeLog.isDebugEnabled()) {
+          NameNode.stateChangeLog.debug(
+              "BLOCK* NameSystem.registerDatanode: "
+              + "new storageID " + nodeReg.getStorageID() + " assigned.");
+        }
+      }
+      
+      DatanodeDescriptor nodeDescr 
+        = new DatanodeDescriptor(nodeReg, NetworkTopology.DEFAULT_RACK);
+      boolean success = false;
+      try {
+        resolveNetworkLocation(nodeDescr);
+        networktopology.add(nodeDescr);
+  
+        // register new datanode
+        addDatanode(nodeDescr);
+        checkDecommissioning(nodeDescr);
+        
+        // also treat the registration message as a heartbeat
+        // no need to update its timestamp
+        // because its is done when the descriptor is created
+        heartbeatManager.addDatanode(nodeDescr);
+        success = true;
+      } finally {
+        if (!success) {
+          removeDatanode(nodeDescr);
+          wipeDatanode(nodeDescr);
+        }
+      }
+    } catch (InvalidTopologyException e) {
+      // If the network location is invalid, clear the cached mappings
+      // so that we have a chance to re-add this DataNode with the
+      // correct network location later.
+      dnsToSwitchMapping.reloadCachedMappings();
+      throw e;
+    }
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java
index acb4ce79588..a72e9be49c3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java
@@ -33,6 +33,7 @@ import java.util.Properties;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.LayoutVersion;
 import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
@@ -663,7 +664,9 @@ public abstract class Storage extends StorageInfo {
         file.write(jvmName.getBytes(Charsets.UTF_8));
         LOG.info("Lock on " + lockF + " acquired by nodename " + jvmName);
       } catch(OverlappingFileLockException oe) {
-        LOG.error("It appears that another namenode " + file.readLine() 
+        // Cannot read from the locked file on Windows.
+        String lockingJvmName = Path.WINDOWS ? "" : (" " + file.readLine());
+        LOG.error("It appears that another namenode" + lockingJvmName
             + " has already locked the storage directory");
         file.close();
         return null;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
index 6738241c046..69fcdd971be 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
@@ -538,7 +538,7 @@ class BPOfferService {
         // using global fsdataset
         dn.getFSDataset().invalidate(bcmd.getBlockPoolId(), toDelete);
       } catch(IOException e) {
-        dn.checkDiskError();
+        // Exceptions caught here are not expected to be disk-related.
         throw e;
       }
       dn.metrics.incrBlocksRemoved(toDelete.length);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
index cc32224b70f..3cf1679b6ec 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
@@ -602,13 +602,13 @@ class BlockReceiver implements Closeable {
           offsetInBlock > lastCacheDropOffset + CACHE_DROP_LAG_BYTES) {
         long twoWindowsAgo = lastCacheDropOffset - CACHE_DROP_LAG_BYTES;
         if (twoWindowsAgo > 0 && dropCacheBehindWrites) {
-          NativeIO.posixFadviseIfPossible(outFd, 0, lastCacheDropOffset,
-              NativeIO.POSIX_FADV_DONTNEED);
+          NativeIO.POSIX.posixFadviseIfPossible(outFd, 0, lastCacheDropOffset,
+              NativeIO.POSIX.POSIX_FADV_DONTNEED);
         }
         
         if (syncBehindWrites) {
-          NativeIO.syncFileRangeIfPossible(outFd, lastCacheDropOffset, CACHE_DROP_LAG_BYTES,
-              NativeIO.SYNC_FILE_RANGE_WRITE);
+          NativeIO.POSIX.syncFileRangeIfPossible(outFd, lastCacheDropOffset, CACHE_DROP_LAG_BYTES,
+              NativeIO.POSIX.SYNC_FILE_RANGE_WRITE);
         }
         
         lastCacheDropOffset += CACHE_DROP_LAG_BYTES;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
index fdade84f0ef..0e1e35c7336 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java
@@ -338,9 +338,9 @@ class BlockSender implements java.io.Closeable {
     if (blockInFd != null && shouldDropCacheBehindRead && isLongRead()) {
       // drop the last few MB of the file from cache
       try {
-        NativeIO.posixFadviseIfPossible(
+        NativeIO.POSIX.posixFadviseIfPossible(
             blockInFd, lastCacheDropOffset, offset - lastCacheDropOffset,
-            NativeIO.POSIX_FADV_DONTNEED);
+            NativeIO.POSIX.POSIX_FADV_DONTNEED);
       } catch (Exception e) {
         LOG.warn("Unable to drop cache on file close", e);
       }
@@ -637,7 +637,8 @@ class BlockSender implements java.io.Closeable {
 
     if (isLongRead() && blockInFd != null) {
       // Advise that this file descriptor will be accessed sequentially.
-      NativeIO.posixFadviseIfPossible(blockInFd, 0, 0, NativeIO.POSIX_FADV_SEQUENTIAL);
+      NativeIO.POSIX.posixFadviseIfPossible(
+          blockInFd, 0, 0, NativeIO.POSIX.POSIX_FADV_SEQUENTIAL);
     }
     
     // Trigger readahead of beginning of file if configured.
@@ -725,9 +726,9 @@ class BlockSender implements java.io.Closeable {
         offset >= nextCacheDropOffset) {
       long dropLength = offset - lastCacheDropOffset;
       if (dropLength >= 1024) {
-        NativeIO.posixFadviseIfPossible(blockInFd,
+        NativeIO.POSIX.posixFadviseIfPossible(blockInFd,
             lastCacheDropOffset, dropLength,
-            NativeIO.POSIX_FADV_DONTNEED);
+            NativeIO.POSIX.POSIX_FADV_DONTNEED);
       }
       lastCacheDropOffset += CACHE_DROP_INTERVAL_BYTES;
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index 999ef8daa87..7427e834dba 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -60,8 +60,11 @@ import java.io.OutputStream;
 import java.io.PrintStream;
 import java.net.InetSocketAddress;
 import java.net.Socket;
+import java.net.SocketException;
+import java.net.SocketTimeoutException;
 import java.net.URI;
 import java.net.UnknownHostException;
+import java.nio.channels.ClosedByInterruptException;
 import java.nio.channels.SocketChannel;
 import java.security.PrivilegedExceptionAction;
 import java.util.AbstractList;
@@ -1281,7 +1284,13 @@ public class DataNode extends Configured
   protected void checkDiskError(Exception e ) throws IOException {
     
     LOG.warn("checkDiskError: exception: ", e);
-    
+    if (e instanceof SocketException || e instanceof SocketTimeoutException
+    	  || e instanceof ClosedByInterruptException 
+    	  || e.getMessage().startsWith("Broken pipe")) {
+      LOG.info("Not checking disk as checkDiskError was called on a network" +
+      		" related exception");	
+      return;
+    }
     if (e.getMessage() != null &&
         e.getMessage().startsWith("No space left on device")) {
       throw new DiskOutOfSpaceException("No space left on device");
@@ -1593,8 +1602,12 @@ public class DataNode extends Configured
       } catch (IOException ie) {
         LOG.warn(bpReg + ":Failed to transfer " + b + " to " +
             targets[0] + " got ", ie);
-        // check if there are any disk problem
-        checkDiskError();
+          // check if there are any disk problem
+        try{
+          checkDiskError(ie);
+        } catch(IOException e) {
+            LOG.warn("DataNode.checkDiskError failed in run() with: ", e);
+        }
         
       } finally {
         xmitsInProgress.getAndDecrement();
@@ -1729,6 +1742,21 @@ public class DataNode extends Configured
     }
   }
 
+  // Small wrapper around the DiskChecker class that provides means to mock
+  // DiskChecker static methods and unittest DataNode#getDataDirsFromURIs.
+  static class DataNodeDiskChecker {
+    private FsPermission expectedPermission;
+
+    public DataNodeDiskChecker(FsPermission expectedPermission) {
+      this.expectedPermission = expectedPermission;
+    }
+
+    public void checkDir(LocalFileSystem localFS, Path path)
+        throws DiskErrorException, IOException {
+      DiskChecker.checkDir(localFS, path, expectedPermission);
+    }
+  }
+
   /**
    * Make an instance of DataNode after ensuring that at least one of the
    * given data directories (and their parent directories, if necessary)
@@ -1747,7 +1775,10 @@ public class DataNode extends Configured
     FsPermission permission = new FsPermission(
         conf.get(DFS_DATANODE_DATA_DIR_PERMISSION_KEY,
                  DFS_DATANODE_DATA_DIR_PERMISSION_DEFAULT));
-    ArrayList<File> dirs = getDataDirsFromURIs(dataDirs, localFS, permission);
+    DataNodeDiskChecker dataNodeDiskChecker =
+        new DataNodeDiskChecker(permission);
+    ArrayList<File> dirs =
+        getDataDirsFromURIs(dataDirs, localFS, dataNodeDiskChecker);
     DefaultMetricsSystem.initialize("DataNode");
 
     assert dirs.size() > 0 : "number of data directories should be > 0";
@@ -1756,7 +1787,8 @@ public class DataNode extends Configured
 
   // DataNode ctor expects AbstractList instead of List or Collection...
   static ArrayList<File> getDataDirsFromURIs(Collection<URI> dataDirs,
-      LocalFileSystem localFS, FsPermission permission) throws IOException {
+      LocalFileSystem localFS, DataNodeDiskChecker dataNodeDiskChecker)
+          throws IOException {
     ArrayList<File> dirs = new ArrayList<File>();
     StringBuilder invalidDirs = new StringBuilder();
     for (URI dirURI : dataDirs) {
@@ -1768,7 +1800,7 @@ public class DataNode extends Configured
       // drop any (illegal) authority in the URI for backwards compatibility
       File dir = new File(dirURI.getPath());
       try {
-        DiskChecker.checkDir(localFS, new Path(dir.toURI()), permission);
+        dataNodeDiskChecker.checkDir(localFS, new Path(dir.toURI()));
         dirs.add(dir);
       } catch (IOException ioe) {
         LOG.warn("Invalid " + DFS_DATANODE_DATA_DIR_KEY + " "
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
index 521fd01cabe..dfbd52c2070 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
@@ -41,6 +41,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.protocol.Block;
@@ -92,6 +93,15 @@ import org.apache.hadoop.util.Time;
 @InterfaceAudience.Private
 class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
   static final Log LOG = LogFactory.getLog(FsDatasetImpl.class);
+  private final static boolean isNativeIOAvailable;
+  static {
+    isNativeIOAvailable = NativeIO.isAvailable();
+    if (Path.WINDOWS && !isNativeIOAvailable) {
+      LOG.warn("Data node cannot fully support concurrent reading"
+          + " and writing without native code extensions on Windows.");
+    }
+  }
+
 
   @Override // FsDatasetSpi
   public List<FsVolumeImpl> getVolumes() {
@@ -149,6 +159,11 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
     if (meta == null || !meta.exists()) {
       return null;
     }
+    if (isNativeIOAvailable) {
+      return new LengthInputStream(
+          NativeIO.getShareDeleteFileInputStream(meta),
+          meta.length());
+    }
     return new LengthInputStream(new FileInputStream(meta), meta.length());
   }
     
@@ -324,18 +339,22 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
   public InputStream getBlockInputStream(ExtendedBlock b,
       long seekOffset) throws IOException {
     File blockFile = getBlockFileNoExistsCheck(b);
-    RandomAccessFile blockInFile;
-    try {
-      blockInFile = new RandomAccessFile(blockFile, "r");
-    } catch (FileNotFoundException fnfe) {
-      throw new IOException("Block " + b + " is not valid. " +
-          "Expected block file at " + blockFile + " does not exist.");
-    }
+    if (isNativeIOAvailable) {
+      return NativeIO.getShareDeleteFileInputStream(blockFile, seekOffset);
+    } else {
+      RandomAccessFile blockInFile;
+      try {
+        blockInFile = new RandomAccessFile(blockFile, "r");
+      } catch (FileNotFoundException fnfe) {
+        throw new IOException("Block " + b + " is not valid. " +
+            "Expected block file at " + blockFile + " does not exist.");
+      }
 
-    if (seekOffset > 0) {
-      blockInFile.seek(seekOffset);
+      if (seekOffset > 0) {
+        blockInFile.seek(seekOffset);
+      }
+      return new FileInputStream(blockInFile.getFD());
     }
-    return new FileInputStream(blockInFile.getFD());
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java
index 8b3cf04d741..0bc62d8f288 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java
@@ -39,6 +39,8 @@ public class CheckpointConf {
   /** checkpoint once every this many transactions, regardless of time */
   private final long checkpointTxnCount;
 
+  /** maxium number of retries when merge errors occur */
+  private final int maxRetriesOnMergeError;
   
   public CheckpointConf(Configuration conf) {
     checkpointCheckPeriod = conf.getLong(
@@ -49,6 +51,8 @@ public class CheckpointConf {
                                     DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT);
     checkpointTxnCount = conf.getLong(DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 
                                   DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT);
+    maxRetriesOnMergeError = conf.getInt(DFS_NAMENODE_CHECKPOINT_MAX_RETRIES_KEY,
+                                  DFS_NAMENODE_CHECKPOINT_MAX_RETRIES_DEFAULT);
     warnForDeprecatedConfigs(conf);
   }
   
@@ -75,4 +79,8 @@ public class CheckpointConf {
   public long getTxnCount() {
     return checkpointTxnCount;
   }
+
+  public int getMaxRetriesOnMergeError() {
+    return maxRetriesOnMergeError;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointFaultInjector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointFaultInjector.java
index d898250db29..8488bbea0c2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointFaultInjector.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointFaultInjector.java
@@ -33,6 +33,7 @@ class CheckpointFaultInjector {
   
   public void beforeGetImageSetsHeaders() throws IOException {}
   public void afterSecondaryCallsRollEditLog() throws IOException {}
+  public void duringMerge() throws IOException {}
   public void afterSecondaryUploadsNewImage() throws IOException {}
   public void aboutToSendFile(File localfile) throws IOException {}
 
@@ -43,4 +44,5 @@ class CheckpointFaultInjector {
     return false;
   }
   
+  public void afterMD5Rename() throws IOException {}
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java
index b3c45ffdb9e..bfe479930ce 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java
@@ -142,4 +142,9 @@ class EditLogBackupInputStream extends EditLogInputStream {
   public boolean isInProgress() {
     return true;
   }
+
+  @Override
+  public void setMaxOpSize(int maxOpSize) {
+    reader.setMaxOpSize(maxOpSize);
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java
index 76b5ff5616a..2b949541237 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java
@@ -32,6 +32,7 @@ import java.security.PrivilegedExceptionAction;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.namenode.TransferFsImage.HttpGetFailedException;
@@ -53,6 +54,7 @@ public class EditLogFileInputStream extends EditLogInputStream {
   private final long firstTxId;
   private final long lastTxId;
   private final boolean isInProgress;
+  private int maxOpSize;
   static private enum State {
     UNINIT,
     OPEN,
@@ -118,6 +120,7 @@ public class EditLogFileInputStream extends EditLogInputStream {
     this.firstTxId = firstTxId;
     this.lastTxId = lastTxId;
     this.isInProgress = isInProgress;
+    this.maxOpSize = DFSConfigKeys.DFS_NAMENODE_MAX_OP_SIZE_DEFAULT;
   }
 
   private void init() throws LogHeaderCorruptException, IOException {
@@ -134,6 +137,7 @@ public class EditLogFileInputStream extends EditLogInputStream {
         throw new LogHeaderCorruptException("No header found in log");
       }
       reader = new FSEditLogOp.Reader(dataIn, tracker, logVersion);
+      reader.setMaxOpSize(maxOpSize);
       state = State.OPEN;
     } finally {
       if (reader == null) {
@@ -412,5 +416,12 @@ public class EditLogFileInputStream extends EditLogInputStream {
       return url.toString();
     }
   }
-  
+
+  @Override
+  public void setMaxOpSize(int maxOpSize) {
+    this.maxOpSize = maxOpSize;
+    if (reader != null) {
+      reader.setMaxOpSize(maxOpSize);
+    }
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java
index 3816dc10d44..6fdf9389972 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java
@@ -165,4 +165,9 @@ public abstract class EditLogInputStream implements Closeable {
    * Return true if this stream is in progress, false if it is finalized.
    */
   public abstract boolean isInProgress();
+  
+  /**
+   * Set the maximum opcode size in bytes.
+   */
+  public abstract void setMaxOpSize(int maxOpSize);
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
index 9becf41174d..b11059a4bb4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
@@ -1343,6 +1343,11 @@ public class FSDirectory implements Closeable {
 
     // fill up the inodes in the path from this inode to root
     for (int i = 0; i < depth; i++) {
+      if (inode == null) {
+        NameNode.stateChangeLog.warn("Could not get full path."
+            + " Corresponding file might have deleted already.");
+        return null;
+      }
       inodes[depth-i-1] = inode;
       inode = inode.parent;
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java
index 7dac687fc63..fb74adf6a11 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java
@@ -51,6 +51,7 @@ import org.apache.hadoop.io.WritableFactory;
 import org.apache.hadoop.hdfs.util.XMLUtils;
 import org.apache.hadoop.hdfs.util.XMLUtils.InvalidXmlException;
 import org.apache.hadoop.hdfs.util.XMLUtils.Stanza;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DeprecatedUTF8;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
@@ -75,11 +76,6 @@ import java.io.EOFException;
 public abstract class FSEditLogOp {
   public final FSEditLogOpCodes opCode;
   long txid;
-  /**
-   * Opcode size is limited to 1.5 megabytes
-   */
-  public static final int MAX_OP_SIZE = (3 * 1024 * 1024) / 2;
-
 
   @SuppressWarnings("deprecation")
   final public static class OpInstanceCache {
@@ -2246,6 +2242,7 @@ public abstract class FSEditLogOp {
     private final int logVersion;
     private final Checksum checksum;
     private final OpInstanceCache cache;
+    private int maxOpSize;
 
     /**
      * Construct the reader
@@ -2253,7 +2250,8 @@ public abstract class FSEditLogOp {
      * @param logVersion The version of the data coming from the stream.
      */
     @SuppressWarnings("deprecation")
-    public Reader(DataInputStream in, StreamLimiter limiter, int logVersion) {
+    public Reader(DataInputStream in, StreamLimiter limiter,
+        int logVersion) {
       this.logVersion = logVersion;
       if (LayoutVersion.supports(Feature.EDITS_CHESKUM, logVersion)) {
         this.checksum = new PureJavaCrc32();
@@ -2269,6 +2267,11 @@ public abstract class FSEditLogOp {
       }
       this.limiter = limiter;
       this.cache = new OpInstanceCache();
+      this.maxOpSize = DFSConfigKeys.DFS_NAMENODE_MAX_OP_SIZE_DEFAULT;
+    }
+
+    public void setMaxOpSize(int maxOpSize) {
+      this.maxOpSize = maxOpSize;
     }
 
     /**
@@ -2363,8 +2366,8 @@ public abstract class FSEditLogOp {
      * problematic byte.  This usually means the beginning of the opcode.
      */
     private FSEditLogOp decodeOp() throws IOException {
-      limiter.setLimit(MAX_OP_SIZE);
-      in.mark(MAX_OP_SIZE);
+      limiter.setLimit(maxOpSize);
+      in.mark(maxOpSize);
 
       if (checksum != null) {
         checksum.reset();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index f67b7ce852f..c7b77e5cd63 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -49,6 +49,7 @@ import static org.apache.hadoop.util.Time.now;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 
+import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
 import org.apache.hadoop.hdfs.server.protocol.CheckpointCommand;
@@ -584,11 +585,11 @@ public class FSImage implements Closeable {
   boolean loadFSImage(FSNamesystem target, MetaRecoveryContext recovery)
       throws IOException {
     FSImageStorageInspector inspector = storage.readAndInspectDirs();
+    FSImageFile imageFile = null;
     
     isUpgradeFinalized = inspector.isUpgradeFinalized();
  
-    FSImageStorageInspector.FSImageFile imageFile 
-      = inspector.getLatestImage();   
+    List<FSImageFile> imageFiles = inspector.getLatestImages();
     boolean needToSave = inspector.needToSave();
 
     Iterable<EditLogInputStream> editStreams = null;
@@ -601,14 +602,20 @@ public class FSImage implements Closeable {
       // we better be able to load all the edits. If we're the standby NN, it's
       // OK to not be able to read all of edits right now.
       long toAtLeastTxId = editLog.isOpenForWrite() ? inspector.getMaxSeenTxId() : 0;
-      editStreams = editLog.selectInputStreams(imageFile.getCheckpointTxId() + 1,
+      editStreams = editLog.selectInputStreams(
+          imageFiles.get(0).getCheckpointTxId() + 1,
           toAtLeastTxId, recovery, false);
     } else {
       editStreams = FSImagePreTransactionalStorageInspector
         .getEditLogStreams(storage);
     }
+    int maxOpSize = conf.getInt(DFSConfigKeys.
+          DFS_NAMENODE_MAX_OP_SIZE_KEY,
+        DFSConfigKeys.DFS_NAMENODE_MAX_OP_SIZE_DEFAULT);
+    for (EditLogInputStream elis : editStreams) {
+      elis.setMaxOpSize(maxOpSize);
+    }
  
-    LOG.debug("Planning to load image :\n" + imageFile);
     for (EditLogInputStream l : editStreams) {
       LOG.debug("Planning to load edit log stream: " + l);
     }
@@ -616,34 +623,21 @@ public class FSImage implements Closeable {
       LOG.info("No edit log streams selected.");
     }
     
-    try {
-      StorageDirectory sdForProperties = imageFile.sd;
-      storage.readProperties(sdForProperties);
-
-      if (LayoutVersion.supports(Feature.TXID_BASED_LAYOUT,
-                                 getLayoutVersion())) {
-        // For txid-based layout, we should have a .md5 file
-        // next to the image file
-        loadFSImage(imageFile.getFile(), target, recovery);
-      } else if (LayoutVersion.supports(Feature.FSIMAGE_CHECKSUM,
-                                        getLayoutVersion())) {
-        // In 0.22, we have the checksum stored in the VERSION file.
-        String md5 = storage.getDeprecatedProperty(
-            NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY);
-        if (md5 == null) {
-          throw new InconsistentFSStateException(sdForProperties.getRoot(),
-              "Message digest property " +
-              NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY +
-              " not set for storage directory " + sdForProperties.getRoot());
-        }
-        loadFSImage(imageFile.getFile(), new MD5Hash(md5), target, recovery);
-      } else {
-        // We don't have any record of the md5sum
-        loadFSImage(imageFile.getFile(), null, target, recovery);
+    for (int i = 0; i < imageFiles.size(); i++) {
+      try {
+        imageFile = imageFiles.get(i);
+        loadFSImageFile(target, recovery, imageFile);
+        break;
+      } catch (IOException ioe) {
+        LOG.error("Failed to load image from " + imageFile, ioe);
+        target.clear();
+        imageFile = null;
       }
-    } catch (IOException ioe) {
+    }
+    // Failed to load any images, error out
+    if (imageFile == null) {
       FSEditLog.closeAllStreams(editStreams);
-      throw new IOException("Failed to load image from " + imageFile, ioe);
+      throw new IOException("Failed to load an FSImage file!");
     }
     long txnsAdvanced = loadEdits(editStreams, target, recovery);
     needToSave |= needsResaveBasedOnStaleCheckpoint(imageFile.getFile(),
@@ -652,6 +646,35 @@ public class FSImage implements Closeable {
     return needToSave;
   }
 
+  void loadFSImageFile(FSNamesystem target, MetaRecoveryContext recovery,
+      FSImageFile imageFile) throws IOException {
+    LOG.debug("Planning to load image :\n" + imageFile);
+    StorageDirectory sdForProperties = imageFile.sd;
+    storage.readProperties(sdForProperties);
+
+    if (LayoutVersion.supports(Feature.TXID_BASED_LAYOUT,
+                               getLayoutVersion())) {
+      // For txid-based layout, we should have a .md5 file
+      // next to the image file
+      loadFSImage(imageFile.getFile(), target, recovery);
+    } else if (LayoutVersion.supports(Feature.FSIMAGE_CHECKSUM,
+                                      getLayoutVersion())) {
+      // In 0.22, we have the checksum stored in the VERSION file.
+      String md5 = storage.getDeprecatedProperty(
+          NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY);
+      if (md5 == null) {
+        throw new InconsistentFSStateException(sdForProperties.getRoot(),
+            "Message digest property " +
+            NNStorage.DEPRECATED_MESSAGE_DIGEST_PROPERTY +
+            " not set for storage directory " + sdForProperties.getRoot());
+      }
+      loadFSImage(imageFile.getFile(), new MD5Hash(md5), target, recovery);
+    } else {
+      // We don't have any record of the md5sum
+      loadFSImage(imageFile.getFile(), null, target, recovery);
+    }
+  }
+
   public void initEditLog() {
     Preconditions.checkState(getNamespaceID() != 0,
         "Must know namespace ID before initting edit log");
@@ -1080,7 +1103,7 @@ public class FSImage implements Closeable {
    */
   public synchronized void saveDigestAndRenameCheckpointImage(
       long txid, MD5Hash digest) throws IOException {
-    renameCheckpoint(txid);
+    // Write and rename MD5 file
     List<StorageDirectory> badSds = Lists.newArrayList();
     
     for (StorageDirectory sd : storage.dirIterable(NameNodeDirType.IMAGE)) {
@@ -1093,6 +1116,10 @@ public class FSImage implements Closeable {
     }
     storage.reportErrorsOnDirectories(badSds);
     
+    CheckpointFaultInjector.getInstance().afterMD5Rename();
+    
+    // Rename image from tmp file
+    renameCheckpoint(txid);
     // So long as this is the newest image available,
     // advertise it as such to other checkpointers
     // from now on
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
index 7971de3df71..02d7d4400c6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
@@ -232,8 +232,8 @@ class FSImageFormat {
         loadSecretManagerState(in);
 
         // make sure to read to the end of file
-        int eof = in.read();
-        assert eof == -1 : "Should have reached the end of image file " + curFile;
+        boolean eof = (in.read() == -1);
+        assert eof : "Should have reached the end of image file " + curFile;
       } finally {
         in.close();
       }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImagePreTransactionalStorageInspector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImagePreTransactionalStorageInspector.java
index 4b0fd276414..1a637cc32bc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImagePreTransactionalStorageInspector.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImagePreTransactionalStorageInspector.java
@@ -25,6 +25,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashSet;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Set;
 
@@ -146,7 +147,7 @@ class FSImagePreTransactionalStorageInspector extends FSImageStorageInspector {
   }
     
   @Override
-  FSImageFile getLatestImage() throws IOException {
+  List<FSImageFile> getLatestImages() throws IOException {
     // We should have at least one image and one edits dirs
     if (latestNameSD == null)
       throw new IOException("Image file is not found in " + imageDirs);
@@ -176,9 +177,12 @@ class FSImagePreTransactionalStorageInspector extends FSImageStorageInspector {
 
     needToSaveAfterRecovery = doRecovery();
     
-    return new FSImageFile(latestNameSD, 
+    FSImageFile file = new FSImageFile(latestNameSD, 
         NNStorage.getStorageFile(latestNameSD, NameNodeFile.IMAGE),
         HdfsConstants.INVALID_TXID);
+    LinkedList<FSImageFile> ret = new LinkedList<FSImageFile>();
+    ret.add(file);
+    return ret;
   }
 
   @Override
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageStorageInspector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageStorageInspector.java
index a3a516f0ed9..cb1351bb117 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageStorageInspector.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageStorageInspector.java
@@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs.server.namenode;
 
 import java.io.File;
 import java.io.IOException;
+import java.util.List;
+
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
@@ -45,7 +47,7 @@ abstract class FSImageStorageInspector {
    * Get the image files which should be loaded into the filesystem.
    * @throws IOException if not enough files are available (eg no image found in any directory)
    */
-  abstract FSImageFile getLatestImage() throws IOException;
+  abstract List<FSImageFile> getLatestImages() throws IOException;
 
   /** 
    * Get the minimum tx id which should be loaded with this set of images.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java
index 484499a6c71..5ab9b37d9b8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageTransactionalStorageInspector.java
@@ -22,6 +22,7 @@ import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -108,24 +109,31 @@ class FSImageTransactionalStorageInspector extends FSImageStorageInspector {
   }
   
   /**
-   * @return the image that has the most recent associated transaction ID.
-   * If there are multiple storage directories which contain equal images 
-   * the storage directory that was inspected first will be preferred.
+   * @return the image files that have the most recent associated 
+   * transaction IDs.  If there are multiple storage directories which 
+   * contain equal images, we'll return them all.
    * 
    * @throws FileNotFoundException if not images are found.
    */
   @Override
-  FSImageFile getLatestImage() throws IOException {
-    if (foundImages.isEmpty()) {
-      throw new FileNotFoundException("No valid image files found");
-    }
-
-    FSImageFile ret = null;
+  List<FSImageFile> getLatestImages() throws IOException {
+    LinkedList<FSImageFile> ret = new LinkedList<FSImageFile>();
     for (FSImageFile img : foundImages) {
-      if (ret == null || img.txId > ret.txId) {
-        ret = img;
+      if (ret.isEmpty()) {
+        ret.add(img);
+      } else {
+        FSImageFile cur = ret.getFirst();
+        if (cur.txId == img.txId) {
+          ret.add(img);
+        } else if (cur.txId < img.txId) {
+          ret.clear();
+          ret.add(img);
+        }
       }
     }
+    if (ret.isEmpty()) {
+      throw new FileNotFoundException("No valid image files found");
+    }
     return ret;
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index b626612ec2d..aa077fe5a29 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -34,6 +34,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT;
@@ -255,10 +256,23 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     return !isDefaultAuditLogger || auditLog.isInfoEnabled();
   }
 
-  private void logAuditEvent(UserGroupInformation ugi,
-      InetAddress addr, String cmd, String src, String dst,
-      HdfsFileStatus stat) {
-    logAuditEvent(true, ugi, addr, cmd, src, dst, stat);
+  private HdfsFileStatus getAuditFileInfo(String path, boolean resolveSymlink)
+      throws IOException {
+    return (isAuditEnabled() && isExternalInvocation())
+        ? dir.getFileInfo(path, resolveSymlink) : null;
+  }
+  
+  private void logAuditEvent(boolean succeeded, String cmd, String src)
+      throws IOException {
+    logAuditEvent(succeeded, cmd, src, null, null);
+  }
+  
+  private void logAuditEvent(boolean succeeded, String cmd, String src,
+      String dst, HdfsFileStatus stat) throws IOException {
+    if (isAuditEnabled() && isExternalInvocation()) {
+      logAuditEvent(succeeded, getRemoteUser(), getRemoteIp(),
+                    cmd, src, dst, stat);
+    }
   }
 
   private void logAuditEvent(boolean succeeded,
@@ -300,6 +314,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   private final boolean isPermissionEnabled;
   private final boolean persistBlocks;
   private final UserGroupInformation fsOwner;
+  private final String fsOwnerShortUserName;
   private final String supergroup;
   private final boolean standbyShouldCheckpoint;
   
@@ -529,6 +544,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       this.datanodeStatistics = blockManager.getDatanodeManager().getDatanodeStatistics();
 
       this.fsOwner = UserGroupInformation.getCurrentUser();
+      this.fsOwnerShortUserName = fsOwner.getShortUserName();
       this.supergroup = conf.get(DFS_PERMISSIONS_SUPERUSERGROUP_KEY, 
                                  DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT);
       this.isPermissionEnabled = conf.getBoolean(DFS_PERMISSIONS_ENABLED_KEY,
@@ -579,7 +595,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       this.maxFsObjects = conf.getLong(DFS_NAMENODE_MAX_OBJECTS_KEY, 
                                        DFS_NAMENODE_MAX_OBJECTS_DEFAULT);
 
-      this.accessTimePrecision = conf.getLong(DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 0);
+      this.accessTimePrecision = conf.getLong(DFS_NAMENODE_ACCESSTIME_PRECISION_KEY,
+          DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT);
       this.supportAppends = conf.getBoolean(DFS_SUPPORT_APPEND_KEY, DFS_SUPPORT_APPEND_DEFAULT);
       LOG.info("Append Enabled: " + supportAppends);
 
@@ -1111,9 +1128,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    * Dump all metadata into specified file
    */
   void metaSave(String filename) throws IOException {
+    checkSuperuserPrivilege();
+    checkOperation(OperationCategory.UNCHECKED);
     writeLock();
     try {
-      checkSuperuserPrivilege();
+      checkOperation(OperationCategory.UNCHECKED);
       File file = new File(System.getProperty("hadoop.log.dir"), filename);
       PrintWriter out = new PrintWriter(new BufferedWriter(
           new OutputStreamWriter(new FileOutputStream(file, true), Charsets.UTF_8)));
@@ -1177,11 +1196,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     try {
       setPermissionInt(src, permission);
     } catch (AccessControlException e) {
-      if (isAuditEnabled() && isExternalInvocation()) {
-        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
-                      getRemoteIp(),
-                      "setPermission", src, null, null);
-      }
+      logAuditEvent(false, "setPermission", src);
       throw e;
     }
   }
@@ -1190,6 +1205,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       throws AccessControlException, FileNotFoundException, SafeModeException,
       UnresolvedLinkException, IOException {
     HdfsFileStatus resultingStat = null;
+    FSPermissionChecker pc = getPermissionChecker();
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -1197,20 +1214,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot set permission for " + src, safeMode);
       }
-      checkOwner(src);
+      checkOwner(pc, src);
       dir.setPermission(src, permission);
-      if (isAuditEnabled() && isExternalInvocation()) {
-        resultingStat = dir.getFileInfo(src, false);
-      }
+      resultingStat = getAuditFileInfo(src, false);
     } finally {
       writeUnlock();
     }
     getEditLog().logSync();
-    if (isAuditEnabled() && isExternalInvocation()) {
-      logAuditEvent(UserGroupInformation.getCurrentUser(),
-                    getRemoteIp(),
-                    "setPermission", src, null, resultingStat);
-    }
+    logAuditEvent(true, "setPermission", src, null, resultingStat);
   }
 
   /**
@@ -1223,11 +1234,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     try {
       setOwnerInt(src, username, group);
     } catch (AccessControlException e) {
-      if (isAuditEnabled() && isExternalInvocation()) {
-        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
-                      getRemoteIp(),
-                      "setOwner", src, null, null);
-      }
+      logAuditEvent(false, "setOwner", src);
       throw e;
     } 
   }
@@ -1236,6 +1243,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       throws AccessControlException, FileNotFoundException, SafeModeException,
       UnresolvedLinkException, IOException {
     HdfsFileStatus resultingStat = null;
+    FSPermissionChecker pc = getPermissionChecker();
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -1243,29 +1252,22 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot set owner for " + src, safeMode);
       }
-      FSPermissionChecker pc = checkOwner(src);
-      if (!pc.isSuper) {
-        if (username != null && !pc.user.equals(username)) {
-          throw new AccessControlException("Non-super user cannot change owner.");
+      checkOwner(pc, src);
+      if (!pc.isSuperUser()) {
+        if (username != null && !pc.getUser().equals(username)) {
+          throw new AccessControlException("Non-super user cannot change owner");
         }
         if (group != null && !pc.containsGroup(group)) {
-          throw new AccessControlException("User does not belong to " + group
-            + " .");
+          throw new AccessControlException("User does not belong to " + group);
         }
       }
       dir.setOwner(src, username, group);
-      if (isAuditEnabled() && isExternalInvocation()) {
-        resultingStat = dir.getFileInfo(src, false);
-      }
+      resultingStat = getAuditFileInfo(src, false);
     } finally {
       writeUnlock();
     }
     getEditLog().logSync();
-    if (isAuditEnabled() && isExternalInvocation()) {
-      logAuditEvent(UserGroupInformation.getCurrentUser(),
-                    getRemoteIp(),
-                    "setOwner", src, null, resultingStat);
-    }
+    logAuditEvent(true, "setOwner", src, null, resultingStat);
   }
 
   /**
@@ -1300,24 +1302,22 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   LocatedBlocks getBlockLocations(String src, long offset, long length,
       boolean doAccessTime, boolean needBlockToken, boolean checkSafeMode)
       throws FileNotFoundException, UnresolvedLinkException, IOException {
+    FSPermissionChecker pc = getPermissionChecker();
     try {
-      return getBlockLocationsInt(src, offset, length, doAccessTime,
+      return getBlockLocationsInt(pc, src, offset, length, doAccessTime,
                                   needBlockToken, checkSafeMode);
     } catch (AccessControlException e) {
-      if (isAuditEnabled() && isExternalInvocation()) {
-        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
-                      getRemoteIp(),
-                      "open", src, null, null);
-      }
+      logAuditEvent(false, "open", src);
       throw e;
     }
   }
 
-  private LocatedBlocks getBlockLocationsInt(String src, long offset, long length,
-      boolean doAccessTime, boolean needBlockToken, boolean checkSafeMode)
+  private LocatedBlocks getBlockLocationsInt(FSPermissionChecker pc,
+      String src, long offset, long length, boolean doAccessTime,
+      boolean needBlockToken, boolean checkSafeMode)
       throws FileNotFoundException, UnresolvedLinkException, IOException {
     if (isPermissionEnabled) {
-      checkPathAccess(src, FsAction.READ);
+      checkPathAccess(pc, src, FsAction.READ);
     }
 
     if (offset < 0) {
@@ -1330,11 +1330,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
     final LocatedBlocks ret = getBlockLocationsUpdateTimes(src,
         offset, length, doAccessTime, needBlockToken);  
-    if (isAuditEnabled() && isExternalInvocation()) {
-      logAuditEvent(UserGroupInformation.getCurrentUser(),
-                    getRemoteIp(),
-                    "open", src, null, null);
-    }
+    logAuditEvent(true, "open", src);
     if (checkSafeMode && isInSafeMode()) {
       for (LocatedBlock b : ret.getLocatedBlocks()) {
         // if safemode & no block locations yet then throw safemodeException
@@ -1359,13 +1355,20 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       throws FileNotFoundException, UnresolvedLinkException, IOException {
 
     for (int attempt = 0; attempt < 2; attempt++) {
-      if (attempt == 0) { // first attempt is with readlock
+      boolean isReadOp = (attempt == 0);
+      if (isReadOp) { // first attempt is with readlock
+        checkOperation(OperationCategory.READ);
         readLock();
       }  else { // second attempt is with  write lock
+        checkOperation(OperationCategory.WRITE);
         writeLock(); // writelock is needed to set accesstime
       }
       try {
-        checkOperation(OperationCategory.READ);
+        if (isReadOp) {
+          checkOperation(OperationCategory.READ);
+        } else {
+          checkOperation(OperationCategory.WRITE);
+        }
 
         // if the namenode is in safemode, then do not update access time
         if (isInSafeMode()) {
@@ -1378,7 +1381,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
           if (now <= inode.getAccessTime() + getAccessTimePrecision()) {
             // if we have to set access time but we only have the readlock, then
             // restart this entire operation with the writeLock.
-            if (attempt == 0) {
+            if (isReadOp) {
               continue;
             }
           }
@@ -1388,7 +1391,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
             inode.computeFileSize(false), inode.isUnderConstruction(),
             offset, length, needBlockToken);
       } finally {
-        if (attempt == 0) {
+        if (isReadOp) {
           readUnlock();
         } else {
           writeUnlock();
@@ -1411,11 +1414,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     try {
       concatInt(target, srcs);
     } catch (AccessControlException e) {
-      if (isAuditEnabled() && isExternalInvocation()) {
-        logAuditEvent(false, UserGroupInformation.getLoginUser(),
-                      getRemoteIp(),
-                      "concat", Arrays.toString(srcs), target, null);
-      }
+      logAuditEvent(false, "concat", Arrays.toString(srcs), target, null);
       throw e;
     }
   }
@@ -1447,40 +1446,36 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
 
     HdfsFileStatus resultingStat = null;
+    FSPermissionChecker pc = getPermissionChecker();
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot concat " + target, safeMode);
       }
-      concatInternal(target, srcs);
-      if (isAuditEnabled() && isExternalInvocation()) {
-        resultingStat = dir.getFileInfo(target, false);
-      }
+      concatInternal(pc, target, srcs);
+      resultingStat = getAuditFileInfo(target, false);
     } finally {
       writeUnlock();
     }
     getEditLog().logSync();
-    if (isAuditEnabled() && isExternalInvocation()) {
-      logAuditEvent(UserGroupInformation.getLoginUser(),
-                    getRemoteIp(),
-                    "concat", Arrays.toString(srcs), target, resultingStat);
-    }
+    logAuditEvent(true, "concat", Arrays.toString(srcs), target, resultingStat);
   }
 
   /** See {@link #concat(String, String[])} */
-  private void concatInternal(String target, String [] srcs) 
+  private void concatInternal(FSPermissionChecker pc, String target, String [] srcs) 
       throws IOException, UnresolvedLinkException {
     assert hasWriteLock();
 
     // write permission for the target
     if (isPermissionEnabled) {
-      checkPathAccess(target, FsAction.WRITE);
+      checkPathAccess(pc, target, FsAction.WRITE);
 
       // and srcs
       for(String aSrc: srcs) {
-        checkPathAccess(aSrc, FsAction.READ); // read the file
-        checkParentAccess(aSrc, FsAction.WRITE); // for delete 
+        checkPathAccess(pc, aSrc, FsAction.READ); // read the file
+        checkParentAccess(pc, aSrc, FsAction.WRITE); // for delete 
       }
     }
 
@@ -1582,11 +1577,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     try {
       setTimesInt(src, mtime, atime);
     } catch (AccessControlException e) {
-      if (isAuditEnabled() && isExternalInvocation()) {
-        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
-                      getRemoteIp(),
-                      "setTimes", src, null, null);
-      }
+      logAuditEvent(false, "setTimes", src);
       throw e;
     }
   }
@@ -1597,29 +1588,28 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       throw new IOException("Access time for hdfs is not configured. " +
                             " Please set " + DFS_NAMENODE_ACCESSTIME_PRECISION_KEY + " configuration parameter.");
     }
+    HdfsFileStatus resultingStat = null;
+    FSPermissionChecker pc = getPermissionChecker();
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
 
       // Write access is required to set access and modification times
       if (isPermissionEnabled) {
-        checkPathAccess(src, FsAction.WRITE);
+        checkPathAccess(pc, src, FsAction.WRITE);
       }
       INode inode = dir.getINode(src);
       if (inode != null) {
         dir.setTimes(src, inode, mtime, atime, true);
-        if (isAuditEnabled() && isExternalInvocation()) {
-          final HdfsFileStatus stat = dir.getFileInfo(src, false);
-          logAuditEvent(UserGroupInformation.getCurrentUser(),
-                        getRemoteIp(),
-                        "setTimes", src, null, stat);
-        }
+        resultingStat = getAuditFileInfo(src, false);
       } else {
         throw new FileNotFoundException("File/Directory " + src + " does not exist.");
       }
     } finally {
       writeUnlock();
     }
+    logAuditEvent(true, "setTimes", src, null, resultingStat);
   }
 
   /**
@@ -1631,11 +1621,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     try {
       createSymlinkInt(target, link, dirPerms, createParent);
     } catch (AccessControlException e) {
-      if (isAuditEnabled() && isExternalInvocation()) {
-        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
-                      getRemoteIp(),
-                      "createSymlink", link, target, null);
-      }
+      logAuditEvent(false, "createSymlink", link, target, null);
       throw e;
     }
   }
@@ -1644,6 +1630,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       PermissionStatus dirPerms, boolean createParent) 
       throws IOException, UnresolvedLinkException {
     HdfsFileStatus resultingStat = null;
+    FSPermissionChecker pc = getPermissionChecker();
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -1651,26 +1639,20 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       if (!createParent) {
         verifyParentDir(link);
       }
-      createSymlinkInternal(target, link, dirPerms, createParent);
-      if (isAuditEnabled() && isExternalInvocation()) {
-        resultingStat = dir.getFileInfo(link, false);
-      }
+      createSymlinkInternal(pc, target, link, dirPerms, createParent);
+      resultingStat = getAuditFileInfo(link, false);
     } finally {
       writeUnlock();
     }
     getEditLog().logSync();
-    if (isAuditEnabled() && isExternalInvocation()) {
-      logAuditEvent(UserGroupInformation.getCurrentUser(),
-                    getRemoteIp(),
-                    "createSymlink", link, target, resultingStat);
-    }
+    logAuditEvent(true, "createSymlink", link, target, resultingStat);
   }
 
   /**
    * Create a symbolic link.
    */
-  private void createSymlinkInternal(String target, String link,
-      PermissionStatus dirPerms, boolean createParent)
+  private void createSymlinkInternal(FSPermissionChecker pc, String target,
+      String link, PermissionStatus dirPerms, boolean createParent)
       throws IOException, UnresolvedLinkException {
     assert hasWriteLock();
     if (NameNode.stateChangeLog.isDebugEnabled()) {
@@ -1688,7 +1670,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
           +" either because the filename is invalid or the file exists");
     }
     if (isPermissionEnabled) {
-      checkAncestorAccess(link, FsAction.WRITE);
+      checkAncestorAccess(pc, link, FsAction.WRITE);
     }
     // validate that we have enough inodes.
     checkFsObjectLimit();
@@ -1715,11 +1697,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     try {
       return setReplicationInt(src, replication);
     } catch (AccessControlException e) {
-      if (isAuditEnabled() && isExternalInvocation()) {
-        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
-                      getRemoteIp(),
-                      "setReplication", src, null, null);
-      }
+      logAuditEvent(false, "setReplication", src);
       throw e;
     }
   }
@@ -1727,17 +1705,17 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   private boolean setReplicationInt(final String src, final short replication)
       throws IOException {
     blockManager.verifyReplication(src, replication, null);
-
     final boolean isFile;
+    FSPermissionChecker pc = getPermissionChecker();
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
-
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot set replication for " + src, safeMode);
       }
       if (isPermissionEnabled) {
-        checkPathAccess(src, FsAction.WRITE);
+        checkPathAccess(pc, src, FsAction.WRITE);
       }
 
       final short[] oldReplication = new short[1];
@@ -1751,21 +1729,21 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
 
     getEditLog().logSync();
-    if (isFile && isAuditEnabled() && isExternalInvocation()) {
-      logAuditEvent(UserGroupInformation.getCurrentUser(),
-                    getRemoteIp(),
-                    "setReplication", src, null, null);
+    if (isFile) {
+      logAuditEvent(true, "setReplication", src);
     }
     return isFile;
   }
 
   long getPreferredBlockSize(String filename) 
       throws IOException, UnresolvedLinkException {
+    FSPermissionChecker pc = getPermissionChecker();
+    checkOperation(OperationCategory.READ);
     readLock();
     try {
       checkOperation(OperationCategory.READ);
       if (isPermissionEnabled) {
-        checkTraverse(filename);
+        checkTraverse(pc, filename);
       }
       return dir.getPreferredBlockSize(filename);
     } finally {
@@ -1809,11 +1787,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       return startFileInt(src, permissions, holder, clientMachine, flag,
           createParent, replication, blockSize);
     } catch (AccessControlException e) {
-      if (isAuditEnabled() && isExternalInvocation()) {
-        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
-                      getRemoteIp(),
-                      "create", src, null, null);
-      }
+      logAuditEvent(false, "create", src);
       throw e;
     }
   }
@@ -1826,11 +1800,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       FileNotFoundException, ParentNotDirectoryException, IOException {
     boolean skipSync = false;
     final HdfsFileStatus stat;
+    FSPermissionChecker pc = getPermissionChecker();
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
-
-      startFileInternal(src, permissions, holder, clientMachine, flag,
+      startFileInternal(pc, src, permissions, holder, clientMachine, flag,
           createParent, replication, blockSize);
       stat = dir.getFileInfo(src, false);
     } catch (StandbyException se) {
@@ -1845,11 +1820,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       }
     } 
 
-    if (isAuditEnabled() && isExternalInvocation()) {
-      logAuditEvent(UserGroupInformation.getCurrentUser(),
-                    getRemoteIp(),
-                    "create", src, null, stat);
-    }
+    logAuditEvent(true, "create", src, null, stat);
     return stat;
   }
 
@@ -1869,7 +1840,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    * 
    * @return the last block locations if the block is partial or null otherwise
    */
-  private LocatedBlock startFileInternal(String src,
+  private LocatedBlock startFileInternal(FSPermissionChecker pc, String src,
       PermissionStatus permissions, String holder, String clientMachine,
       EnumSet<CreateFlag> flag, boolean createParent, short replication,
       long blockSize) throws SafeModeException, FileAlreadyExistsException,
@@ -1902,9 +1873,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     boolean append = flag.contains(CreateFlag.APPEND);
     if (isPermissionEnabled) {
       if (append || (overwrite && pathExists)) {
-        checkPathAccess(src, FsAction.WRITE);
+        checkPathAccess(pc, src, FsAction.WRITE);
       } else {
-        checkAncestorAccess(src, FsAction.WRITE);
+        checkAncestorAccess(pc, src, FsAction.WRITE);
       }
     }
 
@@ -2027,6 +1998,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   boolean recoverLease(String src, String holder, String clientMachine)
       throws IOException {
     boolean skipSync = false;
+    FSPermissionChecker pc = getPermissionChecker();
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -2044,7 +2017,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         return true;
       }
       if (isPermissionEnabled) {
-        checkPathAccess(src, FsAction.WRITE);
+        checkPathAccess(pc, src, FsAction.WRITE);
       }
   
       recoverLeaseInternal(inode, src, holder, clientMachine, true);
@@ -2147,11 +2120,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     try {
       return appendFileInt(src, holder, clientMachine);
     } catch (AccessControlException e) {
-      if (isAuditEnabled() && isExternalInvocation()) {
-        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
-                      getRemoteIp(),
-                      "append", src, null, null);
-      }
+      logAuditEvent(false, "append", src);
       throw e;
     }
   }
@@ -2167,11 +2136,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
           DFS_SUPPORT_APPEND_KEY + " configuration option to enable it.");
     }
     LocatedBlock lb = null;
+    FSPermissionChecker pc = getPermissionChecker();
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
 
-      lb = startFileInternal(src, null, holder, clientMachine, 
+      lb = startFileInternal(pc, src, null, holder, clientMachine, 
                         EnumSet.of(CreateFlag.APPEND), 
                         false, blockManager.maxReplication, 0);
     } catch (StandbyException se) {
@@ -2193,11 +2164,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
             +" block size " + lb.getBlock().getNumBytes());
       }
     }
-    if (isAuditEnabled() && isExternalInvocation()) {
-      logAuditEvent(UserGroupInformation.getCurrentUser(),
-                    getRemoteIp(),
-                    "append", src, null, null);
-    }
+    logAuditEvent(true, "append", src);
     return lb;
   }
 
@@ -2237,8 +2204,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
 
     // Part I. Analyze the state of the file with respect to the input data.
+    checkOperation(OperationCategory.READ);
     readLock();
     try {
+      checkOperation(OperationCategory.READ);
       LocatedBlock[] onRetryBlock = new LocatedBlock[1];
       final INode[] inodes = analyzeFileState(
           src, fileId, clientName, previous, onRetryBlock).getINodes();
@@ -2265,8 +2234,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     // Allocate a new block, add it to the INode and the BlocksMap. 
     Block newBlock = null;
     long offset;
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
       // Run the full analysis again, since things could have changed
       // while chooseTarget() was executing.
       LocatedBlock[] onRetryBlock = new LocatedBlock[1];
@@ -2420,9 +2391,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     final DatanodeDescriptor clientnode;
     final long preferredblocksize;
     final List<DatanodeDescriptor> chosen;
+    checkOperation(OperationCategory.READ);
     readLock();
     try {
-      checkOperation(OperationCategory.WRITE);
+      checkOperation(OperationCategory.READ);
       //check safe mode
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot add datanode; src=" + src
@@ -2462,6 +2434,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   boolean abandonBlock(ExtendedBlock b, String src, String holder)
       throws LeaseExpiredException, FileNotFoundException,
       UnresolvedLinkException, IOException {
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -2539,6 +2512,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     throws SafeModeException, UnresolvedLinkException, IOException {
     checkBlock(last);
     boolean success = false;
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -2691,11 +2665,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     try {
       return renameToInt(src, dst);
     } catch (AccessControlException e) {
-      if (isAuditEnabled() && isExternalInvocation()) {
-        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
-                      getRemoteIp(),
-                      "rename", src, dst, null);
-      }
+      logAuditEvent(false, "rename", src, dst, null);
       throw e;
     }
   }
@@ -2708,29 +2678,29 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: " + src +
           " to " + dst);
     }
+    FSPermissionChecker pc = getPermissionChecker();
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
 
-      status = renameToInternal(src, dst);
-      if (status && isAuditEnabled() && isExternalInvocation()) {
-        resultingStat = dir.getFileInfo(dst, false);
+      status = renameToInternal(pc, src, dst);
+      if (status) {
+        resultingStat = getAuditFileInfo(dst, false);
       }
     } finally {
       writeUnlock();
     }
     getEditLog().logSync();
-    if (status && isAuditEnabled() && isExternalInvocation()) {
-      logAuditEvent(UserGroupInformation.getCurrentUser(),
-                    getRemoteIp(),
-                    "rename", src, dst, resultingStat);
+    if (status) {
+      logAuditEvent(true, "rename", src, dst, resultingStat);
     }
     return status;
   }
 
   /** @deprecated See {@link #renameTo(String, String)} */
   @Deprecated
-  private boolean renameToInternal(String src, String dst)
+  private boolean renameToInternal(FSPermissionChecker pc, String src, String dst)
     throws IOException, UnresolvedLinkException {
     assert hasWriteLock();
     if (isInSafeMode()) {
@@ -2746,8 +2716,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       //      of rewriting the dst
       String actualdst = dir.isDir(dst)?
           dst + Path.SEPARATOR + new Path(src).getName(): dst;
-      checkParentAccess(src, FsAction.WRITE);
-      checkAncestorAccess(actualdst, FsAction.WRITE);
+      checkParentAccess(pc, src, FsAction.WRITE);
+      checkAncestorAccess(pc, actualdst, FsAction.WRITE);
     }
 
     if (dir.renameTo(src, dst)) {
@@ -2765,29 +2735,27 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: with options - "
           + src + " to " + dst);
     }
+    FSPermissionChecker pc = getPermissionChecker();
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
-
-      renameToInternal(src, dst, options);
-      if (isAuditEnabled() && isExternalInvocation()) {
-        resultingStat = dir.getFileInfo(dst, false); 
-      }
+      renameToInternal(pc, src, dst, options);
+      resultingStat = getAuditFileInfo(dst, false);
     } finally {
       writeUnlock();
     }
     getEditLog().logSync();
-    if (isAuditEnabled() && isExternalInvocation()) {
+    if (resultingStat != null) {
       StringBuilder cmd = new StringBuilder("rename options=");
       for (Rename option : options) {
         cmd.append(option.value()).append(" ");
       }
-      logAuditEvent(UserGroupInformation.getCurrentUser(), getRemoteIp(),
-                    cmd.toString(), src, dst, resultingStat);
+      logAuditEvent(true, cmd.toString(), src, dst, resultingStat);
     }
   }
 
-  private void renameToInternal(String src, String dst,
+  private void renameToInternal(FSPermissionChecker pc, String src, String dst,
       Options.Rename... options) throws IOException {
     assert hasWriteLock();
     if (isInSafeMode()) {
@@ -2797,8 +2765,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       throw new InvalidPathException("Invalid name: " + dst);
     }
     if (isPermissionEnabled) {
-      checkParentAccess(src, FsAction.WRITE);
-      checkAncestorAccess(dst, FsAction.WRITE);
+      checkParentAccess(pc, src, FsAction.WRITE);
+      checkAncestorAccess(pc, dst, FsAction.WRITE);
     }
 
     dir.renameTo(src, dst, options);
@@ -2816,11 +2784,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     try {
       return deleteInt(src, recursive);
     } catch (AccessControlException e) {
-      if (isAuditEnabled() && isExternalInvocation()) {
-        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
-                      getRemoteIp(),
-                      "delete", src, null, null);
-      }
+      logAuditEvent(false, "delete", src);
       throw e;
     }
   }
@@ -2832,14 +2796,16 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       NameNode.stateChangeLog.debug("DIR* NameSystem.delete: " + src);
     }
     boolean status = deleteInternal(src, recursive, true);
-    if (status && isAuditEnabled() && isExternalInvocation()) {
-      logAuditEvent(UserGroupInformation.getCurrentUser(),
-                    getRemoteIp(),
-                    "delete", src, null, null);
+    if (status) {
+      logAuditEvent(true, "delete", src);
     }
     return status;
   }
     
+  private FSPermissionChecker getPermissionChecker()
+      throws AccessControlException {
+    return new FSPermissionChecker(fsOwnerShortUserName, supergroup);
+  }
   /**
    * Remove a file/directory from the namespace.
    * <p>
@@ -2856,7 +2822,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       throws AccessControlException, SafeModeException, UnresolvedLinkException,
              IOException {
     BlocksMapUpdateInfo collectedBlocks = new BlocksMapUpdateInfo();
-
+    FSPermissionChecker pc = getPermissionChecker();
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -2867,7 +2834,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         throw new IOException(src + " is non empty");
       }
       if (enforcePermission && isPermissionEnabled) {
-        checkPermission(src, false, null, FsAction.WRITE, null, FsAction.ALL);
+        checkPermission(pc, src, false, null, FsAction.WRITE, null, FsAction.ALL);
       }
       // Unlink the target directory from directory tree
       if (!dir.delete(src, collectedBlocks)) {
@@ -2984,9 +2951,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     throws AccessControlException, UnresolvedLinkException,
            StandbyException, IOException {
     HdfsFileStatus stat = null;
-
+    FSPermissionChecker pc = getPermissionChecker();
+    checkOperation(OperationCategory.READ);
     readLock();
-
     try {
       checkOperation(OperationCategory.READ);
 
@@ -2994,24 +2961,16 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         throw new InvalidPathException("Invalid file name: " + src);
       }
       if (isPermissionEnabled) {
-        checkTraverse(src);
+        checkTraverse(pc, src);
       }
       stat = dir.getFileInfo(src, resolveLink);
     } catch (AccessControlException e) {
-      if (isAuditEnabled() && isExternalInvocation()) {
-        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
-                      getRemoteIp(),
-                      "getfileinfo", src, null, null);
-      }
+      logAuditEvent(false, "getfileinfo", src);
       throw e;
     } finally {
       readUnlock();
     }
-    if (isAuditEnabled() && isExternalInvocation()) {
-      logAuditEvent(UserGroupInformation.getCurrentUser(),
-                    getRemoteIp(),
-                    "getfileinfo", src, null, null);
-    }
+    logAuditEvent(true, "getfileinfo", src);
     return stat;
   }
 
@@ -3023,35 +2982,33 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     try {
       return mkdirsInt(src, permissions, createParent);
     } catch (AccessControlException e) {
-      if (isAuditEnabled() && isExternalInvocation()) {
-        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
-                      getRemoteIp(),
-                      "mkdirs", src, null, null);
-      }
+      logAuditEvent(false, "mkdirs", src);
       throw e;
     }
   }
 
   private boolean mkdirsInt(String src, PermissionStatus permissions,
       boolean createParent) throws IOException, UnresolvedLinkException {
+    HdfsFileStatus resultingStat = null;
     boolean status = false;
     if(NameNode.stateChangeLog.isDebugEnabled()) {
       NameNode.stateChangeLog.debug("DIR* NameSystem.mkdirs: " + src);
     }
+    FSPermissionChecker pc = getPermissionChecker();
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
-
-      status = mkdirsInternal(src, permissions, createParent);
+      status = mkdirsInternal(pc, src, permissions, createParent);
+      if (status) {
+        resultingStat = dir.getFileInfo(src, false);
+      }
     } finally {
       writeUnlock();
     }
     getEditLog().logSync();
-    if (status && isAuditEnabled() && isExternalInvocation()) {
-      final HdfsFileStatus stat = dir.getFileInfo(src, false);
-      logAuditEvent(UserGroupInformation.getCurrentUser(),
-                    getRemoteIp(),
-                    "mkdirs", src, null, stat);
+    if (status) {
+      logAuditEvent(true, "mkdirs", src, null, resultingStat);
     }
     return status;
   }
@@ -3059,7 +3016,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   /**
    * Create all the necessary directories
    */
-  private boolean mkdirsInternal(String src,
+  private boolean mkdirsInternal(FSPermissionChecker pc, String src,
       PermissionStatus permissions, boolean createParent) 
       throws IOException, UnresolvedLinkException {
     assert hasWriteLock();
@@ -3067,7 +3024,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       throw new SafeModeException("Cannot create directory " + src, safeMode);
     }
     if (isPermissionEnabled) {
-      checkTraverse(src);
+      checkTraverse(pc, src);
     }
     if (dir.isDir(src)) {
       // all the users of mkdirs() are used to expect 'true' even if
@@ -3078,7 +3035,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       throw new InvalidPathException(src);
     }
     if (isPermissionEnabled) {
-      checkAncestorAccess(src, FsAction.WRITE);
+      checkAncestorAccess(pc, src, FsAction.WRITE);
     }
     if (!createParent) {
       verifyParentDir(src);
@@ -3097,12 +3054,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
   ContentSummary getContentSummary(String src) throws AccessControlException,
       FileNotFoundException, UnresolvedLinkException, StandbyException {
+    FSPermissionChecker pc = getPermissionChecker();
+    checkOperation(OperationCategory.READ);
     readLock();
     try {
       checkOperation(OperationCategory.READ);
-
       if (isPermissionEnabled) {
-        checkPermission(src, false, null, null, null, FsAction.READ_EXECUTE);
+        checkPermission(pc, src, false, null, null, null, FsAction.READ_EXECUTE);
       }
       return dir.getContentSummary(src);
     } finally {
@@ -3117,15 +3075,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    */
   void setQuota(String path, long nsQuota, long dsQuota) 
       throws IOException, UnresolvedLinkException {
+    checkSuperuserPrivilege();
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot set quota on " + path, safeMode);
       }
-      if (isPermissionEnabled) {
-        checkSuperuserPrivilege();
-      }
       dir.setQuota(path, nsQuota, dsQuota);
     } finally {
       writeUnlock();
@@ -3143,6 +3100,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   void fsync(String src, String clientName, long lastBlockLength) 
       throws IOException, UnresolvedLinkException {
     NameNode.stateChangeLog.info("BLOCK* fsync: " + src + " for " + clientName);
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -3347,6 +3305,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       String[] newtargetstorages)
       throws IOException, UnresolvedLinkException {
     String src = "";
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -3450,6 +3409,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    * Renew the lease(s) held by the given client
    */
   void renewLease(String holder) throws IOException {
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -3481,11 +3441,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     try {
       return getListingInt(src, startAfter, needLocation);
     } catch (AccessControlException e) {
-      if (isAuditEnabled() && isExternalInvocation()) {
-        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
-                      getRemoteIp(),
-                      "listStatus", src, null, null);
-      }
+      logAuditEvent(false, "listStatus", src);
       throw e;
     }
   }
@@ -3494,22 +3450,20 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       boolean needLocation) 
     throws AccessControlException, UnresolvedLinkException, IOException {
     DirectoryListing dl;
+    FSPermissionChecker pc = getPermissionChecker();
+    checkOperation(OperationCategory.READ);
     readLock();
     try {
       checkOperation(OperationCategory.READ);
 
       if (isPermissionEnabled) {
         if (dir.isDir(src)) {
-          checkPathAccess(src, FsAction.READ_EXECUTE);
+          checkPathAccess(pc, src, FsAction.READ_EXECUTE);
         } else {
-          checkTraverse(src);
+          checkTraverse(pc, src);
         }
       }
-      if (isAuditEnabled() && isExternalInvocation()) {
-        logAuditEvent(UserGroupInformation.getCurrentUser(),
-                      getRemoteIp(),
-                      "listStatus", src, null, null);
-      }
+      logAuditEvent(true, "listStatus", src);
       dl = dir.getListing(src, startAfter, needLocation);
     } finally {
       readUnlock();
@@ -3721,42 +3675,49 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     return stats;
   }
 
-  /**
-   * Total raw bytes including non-dfs used space.
-   */
   @Override // FSNamesystemMBean
+  @Metric({"CapacityTotal",
+      "Total raw capacity of data nodes in bytes"})
   public long getCapacityTotal() {
     return datanodeStatistics.getCapacityTotal();
   }
 
-  @Metric
+  @Metric({"CapacityTotalGB",
+      "Total raw capacity of data nodes in GB"})
   public float getCapacityTotalGB() {
     return DFSUtil.roundBytesToGB(getCapacityTotal());
   }
 
-  /**
-   * Total used space by data nodes
-   */
   @Override // FSNamesystemMBean
+  @Metric({"CapacityUsed",
+      "Total used capacity across all data nodes in bytes"})
   public long getCapacityUsed() {
     return datanodeStatistics.getCapacityUsed();
   }
 
-  @Metric
+  @Metric({"CapacityUsedGB",
+      "Total used capacity across all data nodes in GB"})
   public float getCapacityUsedGB() {
     return DFSUtil.roundBytesToGB(getCapacityUsed());
   }
 
-  @Override
+  @Override // FSNamesystemMBean
+  @Metric({"CapacityRemaining", "Remaining capacity in bytes"})
   public long getCapacityRemaining() {
     return datanodeStatistics.getCapacityRemaining();
   }
 
-  @Metric
+  @Metric({"CapacityRemainingGB", "Remaining capacity in GB"})
   public float getCapacityRemainingGB() {
     return DFSUtil.roundBytesToGB(getCapacityRemaining());
   }
 
+  @Metric({"CapacityUsedNonDFS",
+      "Total space used by data nodes for non DFS purposes in bytes"})
+  public long getCapacityUsedNonDFS() {
+    return datanodeStatistics.getCapacityUsedNonDFS();
+  }
+
   /**
    * Total number of connections.
    */
@@ -3777,10 +3738,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   }
 
   DatanodeInfo[] datanodeReport(final DatanodeReportType type
-      ) throws AccessControlException {
+      ) throws AccessControlException, StandbyException {
     checkSuperuserPrivilege();
+    checkOperation(OperationCategory.UNCHECKED);
     readLock();
     try {
+      checkOperation(OperationCategory.UNCHECKED);
       final DatanodeManager dm = getBlockManager().getDatanodeManager();      
       final List<DatanodeDescriptor> results = dm.getDatanodeListForReport(type);
 
@@ -3803,9 +3766,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    * @throws IOException if 
    */
   void saveNamespace() throws AccessControlException, IOException {
+    checkSuperuserPrivilege();
+    checkOperation(OperationCategory.UNCHECKED);
     readLock();
     try {
-      checkSuperuserPrivilege();
+      checkOperation(OperationCategory.UNCHECKED);
       if (!isInSafeMode()) {
         throw new IOException("Safe mode should be turned ON " +
                               "in order to create namespace image.");
@@ -3823,10 +3788,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    * 
    * @throws AccessControlException if superuser privilege is violated.
    */
-  boolean restoreFailedStorage(String arg) throws AccessControlException {
+  boolean restoreFailedStorage(String arg) throws AccessControlException,
+      StandbyException {
+    checkSuperuserPrivilege();
+    checkOperation(OperationCategory.UNCHECKED);
     writeLock();
     try {
-      checkSuperuserPrivilege();
+      checkOperation(OperationCategory.UNCHECKED);
       
       // if it is disabled - enable it and vice versa.
       if(arg.equals("check"))
@@ -3846,10 +3814,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   }
     
   void finalizeUpgrade() throws IOException {
+    checkSuperuserPrivilege();
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
-      checkSuperuserPrivilege();
       getFSImage().finalizeUpgrade();
     } finally {
       writeUnlock();
@@ -4585,10 +4554,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   }
 
   CheckpointSignature rollEditLog() throws IOException {
+    checkSuperuserPrivilege();
+    checkOperation(OperationCategory.JOURNAL);
     writeLock();
     try {
       checkOperation(OperationCategory.JOURNAL);
-      checkSuperuserPrivilege();
       if (isInSafeMode()) {
         throw new SafeModeException("Log not rolled", safeMode);
       }
@@ -4603,6 +4573,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
                                 NamenodeRegistration bnReg, // backup node
                                 NamenodeRegistration nnReg) // active name-node
   throws IOException {
+    checkOperation(OperationCategory.CHECKPOINT);
     writeLock();
     try {
       checkOperation(OperationCategory.CHECKPOINT);
@@ -4621,6 +4592,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
   void endCheckpoint(NamenodeRegistration registration,
                             CheckpointSignature sig) throws IOException {
+    checkOperation(OperationCategory.CHECKPOINT);
     readLock();
     try {
       checkOperation(OperationCategory.CHECKPOINT);
@@ -4639,61 +4611,64 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     return new PermissionStatus(fsOwner.getShortUserName(), supergroup, permission);
   }
 
-  private FSPermissionChecker checkOwner(String path
-      ) throws AccessControlException, UnresolvedLinkException {
-    return checkPermission(path, true, null, null, null, null);
+  private void checkOwner(FSPermissionChecker pc, String path)
+      throws AccessControlException, UnresolvedLinkException {
+    checkPermission(pc, path, true, null, null, null, null);
   }
 
-  private FSPermissionChecker checkPathAccess(String path, FsAction access
-      ) throws AccessControlException, UnresolvedLinkException {
-    return checkPermission(path, false, null, null, access, null);
+  private void checkPathAccess(FSPermissionChecker pc,
+      String path, FsAction access) throws AccessControlException,
+      UnresolvedLinkException {
+    checkPermission(pc, path, false, null, null, access, null);
   }
 
-  private FSPermissionChecker checkParentAccess(String path, FsAction access
-      ) throws AccessControlException, UnresolvedLinkException {
-    return checkPermission(path, false, null, access, null, null);
+  private void checkParentAccess(FSPermissionChecker pc,
+      String path, FsAction access) throws AccessControlException,
+      UnresolvedLinkException {
+    checkPermission(pc, path, false, null, access, null, null);
   }
 
-  private FSPermissionChecker checkAncestorAccess(String path, FsAction access
-      ) throws AccessControlException, UnresolvedLinkException {
-    return checkPermission(path, false, access, null, null, null);
+  private void checkAncestorAccess(FSPermissionChecker pc,
+      String path, FsAction access) throws AccessControlException,
+      UnresolvedLinkException {
+    checkPermission(pc, path, false, access, null, null, null);
   }
 
-  private FSPermissionChecker checkTraverse(String path
-      ) throws AccessControlException, UnresolvedLinkException {
-    return checkPermission(path, false, null, null, null, null);
+  private void checkTraverse(FSPermissionChecker pc, String path)
+      throws AccessControlException, UnresolvedLinkException {
+    checkPermission(pc, path, false, null, null, null, null);
   }
 
   @Override
-  public void checkSuperuserPrivilege() throws AccessControlException {
+  public void checkSuperuserPrivilege()
+      throws AccessControlException {
     if (isPermissionEnabled) {
-      FSPermissionChecker.checkSuperuserPrivilege(fsOwner, supergroup);
+      FSPermissionChecker pc = getPermissionChecker();
+      pc.checkSuperuserPrivilege();
     }
   }
 
   /**
-   * Check whether current user have permissions to access the path.
-   * For more details of the parameters, see
-   * {@link FSPermissionChecker#checkPermission(String, INodeDirectory, boolean, FsAction, FsAction, FsAction, FsAction)}.
+   * Check whether current user have permissions to access the path. For more
+   * details of the parameters, see
+   * {@link FSPermissionChecker#checkPermission()}.
    */
-  private FSPermissionChecker checkPermission(String path, boolean doCheckOwner,
-      FsAction ancestorAccess, FsAction parentAccess, FsAction access,
-      FsAction subAccess) throws AccessControlException, UnresolvedLinkException {
-    FSPermissionChecker pc = new FSPermissionChecker(
-        fsOwner.getShortUserName(), supergroup);
-    if (!pc.isSuper) {
+  private void checkPermission(FSPermissionChecker pc,
+      String path, boolean doCheckOwner, FsAction ancestorAccess,
+      FsAction parentAccess, FsAction access, FsAction subAccess)
+      throws AccessControlException, UnresolvedLinkException {
+    if (!pc.isSuperUser()) {
       dir.waitForReady();
       readLock();
       try {
-        pc.checkPermission(path, dir.rootDir, doCheckOwner,
-            ancestorAccess, parentAccess, access, subAccess);
+        pc.checkPermission(path, dir.rootDir, doCheckOwner, ancestorAccess,
+            parentAccess, access, subAccess);
       } finally {
         readUnlock();
-      } 
+      }
     }
-    return pc;
   }
-
+  
   /**
    * Check to see if we have exceeded the limit on the number
    * of inodes.
@@ -4906,6 +4881,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    * Client is reporting some bad block locations.
    */
   void reportBadBlocks(LocatedBlock[] blocks) throws IOException {
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -4940,6 +4916,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   LocatedBlock updateBlockForPipeline(ExtendedBlock block, 
       String clientName) throws IOException {
     LocatedBlock locatedBlock;
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -4971,6 +4948,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   void updatePipeline(String clientName, ExtendedBlock oldBlock, 
       ExtendedBlock newBlock, DatanodeID[] newNodes)
       throws IOException {
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -5098,8 +5076,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    */
   void releaseBackupNode(NamenodeRegistration registration)
     throws IOException {
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
       if(getFSImage().getStorage().getNamespaceID()
          != registration.getNamespaceID())
         throw new IOException("Incompatible namespaceIDs: "
@@ -5137,16 +5117,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    */
   Collection<CorruptFileBlockInfo> listCorruptFileBlocks(String path,
 	String[] cookieTab) throws IOException {
-
+    checkSuperuserPrivilege();
+    checkOperation(OperationCategory.READ);
     readLock();
     try {
       checkOperation(OperationCategory.READ);
-
       if (!isPopulatingReplQueues()) {
         throw new IOException("Cannot run listCorruptFileBlocks because " +
                               "replication queues have not been initialized.");
       }
-      checkSuperuserPrivilege();
       // print a limited # of corrupt files per call
       int count = 0;
       ArrayList<CorruptFileBlockInfo> corruptFiles = new ArrayList<CorruptFileBlockInfo>();
@@ -5232,6 +5211,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   Token<DelegationTokenIdentifier> getDelegationToken(Text renewer)
       throws IOException {
     Token<DelegationTokenIdentifier> token;
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -5248,7 +5228,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         return null;
       }
 
-      UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
+      UserGroupInformation ugi = getRemoteUser();
       String user = ugi.getUserName();
       Text owner = new Text(user);
       Text realUser = null;
@@ -5278,6 +5258,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   long renewDelegationToken(Token<DelegationTokenIdentifier> token)
       throws InvalidToken, IOException {
     long expiryTime;
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -5289,7 +5270,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         throw new IOException(
             "Delegation Token can be renewed only with kerberos or web authentication");
       }
-      String renewer = UserGroupInformation.getCurrentUser().getShortUserName();
+      String renewer = getRemoteUser().getShortUserName();
       expiryTime = dtSecretManager.renewToken(token, renewer);
       DelegationTokenIdentifier id = new DelegationTokenIdentifier();
       ByteArrayInputStream buf = new ByteArrayInputStream(token.getIdentifier());
@@ -5310,6 +5291,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    */
   void cancelDelegationToken(Token<DelegationTokenIdentifier> token)
       throws IOException {
+    checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -5317,7 +5299,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot cancel delegation token", safeMode);
       }
-      String canceller = UserGroupInformation.getCurrentUser().getUserName();
+      String canceller = getRemoteUser().getUserName();
       DelegationTokenIdentifier id = dtSecretManager
         .cancelToken(token, canceller);
       getEditLog().logCancelDelegationToken(id);
@@ -5386,7 +5368,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    */
   private AuthenticationMethod getConnectionAuthenticationMethod()
       throws IOException {
-    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
+    UserGroupInformation ugi = getRemoteUser();
     AuthenticationMethod authMethod = ugi.getAuthenticationMethod();
     if (authMethod == AuthenticationMethod.PROXY) {
       authMethod = ugi.getRealUser().getAuthenticationMethod();
@@ -5410,12 +5392,22 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     return NamenodeWebHdfsMethods.getRemoteIp();
   }
   
+  // optimize ugi lookup for RPC operations to avoid a trip through
+  // UGI.getCurrentUser which is synch'ed
+  private static UserGroupInformation getRemoteUser() throws IOException {
+    UserGroupInformation ugi = null;
+    if (Server.isRpcInvocation()) {
+      ugi = Server.getRemoteUser();
+    }
+    return (ugi != null) ? ugi : UserGroupInformation.getCurrentUser();
+  }
+  
   /**
    * Log fsck event in the audit log 
    */
   void logFsckEvent(String src, InetAddress remoteAddress) throws IOException {
     if (isAuditEnabled()) {
-      logAuditEvent(UserGroupInformation.getCurrentUser(),
+      logAuditEvent(true, getRemoteUser(),
                     remoteAddress,
                     "fsck", src, null, null);
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSPermissionChecker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSPermissionChecker.java
index 91ebc968a04..d88bfd87960 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSPermissionChecker.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSPermissionChecker.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.namenode;
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.Set;
 import java.util.Stack;
@@ -31,14 +32,20 @@ import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 
-/** Perform permission checking in {@link FSNamesystem}. */
+/** 
+ * Class that helps in checking file system permission.
+ * The state of this class need not be synchronized as it has data structures that
+ * are read-only.
+ * 
+ * Some of the helper methods are gaurded by {@link FSNamesystem#readLock()}.
+ */
 class FSPermissionChecker {
   static final Log LOG = LogFactory.getLog(UserGroupInformation.class);
-
   private final UserGroupInformation ugi;
-  public final String user;
-  private final Set<String> groups = new HashSet<String>();
-  public final boolean isSuper;
+  private final String user;  
+  /** A set with group namess. Not synchronized since it is unmodifiable */
+  private final Set<String> groups;
+  private final boolean isSuper;
   
   FSPermissionChecker(String fsOwner, String supergroup
       ) throws AccessControlException{
@@ -47,10 +54,9 @@ class FSPermissionChecker {
     } catch (IOException e) {
       throw new AccessControlException(e); 
     } 
-    
-    groups.addAll(Arrays.asList(ugi.getGroupNames()));
+    HashSet<String> s = new HashSet<String>(Arrays.asList(ugi.getGroupNames()));
+    groups = Collections.unmodifiableSet(s);
     user = ugi.getShortUserName();
-    
     isSuper = user.equals(fsOwner) || groups.contains(supergroup);
   }
 
@@ -60,20 +66,23 @@ class FSPermissionChecker {
    */
   public boolean containsGroup(String group) {return groups.contains(group);}
 
+  public String getUser() {
+    return user;
+  }
+  
+  public boolean isSuperUser() {
+    return isSuper;
+  }
+  
   /**
    * Verify if the caller has the required permission. This will result into 
    * an exception if the caller is not allowed to access the resource.
-   * @param owner owner of the system
-   * @param supergroup supergroup of the system
    */
-  public static void checkSuperuserPrivilege(UserGroupInformation owner, 
-                                             String supergroup) 
-                     throws AccessControlException {
-    FSPermissionChecker checker = 
-      new FSPermissionChecker(owner.getShortUserName(), supergroup);
-    if (!checker.isSuper) {
+  public void checkSuperuserPrivilege()
+      throws AccessControlException {
+    if (!isSuper) {
       throw new AccessControlException("Access denied for user " 
-          + checker.user + ". Superuser privilege is required");
+          + user + ". Superuser privilege is required");
     }
   }
   
@@ -103,9 +112,11 @@ class FSPermissionChecker {
    * @param subAccess If path is a directory,
    * it is the access required of the path and all the sub-directories.
    * If path is not a directory, there is no effect.
-   * @return a PermissionChecker object which caches data for later use.
    * @throws AccessControlException
    * @throws UnresolvedLinkException
+   * 
+   * Guarded by {@link FSNamesystem#readLock()}
+   * Caller of this method must hold that lock.
    */
   void checkPermission(String path, INodeDirectory root, boolean doCheckOwner,
       FsAction ancestorAccess, FsAction parentAccess, FsAction access,
@@ -148,6 +159,7 @@ class FSPermissionChecker {
       }
   }
 
+  /** Guarded by {@link FSNamesystem#readLock()} */
   private void checkOwner(INode inode) throws AccessControlException {
     if (inode != null && user.equals(inode.getUserName())) {
       return;
@@ -155,6 +167,7 @@ class FSPermissionChecker {
     throw new AccessControlException("Permission denied");
   }
 
+  /** Guarded by {@link FSNamesystem#readLock()} */
   private void checkTraverse(INode[] inodes, int last
       ) throws AccessControlException {
     for(int j = 0; j <= last; j++) {
@@ -162,6 +175,7 @@ class FSPermissionChecker {
     }
   }
 
+  /** Guarded by {@link FSNamesystem#readLock()} */
   private void checkSubAccess(INode inode, FsAction access
       ) throws AccessControlException {
     if (inode == null || !inode.isDirectory()) {
@@ -181,11 +195,13 @@ class FSPermissionChecker {
     }
   }
 
+  /** Guarded by {@link FSNamesystem#readLock()} */
   private void check(INode[] inodes, int i, FsAction access
       ) throws AccessControlException {
     check(i >= 0? inodes[i]: null, access);
   }
 
+  /** Guarded by {@link FSNamesystem#readLock()} */
   private void check(INode inode, FsAction access
       ) throws AccessControlException {
     if (inode == null) {
@@ -206,7 +222,9 @@ class FSPermissionChecker {
         + ", access=" + access + ", inode=" + inode);
   }
 
-  private void checkStickyBit(INode parent, INode inode) throws AccessControlException {
+  /** Guarded by {@link FSNamesystem#readLock()} */
+  private void checkStickyBit(INode parent, INode inode)
+      throws AccessControlException {
     if(!parent.getFsPermission().getStickyBit()) {
       return;
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
index 91ff0891c96..b407a62da97 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INode.java
@@ -282,7 +282,11 @@ abstract class INode implements Comparable<byte[]> {
 
   String getLocalParentDir() {
     INode inode = isRoot() ? this : getParent();
-    return (inode != null) ? inode.getFullPathName() : "";
+    String parentDir = "";
+    if (inode != null) {
+      parentDir = inode.getFullPathName();
+    }
+    return (parentDir != null) ? parentDir : "";
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java
index 3488f074103..32e9fd15f05 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java
@@ -25,10 +25,10 @@ import java.util.Map;
 
 import javax.servlet.ServletContext;
 
-import org.apache.commons.logging.Log;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.server.common.JspHelper;
 import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
 import org.apache.hadoop.hdfs.web.AuthFilter;
@@ -77,7 +77,8 @@ public class NameNodeHttpServer {
         if (UserGroupInformation.isSecurityEnabled()) {
           initSpnego(conf,
               DFSConfigKeys.DFS_NAMENODE_INTERNAL_SPNEGO_USER_NAME_KEY,
-              DFSConfigKeys.DFS_NAMENODE_KEYTAB_FILE_KEY);
+              DFSUtil.getSpnegoKeytabKey(conf,
+                  DFSConfigKeys.DFS_NAMENODE_KEYTAB_FILE_KEY));
         }
         if (WebHdfsFileSystem.isEnabled(conf, LOG)) {
           //add SPNEGO authentication filter for webhdfs
@@ -112,11 +113,8 @@ public class NameNodeHttpServer {
                     DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBEROS_PRINCIPAL_KEY +
                     "' is not set.");
         }
-        String httpKeytab = conf.get(
-          DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBEROS_KEYTAB_KEY);
-        if (httpKeytab == null) {
-          httpKeytab = conf.get(DFSConfigKeys.DFS_NAMENODE_KEYTAB_FILE_KEY);
-        }
+        String httpKeytab = conf.get(DFSUtil.getSpnegoKeytabKey(conf,
+            DFSConfigKeys.DFS_NAMENODE_KEYTAB_FILE_KEY));
         if (httpKeytab != null && !httpKeytab.isEmpty()) {
           params.put(
             DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBEROS_KEYTAB_KEY,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index 90e79363b17..275b9bdbe61 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -337,7 +337,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
       throw new IllegalArgumentException(
         "Unexpected not positive size: "+size);
     }
-    namesystem.checkOperation(OperationCategory.READ);
     namesystem.checkSuperuserPrivilege();
     return namesystem.getBlockManager().getBlocks(datanode, size); 
   }
@@ -707,7 +706,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
   @Override // ClientProtocol
   public DatanodeInfo[] getDatanodeReport(DatanodeReportType type)
   throws IOException {
-    namesystem.checkOperation(OperationCategory.UNCHECKED);
     DatanodeInfo results[] = namesystem.datanodeReport(type);
     if (results == null ) {
       throw new IOException("Cannot find datanode report");
@@ -732,19 +730,16 @@ class NameNodeRpcServer implements NamenodeProtocols {
 
   @Override // ClientProtocol
   public boolean restoreFailedStorage(String arg) throws IOException { 
-    namesystem.checkOperation(OperationCategory.UNCHECKED);
     return namesystem.restoreFailedStorage(arg);
   }
 
   @Override // ClientProtocol
   public void saveNamespace() throws IOException {
-    namesystem.checkOperation(OperationCategory.UNCHECKED);
     namesystem.saveNamespace();
   }
   
   @Override // ClientProtocol
   public long rollEdits() throws AccessControlException, IOException {
-    namesystem.checkOperation(OperationCategory.JOURNAL);
     CheckpointSignature sig = namesystem.rollEditLog();
     return sig.getCurSegmentTxId();
   }
@@ -789,7 +784,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
 
   @Override // ClientProtocol
   public void metaSave(String filename) throws IOException {
-    namesystem.checkOperation(OperationCategory.UNCHECKED);
     namesystem.metaSave(filename);
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java
index c453db561eb..c82e9155ed6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java
@@ -18,7 +18,9 @@
 package org.apache.hadoop.hdfs.server.namenode;
 
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
 import org.apache.hadoop.hdfs.util.RwLock;
+import org.apache.hadoop.ipc.StandbyException;
 import org.apache.hadoop.security.AccessControlException;
 
 /** Namesystem operations. */
@@ -38,4 +40,6 @@ public interface Namesystem extends RwLock, SafeMode {
   public boolean isGenStampInFuture(long generationStamp);
 
   public void adjustSafeModeBlockTotals(int deltaSafe, int deltaTotal);
+
+  public void checkOperation(OperationCategory read) throws StandbyException;
 }
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RedundantEditLogInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RedundantEditLogInputStream.java
index eb6a8ea1c5f..7d8135669d0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RedundantEditLogInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RedundantEditLogInputStream.java
@@ -267,4 +267,11 @@ class RedundantEditLogInputStream extends EditLogInputStream {
       super(msg);
     }
   }
+
+  @Override
+  public void setMaxOpSize(int maxOpSize) {
+    for (EditLogInputStream elis : streams) {
+      elis.setMaxOpSize(maxOpSize);
+    }
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
index 763c7089abd..16c12608909 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
@@ -144,6 +144,11 @@ public class SecondaryNameNode implements Runnable {
     return checkpointImage;
   }
 
+  @VisibleForTesting
+  int getMergeErrorCount() {
+    return checkpointImage.getMergeErrorCount();
+  }
+
   @VisibleForTesting
   FSNamesystem getFSNamesystem() {
     return namesystem;
@@ -250,15 +255,11 @@ public class SecondaryNameNode implements Runnable {
                                 new AccessControlList(conf.get(DFS_ADMIN, " "))) {
       {
         if (UserGroupInformation.isSecurityEnabled()) {
-          String httpKeytabKey = DFSConfigKeys.
-              DFS_WEB_AUTHENTICATION_KERBEROS_KEYTAB_KEY;
-          if (null == conf.get(httpKeytabKey)) {
-            httpKeytabKey = DFSConfigKeys.DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY;
-          }
           initSpnego(
               conf,
               DFSConfigKeys.DFS_SECONDARY_NAMENODE_INTERNAL_SPNEGO_USER_NAME_KEY,
-              httpKeytabKey);
+              DFSUtil.getSpnegoKeytabKey(conf,
+                  DFSConfigKeys.DFS_SECONDARY_NAMENODE_KEYTAB_FILE_KEY));
         }
       }
     };
@@ -339,6 +340,7 @@ public class SecondaryNameNode implements Runnable {
     // number of transactions in the edit log that haven't yet been checkpointed.
     //
     long period = checkpointConf.getCheckPeriod();
+    int maxRetries = checkpointConf.getMaxRetriesOnMergeError();
 
     while (shouldRun) {
       try {
@@ -364,6 +366,13 @@ public class SecondaryNameNode implements Runnable {
       } catch (IOException e) {
         LOG.error("Exception in doCheckpoint", e);
         e.printStackTrace();
+        // Prevent a huge number of edits from being created due to
+        // unrecoverable conditions and endless retries.
+        if (checkpointImage.getMergeErrorCount() > maxRetries) {
+          LOG.fatal("Merging failed " + 
+              checkpointImage.getMergeErrorCount() + " times.");
+          terminate(1);
+        }
       } catch (Throwable e) {
         LOG.fatal("Throwable Exception in doCheckpoint", e);
         e.printStackTrace();
@@ -498,9 +507,21 @@ public class SecondaryNameNode implements Runnable {
     RemoteEditLogManifest manifest =
       namenode.getEditLogManifest(sig.mostRecentCheckpointTxId + 1);
 
+    // Fetch fsimage and edits. Reload the image if previous merge failed.
     loadImage |= downloadCheckpointFiles(
-        fsName, checkpointImage, sig, manifest);   // Fetch fsimage and edits
-    doMerge(sig, manifest, loadImage, checkpointImage, namesystem);
+        fsName, checkpointImage, sig, manifest) |
+        checkpointImage.hasMergeError();
+    try {
+      doMerge(sig, manifest, loadImage, checkpointImage, namesystem);
+    } catch (IOException ioe) {
+      // A merge error occurred. The in-memory file system state may be
+      // inconsistent, so the image and edits need to be reloaded.
+      checkpointImage.setMergeError();
+      throw ioe;
+    }
+    // Clear any error since merge was successful.
+    checkpointImage.clearMergeError();
+
     
     //
     // Upload the new image into the NameNode. Then tell the Namenode
@@ -754,6 +775,7 @@ public class SecondaryNameNode implements Runnable {
   
   static class CheckpointStorage extends FSImage {
     
+    private int mergeErrorCount;
     private static class CheckpointLogPurger implements LogsPurgeable {
       
       private NNStorage storage;
@@ -815,6 +837,7 @@ public class SecondaryNameNode implements Runnable {
       // we shouldn't have any editLog instance. Setting to null
       // makes sure we don't accidentally depend on it.
       editLog = null;
+      mergeErrorCount = 0;
       
       // Replace the archival manager with one that can actually work on the
       // 2NN's edits storage.
@@ -881,7 +904,24 @@ public class SecondaryNameNode implements Runnable {
         }
       }
     }
-    
+
+
+    boolean hasMergeError() {
+      return (mergeErrorCount > 0);
+    }
+
+    int getMergeErrorCount() {
+      return mergeErrorCount;
+    }
+
+    void setMergeError() {
+      mergeErrorCount++;
+    }
+
+    void clearMergeError() {
+      mergeErrorCount = 0;
+    }
+ 
     /**
      * Ensure that the current/ directory exists in all storage
      * directories
@@ -915,7 +955,9 @@ public class SecondaryNameNode implements Runnable {
       dstImage.reloadFromImageFile(file, dstNamesystem);
       dstNamesystem.dir.imageLoadComplete();
     }
-    
+    // error simulation code for junit test
+    CheckpointFaultInjector.getInstance().duringMerge();   
+
     Checkpointer.rollForwardByApplyingLogs(manifest, dstImage, dstNamesystem);
     // The following has the side effect of purging old fsimages/edit logs.
     dstImage.saveFSImageInAllDirs(dstNamesystem, dstImage.getLastAppliedTxId());
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java
index 507c8ec7648..60e703b4e02 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java
@@ -229,7 +229,6 @@ public class TransferFsImage {
       SecurityUtil.openSecureHttpConnection(url);
 
     if (timeout <= 0) {
-      // Set the ping interval as timeout
       Configuration conf = new HdfsConfiguration();
       timeout = conf.getInt(DFSConfigKeys.DFS_IMAGE_TRANSFER_TIMEOUT_KEY,
           DFSConfigKeys.DFS_IMAGE_TRANSFER_TIMEOUT_DEFAULT);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java
index b052e4ea9e3..823738798d6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java
@@ -64,9 +64,17 @@ public interface HAContext {
   void writeUnlock();
 
   /**
-   * Verify that the given operation category is allowed in the
-   * current state. This is to allow NN implementations (eg BackupNode)
-   * to override it with node-specific handling.
+   * Verify that the given operation category is allowed in the current state.
+   * This is to allow NN implementations (eg BackupNode) to override it with
+   * node-specific handling.
+   * 
+   * If the operation which is being checked will be taking the FSNS lock, it's
+   * advisable to check the operation category both immediately before and after
+   * taking the lock. This is because clients rely on the StandbyException
+   * thrown by this method in order to trigger client failover, and if a client
+   * first tries to contact the Standby NN, it could block for a long time if
+   * the Standby is holding the lock for a while, e.g. when performing a
+   * checkpoint. See HDFS-4591 for more details.
    */
   void checkOperation(OperationCategory op) throws StandbyException;
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
index 0cb4b127115..f88f085b3ed 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
@@ -99,7 +99,6 @@ import org.apache.hadoop.hdfs.web.resources.UserParam;
 import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.net.NodeBase;
 import org.apache.hadoop.security.Credentials;
-import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
@@ -212,7 +211,6 @@ public class NamenodeWebHdfsMethods {
         namenode, ugi, renewer != null? renewer: ugi.getShortUserName());
     final Token<? extends TokenIdentifier> t = c.getAllTokens().iterator().next();
     t.setKind(WebHdfsFileSystem.TOKEN_KIND);
-    SecurityUtil.setTokenService(t, namenode.getHttpAddress());
     return t;
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java
index 95cc3b89120..cf761ccedd4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java
@@ -26,6 +26,7 @@ import java.util.Stack;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hdfs.util.XMLUtils;
 import org.apache.hadoop.hdfs.util.XMLUtils.InvalidXmlException;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes;
@@ -176,7 +177,7 @@ class OfflineEditsXmlLoader
   
   @Override
   public void endElement (String uri, String name, String qName) {
-    String str = cbuf.toString().trim();
+    String str = XMLUtils.unmangleXmlString(cbuf.toString()).trim();
     cbuf = new StringBuffer();
     switch (state) {
     case EXPECT_EDITS_TAG:
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/LightWeightGSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/LightWeightGSet.java
index 32d80c25516..5ab9a8cb355 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/LightWeightGSet.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/LightWeightGSet.java
@@ -24,8 +24,11 @@ import java.util.Iterator;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.HadoopIllegalArgumentException;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * A low memory footprint {@link GSet} implementation,
  * which uses an array for storing the elements
@@ -72,7 +75,7 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
   /** Modification version for fail-fast.
    * @see ConcurrentModificationException
    */
-  private volatile int modification = 0;
+  private int modification = 0;
 
   /**
    * @param recommended_length Recommended size of the internal array.
@@ -285,4 +288,54 @@ public class LightWeightGSet<K, E extends K> implements GSet<K, E> {
       throw new UnsupportedOperationException("Remove is not supported.");
     }
   }
+  
+  /**
+   * Let t = percentage of max memory.
+   * Let e = round(log_2 t).
+   * Then, we choose capacity = 2^e/(size of reference),
+   * unless it is outside the close interval [1, 2^30].
+   */
+  public static int computeCapacity(double percentage, String mapName) {
+    return computeCapacity(Runtime.getRuntime().maxMemory(), percentage,
+        mapName);
+  }
+  
+  @VisibleForTesting
+  static int computeCapacity(long maxMemory, double percentage,
+      String mapName) {
+    if (percentage > 100.0 || percentage < 0.0) {
+      throw new HadoopIllegalArgumentException("Percentage " + percentage
+          + " must be greater than or equal to 0 "
+          + " and less than or equal to 100");
+    }
+    if (maxMemory < 0) {
+      throw new HadoopIllegalArgumentException("Memory " + maxMemory
+          + " must be greater than or equal to 0");
+    }
+    if (percentage == 0.0 || maxMemory == 0) {
+      return 0;
+    }
+    //VM detection
+    //See http://java.sun.com/docs/hotspot/HotSpotFAQ.html#64bit_detection
+    final String vmBit = System.getProperty("sun.arch.data.model");
+
+    //Percentage of max memory
+    final double percentDivisor = 100.0/percentage;
+    final double percentMemory = maxMemory/percentDivisor;
+    
+    //compute capacity
+    final int e1 = (int)(Math.log(percentMemory)/Math.log(2.0) + 0.5);
+    final int e2 = e1 - ("32".equals(vmBit)? 2: 3);
+    final int exponent = e2 < 0? 0: e2 > 30? 30: e2;
+    final int c = 1 << exponent;
+
+    if (LightWeightGSet.LOG.isDebugEnabled()) {
+      LOG.debug("Computing capacity for map " + mapName);
+      LOG.debug("VM type       = " + vmBit + "-bit");
+      LOG.debug(percentage + "% max memory = "
+          + StringUtils.TraditionalBinaryPrefix.long2String(maxMemory, "B", 1));
+      LOG.debug("capacity      = 2^" + exponent + " = " + c + " entries");
+    }
+    return c;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/XMLUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/XMLUtils.java
index a023b878558..d036b1e24f2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/XMLUtils.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/XMLUtils.java
@@ -46,6 +46,140 @@ public class XMLUtils {
     }
   }
   
+  /**
+   * Exception that reflects a string that cannot be unmangled.
+   */
+  public static class UnmanglingError extends RuntimeException {
+    private static final long serialVersionUID = 1L;
+    
+    public UnmanglingError(String str, Exception e) {
+      super(str, e);
+    }
+    
+    public UnmanglingError(String str) {
+      super(str);
+    }
+  }
+  
+
+  /**
+   * Given a code point, determine if it should be mangled before being
+   * represented in an XML document.
+   * 
+   * Any code point that isn't valid in XML must be mangled.
+   * See http://en.wikipedia.org/wiki/Valid_characters_in_XML for a
+   * quick reference, or the w3 standard for the authoritative reference.
+   * 
+   * @param cp      The code point
+   * @return        True if the code point should be mangled
+   */
+  private static boolean codePointMustBeMangled(int cp) {
+    if (cp < 0x20) {
+      return ((cp != 0x9) && (cp != 0xa) && (cp != 0xd));
+    } else if ((0xd7ff < cp) && (cp < 0xe000)) {
+      return true;
+    } else if ((cp == 0xfffe) || (cp == 0xffff)) {
+      return true;
+    } else if (cp == 0x5c) {
+      // we mangle backslash to simplify decoding... it's
+      // easier if backslashes always begin mangled sequences. 
+      return true;
+    }
+    return false;
+  }
+
+  private static int NUM_SLASH_POSITIONS = 4;
+
+  private static String mangleCodePoint(int cp) {
+    return String.format("\\%0" + NUM_SLASH_POSITIONS + "x;", cp);
+  }
+
+  /**
+   * Mangle a string so that it can be represented in an XML document.
+   * 
+   * There are three kinds of code points in XML:
+   * - Those that can be represented normally,
+   * - Those that have to be escaped (for example, & must be represented 
+   *     as &amp;)
+   * - Those that cannot be represented at all in XML.
+   *
+   * The built-in SAX functions will handle the first two types for us just
+   * fine.  However, sometimes we come across a code point of the third type.
+   * In this case, we have to mangle the string in order to represent it at
+   * all.  We also mangle backslash to avoid confusing a backslash in the
+   * string with part our escape sequence.
+   * 
+   * The encoding used here is as follows: an illegal code point is
+   * represented as '\ABCD;', where ABCD is the hexadecimal value of 
+   * the code point.
+   *
+   * @param str     The input string.
+   *
+   * @return        The mangled string.
+   */
+  public static String mangleXmlString(String str) {
+    final StringBuilder bld = new StringBuilder();
+    final int length = str.length();
+    for (int offset = 0; offset < length; ) {
+       final int cp = str.codePointAt(offset);
+       final int len = Character.charCount(cp);
+       if (codePointMustBeMangled(cp)) {
+         bld.append(mangleCodePoint(cp));
+       } else {
+         for (int i = 0; i < len; i++) {
+           bld.append(str.charAt(offset + i));
+         }
+       }
+       offset += len;
+    }
+    return bld.toString();
+  }
+
+  /**
+   * Demangle a string from an XML document.
+   * See {@link #mangleXmlString(String)} for a description of the mangling
+   * format.
+   *
+   * @param str    The string to be demangled.
+   * 
+   * @return       The unmangled string
+   * @throws       UnmanglingError if the input is malformed.
+   */
+  public static String unmangleXmlString(String str)
+        throws UnmanglingError {
+    int slashPosition = -1;
+    String escapedCp = "";
+    StringBuilder bld = new StringBuilder();
+    for (int i = 0; i < str.length(); i++) {
+      char ch = str.charAt(i);
+      if ((slashPosition >= 0) && (slashPosition < NUM_SLASH_POSITIONS)) {
+        escapedCp += ch;
+        ++slashPosition;
+      } else if (slashPosition == NUM_SLASH_POSITIONS) {
+        if (ch != ';') {
+          throw new UnmanglingError("unterminated code point escape: " +
+              "expected semicolon at end.");
+        }
+        try {
+          bld.appendCodePoint(Integer.parseInt(escapedCp, 16));
+        } catch (NumberFormatException e) {
+          throw new UnmanglingError("error parsing unmangling escape code", e);
+        }
+        escapedCp = "";
+        slashPosition = -1;
+      } else if (ch == '\\') {
+        slashPosition = 0;
+      } else {
+        bld.append(ch);
+      }
+    }
+    if (slashPosition != -1) {
+      throw new UnmanglingError("unterminated code point escape: string " +
+          "broke off in the middle");
+    }
+    return bld.toString();
+  }
+  
   /**
    * Add a SAX tag with a string inside.
    *
@@ -56,7 +190,7 @@ public class XMLUtils {
   public static void addSaxString(ContentHandler contentHandler,
       String tag, String val) throws SAXException {
     contentHandler.startElement("", "", tag, new AttributesImpl());
-    char c[] = val.toString().toCharArray();
+    char c[] = mangleXmlString(val).toCharArray();
     contentHandler.characters(c, 0, c.length);
     contentHandler.endElement("", "", tag);
   }
@@ -67,6 +201,8 @@ public class XMLUtils {
    */
   static public class Stanza {
     private TreeMap<String, LinkedList <Stanza > > subtrees;
+
+    /** The unmangled value of this stanza. */
     private String value;
     
     public Stanza() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java
index e04fb694bdb..5d1d33f9102 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java
@@ -42,6 +42,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
+import org.apache.hadoop.hdfs.server.namenode.INodeId;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
@@ -244,7 +245,8 @@ public class JsonUtil {
     final long mTime = (Long) m.get("modificationTime");
     final long blockSize = (Long) m.get("blockSize");
     final short replication = (short) (long) (Long) m.get("replication");
-    final long fileId = (Long) m.get("fileId");
+    final long fileId = m.containsKey("fileId") ? (Long) m.get("fileId")
+        : INodeId.GRANDFATHER_INODE_ID;
     return new HdfsFileStatus(len, type == PathType.DIRECTORY, replication,
         blockSize, mTime, aTime, permission, owner, group,
         symlink, DFSUtil.string2Bytes(localName), fileId);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
index 02c147ab1a6..6f33827fa74 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
@@ -29,7 +29,7 @@ import java.net.MalformedURLException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
-import java.util.ArrayList;
+import java.security.PrivilegedExceptionAction;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
@@ -62,15 +62,16 @@ import org.apache.hadoop.hdfs.protocol.NSQuotaExceededException;
 import org.apache.hadoop.hdfs.protocol.UnresolvedPathException;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSelector;
-import org.apache.hadoop.hdfs.server.common.JspHelper;
 import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
 import org.apache.hadoop.hdfs.web.resources.AccessTimeParam;
 import org.apache.hadoop.hdfs.web.resources.BlockSizeParam;
 import org.apache.hadoop.hdfs.web.resources.BufferSizeParam;
 import org.apache.hadoop.hdfs.web.resources.ConcatSourcesParam;
 import org.apache.hadoop.hdfs.web.resources.CreateParentParam;
+import org.apache.hadoop.hdfs.web.resources.DelegationParam;
 import org.apache.hadoop.hdfs.web.resources.DeleteOpParam;
 import org.apache.hadoop.hdfs.web.resources.DestinationParam;
+import org.apache.hadoop.hdfs.web.resources.DoAsParam;
 import org.apache.hadoop.hdfs.web.resources.GetOpParam;
 import org.apache.hadoop.hdfs.web.resources.GroupParam;
 import org.apache.hadoop.hdfs.web.resources.HttpOpParam;
@@ -106,10 +107,11 @@ import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.security.token.TokenRenewer;
 import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSelector;
 import org.apache.hadoop.util.Progressable;
-import org.apache.hadoop.util.StringUtils;
 import org.mortbay.util.ajax.JSON;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
 
 /** A FileSystem for HDFS over the web. */
 public class WebHdfsFileSystem extends FileSystem
@@ -132,7 +134,8 @@ public class WebHdfsFileSystem extends FileSystem
 
   private DelegationTokenRenewer dtRenewer = null;
 
-  private synchronized void addRenewAction(final WebHdfsFileSystem webhdfs) {
+  @VisibleForTesting
+  protected synchronized void addRenewAction(final WebHdfsFileSystem webhdfs) {
     if (dtRenewer == null) {
       dtRenewer = DelegationTokenRenewer.getInstance();
     }
@@ -148,22 +151,15 @@ public class WebHdfsFileSystem extends FileSystem
     return b;
   }
 
-  private final UserGroupInformation ugi;
+  private UserGroupInformation ugi;
   private InetSocketAddress nnAddr;
   private URI uri;
+  private boolean hasInitedToken;
   private Token<?> delegationToken;
   private final AuthenticatedURL.Token authToken = new AuthenticatedURL.Token();
   private RetryPolicy retryPolicy = null;
   private Path workingDir;
 
-  {
-    try {
-      ugi = UserGroupInformation.getCurrentUser();
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
   /**
    * Return the protocol scheme for the FileSystem.
    * <p/>
@@ -180,6 +176,7 @@ public class WebHdfsFileSystem extends FileSystem
       ) throws IOException {
     super.initialize(uri, conf);
     setConf(conf);
+    ugi = UserGroupInformation.getCurrentUser();
     try {
       this.uri = new URI(uri.getScheme(), uri.getAuthority(), null, null, null);
     } catch (URISyntaxException e) {
@@ -204,24 +201,26 @@ public class WebHdfsFileSystem extends FileSystem
   protected void initDelegationToken() throws IOException {
     // look for webhdfs token, then try hdfs
     Token<?> token = selectDelegationToken(ugi);
-
-    //since we don't already have a token, go get one
-    boolean createdToken = false;
-    if (token == null) {
-      token = getDelegationToken(null);
-      createdToken = (token != null);
-    }
-
-    // security might be disabled
     if (token != null) {
+      LOG.debug("Found existing DT for " + token.getService());        
       setDelegationToken(token);
-      if (createdToken) {
+      hasInitedToken = true;
+    }
+  }
+
+  protected synchronized Token<?> getDelegationToken() throws IOException {
+    if (!hasInitedToken) {
+      //since we don't already have a token, go get one
+      Token<?> token = getDelegationToken(null);
+      // security might be disabled
+      if (token != null) {
+        setDelegationToken(token);
         addRenewAction(this);
         LOG.debug("Created new DT for " + token.getService());
-      } else {
-        LOG.debug("Found existing DT for " + token.getService());        
       }
+      hasInitedToken = true;
     }
+    return delegationToken;
   }
 
   protected Token<DelegationTokenIdentifier> selectDelegationToken(
@@ -239,6 +238,11 @@ public class WebHdfsFileSystem extends FileSystem
   public URI getUri() {
     return this.uri;
   }
+  
+  @Override
+  protected URI canonicalizeUri(URI uri) {
+    return NetUtils.getCanonicalUri(uri, getDefaultPort());
+  }
 
   /** @return the home directory. */
   public static String getHomeDirectoryString(final UserGroupInformation ugi) {
@@ -365,16 +369,26 @@ public class WebHdfsFileSystem extends FileSystem
     return url;
   }
   
-  private String addDt2Query(String query) throws IOException {
-    if (UserGroupInformation.isSecurityEnabled()) {
-      synchronized (this) {
-        if (delegationToken != null) {
-          final String encoded = delegationToken.encodeToUrlString();
-          return query + JspHelper.getDelegationTokenUrlParam(encoded);
-        } // else we are talking to an insecure cluster
-      }
+  Param<?,?>[] getAuthParameters(final HttpOpParam.Op op) throws IOException {
+    List<Param<?,?>> authParams = Lists.newArrayList();    
+    // Skip adding delegation token for token operations because these
+    // operations require authentication.
+    Token<?> token = null;
+    if (UserGroupInformation.isSecurityEnabled() && !op.getRequireAuth()) {
+      token = getDelegationToken();
     }
-    return query;
+    if (token != null) {
+      authParams.add(new DelegationParam(token.encodeToUrlString()));
+    } else {
+      UserGroupInformation userUgi = ugi;
+      UserGroupInformation realUgi = userUgi.getRealUser();
+      if (realUgi != null) { // proxy user
+        authParams.add(new DoAsParam(userUgi.getShortUserName()));
+        userUgi = realUgi;
+      }
+      authParams.add(new UserParam(userUgi.getShortUserName()));
+    }
+    return authParams.toArray(new Param<?,?>[0]);
   }
 
   URL toUrl(final HttpOpParam.Op op, final Path fspath,
@@ -383,34 +397,15 @@ public class WebHdfsFileSystem extends FileSystem
     final String path = PATH_PREFIX
         + (fspath == null? "/": makeQualified(fspath).toUri().getPath());
     final String query = op.toQueryString()
-        + '&' + new UserParam(ugi)
+        + Param.toSortedString("&", getAuthParameters(op))
         + Param.toSortedString("&", parameters);
-    final URL url;
-    if (op == PutOpParam.Op.RENEWDELEGATIONTOKEN
-        || op == GetOpParam.Op.GETDELEGATIONTOKEN) {
-      // Skip adding delegation token for getting or renewing delegation token,
-      // because these operations require kerberos authentication.
-      url = getNamenodeURL(path, query);
-    } else {
-      url = getNamenodeURL(path, addDt2Query(query));
-    }
+    final URL url = getNamenodeURL(path, query);
     if (LOG.isTraceEnabled()) {
       LOG.trace("url=" + url);
     }
     return url;
   }
 
-  private HttpURLConnection getHttpUrlConnection(URL url)
-      throws IOException, AuthenticationException {
-    final HttpURLConnection conn;
-    if (ugi.hasKerberosCredentials()) { 
-      conn = new AuthenticatedURL(AUTH).openConnection(url, authToken);
-    } else {
-      conn = (HttpURLConnection)url.openConnection();
-    }
-    return conn;
-  }
-
   /**
    * Run a http operation.
    * Connect to the http server, validate response, and obtain the JSON output.
@@ -455,6 +450,48 @@ public class WebHdfsFileSystem extends FileSystem
       this.conn = conn;
     }
 
+    private HttpURLConnection getHttpUrlConnection(final URL url)
+        throws IOException, AuthenticationException {
+      UserGroupInformation connectUgi = ugi.getRealUser();
+      if (connectUgi == null) {
+        connectUgi = ugi;
+      }
+      try {
+        return connectUgi.doAs(
+            new PrivilegedExceptionAction<HttpURLConnection>() {
+              @Override
+              public HttpURLConnection run() throws IOException {
+                return openHttpUrlConnection(url);
+              }
+            });
+      } catch (IOException ioe) {
+        Throwable cause = ioe.getCause();
+        if (cause != null && cause instanceof AuthenticationException) {
+          throw (AuthenticationException)cause;
+        }
+        throw ioe;
+      } catch (InterruptedException e) {
+        throw new IOException(e);
+      }
+    }
+    
+    private HttpURLConnection openHttpUrlConnection(final URL url)
+        throws IOException {
+      final HttpURLConnection conn;
+      try {
+        if (op.getRequireAuth()) {
+          LOG.debug("open AuthenticatedURL connection");
+          conn = new AuthenticatedURL(AUTH).openConnection(url, authToken);
+        } else {
+          LOG.debug("open URL connection");
+          conn = (HttpURLConnection)url.openConnection();
+        }
+      } catch (AuthenticationException e) {
+        throw new IOException(e);
+      }
+      return conn;
+    }
+  
     private void init() throws IOException {
       checkRetry = !redirected;
       try {
@@ -721,17 +758,10 @@ public class WebHdfsFileSystem extends FileSystem
   }
 
   @Override
-  public void concat(final Path trg, final Path [] psrcs) throws IOException {
+  public void concat(final Path trg, final Path [] srcs) throws IOException {
     statistics.incrementWriteOps(1);
     final HttpOpParam.Op op = PostOpParam.Op.CONCAT;
 
-    List<String> strPaths = new ArrayList<String>(psrcs.length);
-    for(Path psrc : psrcs) {
-       strPaths.add(psrc.toUri().getPath());
-    }
-
-    String srcs = StringUtils.join(",", strPaths);
-
     ConcatSourcesParam param = new ConcatSourcesParam(srcs);
     run(op, trg, param);
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ConcatSourcesParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ConcatSourcesParam.java
index e6afbe3e4ec..b68c5f5b58e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ConcatSourcesParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ConcatSourcesParam.java
@@ -18,15 +18,28 @@
 
 package org.apache.hadoop.hdfs.web.resources;
 
+import org.apache.hadoop.fs.Path;
+
 /** The concat source paths parameter. */
 public class ConcatSourcesParam extends StringParam {
   /** Parameter name. */
   public static final String NAME = "sources";
 
-  public static final String DEFAULT = NULL;
+  public static final String DEFAULT = "";
 
   private static final Domain DOMAIN = new Domain(NAME, null);
 
+  private static String paths2String(Path[] paths) {
+    if (paths == null || paths.length == 0) {
+      return "";
+    }
+    final StringBuilder b = new StringBuilder(paths[0].toUri().getPath());
+    for(int i = 1; i < paths.length; i++) {
+      b.append(',').append(paths[i].toUri().getPath());
+    }
+    return b.toString();
+  }
+
   /**
    * Constructor.
    * @param str a string representation of the parameter value.
@@ -35,6 +48,10 @@ public class ConcatSourcesParam extends StringParam {
     super(DOMAIN, str);
   }
 
+  public ConcatSourcesParam(Path[] paths) {
+    this(paths2String(paths));
+  }
+
   @Override
   public String getName() {
     return NAME;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DeleteOpParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DeleteOpParam.java
index a82b8a72c8e..710e2e8992e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DeleteOpParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DeleteOpParam.java
@@ -38,6 +38,11 @@ public class DeleteOpParam extends HttpOpParam<DeleteOpParam.Op> {
       return HttpOpParam.Type.DELETE;
     }
 
+    @Override
+    public boolean getRequireAuth() {
+      return false;
+    }
+
     @Override
     public boolean getDoOutput() {
       return false;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java
index eaf23431450..916fe553ac4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java
@@ -31,7 +31,7 @@ public class GetOpParam extends HttpOpParam<GetOpParam.Op> {
     GETFILECHECKSUM(true, HttpURLConnection.HTTP_OK),
 
     GETHOMEDIRECTORY(false, HttpURLConnection.HTTP_OK),
-    GETDELEGATIONTOKEN(false, HttpURLConnection.HTTP_OK),
+    GETDELEGATIONTOKEN(false, HttpURLConnection.HTTP_OK, true),
 
     /** GET_BLOCK_LOCATIONS is a private unstable op. */
     GET_BLOCK_LOCATIONS(false, HttpURLConnection.HTTP_OK),
@@ -40,16 +40,28 @@ public class GetOpParam extends HttpOpParam<GetOpParam.Op> {
 
     final boolean redirect;
     final int expectedHttpResponseCode;
+    final boolean requireAuth;
 
     Op(final boolean redirect, final int expectedHttpResponseCode) {
+      this(redirect, expectedHttpResponseCode, false);
+    }
+    
+    Op(final boolean redirect, final int expectedHttpResponseCode,
+       final boolean requireAuth) {
       this.redirect = redirect;
       this.expectedHttpResponseCode = expectedHttpResponseCode;
+      this.requireAuth = requireAuth;
     }
 
     @Override
     public HttpOpParam.Type getType() {
       return HttpOpParam.Type.GET;
     }
+    
+    @Override
+    public boolean getRequireAuth() {
+      return requireAuth;
+    }
 
     @Override
     public boolean getDoOutput() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/HttpOpParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/HttpOpParam.java
index 1d029ec65cd..2237fb64813 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/HttpOpParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/HttpOpParam.java
@@ -43,6 +43,9 @@ public abstract class HttpOpParam<E extends Enum<E> & HttpOpParam.Op>
     /** @return the Http operation type. */
     public Type getType();
 
+    /** @return true if the operation cannot use a token */
+    public boolean getRequireAuth();
+    
     /** @return true if the operation will do output. */
     public boolean getDoOutput();
 
@@ -92,6 +95,11 @@ public abstract class HttpOpParam<E extends Enum<E> & HttpOpParam.Op>
       return op.getType();
     }
 
+    @Override
+    public boolean getRequireAuth() {
+      return op.getRequireAuth();
+    }
+
     @Override
     public boolean getDoOutput() {
       return op.getDoOutput();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PostOpParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PostOpParam.java
index 4bb5673ab10..54034f0e818 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PostOpParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PostOpParam.java
@@ -41,6 +41,11 @@ public class PostOpParam extends HttpOpParam<PostOpParam.Op> {
     public Type getType() {
       return Type.POST;
     }
+    
+    @Override
+    public boolean getRequireAuth() {
+      return false;
+    }
 
     @Override
     public boolean getDoOutput() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PutOpParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PutOpParam.java
index 77bad214225..6ee84c4ccf4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PutOpParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PutOpParam.java
@@ -34,23 +34,35 @@ public class PutOpParam extends HttpOpParam<PutOpParam.Op> {
     SETPERMISSION(false, HttpURLConnection.HTTP_OK),
     SETTIMES(false, HttpURLConnection.HTTP_OK),
     
-    RENEWDELEGATIONTOKEN(false, HttpURLConnection.HTTP_OK),
-    CANCELDELEGATIONTOKEN(false, HttpURLConnection.HTTP_OK),
+    RENEWDELEGATIONTOKEN(false, HttpURLConnection.HTTP_OK, true),
+    CANCELDELEGATIONTOKEN(false, HttpURLConnection.HTTP_OK, true),
     
     NULL(false, HttpURLConnection.HTTP_NOT_IMPLEMENTED);
 
     final boolean doOutputAndRedirect;
     final int expectedHttpResponseCode;
+    final boolean requireAuth;
 
     Op(final boolean doOutputAndRedirect, final int expectedHttpResponseCode) {
+      this(doOutputAndRedirect, expectedHttpResponseCode, false);
+    }
+    
+    Op(final boolean doOutputAndRedirect, final int expectedHttpResponseCode,
+       final boolean requireAuth) {
       this.doOutputAndRedirect = doOutputAndRedirect;
       this.expectedHttpResponseCode = expectedHttpResponseCode;
+      this.requireAuth = requireAuth;
     }
 
     @Override
     public HttpOpParam.Type getType() {
       return HttpOpParam.Type.PUT;
     }
+    
+    @Override
+    public boolean getRequireAuth() {
+      return requireAuth;
+    }
 
     @Override
     public boolean getDoOutput() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/overview.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/overview.html
index c0cafc64082..759c093aa59 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/overview.html
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/overview.html
@@ -60,9 +60,7 @@ that process vast amounts of data. Here's what makes Hadoop especially useful:</
     Hadoop was been demonstrated on GNU/Linux clusters with 2000 nodes.
   </li>
   <li>
-    Win32 is supported as a <i>development</i> platform. Distributed operation 
-    has not been well tested on Win32, so this is not a <i>production</i> 
-    platform.
+    Windows is also a supported platform.
   </li>  
 </ul>
   
@@ -84,15 +82,6 @@ that process vast amounts of data. Here's what makes Hadoop especially useful:</
   </li>
 </ol>
 
-<h4>Additional requirements for Windows</h4>
-
-<ol>
-  <li>
-    <a href="http://www.cygwin.com/">Cygwin</a> - Required for shell support in 
-    addition to the required software above.
-  </li>
-</ol>
-  
 <h3>Installing Required Software</h3>
 
 <p>If your platform does not have the required software listed above, you
@@ -104,13 +93,6 @@ $ sudo apt-get install ssh<br>
 $ sudo apt-get install rsync<br>
 </pre></blockquote></p>
 
-<p>On Windows, if you did not install the required software when you
-installed cygwin, start the cygwin installer and select the packages:</p>
-<ul>
-  <li>openssh - the "Net" category</li>
-  <li>rsync - the "Net" category</li>
-</ul>
-
 <h2>Getting Started</h2>
 
 <p>First, you need to get a copy of the Hadoop code.</p>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/tests/test-libhdfs.sh b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/tests/test-libhdfs.sh
index 51bb15f45dc..3407e9cf8e2 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/tests/test-libhdfs.sh
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/tests/test-libhdfs.sh
@@ -82,7 +82,7 @@ unset IFS
 findlibjvm () {
 javabasedir=$JAVA_HOME
 case $OS_NAME in
-    cygwin* | mingw* | pw23* )
+    mingw* | pw23* )
     lib_jvm_dir=`find $javabasedir -follow \( \
         \( -name client -type d -prune \) -o \
         \( -name "jvm.dll" -exec dirname {} \; \) \) 2> /dev/null | tr "\n" " "`
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 5380c060028..79e8476ac91 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -47,7 +47,7 @@
     RPC address that handles all clients requests. In the case of HA/Federation where multiple namenodes exist,
     the name service id is added to the name e.g. dfs.namenode.rpc-address.ns1
     dfs.namenode.rpc-address.EXAMPLENAMESERVICE
-    The value of this property will take the form of hdfs://nn-host1:rpc-port.
+    The value of this property will take the form of nn-host1:rpc-port.
   </description>
 </property>
 
@@ -59,7 +59,7 @@
     connecting to this address if it is configured. In the case of HA/Federation where multiple namenodes exist,
     the name service id is added to the name e.g. dfs.namenode.servicerpc-address.ns1
     dfs.namenode.rpc-address.EXAMPLENAMESERVICE
-    The value of this property will take the form of hdfs://nn-host1:rpc-port.
+    The value of this property will take the form of nn-host1:rpc-port.
     If the value of this property is unset the value of dfs.namenode.rpc-address will be used as the default.
   </description>
 </property>
@@ -594,6 +594,17 @@
   <description>Packet size for clients to write</description>
 </property>
 
+<property>
+  <name>dfs.client.write.exclude.nodes.cache.expiry.interval.millis</name>
+  <value>600000</value>
+  <description>The maximum period to keep a DN in the excluded nodes list
+  at a client. After this period, in milliseconds, the previously excluded node(s) will
+  be removed automatically from the cache and will be considered good for block allocations
+  again. Useful to lower or raise in situations where you keep a file open for very long
+  periods (such as a Write-Ahead-Log (WAL) file) to make the writer tolerant to cluster maintenance
+  restarts. Defaults to 10 minutes.</description>
+</property>
+
 <property>
   <name>dfs.namenode.checkpoint.dir</name>
   <value>file://${hadoop.tmp.dir}/dfs/namesecondary</value>
@@ -624,7 +635,7 @@
 
 <property>
   <name>dfs.namenode.checkpoint.txns</name>
-  <value>40000</value>
+  <value>1000000</value>
   <description>The Secondary NameNode or CheckpointNode will create a checkpoint
   of the namespace every 'dfs.namenode.checkpoint.txns' transactions, regardless
   of whether 'dfs.namenode.checkpoint.period' has expired.
@@ -640,6 +651,15 @@
   </description>
 </property>
 
+<property>
+  <name>dfs.namenode.checkpoint.max-retries</name>
+  <value>3</value>
+  <description>The SecondaryNameNode retries failed checkpointing. If the 
+  failure occurs while loading fsimage or replaying edits, the number of
+  retries is limited by this variable. 
+  </description>
+</property>
+
 <property>
   <name>dfs.namenode.num.checkpoints.retained</name>
   <value>2</value>
@@ -724,12 +744,29 @@
   </description>
 </property>
 
+<property>
+  <name>dfs.image.transfer.timeout</name>
+  <value>600000</value>
+  <description>
+        Timeout for image transfer in milliseconds. This timeout and the related
+        dfs.image.transfer.bandwidthPerSec parameter should be configured such
+        that normal image transfer can complete within the timeout.
+        This timeout prevents client hangs when the sender fails during
+        image transfer, which is particularly important during checkpointing.
+        Note that this timeout applies to the entirety of image transfer, and
+        is not a socket timeout.
+  </description>
+</property>
+
 <property>
   <name>dfs.image.transfer.bandwidthPerSec</name>
   <value>0</value>
   <description>
-        Specifies the maximum amount of bandwidth that can be utilized for image
-        transfer in term of the number of bytes per second.
+        Maximum bandwidth used for image transfer in bytes per second.
+        This can help keep normal namenode operations responsive during
+        checkpointing. The maximum bandwidth and timeout in
+        dfs.image.transfer.timeout should be set such that normal image
+        transfers can complete successfully.
         A default value of 0 indicates that throttling is disabled. 
   </description>
 </property>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsUserGuide.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsUserGuide.apt.vm
index 82f1046eb47..b9d1c637a32 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsUserGuide.apt.vm
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/HdfsUserGuide.apt.vm
@@ -193,7 +193,7 @@ HDFS Users Guide
      * <<<dfs.namenode.checkpoint.period>>>, set to 1 hour by default, specifies
        the maximum delay between two consecutive checkpoints, and
 
-     * <<<dfs.namenode.checkpoint.txns>>>, set to 40000 default, defines the
+     * <<<dfs.namenode.checkpoint.txns>>>, set to 1 million by default, defines the
        number of uncheckpointed transactions on the NameNode which will
        force an urgent checkpoint, even if the checkpoint period has not
        been reached.
@@ -232,7 +232,7 @@ HDFS Users Guide
      * <<<dfs.namenode.checkpoint.period>>>, set to 1 hour by default, specifies
        the maximum delay between two consecutive checkpoints
 
-     * <<<dfs.namenode.checkpoint.txns>>>, set to 40000 default, defines the
+     * <<<dfs.namenode.checkpoint.txns>>>, set to 1 million by default, defines the
        number of uncheckpointed transactions on the NameNode which will
        force an urgent checkpoint, even if the checkpoint period has not
        been reached.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/WebHDFS.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/WebHDFS.apt.vm
index 90f8dabce71..7735f8dafed 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/WebHDFS.apt.vm
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/WebHDFS.apt.vm
@@ -109,7 +109,7 @@ WebHDFS REST API
     * {{{Append to a File}<<<APPEND>>>}}
         (see  {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.append)
 
-    * {{{Concat File(s)}<<<CONCAT>>>}}
+    * {{{Concatenate Files}<<<CONCAT>>>}}
         (see  {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.concat)
 
   * HTTP DELETE
@@ -307,7 +307,7 @@ Content-Length: 0
   * Submit a HTTP POST request.
 
 +---------------------------------
-curl -i -X POST "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=CONCAT&sources=<SOURCES>"
+curl -i -X POST "http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=CONCAT&sources=<PATHS>"
 +---------------------------------
 
   The client receives a response with zero content length:
@@ -319,10 +319,6 @@ Content-Length: 0
 
   []
 
-  This REST API call is available as of Hadoop version 2.0.3.
-  Please note that <SOURCES> is a comma seperated list of absolute paths.
-  (Example: sources=/test/file1,/test/file2,/test/file3)
-
   See also:
   {{{Sources}<<<sources>>>}},
    {{{../../api/org/apache/hadoop/fs/FileSystem.html}FileSystem}}.concat
@@ -1761,7 +1757,7 @@ var tokenProperties =
 *----------------+-------------------------------------------------------------------+
 || Name          | <<<sources>>> |
 *----------------+-------------------------------------------------------------------+
-|| Description   | The comma seperated absolute paths used for concatenation. |
+|| Description   | A list of source paths. |
 *----------------+-------------------------------------------------------------------+
 || Type          | String |
 *----------------+-------------------------------------------------------------------+
@@ -1769,12 +1765,9 @@ var tokenProperties =
 *----------------+-------------------------------------------------------------------+
 || Valid Values  | A list of comma seperated absolute FileSystem paths without scheme and authority. |
 *----------------+-------------------------------------------------------------------+
-|| Syntax        | See the note in {{Delegation}}. |
+|| Syntax        | Any string. |
 *----------------+-------------------------------------------------------------------+
 
-  <<Note>> that sources are absolute FileSystem paths.
-
-
   See also:
   {{{Concat File(s)}<<<CONCAT>>>}}
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/LogVerificationAppender.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/LogVerificationAppender.java
new file mode 100644
index 00000000000..d6698b88c4b
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/LogVerificationAppender.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.log4j.AppenderSkeleton;
+import org.apache.log4j.spi.LoggingEvent;
+import org.apache.log4j.spi.ThrowableInformation;
+
+/**
+ * Used to verify that certain exceptions or messages are present in log output.
+ */
+public class LogVerificationAppender extends AppenderSkeleton {
+  private final List<LoggingEvent> log = new ArrayList<LoggingEvent>();
+
+  @Override
+  public boolean requiresLayout() {
+    return false;
+  }
+
+  @Override
+  protected void append(final LoggingEvent loggingEvent) {
+    log.add(loggingEvent);
+  }
+
+  @Override
+  public void close() {
+  }
+
+  public List<LoggingEvent> getLog() {
+    return new ArrayList<LoggingEvent>(log);
+  }
+  
+  public int countExceptionsWithMessage(final String text) {
+    int count = 0;
+    for (LoggingEvent e: getLog()) {
+      ThrowableInformation t = e.getThrowableInformation();
+      if (t != null) {
+        String m = t.getThrowable().getMessage();
+        if (m.contains(text)) {
+          count++;
+        }
+      }
+    }
+    return count;
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientExcludedNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientExcludedNodes.java
index dccc82f1a42..3574323d545 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientExcludedNodes.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientExcludedNodes.java
@@ -21,36 +21,134 @@ import static org.junit.Assert.fail;
 
 import java.io.IOException;
 import java.io.OutputStream;
+import java.util.List;
+
+import junit.framework.Assert;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
+import org.apache.hadoop.util.ThreadUtil;
+
+import org.junit.After;
+import org.junit.Before;
 import org.junit.Test;
 
 
 /**
- * These tests make sure that DFSClient retries fetching data from DFS
- * properly in case of errors.
+ * These tests make sure that DFSClient excludes writing data to
+ * a DN properly in case of errors.
  */
 public class TestDFSClientExcludedNodes {
 
-  @Test
+  private MiniDFSCluster cluster;
+  private Configuration conf;
+
+  @Before
+  public void setUp() {
+    cluster = null;
+    conf = new HdfsConfiguration();
+  }
+
+  @After
+  public void tearDown() {
+    if (cluster != null) {
+      cluster.shutdown();
+    }
+  }
+
+  @Test(timeout=60000)
   public void testExcludedNodes() throws IOException {
-    Configuration conf = new HdfsConfiguration();
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
     FileSystem fs = cluster.getFileSystem();
     Path filePath = new Path("/testExcludedNodes");
 
     // kill a datanode
     cluster.stopDataNode(AppendTestUtil.nextInt(3));
-    OutputStream out = fs.create(filePath, true, 4096);
+    OutputStream out = fs.create(
+        filePath,
+        true,
+        4096,
+        (short) 3,
+        fs.getDefaultBlockSize(filePath)
+    );
     out.write(20);
 
     try {
       out.close();
     } catch (Exception e) {
-      fail("DataNode failure should not result in a block abort: \n" + e.getMessage());
+      fail("Single DN failure should not result in a block abort: \n" +
+          e.getMessage());
+    }
+  }
+
+  @Test(timeout=60000)
+  public void testExcludedNodesForgiveness() throws IOException {
+    // Forgive nodes in under 2.5s for this test case.
+    conf.setLong(
+        DFSConfigKeys.DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL,
+        2500);
+    // We'll be using a 512 bytes block size just for tests
+    // so making sure the checksum bytes too match it.
+    conf.setInt("io.bytes.per.checksum", 512);
+    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+    List<DataNodeProperties> props = cluster.dataNodes;
+    FileSystem fs = cluster.getFileSystem();
+    Path filePath = new Path("/testForgivingExcludedNodes");
+
+    // 256 bytes data chunk for writes
+    byte[] bytes = new byte[256];
+    for (int index=0; index<bytes.length; index++) {
+      bytes[index] = '0';
+    }
+
+    // File with a 512 bytes block size
+    FSDataOutputStream out = fs.create(filePath, true, 4096, (short) 3, 512);
+
+    // Write a block to all 3 DNs (2x256bytes).
+    out.write(bytes);
+    out.write(bytes);
+    out.hflush();
+
+    // Remove two DNs, to put them into the exclude list.
+    DataNodeProperties two = cluster.stopDataNode(2);
+    DataNodeProperties one = cluster.stopDataNode(1);
+
+    // Write another block.
+    // At this point, we have two nodes already in excluded list.
+    out.write(bytes);
+    out.write(bytes);
+    out.hflush();
+
+    // Bring back the older DNs, since they are gonna be forgiven only
+    // afterwards of this previous block write.
+    Assert.assertEquals(true, cluster.restartDataNode(one, true));
+    Assert.assertEquals(true, cluster.restartDataNode(two, true));
+    cluster.waitActive();
+
+    // Sleep for 5s, to let the excluded nodes be expired
+    // from the excludes list (i.e. forgiven after the configured wait period).
+    // [Sleeping just in case the restart of the DNs completed < 5s cause
+    // otherwise, we'll end up quickly excluding those again.]
+    ThreadUtil.sleepAtLeastIgnoreInterrupts(5000);
+
+    // Terminate the last good DN, to assert that there's no
+    // single-DN-available scenario, caused by not forgiving the other
+    // two by now.
+    cluster.stopDataNode(0);
+
+    try {
+      // Attempt writing another block, which should still pass
+      // cause the previous two should have been forgiven by now,
+      // while the last good DN added to excludes this time.
+      out.write(bytes);
+      out.hflush();
+      out.close();
+    } catch (Exception e) {
+      fail("Excluded DataNodes should be forgiven after a while and " +
+           "not cause file writing exception of: '" + e.getMessage() + "'");
     }
   }
-  
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
index 86bcae3a0a2..aacebce3595 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
@@ -26,8 +26,11 @@ import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.net.Socket;
 import java.net.SocketAddress;
+import java.lang.reflect.Field;
+import java.net.InetAddress;
 import java.net.URI;
 import java.net.URISyntaxException;
+import java.util.List;
 
 import javax.net.SocketFactory;
 
@@ -35,6 +38,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
@@ -48,10 +52,13 @@ import org.apache.hadoop.util.StringUtils;
 import org.hamcrest.BaseMatcher;
 import org.hamcrest.Description;
 import org.junit.After;
+import org.junit.Assume;
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.Mockito;
 
+import sun.net.spi.nameservice.NameService;
+
 public class TestDFSClientFailover {
   
   private static final Log LOG = LogFactory.getLog(TestDFSClientFailover.class);
@@ -201,4 +208,74 @@ public class TestDFSClientFailover {
           "Could not find any configured addresses for URI " + uri));
     }
   }
+
+  /**
+   * Spy on the Java DNS infrastructure.
+   * This likely only works on Sun-derived JDKs, but uses JUnit's
+   * Assume functionality so that any tests using it are skipped on
+   * incompatible JDKs.
+   */
+  private NameService spyOnNameService() {
+    try {
+      Field f = InetAddress.class.getDeclaredField("nameServices");
+      f.setAccessible(true);
+      Assume.assumeNotNull(f);
+      @SuppressWarnings("unchecked")
+      List<NameService> nsList = (List<NameService>) f.get(null);
+
+      NameService ns = nsList.get(0);
+      Log log = LogFactory.getLog("NameServiceSpy");
+      
+      ns = Mockito.mock(NameService.class,
+          new GenericTestUtils.DelegateAnswer(log, ns));
+      nsList.set(0, ns);
+      return ns;
+    } catch (Throwable t) {
+      LOG.info("Unable to spy on DNS. Skipping test.", t);
+      // In case the JDK we're testing on doesn't work like Sun's, just
+      // skip the test.
+      Assume.assumeNoException(t);
+      throw new RuntimeException(t);
+    }
+  }
+  
+  /**
+   * Test that the client doesn't ever try to DNS-resolve the logical URI.
+   * Regression test for HADOOP-9150.
+   */
+  @Test
+  public void testDoesntDnsResolveLogicalURI() throws Exception {
+    NameService spyNS = spyOnNameService();
+    
+    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
+    String logicalHost = fs.getUri().getHost();
+    Path qualifiedRoot = fs.makeQualified(new Path("/"));
+    
+    // Make a few calls against the filesystem.
+    fs.getCanonicalServiceName();
+    fs.listStatus(qualifiedRoot);
+    
+    // Ensure that the logical hostname was never resolved.
+    Mockito.verify(spyNS, Mockito.never()).lookupAllHostAddr(Mockito.eq(logicalHost));
+  }
+  
+  /**
+   * Same test as above, but for FileContext.
+   */
+  @Test
+  public void testFileContextDoesntDnsResolveLogicalURI() throws Exception {
+    NameService spyNS = spyOnNameService();
+    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
+    String logicalHost = fs.getUri().getHost();
+    Configuration haClientConf = fs.getConf();
+    
+    FileContext fc = FileContext.getFileContext(haClientConf);
+    Path root = new Path("/");
+    fc.listStatus(root);
+    fc.listStatus(fc.makeQualified(root));
+    fc.getDefaultFileSystem().getCanonicalServiceName();
+
+    // Ensure that the logical hostname was never resolved.
+    Mockito.verify(spyNS, Mockito.never()).lookupAllHostAddr(Mockito.eq(logicalHost));
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSShell.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSShell.java
index 4b26e77d809..cf62cd5257c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSShell.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSShell.java
@@ -35,6 +35,7 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.Random;
 import java.util.Scanner;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.zip.DeflaterOutputStream;
 import java.util.zip.GZIPOutputStream;
 
@@ -68,7 +69,8 @@ import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERV
  */
 public class TestDFSShell {
   private static final Log LOG = LogFactory.getLog(TestDFSShell.class);
-  
+  private static AtomicInteger counter = new AtomicInteger();
+
   static final String TEST_ROOT_DIR =
     new Path(System.getProperty("test.build.data","/tmp"))
     .toString().replace(' ', '+');
@@ -103,7 +105,7 @@ public class TestDFSShell {
     System.out.println(Thread.currentThread().getStackTrace()[2] + " " + s);
   }
 
-  @Test
+  @Test (timeout = 30000)
   public void testZeroSizeFile() throws IOException {
     Configuration conf = new HdfsConfiguration();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
@@ -146,7 +148,7 @@ public class TestDFSShell {
     }
   }
   
-  @Test
+  @Test (timeout = 30000)
   public void testRecrusiveRm() throws IOException {
 	  Configuration conf = new HdfsConfiguration();
 	  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
@@ -172,7 +174,7 @@ public class TestDFSShell {
     }
   }
     
-  @Test
+  @Test (timeout = 30000)
   public void testDu() throws IOException {
     Configuration conf = new HdfsConfiguration();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
@@ -222,7 +224,8 @@ public class TestDFSShell {
     }
                                   
   }
-  @Test
+
+  @Test (timeout = 30000)
   public void testPut() throws IOException {
     Configuration conf = new HdfsConfiguration();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
@@ -321,7 +324,7 @@ public class TestDFSShell {
 
 
   /** check command error outputs and exit statuses. */
-  @Test
+  @Test (timeout = 30000)
   public void testErrOutPut() throws Exception {
     Configuration conf = new HdfsConfiguration();
     MiniDFSCluster cluster = null;
@@ -471,7 +474,7 @@ public class TestDFSShell {
     }
   }
   
-  @Test
+  @Test (timeout = 30000)
   public void testURIPaths() throws Exception {
     Configuration srcConf = new HdfsConfiguration();
     Configuration dstConf = new HdfsConfiguration();
@@ -511,7 +514,7 @@ public class TestDFSShell {
       createLocalFile(furi);
       argv = new String[3];
       argv[0] = "-put";
-      argv[1] = furi.toString();
+      argv[1] = furi.toURI().toString();
       argv[2] = dstFs.getUri().toString() + "/furi";
       ret = ToolRunner.run(shell, argv);
       assertEquals(" put is working ", 0, ret);
@@ -564,7 +567,7 @@ public class TestDFSShell {
     }
   }
 
-  @Test
+  @Test (timeout = 30000)
   public void testText() throws Exception {
     Configuration conf = new HdfsConfiguration();
     MiniDFSCluster cluster = null;
@@ -680,7 +683,7 @@ public class TestDFSShell {
     }
   }
 
-  @Test
+  @Test (timeout = 30000)
   public void testCopyToLocal() throws IOException {
     Configuration conf = new HdfsConfiguration();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
@@ -778,7 +781,7 @@ public class TestDFSShell {
     return path;
   }
 
-  @Test
+  @Test (timeout = 30000)
   public void testCount() throws Exception {
     Configuration conf = new HdfsConfiguration();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
@@ -866,52 +869,59 @@ public class TestDFSShell {
     shell.setConf(conf);
     
     try {
-     //first make dir
-     Path dir = new Path(chmodDir);
-     fs.delete(dir, true);
-     fs.mkdirs(dir);
+      //first make dir
+      Path dir = new Path(chmodDir);
+      fs.delete(dir, true);
+      fs.mkdirs(dir);
 
-     confirmPermissionChange(/* Setting */ "u+rwx,g=rw,o-rwx",
+      confirmPermissionChange(/* Setting */ "u+rwx,g=rw,o-rwx",
                              /* Should give */ "rwxrw----", fs, shell, dir);
-     
-     //create an empty file
-     Path file = new Path(chmodDir, "file");
-     TestDFSShell.writeFile(fs, file);
 
-     //test octal mode
-     confirmPermissionChange( "644", "rw-r--r--", fs, shell, file);
+      //create an empty file
+      Path file = new Path(chmodDir, "file");
+      TestDFSShell.writeFile(fs, file);
 
-     //test recursive
-     runCmd(shell, "-chmod", "-R", "a+rwX", chmodDir);
-     assertEquals("rwxrwxrwx",
-                  fs.getFileStatus(dir).getPermission().toString()); 
-     assertEquals("rw-rw-rw-",
-                  fs.getFileStatus(file).getPermission().toString());
+      //test octal mode
+      confirmPermissionChange("644", "rw-r--r--", fs, shell, file);
 
-     // test sticky bit on directories
-     Path dir2 = new Path(dir, "stickybit" );
-     fs.mkdirs(dir2 );
-     LOG.info("Testing sticky bit on: " + dir2);
-     LOG.info("Sticky bit directory initial mode: " + 
-                   fs.getFileStatus(dir2).getPermission());
-     
-     confirmPermissionChange("u=rwx,g=rx,o=rx", "rwxr-xr-x", fs, shell, dir2);
-     
-     confirmPermissionChange("+t", "rwxr-xr-t", fs, shell, dir2);
+      //test recursive
+      runCmd(shell, "-chmod", "-R", "a+rwX", chmodDir);
+      assertEquals("rwxrwxrwx",
+          fs.getFileStatus(dir).getPermission().toString());
+      assertEquals("rw-rw-rw-",
+          fs.getFileStatus(file).getPermission().toString());
 
-     confirmPermissionChange("-t", "rwxr-xr-x", fs, shell, dir2);
+      // Skip "sticky bit" tests on Windows.
+      //
+      if (!Path.WINDOWS) {
+        // test sticky bit on directories
+        Path dir2 = new Path(dir, "stickybit");
+        fs.mkdirs(dir2);
+        LOG.info("Testing sticky bit on: " + dir2);
+        LOG.info("Sticky bit directory initial mode: " +
+            fs.getFileStatus(dir2).getPermission());
 
-     confirmPermissionChange("=t", "--------T", fs, shell, dir2);
+        confirmPermissionChange("u=rwx,g=rx,o=rx", "rwxr-xr-x", fs, shell, dir2);
 
-     confirmPermissionChange("0000", "---------", fs, shell, dir2);
+        confirmPermissionChange("+t", "rwxr-xr-t", fs, shell, dir2);
 
-     confirmPermissionChange("1666", "rw-rw-rwT", fs, shell, dir2);
+        confirmPermissionChange("-t", "rwxr-xr-x", fs, shell, dir2);
+
+        confirmPermissionChange("=t", "--------T", fs, shell, dir2);
+
+        confirmPermissionChange("0000", "---------", fs, shell, dir2);
+
+        confirmPermissionChange("1666", "rw-rw-rwT", fs, shell, dir2);
+
+        confirmPermissionChange("777", "rwxrwxrwt", fs, shell, dir2);
+
+        fs.delete(dir2, true);
+      } else {
+        LOG.info("Skipped sticky bit tests on Windows");
+      }
+
+      fs.delete(dir, true);
 
-     confirmPermissionChange("777", "rwxrwxrwt", fs, shell, dir2);
-     
-     fs.delete(dir2, true);
-     fs.delete(dir, true);
-     
     } finally {
       try {
         fs.close();
@@ -945,7 +955,7 @@ public class TestDFSShell {
     }
   }
   
-  @Test
+  @Test (timeout = 30000)
   public void testFilePermissions() throws IOException {
     Configuration conf = new HdfsConfiguration();
     
@@ -1011,7 +1021,7 @@ public class TestDFSShell {
   /**
    * Tests various options of DFSShell.
    */
-  @Test
+  @Test (timeout = 120000)
   public void testDFSShell() throws IOException {
     Configuration conf = new HdfsConfiguration();
     /* This tests some properties of ChecksumFileSystem as well.
@@ -1391,7 +1401,7 @@ public class TestDFSShell {
     String run(int exitcode, String... options) throws IOException;
   }
 
-  @Test
+  @Test (timeout = 30000)
   public void testRemoteException() throws Exception {
     UserGroupInformation tmpUGI = 
       UserGroupInformation.createUserForTesting("tmpname", new String[] {"mygroup"});
@@ -1435,73 +1445,96 @@ public class TestDFSShell {
     }
   }
   
-  @Test
+  @Test (timeout = 30000)
   public void testGet() throws IOException {
     DFSTestUtil.setLogLevel2All(FSInputChecker.LOG);
+
+    final String fname = "testGet.txt";
+    Path root = new Path("/test/get");
+    final Path remotef = new Path(root, fname);
     final Configuration conf = new HdfsConfiguration();
-    // Race can happen here: block scanner is reading the file when test tries
-    // to corrupt the test file, which will fail the test on Windows platform.
-    // Disable block scanner to avoid this race.
-    conf.setInt(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, -1);
-    
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
-    DistributedFileSystem dfs = (DistributedFileSystem)cluster.getFileSystem();
+
+    TestGetRunner runner = new TestGetRunner() {
+    	private int count = 0;
+    	private FsShell shell = new FsShell(conf);
+
+    	public String run(int exitcode, String... options) throws IOException {
+    	  String dst = TEST_ROOT_DIR + "/" + fname+ ++count;
+    	  String[] args = new String[options.length + 3];
+    	  args[0] = "-get"; 
+    	  args[args.length - 2] = remotef.toString();
+    	  args[args.length - 1] = dst;
+    	  for(int i = 0; i < options.length; i++) {
+    	    args[i + 1] = options[i];
+    	  }
+    	  show("args=" + Arrays.asList(args));
+
+    	  try {
+    	    assertEquals(exitcode, shell.run(args));
+    	  } catch (Exception e) {
+    	    assertTrue(StringUtils.stringifyException(e), false); 
+    	  }
+    	  return exitcode == 0? DFSTestUtil.readFile(new File(dst)): null; 
+    	}
+    };
+
+    File localf = createLocalFile(new File(TEST_ROOT_DIR, fname));
+    MiniDFSCluster cluster = null;
+    DistributedFileSystem dfs = null;
 
     try {
-      final String fname = "testGet.txt";
-      final File localf = createLocalFile(new File(TEST_ROOT_DIR, fname));
-      final String localfcontent = DFSTestUtil.readFile(localf);
-      final Path root = mkdir(dfs, new Path("/test/get"));
-      final Path remotef = new Path(root, fname);
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).format(true)
+        .build();
+      dfs = (DistributedFileSystem)cluster.getFileSystem();
+
+      mkdir(dfs, root);
       dfs.copyFromLocalFile(false, false, new Path(localf.getPath()), remotef);
-
-      final FsShell shell = new FsShell();
-      shell.setConf(conf);
-      TestGetRunner runner = new TestGetRunner() {
-        private int count = 0;
-
-        @Override
-        public String run(int exitcode, String... options) throws IOException {
-          String dst = TEST_ROOT_DIR + "/" + fname+ ++count;
-          String[] args = new String[options.length + 3];
-          args[0] = "-get"; 
-          args[args.length - 2] = remotef.toString();
-          args[args.length - 1] = dst;
-          for(int i = 0; i < options.length; i++) {
-            args[i + 1] = options[i];
-          }
-          show("args=" + Arrays.asList(args));
-          
-          try {
-            assertEquals(exitcode, shell.run(args));
-          } catch (Exception e) {
-            assertTrue(StringUtils.stringifyException(e), false); 
-          }
-          return exitcode == 0? DFSTestUtil.readFile(new File(dst)): null; 
-        }
-      };
+      String localfcontent = DFSTestUtil.readFile(localf);
 
       assertEquals(localfcontent, runner.run(0));
       assertEquals(localfcontent, runner.run(0, "-ignoreCrc"));
 
-      //find and modify the block files
+      // find block files to modify later
       List<File> files = getBlockFiles(cluster);
+
+      // Shut down cluster and then corrupt the block files by overwriting a
+      // portion with junk data.  We must shut down the cluster so that threads
+      // in the data node do not hold locks on the block files while we try to
+      // write into them.  Particularly on Windows, the data node's use of the
+      // FileChannel.transferTo method can cause block files to be memory mapped
+      // in read-only mode during the transfer to a client, and this causes a
+      // locking conflict.  The call to shutdown the cluster blocks until all
+      // DataXceiver threads exit, preventing this problem.
+      dfs.close();
+      cluster.shutdown();
+
       show("files=" + files);
       corrupt(files);
 
+      // Start the cluster again, but do not reformat, so prior files remain.
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).format(false)
+        .build();
+      dfs = (DistributedFileSystem)cluster.getFileSystem();
+
       assertEquals(null, runner.run(1));
       String corruptedcontent = runner.run(0, "-ignoreCrc");
       assertEquals(localfcontent.substring(1), corruptedcontent.substring(1));
       assertEquals(localfcontent.charAt(0)+1, corruptedcontent.charAt(0));
-
-      localf.delete();
     } finally {
-      try {dfs.close();} catch (Exception e) {}
-      cluster.shutdown();
+      if (null != dfs) {
+        try {
+          dfs.close();
+        } catch (Exception e) {
+        }
+      }
+      if (null != cluster) {
+        cluster.shutdown();
+      }
+      localf.delete();
     }
   }
 
-  @Test
+  @Test (timeout = 30000)
   public void testLsr() throws Exception {
     final Configuration conf = new HdfsConfiguration();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
@@ -1559,7 +1592,7 @@ public class TestDFSShell {
    * and return -1 exit code.
    * @throws Exception
    */
-  @Test
+  @Test (timeout = 30000)
   public void testInvalidShell() throws Exception {
     Configuration conf = new Configuration(); // default FS (non-DFS)
     DFSAdmin admin = new DFSAdmin();
@@ -1569,29 +1602,31 @@ public class TestDFSShell {
   }
 
   // force Copy Option is -f
-  @Test
+  @Test (timeout = 30000)
   public void testCopyCommandsWithForceOption() throws Exception {
+    final int SUCCESS = 0;
+    final int ERROR = 1;
+
     Configuration conf = new Configuration();
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
         .format(true).build();
     FsShell shell = null;
     FileSystem fs = null;
     final File localFile = new File(TEST_ROOT_DIR, "testFileForPut");
-    final String localfilepath = localFile.getAbsolutePath();
-    final String testdir = TEST_ROOT_DIR + "/ForceTestDir";
+    final String localfilepath = new Path(localFile.getAbsolutePath()).toUri().toString();
+    final String testdir = "/tmp/TestDFSShell-testCopyCommandsWithForceOption-"
+        + counter.getAndIncrement();
     final Path hdfsTestDir = new Path(testdir);
     try {
       fs = cluster.getFileSystem();
       fs.mkdirs(hdfsTestDir);
       localFile.createNewFile();
-      writeFile(fs, new Path(TEST_ROOT_DIR, "testFileForPut"));
+      writeFile(fs, new Path(testdir, "testFileForPut"));
       shell = new FsShell();
 
       // Tests for put
       String[] argv = new String[] { "-put", "-f", localfilepath, testdir };
       int res = ToolRunner.run(shell, argv);
-      int SUCCESS = 0;
-      int ERROR = 1;
       assertEquals("put -f is not working", SUCCESS, res);
 
       argv = new String[] { "-put", localfilepath, testdir };
@@ -1663,8 +1698,13 @@ public class TestDFSShell {
     try {
       // Create and delete a file
       fs = cluster.getFileSystem();
-      writeFile(fs, new Path(TEST_ROOT_DIR, "foo"));
-      final String testFile = TEST_ROOT_DIR + "/foo";
+
+      // Use a separate tmp dir for each invocation.
+      final String testdir = "/tmp/TestDFSShell-deleteFileUsingTrash-" +
+          counter.getAndIncrement();
+
+      writeFile(fs, new Path(testdir, "foo"));
+      final String testFile = testdir + "/foo";
       final String trashFile = shell.getCurrentTrashDir() + "/" + testFile;
       String[] argv = new String[] { "-rm", testFile };
       int res = ToolRunner.run(shell, argv);
@@ -1696,7 +1736,7 @@ public class TestDFSShell {
    * Test that the server trash configuration is respected when
    * the client configuration is not set.
    */
-  @Test
+  @Test (timeout = 30000)
   public void testServerConfigRespected() throws Exception {
     deleteFileUsingTrash(true, false);
   }
@@ -1705,7 +1745,7 @@ public class TestDFSShell {
    * Test that server trash configuration is respected even when the
    * client configuration is set.
    */
-  @Test
+  @Test (timeout = 30000)
   public void testServerConfigRespectedWithClient() throws Exception {
     deleteFileUsingTrash(true, true);
   }
@@ -1714,7 +1754,7 @@ public class TestDFSShell {
    * Test that the client trash configuration is respected when
    * the server configuration is not set.
    */
-  @Test
+  @Test (timeout = 30000)
   public void testClientConfigRespected() throws Exception {
     deleteFileUsingTrash(false, true);
   }
@@ -1722,7 +1762,7 @@ public class TestDFSShell {
   /**
    * Test that trash is disabled by default.
    */
-  @Test
+  @Test (timeout = 30000)
   public void testNoTrashConfig() throws Exception {
     deleteFileUsingTrash(false, false);
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgradeFromImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgradeFromImage.java
index 8d71791fd92..75e9329298a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgradeFromImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgradeFromImage.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.log4j.Logger;
 import org.junit.Test;
 
 /**
@@ -293,6 +294,11 @@ public class TestDFSUpgradeFromImage {
         new File(baseDir, "name2/current/VERSION"),
         "imageMD5Digest", "22222222222222222222222222222222");
     
+    // Attach our own log appender so we can verify output
+    final LogVerificationAppender appender = new LogVerificationAppender();
+    final Logger logger = Logger.getRootLogger();
+    logger.addAppender(appender);
+
     // Upgrade should now fail
     try {
       upgradeAndVerify(new MiniDFSCluster.Builder(upgradeConf).
@@ -300,9 +306,12 @@ public class TestDFSUpgradeFromImage {
       fail("Upgrade did not fail with bad MD5");
     } catch (IOException ioe) {
       String msg = StringUtils.stringifyException(ioe);
-      if (!msg.contains("is corrupt with MD5 checksum")) {
+      if (!msg.contains("Failed to load an FSImage file")) {
         throw ioe;
       }
+      int md5failures = appender.countExceptionsWithMessage(
+          " is corrupt with MD5 checksum of ");
+      assertEquals("Upgrade did not fail with bad MD5", 1, md5failures);
     }
   }
     
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
index 8482f81ddb4..91a017a2363 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
@@ -641,4 +641,24 @@ public class TestDFSUtil {
     assertFalse(DFSUtil.isValidName("/foo/:/bar"));
     assertFalse(DFSUtil.isValidName("/foo:bar"));
   }
+  
+  @Test(timeout=5000)
+  public void testGetSpnegoKeytabKey() {
+    HdfsConfiguration conf = new HdfsConfiguration();
+    String defaultKey = "default.spengo.key";
+    conf.unset(DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBEROS_KEYTAB_KEY);
+    assertEquals("Test spnego key in config is null", defaultKey,
+        DFSUtil.getSpnegoKeytabKey(conf, defaultKey));
+
+    conf.set(DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBEROS_KEYTAB_KEY, "");
+    assertEquals("Test spnego key is empty", defaultKey,
+        DFSUtil.getSpnegoKeytabKey(conf, defaultKey));
+
+    String spengoKey = "spengo.key";
+    conf.set(DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBEROS_KEYTAB_KEY,
+        spengoKey);
+    assertEquals("Test spnego key is NOT null",
+        DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBEROS_KEYTAB_KEY,
+        DFSUtil.getSpnegoKeytabKey(conf, defaultKey));
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java
index 4e785110476..3c9ee25b111 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferKeepalive.java
@@ -71,6 +71,7 @@ public class TestDataTransferKeepalive {
       .numDataNodes(1).build();
     fs = cluster.getFileSystem();
     dfsClient = ((DistributedFileSystem)fs).dfs;
+    dfsClient.peerCache.clear();
 
     String poolId = cluster.getNamesystem().getBlockPoolId();
     dn = cluster.getDataNodes().get(0);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileConcurrentReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileConcurrentReader.java
index 97659eeab3e..c1aa9d1f091 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileConcurrentReader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileConcurrentReader.java
@@ -151,7 +151,7 @@ public class TestFileConcurrentReader {
   /**
    * Test that that writes to an incomplete block are available to a reader
    */
-  @Test
+  @Test (timeout = 30000)
   public void testUnfinishedBlockRead()
     throws IOException {
     // create a new file in the root, write data, do no close
@@ -174,7 +174,7 @@ public class TestFileConcurrentReader {
    * would result in too small a buffer to do the buffer-copy needed
    * for partial chunks.
    */
-  @Test
+  @Test (timeout = 30000)
   public void testUnfinishedBlockPacketBufferOverrun() throws IOException {
     // check that / exists
     Path path = new Path("/");
@@ -200,7 +200,7 @@ public class TestFileConcurrentReader {
   // use a small block size and a large write so that DN is busy creating
   // new blocks.  This makes it almost 100% sure we can reproduce
   // case of client getting a DN that hasn't yet created the blocks
-  @Test
+  @Test (timeout = 30000)
   public void testImmediateReadOfNewFile()
     throws IOException {
     final int blockSize = 64 * 1024;
@@ -277,12 +277,12 @@ public class TestFileConcurrentReader {
 
   // for some reason, using tranferTo evokes the race condition more often
   // so test separately
-  @Test
+  @Test (timeout = 30000)
   public void testUnfinishedBlockCRCErrorTransferTo() throws IOException {
     runTestUnfinishedBlockCRCError(true, SyncType.SYNC, DEFAULT_WRITE_SIZE);
   }
 
-  @Test
+  @Test (timeout = 30000)
   public void testUnfinishedBlockCRCErrorTransferToVerySmallWrite()
     throws IOException {
     runTestUnfinishedBlockCRCError(true, SyncType.SYNC, SMALL_WRITE_SIZE);
@@ -290,18 +290,17 @@ public class TestFileConcurrentReader {
 
   // fails due to issue w/append, disable 
   @Ignore
-  @Test
   public void _testUnfinishedBlockCRCErrorTransferToAppend()
     throws IOException {
     runTestUnfinishedBlockCRCError(true, SyncType.APPEND, DEFAULT_WRITE_SIZE);
   }
 
-  @Test
+  @Test (timeout = 30000)
   public void testUnfinishedBlockCRCErrorNormalTransfer() throws IOException {
     runTestUnfinishedBlockCRCError(false, SyncType.SYNC, DEFAULT_WRITE_SIZE);
   }
 
-  @Test
+  @Test (timeout = 30000)
   public void testUnfinishedBlockCRCErrorNormalTransferVerySmallWrite()
     throws IOException {
     runTestUnfinishedBlockCRCError(false, SyncType.SYNC, SMALL_WRITE_SIZE);
@@ -309,7 +308,6 @@ public class TestFileConcurrentReader {
 
   // fails due to issue w/append, disable 
   @Ignore
-  @Test
   public void _testUnfinishedBlockCRCErrorNormalTransferAppend()
     throws IOException {
     runTestUnfinishedBlockCRCError(false, SyncType.APPEND, DEFAULT_WRITE_SIZE);
@@ -338,33 +336,33 @@ public class TestFileConcurrentReader {
     final AtomicBoolean writerDone = new AtomicBoolean(false);
     final AtomicBoolean writerStarted = new AtomicBoolean(false);
     final AtomicBoolean error = new AtomicBoolean(false);
-    final FSDataOutputStream initialOutputStream = fileSystem.create(file);
-    final Thread writer = new Thread(new Runnable() {
-      private FSDataOutputStream outputStream = initialOutputStream;
 
+    final Thread writer = new Thread(new Runnable() {
       @Override
       public void run() {
         try {
-          for (int i = 0; !error.get() && i < numWrites; i++) {
-            try {
+          FSDataOutputStream outputStream = fileSystem.create(file);
+          if (syncType == SyncType.APPEND) {
+            outputStream.close();
+            outputStream = fileSystem.append(file);
+          }
+          try {
+            for (int i = 0; !error.get() && i < numWrites; i++) {
               final byte[] writeBuf =
-                DFSTestUtil.generateSequentialBytes(i * writeSize, writeSize);
+                  DFSTestUtil.generateSequentialBytes(i * writeSize, writeSize);
               outputStream.write(writeBuf);
               if (syncType == SyncType.SYNC) {
                 outputStream.hflush();
-              } else { // append
-                outputStream.close();
-                outputStream = fileSystem.append(file);
               }
               writerStarted.set(true);
-            } catch (IOException e) {
-              error.set(true);
-              LOG.error("error writing to file", e);
             }
+          } catch (IOException e) {
+            error.set(true);
+            LOG.error("error writing to file", e);
+          } finally {
+            outputStream.close();
           }
-
           writerDone.set(true);
-          outputStream.close();
         } catch (Exception e) {
           LOG.error("error in writer", e);
 
@@ -415,7 +413,6 @@ public class TestFileConcurrentReader {
 
       Thread.currentThread().interrupt();
     }
-    initialOutputStream.close();
   }
 
   private boolean validateSequentialBytes(byte[] buf, int startPos, int len) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHftpURLTimeouts.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHftpURLTimeouts.java
index 345c150a74a..d9a22c10111 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHftpURLTimeouts.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHftpURLTimeouts.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hdfs;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
@@ -39,7 +40,7 @@ import org.junit.Test;
 public class TestHftpURLTimeouts {
   @BeforeClass
   public static void setup() {
-    URLUtils.SOCKET_TIMEOUT = 1;
+    URLUtils.SOCKET_TIMEOUT = 5;
   }
   
   @Test
@@ -116,6 +117,7 @@ public class TestHftpURLTimeouts {
           conns.add(fs.openConnection("/", ""));
         } catch (SocketTimeoutException ste) {
           String message = ste.getMessage();
+          assertNotNull(message);
           // https will get a read timeout due to SSL negotiation, but
           // a normal http will not, so need to ignore SSL read timeouts
           // until a connect timeout occurs
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java
index 99f30dd73ec..1400f07e062 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java
@@ -65,7 +65,7 @@ public class TestMiniDFSCluster {
    *
    * @throws Throwable on a failure
    */
-  @Test
+  @Test(timeout=100000)
   public void testClusterWithoutSystemProperties() throws Throwable {
     System.clearProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA);
     Configuration conf = new HdfsConfiguration();
@@ -74,7 +74,8 @@ public class TestMiniDFSCluster {
     conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, c1Path);
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
     try {
-      assertEquals(c1Path+"/data", cluster.getDataDirectory());
+      assertEquals(new File(c1Path + "/data"),
+          new File(cluster.getDataDirectory()));
     } finally {
       cluster.shutdown();
     }
@@ -84,7 +85,7 @@ public class TestMiniDFSCluster {
    * Bring up two clusters and assert that they are in different directories.
    * @throws Throwable on a failure
    */
-  @Test
+  @Test(timeout=100000)
   public void testDualClusters() throws Throwable {
     File testDataCluster2 = new File(testDataPath, CLUSTER_2);
     File testDataCluster3 = new File(testDataPath, CLUSTER_3);
@@ -95,7 +96,7 @@ public class TestMiniDFSCluster {
     MiniDFSCluster cluster3 = null;
     try {
       String dataDir2 = cluster2.getDataDirectory();
-      assertEquals(c2Path + "/data", dataDir2);
+      assertEquals(new File(c2Path + "/data"), new File(dataDir2));
       //change the data dir
       conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR,
                testDataCluster3.getAbsolutePath());
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
index 94931ca4806..f15da5c30e4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java
@@ -18,12 +18,15 @@
 package org.apache.hadoop.hdfs;
 
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertFalse;
 
 import java.io.EOFException;
 import java.io.File;
 import java.io.IOException;
 import java.io.RandomAccessFile;
+import java.net.URI;
 import java.nio.ByteBuffer;
+import java.security.PrivilegedExceptionAction;
 import java.util.concurrent.TimeoutException;
 
 import org.apache.hadoop.conf.Configuration;
@@ -33,6 +36,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
@@ -42,6 +46,7 @@ import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.net.unix.DomainSocket;
 import org.apache.hadoop.net.unix.TemporarySocketDirectory;
+import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Time;
@@ -108,9 +113,22 @@ public class TestShortCircuitLocalRead {
       }
     }
   }
+  
+  private static String getCurrentUser() throws IOException {
+    return UserGroupInformation.getCurrentUser().getShortUserName();
+  }
 
-  static void checkFileContent(FileSystem fs, Path name, byte[] expected,
-      int readOffset) throws IOException {
+  /** Check file content, reading as user {@code readingUser} */
+  static void checkFileContent(URI uri, Path name, byte[] expected,
+      int readOffset, String readingUser, Configuration conf,
+      boolean legacyShortCircuitFails)
+      throws IOException, InterruptedException {
+    // Ensure short circuit is enabled
+    DistributedFileSystem fs = getFileSystem(readingUser, uri, conf);
+    if (legacyShortCircuitFails) {
+      assertTrue(fs.getClient().useLegacyBlockReaderLocal());
+    }
+    
     FSDataInputStream stm = fs.open(name);
     byte[] actual = new byte[expected.length-readOffset];
     stm.readFully(readOffset, actual);
@@ -135,6 +153,10 @@ public class TestShortCircuitLocalRead {
       nread += nbytes;
     }
     checkData(actual, readOffset, expected, "Read 3");
+    
+    if (legacyShortCircuitFails) {
+      assertFalse(fs.getClient().useLegacyBlockReaderLocal());
+    }
     stm.close();
   }
 
@@ -146,11 +168,17 @@ public class TestShortCircuitLocalRead {
     return arr;
   }
   
-  /**
-   * Verifies that reading a file with the direct read(ByteBuffer) api gives the expected set of bytes.
-   */
-  static void checkFileContentDirect(FileSystem fs, Path name, byte[] expected,
-      int readOffset) throws IOException {
+  /** Check the file content, reading as user {@code readingUser} */
+  static void checkFileContentDirect(URI uri, Path name, byte[] expected,
+      int readOffset, String readingUser, Configuration conf,
+      boolean legacyShortCircuitFails)
+      throws IOException, InterruptedException {
+    // Ensure short circuit is enabled
+    DistributedFileSystem fs = getFileSystem(readingUser, uri, conf);
+    if (legacyShortCircuitFails) {
+      assertTrue(fs.getClient().useLegacyBlockReaderLocal());
+    }
+    
     HdfsDataInputStream stm = (HdfsDataInputStream)fs.open(name);
 
     ByteBuffer actual = ByteBuffer.allocateDirect(expected.length - readOffset);
@@ -180,15 +208,33 @@ public class TestShortCircuitLocalRead {
       nread += nbytes;
     }
     checkData(arrayFromByteBuffer(actual), readOffset, expected, "Read 3");
+    if (legacyShortCircuitFails) {
+      assertFalse(fs.getClient().useLegacyBlockReaderLocal());
+    }
     stm.close();
   }
 
+  public void doTestShortCircuitReadLegacy(boolean ignoreChecksum, int size,
+      int readOffset, String shortCircuitUser, String readingUser,
+      boolean legacyShortCircuitFails) throws IOException, InterruptedException {
+    doTestShortCircuitReadImpl(ignoreChecksum, size, readOffset,
+        shortCircuitUser, readingUser, legacyShortCircuitFails);
+  }
+
+  public void doTestShortCircuitRead(boolean ignoreChecksum, int size,
+      int readOffset) throws IOException, InterruptedException {
+    String shortCircuitUser = getCurrentUser();
+    doTestShortCircuitReadImpl(ignoreChecksum, size, readOffset,
+        null, getCurrentUser(), false);
+  }
+  
   /**
    * Test that file data can be read by reading the block file
    * directly from the local store.
    */
-  public void doTestShortCircuitRead(boolean ignoreChecksum, int size,
-      int readOffset) throws IOException {
+  public void doTestShortCircuitReadImpl(boolean ignoreChecksum, int size,
+      int readOffset, String shortCircuitUser, String readingUser,
+      boolean legacyShortCircuitFails) throws IOException, InterruptedException {
     Configuration conf = new Configuration();
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
@@ -196,6 +242,11 @@ public class TestShortCircuitLocalRead {
     conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
         new File(sockDir.getDir(),
           "TestShortCircuitLocalRead._PORT.sock").getAbsolutePath());
+    if (shortCircuitUser != null) {
+      conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
+          shortCircuitUser);
+      conf.setBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL, true);
+    }
     if (simulatedStorage) {
       SimulatedFSDataset.setFactory(conf);
     }
@@ -208,53 +259,94 @@ public class TestShortCircuitLocalRead {
       assertTrue("/ should be a directory", fs.getFileStatus(path)
           .isDirectory() == true);
       
-      byte[] fileData = AppendTestUtil.randomBytes(seed, size);
       // create a new file in home directory. Do not close it.
-      Path file1 = new Path("filelocal.dat");
+      byte[] fileData = AppendTestUtil.randomBytes(seed, size);
+      Path file1 = fs.makeQualified(new Path("filelocal.dat"));
       FSDataOutputStream stm = createFile(fs, file1, 1);
-
-      // write to file
       stm.write(fileData);
       stm.close();
-      checkFileContent(fs, file1, fileData, readOffset);
-      checkFileContentDirect(fs, file1, fileData, readOffset);
+      
+      URI uri = cluster.getURI();
+      checkFileContent(uri, file1, fileData, readOffset, readingUser, conf,
+          legacyShortCircuitFails);
+      checkFileContentDirect(uri, file1, fileData, readOffset, readingUser,
+          conf, legacyShortCircuitFails);
     } finally {
       fs.close();
       cluster.shutdown();
     }
   }
 
-  @Test
-  public void testFileLocalReadNoChecksum() throws IOException {
+  @Test(timeout=10000)
+  public void testFileLocalReadNoChecksum() throws Exception {
     doTestShortCircuitRead(true, 3*blockSize+100, 0);
   }
 
-  @Test
-  public void testFileLocalReadChecksum() throws IOException {
+  @Test(timeout=10000)
+  public void testFileLocalReadChecksum() throws Exception {
     doTestShortCircuitRead(false, 3*blockSize+100, 0);
   }
   
-  @Test
-  public void testSmallFileLocalRead() throws IOException {
+  @Test(timeout=10000)
+  public void testSmallFileLocalRead() throws Exception {
     doTestShortCircuitRead(false, 13, 0);
     doTestShortCircuitRead(false, 13, 5);
     doTestShortCircuitRead(true, 13, 0);
     doTestShortCircuitRead(true, 13, 5);
   }
   
-  @Test
-  public void testReadFromAnOffset() throws IOException {
+  @Test(timeout=10000)
+  public void testLocalReadLegacy() throws Exception {
+    doTestShortCircuitReadLegacy(true, 13, 0, getCurrentUser(),
+        getCurrentUser(), false);
+  }
+
+  /**
+   * Try a short circuit from a reader that is not allowed to
+   * to use short circuit. The test ensures reader falls back to non
+   * shortcircuit reads when shortcircuit is disallowed.
+   */
+  @Test(timeout=10000)
+  public void testLocalReadFallback() throws Exception {
+    doTestShortCircuitReadLegacy(true, 13, 0, getCurrentUser(), "notallowed", true);
+  }
+  
+  @Test(timeout=10000)
+  public void testReadFromAnOffset() throws Exception {
     doTestShortCircuitRead(false, 3*blockSize+100, 777);
     doTestShortCircuitRead(true, 3*blockSize+100, 777);
   }
   
-  @Test
-  public void testLongFile() throws IOException {
+  @Test(timeout=10000)
+  public void testLongFile() throws Exception {
     doTestShortCircuitRead(false, 10*blockSize+100, 777);
     doTestShortCircuitRead(true, 10*blockSize+100, 777);
   }
 
-  @Test
+  private ClientDatanodeProtocol getProxy(UserGroupInformation ugi,
+      final DatanodeID dnInfo, final Configuration conf) throws IOException,
+      InterruptedException {
+    return ugi.doAs(new PrivilegedExceptionAction<ClientDatanodeProtocol>() {
+      @Override
+      public ClientDatanodeProtocol run() throws Exception {
+        return DFSUtil.createClientDatanodeProtocolProxy(dnInfo, conf, 60000,
+            false);
+      }
+    });
+  }
+  
+  private static DistributedFileSystem getFileSystem(String user, final URI uri,
+      final Configuration conf) throws InterruptedException, IOException {
+    UserGroupInformation ugi = UserGroupInformation.createRemoteUser(user);
+    return ugi.doAs(new PrivilegedExceptionAction<DistributedFileSystem>() {
+      @Override
+      public DistributedFileSystem run() throws Exception {
+        return (DistributedFileSystem)FileSystem.get(uri, conf);
+      }
+    });
+  }
+  
+  @Test(timeout=10000)
   public void testDeprecatedGetBlockLocalPathInfoRpc()
       throws IOException, InterruptedException {
     final Configuration conf = new Configuration();
@@ -287,7 +379,7 @@ public class TestShortCircuitLocalRead {
     }
   }
 
-  @Test
+  @Test(timeout=10000)
   public void testSkipWithVerifyChecksum() throws IOException {
     int size = blockSize;
     Configuration conf = new Configuration();
@@ -417,7 +509,7 @@ public class TestShortCircuitLocalRead {
   }
      
   /**
-   * Test to run benchmarks between shortcircuit read vs regular read with
+   * Test to run benchmarks between short circuit read vs regular read with
    * specified number of threads simultaneously reading.
    * <br>
    * Run this using the following command:
@@ -435,7 +527,7 @@ public class TestShortCircuitLocalRead {
     int threadCount = Integer.valueOf(args[2]);
 
     // Setup create a file
-    Configuration conf = new Configuration();
+    final Configuration conf = new Configuration();
     conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, shortcircuit);
     conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
         "/tmp/TestShortCircuitLocalRead._PORT");
@@ -463,9 +555,13 @@ public class TestShortCircuitLocalRead {
         public void run() {
           for (int i = 0; i < iteration; i++) {
             try {
-              checkFileContent(fs, file1, dataToWrite, 0);
+              String user = getCurrentUser();
+              checkFileContent(fs.getUri(), file1, dataToWrite, 0, user, conf,
+                  true);
             } catch (IOException e) {
               e.printStackTrace();
+            } catch (InterruptedException e) {
+              e.printStackTrace();
             }
           }
         }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestNNWithQJM.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestNNWithQJM.java
index a5463d03b39..9ce2ae3c7ce 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestNNWithQJM.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/TestNNWithQJM.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.qjournal;
 
 import static org.junit.Assert.*;
+import static org.junit.Assume.*;
 
 import java.io.File;
 import java.io.IOException;
@@ -43,7 +44,7 @@ import org.junit.Test;
 
 public class TestNNWithQJM {
   Configuration conf = new HdfsConfiguration();
-  private MiniJournalCluster mjc;
+  private MiniJournalCluster mjc = null;
   private Path TEST_PATH = new Path("/test-dir");
   private Path TEST_PATH_2 = new Path("/test-dir");
 
@@ -61,10 +62,11 @@ public class TestNNWithQJM {
   public void stopJNs() throws Exception {
     if (mjc != null) {
       mjc.shutdown();
+      mjc = null;
     }
   }
   
-  @Test
+  @Test (timeout = 30000)
   public void testLogAndRestart() throws IOException {
     conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
         MiniDFSCluster.getBaseDirectory() + "/TestNNWithQJM/image");
@@ -93,9 +95,12 @@ public class TestNNWithQJM {
       cluster.shutdown();
     }
   }
-  
-  @Test
+
+  @Test (timeout = 30000)
   public void testNewNamenodeTakesOverWriter() throws Exception {
+    // Skip the test on Windows. See HDFS-4584.
+    assumeTrue(!Path.WINDOWS);
+
     File nn1Dir = new File(
         MiniDFSCluster.getBaseDirectory() + "/TestNNWithQJM/image-nn1");
     File nn2Dir = new File(
@@ -154,7 +159,7 @@ public class TestNNWithQJM {
     }
   }
 
-  @Test
+  @Test (timeout = 30000)
   public void testMismatchedNNIsRejected() throws Exception {
     conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
         MiniDFSCluster.getBaseDirectory() + "/TestNNWithQJM/image");
@@ -188,8 +193,8 @@ public class TestNNWithQJM {
           "Unable to start log segment 1: too few journals", ioe);
     }
   }
-  
-  @Test
+
+  @Test (timeout = 30000)
   public void testWebPageHasQjmInfo() throws Exception {
     conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
         MiniDFSCluster.getBaseDirectory() + "/TestNNWithQJM/image");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java
index c9db35faca1..3c4d575db87 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournal.java
@@ -36,10 +36,7 @@ import org.apache.hadoop.hdfs.server.common.StorageErrorReporter;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.test.GenericTestUtils;
-import org.junit.After;
-import org.junit.Assume;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.*;
 import org.mockito.Mockito;
 
 public class TestJournal {
@@ -77,7 +74,7 @@ public class TestJournal {
     IOUtils.closeStream(journal);
   }
   
-  @Test
+  @Test (timeout = 10000)
   public void testEpochHandling() throws Exception {
     assertEquals(0, journal.getLastPromisedEpoch());
     NewEpochResponseProto newEpoch =
@@ -110,7 +107,7 @@ public class TestJournal {
     }
   }
   
-  @Test
+  @Test (timeout = 10000)
   public void testMaintainCommittedTxId() throws Exception {
     journal.newEpoch(FAKE_NSINFO, 1);
     journal.startLogSegment(makeRI(1), 1);
@@ -125,7 +122,7 @@ public class TestJournal {
     assertEquals(3, journal.getCommittedTxnIdForTests());    
   }
   
-  @Test
+  @Test (timeout = 10000)
   public void testRestartJournal() throws Exception {
     journal.newEpoch(FAKE_NSINFO, 1);
     journal.startLogSegment(makeRI(1), 1);
@@ -149,7 +146,7 @@ public class TestJournal {
     assertEquals(1, newEpoch.getLastSegmentTxId());
   }
   
-  @Test
+  @Test (timeout = 10000)
   public void testFormatResetsCachedValues() throws Exception {
     journal.newEpoch(FAKE_NSINFO, 12345L);
     journal.startLogSegment(new RequestInfo(JID, 12345L, 1L, 0L), 1L);
@@ -158,6 +155,8 @@ public class TestJournal {
     assertEquals(12345L, journal.getLastWriterEpoch());
     assertTrue(journal.isFormatted());
     
+    // Close the journal in preparation for reformatting it.
+    journal.close();
     journal.format(FAKE_NSINFO_2);
     
     assertEquals(0, journal.getLastPromisedEpoch());
@@ -170,7 +169,7 @@ public class TestJournal {
    * before any transactions are written, that the next newEpoch() call
    * returns the prior segment txid as its most recent segment.
    */
-  @Test
+  @Test (timeout = 10000)
   public void testNewEpochAtBeginningOfSegment() throws Exception {
     journal.newEpoch(FAKE_NSINFO, 1);
     journal.startLogSegment(makeRI(1), 1);
@@ -182,7 +181,7 @@ public class TestJournal {
     assertEquals(1, resp.getLastSegmentTxId());
   }
   
-  @Test
+  @Test (timeout = 10000)
   public void testJournalLocking() throws Exception {
     Assume.assumeTrue(journal.getStorage().getStorageDir(0).isLockSupported());
     StorageDirectory sd = journal.getStorage().getStorageDir(0);
@@ -206,13 +205,14 @@ public class TestJournal {
     // Hence, should be able to create a new Journal in the same dir.
     Journal journal2 = new Journal(TEST_LOG_DIR, JID, mockErrorReporter);
     journal2.newEpoch(FAKE_NSINFO, 2);
+    journal2.close();
   }
   
   /**
    * Test finalizing a segment after some batch of edits were missed.
    * This should fail, since we validate the log before finalization.
    */
-  @Test
+  @Test (timeout = 10000)
   public void testFinalizeWhenEditsAreMissed() throws Exception {
     journal.newEpoch(FAKE_NSINFO, 1);
     journal.startLogSegment(makeRI(1), 1);
@@ -246,7 +246,7 @@ public class TestJournal {
    * Ensure that finalizing a segment which doesn't exist throws the
    * appropriate exception.
    */
-  @Test
+  @Test (timeout = 10000)
   public void testFinalizeMissingSegment() throws Exception {
     journal.newEpoch(FAKE_NSINFO, 1);
     try {
@@ -267,7 +267,7 @@ public class TestJournal {
    * Eventually, the connection comes back, and the NN tries to start a new
    * segment at a higher txid. This should abort the old one and succeed.
    */
-  @Test
+  @Test (timeout = 10000)
   public void testAbortOldSegmentIfFinalizeIsMissed() throws Exception {
     journal.newEpoch(FAKE_NSINFO, 1);
     
@@ -296,7 +296,7 @@ public class TestJournal {
    * Test behavior of startLogSegment() when a segment with the
    * same transaction ID already exists.
    */
-  @Test
+  @Test (timeout = 10000)
   public void testStartLogSegmentWhenAlreadyExists() throws Exception {
     journal.newEpoch(FAKE_NSINFO, 1);
     
@@ -345,7 +345,7 @@ public class TestJournal {
     return new RequestInfo(JID, 1, serial, 0);
   }
   
-  @Test
+  @Test (timeout = 10000)
   public void testNamespaceVerification() throws Exception {
     journal.newEpoch(FAKE_NSINFO, 1);
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java
index f57f7e66b40..e6e140443bf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNode.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.metrics2.MetricsRecordBuilder;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.MetricsAsserts;
+import org.apache.hadoop.util.Shell;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -65,6 +66,8 @@ public class TestJournalNode {
   private Configuration conf = new Configuration();
   private IPCLoggerChannel ch;
   private String journalId;
+  private File TEST_BUILD_DATA =
+      new File(System.getProperty("test.build.data", "build/test/data"));
 
   static {
     // Avoid an error when we double-initialize JvmMetrics
@@ -96,7 +99,7 @@ public class TestJournalNode {
     jn.stop(0);
   }
   
-  @Test
+  @Test(timeout=100000)
   public void testJournal() throws Exception {
     MetricsRecordBuilder metrics = MetricsAsserts.getMetrics(
         journal.getMetricsForTests().getName());
@@ -129,7 +132,7 @@ public class TestJournalNode {
   }
   
   
-  @Test
+  @Test(timeout=100000)
   public void testReturnsSegmentInfoAtEpochTransition() throws Exception {
     ch.newEpoch(1).get();
     ch.setEpoch(1);
@@ -157,7 +160,7 @@ public class TestJournalNode {
     assertEquals(1, response.getLastSegmentTxId());
   }
   
-  @Test
+  @Test(timeout=100000)
   public void testHttpServer() throws Exception {
     InetSocketAddress addr = jn.getBoundHttpAddress();
     assertTrue(addr.getPort() > 0);
@@ -210,7 +213,7 @@ public class TestJournalNode {
    * Test that the JournalNode performs correctly as a Paxos
    * <em>Acceptor</em> process.
    */
-  @Test
+  @Test(timeout=100000)
   public void testAcceptRecoveryBehavior() throws Exception {
     // We need to run newEpoch() first, or else we have no way to distinguish
     // different proposals for the same decision.
@@ -270,20 +273,27 @@ public class TestJournalNode {
     }
   }
   
-  @Test
+  @Test(timeout=100000)
   public void testFailToStartWithBadConfig() throws Exception {
     Configuration conf = new Configuration();
     conf.set(DFSConfigKeys.DFS_JOURNALNODE_EDITS_DIR_KEY, "non-absolute-path");
     assertJNFailsToStart(conf, "should be an absolute path");
     
     // Existing file which is not a directory 
-    conf.set(DFSConfigKeys.DFS_JOURNALNODE_EDITS_DIR_KEY, "/dev/null");
-    assertJNFailsToStart(conf, "is not a directory");
+    File existingFile = new File(TEST_BUILD_DATA, "testjournalnodefile");
+    assertTrue(existingFile.createNewFile());
+    try {
+      conf.set(DFSConfigKeys.DFS_JOURNALNODE_EDITS_DIR_KEY,
+          existingFile.getAbsolutePath());
+      assertJNFailsToStart(conf, "Not a directory");
+    } finally {
+      existingFile.delete();
+    }
     
     // Directory which cannot be created
-    conf.set(DFSConfigKeys.DFS_JOURNALNODE_EDITS_DIR_KEY, "/proc/does-not-exist");
-    assertJNFailsToStart(conf, "Could not create");
-
+    conf.set(DFSConfigKeys.DFS_JOURNALNODE_EDITS_DIR_KEY,
+        Shell.WINDOWS ? "\\\\cannotBeCreated" : "/proc/does-not-exist");
+    assertJNFailsToStart(conf, "Can not create directory");
   }
 
   private static void assertJNFailsToStart(Configuration conf,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNodeCount.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNodeCount.java
index 80c8eb50083..86994bed3ac 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNodeCount.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNodeCount.java
@@ -104,7 +104,7 @@ public class TestNodeCount {
       while (iter.hasNext()) {
         DatanodeDescriptor dn = iter.next();
         Collection<Block> blocks = bm.excessReplicateMap.get(dn.getStorageID());
-        if (blocks == null || !blocks.contains(block) ) {
+        if (blocks == null || !blocks.contains(block.getLocalBlock()) ) {
           nonExcessDN = dn;
           break;
         }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java
index 38a5f0df3bb..0322eefad78 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.LogVerificationAppender;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
@@ -45,7 +46,6 @@ import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.net.NetworkTopology;
 import org.apache.hadoop.net.Node;
 import org.apache.hadoop.util.Time;
-import org.apache.log4j.AppenderSkeleton;
 import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
 import org.apache.log4j.spi.LoggingEvent;
@@ -419,7 +419,7 @@ public class TestReplicationPolicy {
           (HdfsConstants.MIN_BLOCKS_FOR_WRITE-1)*BLOCK_SIZE, 0L, 0, 0);
     }
     
-    final TestAppender appender = new TestAppender();
+    final LogVerificationAppender appender = new LogVerificationAppender();
     final Logger logger = Logger.getRootLogger();
     logger.addAppender(appender);
     
@@ -446,28 +446,6 @@ public class TestReplicationPolicy {
           HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L, 0, 0);
     }
   }
-  
-  class TestAppender extends AppenderSkeleton {
-    private final List<LoggingEvent> log = new ArrayList<LoggingEvent>();
-
-    @Override
-    public boolean requiresLayout() {
-      return false;
-    }
-
-    @Override
-    protected void append(final LoggingEvent loggingEvent) {
-      log.add(loggingEvent);
-    }
-
-    @Override
-    public void close() {
-    }
-
-    public List<LoggingEvent> getLog() {
-      return new ArrayList<LoggingEvent>(log);
-    }
-  }
 
   private boolean containsWithinRange(DatanodeDescriptor target,
       DatanodeDescriptor[] nodes, int startIndex, int endIndex) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataDirs.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataDirs.java
index 9226beaf297..fc7a3ff01d1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataDirs.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataDirs.java
@@ -27,33 +27,26 @@ import java.util.List;
 import org.junit.Test;
 import static org.junit.Assert.*;
 import static org.mockito.Mockito.*;
-import static org.apache.hadoop.test.MockitoMaker.*;
 
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.server.datanode.DataNode.DataNodeDiskChecker;
 
 public class TestDataDirs {
 
-  @Test public void testGetDataDirsFromURIs() throws Throwable {
-    File localDir = make(stub(File.class).returning(true).from.exists());
-    when(localDir.mkdir()).thenReturn(true);
-    FsPermission normalPerm = new FsPermission("700");
-    FsPermission badPerm = new FsPermission("000");
-    FileStatus stat = make(stub(FileStatus.class)
-        .returning(normalPerm, normalPerm, badPerm).from.getPermission());
-    when(stat.isDirectory()).thenReturn(true);
-    LocalFileSystem fs = make(stub(LocalFileSystem.class)
-        .returning(stat).from.getFileStatus(any(Path.class)));
-    when(fs.pathToFile(any(Path.class))).thenReturn(localDir);
+  @Test (timeout = 10000)
+  public void testGetDataDirsFromURIs() throws Throwable {
+    
+    DataNodeDiskChecker diskChecker = mock(DataNodeDiskChecker.class);
+    doThrow(new IOException()).doThrow(new IOException()).doNothing()
+      .when(diskChecker).checkDir(any(LocalFileSystem.class), any(Path.class));
+    LocalFileSystem fs = mock(LocalFileSystem.class);
     Collection<URI> uris = Arrays.asList(new URI("file:/p1/"),
         new URI("file:/p2/"), new URI("file:/p3/"));
 
-    List<File> dirs = DataNode.getDataDirsFromURIs(uris, fs, normalPerm);
-
-    verify(fs, times(2)).setPermission(any(Path.class), eq(normalPerm));
-    verify(fs, times(6)).getFileStatus(any(Path.class));
-    assertEquals("number of valid data dirs", dirs.size(), 1);
+    List<File> dirs = DataNode.getDataDirsFromURIs(uris, fs, diskChecker);
+    assertEquals("number of valid data dirs", 1, dirs.size());
+    String validDir = dirs.iterator().next().getPath();
+    assertEquals("p3 should be valid", new File("/p3").getPath(), validDir);
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
index f4dc62bf45e..2ffaaafca3a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
@@ -272,15 +272,15 @@ public abstract class FSImageTestUtil {
     for (File dir : dirs) {
       FSImageTransactionalStorageInspector inspector =
         inspectStorageDirectory(dir, NameNodeDirType.IMAGE);
-      FSImageFile latestImage = inspector.getLatestImage();
-      assertNotNull("No image in " + dir, latestImage);      
-      long thisTxId = latestImage.getCheckpointTxId();
+      List<FSImageFile> latestImages = inspector.getLatestImages();
+      assert(!latestImages.isEmpty());
+      long thisTxId = latestImages.get(0).getCheckpointTxId();
       if (imageTxId != -1 && thisTxId != imageTxId) {
         fail("Storage directory " + dir + " does not have the same " +
             "last image index " + imageTxId + " as another");
       }
       imageTxId = thisTxId;
-      imageFiles.add(inspector.getLatestImage().getFile());
+      imageFiles.add(inspector.getLatestImages().get(0).getFile());
     }
     
     assertFileContentsSame(imageFiles.toArray(new File[0]));
@@ -424,7 +424,7 @@ public abstract class FSImageTestUtil {
       new FSImageTransactionalStorageInspector();
     inspector.inspectDirectory(sd);
     
-    return inspector.getLatestImage().getFile();
+    return inspector.getLatestImages().get(0).getFile();
   }
 
   /**
@@ -439,8 +439,8 @@ public abstract class FSImageTestUtil {
       new FSImageTransactionalStorageInspector();
     inspector.inspectDirectory(sd);
 
-    FSImageFile latestImage = inspector.getLatestImage();
-    return (latestImage == null) ? null : latestImage.getFile();
+    List<FSImageFile> latestImages = inspector.getLatestImages();
+    return (latestImages.isEmpty()) ? null : latestImages.get(0).getFile();
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java
index b29f5e041f3..51ca00921c7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java
@@ -143,7 +143,7 @@ public class OfflineEditsViewerHelper {
       (DistributedFileSystem)cluster.getFileSystem();
     FileContext fc = FileContext.getFileContext(cluster.getURI(0), config);
     // OP_ADD 0, OP_SET_GENSTAMP 10
-    Path pathFileCreate = new Path("/file_create");
+    Path pathFileCreate = new Path("/file_create_u\1F431");
     FSDataOutputStream s = dfs.create(pathFileCreate);
     // OP_CLOSE 9
     s.close();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java
index 0fe6255fc9a..bbf1dac25dd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogs.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.log4j.Level;
+import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.log4j.PatternLayout;
 import org.apache.log4j.RollingFileAppender;
@@ -233,9 +234,15 @@ public class TestAuditLogs {
 
   /** Sets up log4j logger for auditlogs */
   private void setupAuditLogs() throws IOException {
+    // Shutdown the LogManager to release all logger open file handles.
+    // Unfortunately, Apache commons logging library does not provide
+    // means to release underlying loggers. For additional info look up
+    // commons library FAQ.
+    LogManager.shutdown();
+
     File file = new File(auditLogFile);
     if (file.exists()) {
-      file.delete();
+      assertTrue(file.delete());
     }
     Logger logger = ((Log4JLogger) FSNamesystem.auditLog).getLogger();
     logger.setLevel(Level.INFO);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
index aa831d2bc09..de0cf8a5ef5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
@@ -74,6 +74,8 @@ import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
 import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
+import org.apache.hadoop.util.ExitUtil;
+import org.apache.hadoop.util.ExitUtil.ExitException;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.log4j.Level;
 import org.junit.After;
@@ -226,6 +228,111 @@ public class TestCheckpoint {
                toString().indexOf("storageDirToCheck") != -1);
   }
 
+  /*
+   * Simulate exception during edit replay.
+   */
+  @Test(timeout=30000)
+  public void testReloadOnEditReplayFailure () throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    FSDataOutputStream fos = null;
+    SecondaryNameNode secondary = null;
+    MiniDFSCluster cluster = null;
+    FileSystem fs = null;
+
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDatanodes)
+          .build();
+      cluster.waitActive();
+      fs = cluster.getFileSystem();
+      secondary = startSecondaryNameNode(conf);
+      fos = fs.create(new Path("tmpfile0"));
+      fos.write(new byte[] { 0, 1, 2, 3 });
+      secondary.doCheckpoint();
+      fos.write(new byte[] { 0, 1, 2, 3 });
+      fos.hsync();
+
+      // Cause merge to fail in next checkpoint.
+      Mockito.doThrow(new IOException(
+          "Injecting failure during merge"))
+          .when(faultInjector).duringMerge();
+
+      try {
+        secondary.doCheckpoint();
+        fail("Fault injection failed.");
+      } catch (IOException ioe) {
+        // This is expected.
+      } 
+      Mockito.reset(faultInjector);
+ 
+      // The error must be recorded, so next checkpoint will reload image.
+      fos.write(new byte[] { 0, 1, 2, 3 });
+      fos.hsync();
+      
+      assertTrue("Another checkpoint should have reloaded image",
+          secondary.doCheckpoint());
+    } finally {
+      if (secondary != null) {
+        secondary.shutdown();
+      }
+      if (fs != null) {
+        fs.close();
+      }
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+      Mockito.reset(faultInjector);
+    }
+  }
+
+  /*
+   * Simulate 2NN exit due to too many merge failures.
+   */
+  @Test(timeout=10000)
+  public void testTooManyEditReplayFailures() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    conf.set(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_MAX_RETRIES_KEY, "1");
+    conf.set(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, "1");
+
+    FSDataOutputStream fos = null;
+    SecondaryNameNode secondary = null;
+    MiniDFSCluster cluster = null;
+    FileSystem fs = null;
+
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDatanodes)
+          .checkExitOnShutdown(false).build();
+      cluster.waitActive();
+      fs = cluster.getFileSystem();
+      fos = fs.create(new Path("tmpfile0"));
+      fos.write(new byte[] { 0, 1, 2, 3 });
+
+      // Cause merge to fail in next checkpoint.
+      Mockito.doThrow(new IOException(
+          "Injecting failure during merge"))
+          .when(faultInjector).duringMerge();
+
+      secondary = startSecondaryNameNode(conf);
+      secondary.doWork();
+      // Fail if we get here.
+      fail("2NN did not exit.");
+    } catch (ExitException ee) {
+      // ignore
+      ExitUtil.resetFirstExitException();
+      assertEquals("Max retries", 1, secondary.getMergeErrorCount() - 1);
+    } finally {
+      if (secondary != null) {
+        secondary.shutdown();
+      }
+      if (fs != null) {
+        fs.close();
+      }
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+      Mockito.reset(faultInjector);
+    }
+  }
+
   /*
    * Simulate namenode crashing after rolling edit log.
    */
@@ -1304,6 +1411,60 @@ public class TestCheckpoint {
     }
   }
   
+  /**
+   * Test NN restart if a failure happens in between creating the fsimage
+   * MD5 file and renaming the fsimage.
+   */
+  @Test(timeout=30000)
+  public void testFailureBeforeRename () throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    FSDataOutputStream fos = null;
+    SecondaryNameNode secondary = null;
+    MiniDFSCluster cluster = null;
+    FileSystem fs = null;
+    NameNode namenode = null;
+
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDatanodes)
+          .build();
+      cluster.waitActive();
+      namenode = cluster.getNameNode();
+      fs = cluster.getFileSystem();
+      secondary = startSecondaryNameNode(conf);
+      fos = fs.create(new Path("tmpfile0"));
+      fos.write(new byte[] { 0, 1, 2, 3 });
+      secondary.doCheckpoint();
+      fos.write(new byte[] { 0, 1, 2, 3 });
+      fos.hsync();
+
+      // Cause merge to fail in next checkpoint.
+      Mockito.doThrow(new IOException(
+          "Injecting failure after MD5Rename"))
+          .when(faultInjector).afterMD5Rename();
+
+      try {
+        secondary.doCheckpoint();
+        fail("Fault injection failed.");
+      } catch (IOException ioe) {
+        // This is expected.
+      }
+      Mockito.reset(faultInjector);
+      // Namenode should still restart successfully
+      cluster.restartNameNode();
+    } finally {
+      if (secondary != null) {
+        secondary.shutdown();
+      }
+      if (fs != null) {
+        fs.close();
+      }
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+      Mockito.reset(faultInjector);
+    }
+  }
+
   /**
    * Test case where two secondary namenodes are checkpointing the same
    * NameNode. This differs from {@link #testMultipleSecondaryNamenodes()}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
index 3cf3ca6c7bc..26c6fdba652 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
@@ -861,6 +861,11 @@ public class TestEditLog {
     public boolean isInProgress() {
       return true;
     }
+
+    @Override
+    public void setMaxOpSize(int maxOpSize) {
+      reader.setMaxOpSize(maxOpSize);
+    }
   }
 
   @Test
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageStorageInspector.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageStorageInspector.java
index 01d54b814db..5e3ac4b7a2b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageStorageInspector.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageStorageInspector.java
@@ -57,7 +57,7 @@ public class TestFSImageStorageInspector {
     inspector.inspectDirectory(mockDir);
     assertEquals(2, inspector.foundImages.size());
 
-    FSImageFile latestImage = inspector.getLatestImage();
+    FSImageFile latestImage = inspector.getLatestImages().get(0);
     assertEquals(456, latestImage.txId);
     assertSame(mockDir, latestImage.sd);
     assertTrue(inspector.isUpgradeFinalized());
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestHostsFiles.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestHostsFiles.java
index 2a0ab2003b8..ea9175b214a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestHostsFiles.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestHostsFiles.java
@@ -120,12 +120,13 @@ public class TestHostsFiles {
       
       InetSocketAddress nnHttpAddress = cluster.getNameNode().getHttpAddress();
       LOG.info("nnaddr = '" + nnHttpAddress + "'");
-      URL nnjsp = new URL("http://" + nnHttpAddress.getHostName() + ":" + nnHttpAddress.getPort() + "/dfshealth.jsp");
+      String nnHostName = nnHttpAddress.getHostName();
+      URL nnjsp = new URL("http://" + nnHostName + ":" + nnHttpAddress.getPort() + "/dfshealth.jsp");
       LOG.info("fetching " + nnjsp);
       String dfshealthPage = StringEscapeUtils.unescapeHtml(DFSTestUtil.urlGet(nnjsp));
       LOG.info("got " + dfshealthPage);
-      assertTrue("dfshealth should contain localhost, got:" + dfshealthPage,
-          dfshealthPage.contains("localhost"));
+      assertTrue("dfshealth should contain " + nnHostName + ", got:" + dfshealthPage,
+          dfshealthPage.contains(nnHostName));
 
     } finally {
       cluster.shutdown();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java
index 9d5efb33498..37b22714c20 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java
@@ -184,34 +184,41 @@ public class TestINodeFile {
     long fileLen = 1024;
     replication = 3;
     Configuration conf = new Configuration();
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(
-        replication).build();
-    cluster.waitActive();
-    FSNamesystem fsn = cluster.getNamesystem();
-    FSDirectory fsdir = fsn.getFSDirectory();
-    DistributedFileSystem dfs = cluster.getFileSystem();
-    
-    // Create a file for test
-    final Path dir = new Path("/dir");
-    final Path file = new Path(dir, "file");
-    DFSTestUtil.createFile(dfs, file, fileLen, replication, 0L);
-    
-    // Check the full path name of the INode associating with the file
-    INode fnode = fsdir.getINode(file.toString());
-    assertEquals(file.toString(), fnode.getFullPathName());
-    
-    // Call FSDirectory#unprotectedSetQuota which calls
-    // INodeDirectory#replaceChild
-    dfs.setQuota(dir, Long.MAX_VALUE - 1, replication * fileLen * 10);
-    final Path newDir = new Path("/newdir");
-    final Path newFile = new Path(newDir, "file");
-    // Also rename dir
-    dfs.rename(dir, newDir, Options.Rename.OVERWRITE);
-    // /dir/file now should be renamed to /newdir/file
-    fnode = fsdir.getINode(newFile.toString());
-    // getFullPathName can return correct result only if the parent field of
-    // child node is set correctly
-    assertEquals(newFile.toString(), fnode.getFullPathName());
+    MiniDFSCluster cluster = null;
+    try {
+      cluster =
+          new MiniDFSCluster.Builder(conf).numDataNodes(replication).build();
+      cluster.waitActive();
+      FSNamesystem fsn = cluster.getNamesystem();
+      FSDirectory fsdir = fsn.getFSDirectory();
+      DistributedFileSystem dfs = cluster.getFileSystem();
+
+      // Create a file for test
+      final Path dir = new Path("/dir");
+      final Path file = new Path(dir, "file");
+      DFSTestUtil.createFile(dfs, file, fileLen, replication, 0L);
+
+      // Check the full path name of the INode associating with the file
+      INode fnode = fsdir.getINode(file.toString());
+      assertEquals(file.toString(), fnode.getFullPathName());
+
+      // Call FSDirectory#unprotectedSetQuota which calls
+      // INodeDirectory#replaceChild
+      dfs.setQuota(dir, Long.MAX_VALUE - 1, replication * fileLen * 10);
+      final Path newDir = new Path("/newdir");
+      final Path newFile = new Path(newDir, "file");
+      // Also rename dir
+      dfs.rename(dir, newDir, Options.Rename.OVERWRITE);
+      // /dir/file now should be renamed to /newdir/file
+      fnode = fsdir.getINode(newFile.toString());
+      // getFullPathName can return correct result only if the parent field of
+      // child node is set correctly
+      assertEquals(newFile.toString(), fnode.getFullPathName());
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
   }
   
   @Test
@@ -385,41 +392,47 @@ public class TestINodeFile {
     Configuration conf = new Configuration();
     conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY,
         DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT);
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
-        .build();
-    cluster.waitActive();
-    
-    FSNamesystem fsn = cluster.getNamesystem();
-    long lastId = fsn.getLastInodeId();
+    MiniDFSCluster cluster = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+      cluster.waitActive();
 
-    assertTrue(lastId == 1001);
+      FSNamesystem fsn = cluster.getNamesystem();
+      long lastId = fsn.getLastInodeId();
 
-    // Create one directory and the last inode id should increase to 1002
-    FileSystem fs = cluster.getFileSystem();
-    Path path = new Path("/test1");
-    assertTrue(fs.mkdirs(path));
-    assertTrue(fsn.getLastInodeId() == 1002);
+      assertTrue(lastId == 1001);
 
-    // Use namenode rpc to create a file
-    NamenodeProtocols nnrpc = cluster.getNameNodeRpc();
-    HdfsFileStatus fileStatus = nnrpc.create("/test1/file", new FsPermission(
-        (short) 0755), "client",
-        new EnumSetWritable<CreateFlag>(EnumSet.of(CreateFlag.CREATE)), true,
-        (short) 1, 128 * 1024 * 1024L);
-    assertTrue(fsn.getLastInodeId() == 1003);
-    assertTrue(fileStatus.getFileId() == 1003);
+      // Create one directory and the last inode id should increase to 1002
+      FileSystem fs = cluster.getFileSystem();
+      Path path = new Path("/test1");
+      assertTrue(fs.mkdirs(path));
+      assertTrue(fsn.getLastInodeId() == 1002);
 
-    // Rename doesn't increase inode id
-    Path renamedPath = new Path("/test2");
-    fs.rename(path, renamedPath);
-    assertTrue(fsn.getLastInodeId() == 1003);
+      // Use namenode rpc to create a file
+      NamenodeProtocols nnrpc = cluster.getNameNodeRpc();
+      HdfsFileStatus fileStatus = nnrpc.create("/test1/file", new FsPermission(
+          (short) 0755), "client",
+          new EnumSetWritable<CreateFlag>(EnumSet.of(CreateFlag.CREATE)), true,
+          (short) 1, 128 * 1024 * 1024L);
+      assertTrue(fsn.getLastInodeId() == 1003);
+      assertTrue(fileStatus.getFileId() == 1003);
 
-    cluster.restartNameNode();
-    cluster.waitActive();
-    // Make sure empty editlog can be handled
-    cluster.restartNameNode();
-    cluster.waitActive();
-    assertTrue(fsn.getLastInodeId() == 1003);
+      // Rename doesn't increase inode id
+      Path renamedPath = new Path("/test2");
+      fs.rename(path, renamedPath);
+      assertTrue(fsn.getLastInodeId() == 1003);
+
+      cluster.restartNameNode();
+      cluster.waitActive();
+      // Make sure empty editlog can be handled
+      cluster.restartNameNode();
+      cluster.waitActive();
+      assertTrue(fsn.getLastInodeId() == 1003);
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
   }
 
   @Test
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java
index 93b588d0c8c..18a45bcb18a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java
@@ -83,6 +83,7 @@ public class TestNameNodeRecovery {
       elfos.close();
       elfos = null;
       elfis = new EditLogFileInputStream(TEST_LOG_NAME);
+      elfis.setMaxOpSize(elts.getMaxOpSize());
       
       // reading through normally will get you an exception
       Set<Long> validTxIds = elts.getValidTxIds();
@@ -143,7 +144,7 @@ public class TestNameNodeRecovery {
   /**
    * A test scenario for the edit log
    */
-  private interface EditLogTestSetup {
+  private static abstract class EditLogTestSetup {
     /** 
      * Set up the edit log.
      */
@@ -162,6 +163,13 @@ public class TestNameNodeRecovery {
      * edit log.
      **/
     abstract public Set<Long> getValidTxIds();
+    
+    /**
+     * Return the maximum opcode size we will use for input.
+     */
+    public int getMaxOpSize() {
+      return DFSConfigKeys.DFS_NAMENODE_MAX_OP_SIZE_DEFAULT;
+    }
   }
   
   static void padEditLog(EditLogOutputStream elos, int paddingLength)
@@ -182,10 +190,10 @@ public class TestNameNodeRecovery {
   }
 
   static void addDeleteOpcode(EditLogOutputStream elos,
-        OpInstanceCache cache) throws IOException {
+        OpInstanceCache cache, long txId, String path) throws IOException {
     DeleteOp op = DeleteOp.getInstance(cache);
-    op.setTransactionId(0x0);
-    op.setPath("/foo");
+    op.setTransactionId(txId);
+    op.setPath(path);
     op.setTimestamp(0);
     elos.write(op);
   }
@@ -198,7 +206,7 @@ public class TestNameNodeRecovery {
    * able to handle any amount of padding (including no padding) without
    * throwing an exception.
    */
-  private static class EltsTestEmptyLog implements EditLogTestSetup {
+  private static class EltsTestEmptyLog extends EditLogTestSetup {
     private int paddingLength;
 
     public EltsTestEmptyLog(int paddingLength) {
@@ -242,6 +250,42 @@ public class TestNameNodeRecovery {
         3 * EditLogFileOutputStream.MIN_PREALLOCATION_LENGTH));
   }
 
+  /**
+   * Test using a non-default maximum opcode length.
+   */
+  private static class EltsTestNonDefaultMaxOpSize extends EditLogTestSetup {
+    public EltsTestNonDefaultMaxOpSize() {
+    }
+
+    @Override
+    public void addTransactionsToLog(EditLogOutputStream elos,
+        OpInstanceCache cache) throws IOException {
+      addDeleteOpcode(elos, cache, 0, "/foo");
+      addDeleteOpcode(elos, cache, 1,
+       "/supercalifragalisticexpialadocius.supercalifragalisticexpialadocius");
+    }
+
+    @Override
+    public long getLastValidTxId() {
+      return 0;
+    }
+
+    @Override
+    public Set<Long> getValidTxIds() {
+      return Sets.newHashSet(0L);
+    } 
+    
+    public int getMaxOpSize() {
+      return 30;
+    }
+  }
+
+  /** Test an empty edit log with extra-long padding */
+  @Test(timeout=180000)
+  public void testNonDefaultMaxOpSize() throws IOException {
+    runEditLogTest(new EltsTestNonDefaultMaxOpSize());
+  }
+
   /**
    * Test the scenario where an edit log contains some padding (0xff) bytes
    * followed by valid opcode data.
@@ -249,7 +293,7 @@ public class TestNameNodeRecovery {
    * These edit logs are corrupt, but all the opcodes should be recoverable
    * with recovery mode.
    */
-  private static class EltsTestOpcodesAfterPadding implements EditLogTestSetup {
+  private static class EltsTestOpcodesAfterPadding extends EditLogTestSetup {
     private int paddingLength;
 
     public EltsTestOpcodesAfterPadding(int paddingLength) {
@@ -260,7 +304,7 @@ public class TestNameNodeRecovery {
     public void addTransactionsToLog(EditLogOutputStream elos,
         OpInstanceCache cache) throws IOException {
       padEditLog(elos, paddingLength);
-      addDeleteOpcode(elos, cache);
+      addDeleteOpcode(elos, cache, 0, "/foo");
     }
 
     @Override
@@ -286,7 +330,7 @@ public class TestNameNodeRecovery {
         3 * EditLogFileOutputStream.MIN_PREALLOCATION_LENGTH));
   }
 
-  private static class EltsTestGarbageInEditLog implements EditLogTestSetup {
+  private static class EltsTestGarbageInEditLog extends EditLogTestSetup {
     final private long BAD_TXID = 4;
     final private long MAX_TXID = 10;
     
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestProcessCorruptBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestProcessCorruptBlocks.java
index d6d849da0ab..abb2337a5dd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestProcessCorruptBlocks.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestProcessCorruptBlocks.java
@@ -158,7 +158,7 @@ public class TestProcessCorruptBlocks {
    *     (corrupt replica should  be removed since number of good
    *      replicas (1) is equal to replication factor (1))
    */
-  @Test
+  @Test(timeout=20000)
   public void testWithReplicationFactorAsOne() throws Exception {
     Configuration conf = new HdfsConfiguration();
     conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
@@ -183,9 +183,14 @@ public class TestProcessCorruptBlocks {
       namesystem.setReplication(fileName.toString(), (short) 1);
 
       // wait for 3 seconds so that all block reports are processed.
-      try {
-        Thread.sleep(3000);
-      } catch (InterruptedException ignored) {
+      for (int i = 0; i < 10; i++) {
+        try {
+          Thread.sleep(1000);
+        } catch (InterruptedException ignored) {
+        }
+        if (countReplicas(namesystem, block).corruptReplicas() == 0) {
+          break;
+        }
       }
 
       assertEquals(1, countReplicas(namesystem, block).liveReplicas());
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
index a852a690408..ff88711588c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
@@ -41,6 +41,7 @@ import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.permission.PermissionStatus;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
@@ -219,7 +220,7 @@ public class TestSaveNamespace {
    * Verify that a saveNamespace command brings faulty directories
    * in fs.name.dir and fs.edit.dir back online.
    */
-  @Test
+  @Test (timeout=30000)
   public void testReinsertnamedirsInSavenamespace() throws Exception {
     // create a configuration with the key to restore error
     // directories in fs.name.dir
@@ -237,10 +238,13 @@ public class TestSaveNamespace {
     FSImage spyImage = spy(originalImage);
     fsn.dir.fsImage = spyImage;
     
+    FileSystem fs = FileSystem.getLocal(conf);
     File rootDir = storage.getStorageDir(0).getRoot();
-    rootDir.setExecutable(false);
-    rootDir.setWritable(false);
-    rootDir.setReadable(false);
+    Path rootPath = new Path(rootDir.getPath(), "current");
+    final FsPermission permissionNone = new FsPermission((short) 0);
+    final FsPermission permissionAll = new FsPermission(
+        FsAction.ALL, FsAction.READ_EXECUTE, FsAction.READ_EXECUTE);
+    fs.setPermission(rootPath, permissionNone);
 
     try {
       doAnEdit(fsn, 1);
@@ -257,9 +261,7 @@ public class TestSaveNamespace {
                  " bad directories.", 
                    storage.getRemovedStorageDirs().size() == 1);
 
-      rootDir.setExecutable(true);
-      rootDir.setWritable(true);
-      rootDir.setReadable(true);
+      fs.setPermission(rootPath, permissionAll);
 
       // The next call to savenamespace should try inserting the
       // erroneous directory back to fs.name.dir. This command should
@@ -290,9 +292,7 @@ public class TestSaveNamespace {
       LOG.info("Reloaded image is good.");
     } finally {
       if (rootDir.exists()) {
-        rootDir.setExecutable(true);
-        rootDir.setWritable(true);
-        rootDir.setReadable(true);
+        fs.setPermission(rootPath, permissionAll);
       }
 
       if (fsn != null) {
@@ -305,27 +305,27 @@ public class TestSaveNamespace {
     }
   }
 
-  @Test
+  @Test (timeout=30000)
   public void testRTEWhileSavingSecondImage() throws Exception {
     saveNamespaceWithInjectedFault(Fault.SAVE_SECOND_FSIMAGE_RTE);
   }
 
-  @Test
+  @Test (timeout=30000)
   public void testIOEWhileSavingSecondImage() throws Exception {
     saveNamespaceWithInjectedFault(Fault.SAVE_SECOND_FSIMAGE_IOE);
   }
 
-  @Test
+  @Test (timeout=30000)
   public void testCrashInAllImageDirs() throws Exception {
     saveNamespaceWithInjectedFault(Fault.SAVE_ALL_FSIMAGES);
   }
   
-  @Test
+  @Test (timeout=30000)
   public void testCrashWhenWritingVersionFiles() throws Exception {
     saveNamespaceWithInjectedFault(Fault.WRITE_STORAGE_ALL);
   }
   
-  @Test
+  @Test (timeout=30000)
   public void testCrashWhenWritingVersionFileInOneDir() throws Exception {
     saveNamespaceWithInjectedFault(Fault.WRITE_STORAGE_ONE);
   }
@@ -337,7 +337,7 @@ public class TestSaveNamespace {
    * failed checkpoint since it only affected ".ckpt" files, not
    * valid image files
    */
-  @Test
+  @Test (timeout=30000)
   public void testFailedSaveNamespace() throws Exception {
     doTestFailedSaveNamespace(false);
   }
@@ -347,7 +347,7 @@ public class TestSaveNamespace {
    * the operator restores the directories and calls it again.
    * This should leave the NN in a clean state for next start.
    */
-  @Test
+  @Test (timeout=30000)
   public void testFailedSaveNamespaceWithRecovery() throws Exception {
     doTestFailedSaveNamespace(true);
   }
@@ -421,7 +421,7 @@ public class TestSaveNamespace {
     }
   }
 
-  @Test
+  @Test (timeout=30000)
   public void testSaveWhileEditsRolled() throws Exception {
     Configuration conf = getConf();
     NameNode.initMetrics(conf, NamenodeRole.NAMENODE);
@@ -457,7 +457,7 @@ public class TestSaveNamespace {
     }
   }
   
-  @Test
+  @Test (timeout=30000)
   public void testTxIdPersistence() throws Exception {
     Configuration conf = getConf();
     NameNode.initMetrics(conf, NamenodeRole.NAMENODE);
@@ -580,7 +580,7 @@ public class TestSaveNamespace {
    * open lease and destination directory exist. 
    * This test is a regression for HDFS-2827
    */
-  @Test
+  @Test (timeout=30000)
   public void testSaveNamespaceWithRenamedLease() throws Exception {
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(new Configuration())
         .numDataNodes(1).build();
@@ -603,7 +603,7 @@ public class TestSaveNamespace {
     }
   }
   
-  @Test
+  @Test (timeout=30000)
   public void testSaveNamespaceWithDanglingLease() throws Exception {
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(new Configuration())
         .numDataNodes(1).build();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java
index fbb88ffe1f7..de111053574 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java
@@ -31,12 +31,10 @@ import java.net.URI;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
-import java.util.Random;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
@@ -46,17 +44,21 @@ import org.apache.hadoop.fs.permission.PermissionStatus;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.LogVerificationAppender;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
+import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.hdfs.util.MD5FileUtils;
 import org.apache.hadoop.io.MD5Hash;
+import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.log4j.Logger;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -111,11 +113,12 @@ public class TestStartup {
     }	
   }
 
-   /**
-   * start MiniDFScluster, create a file (to create edits) and do a checkpoint  
+  /**
+   * Create a number of fsimage checkpoints
+   * @param count number of checkpoints to create
    * @throws IOException
    */
-  public void createCheckPoint() throws IOException {
+  public void createCheckPoint(int count) throws IOException {
     LOG.info("--starting mini cluster");
     // manage dirs parameter set to false 
     MiniDFSCluster cluster = null;
@@ -133,15 +136,18 @@ public class TestStartup {
       sn = new SecondaryNameNode(config);
       assertNotNull(sn);
 
-      // create a file
-      FileSystem fileSys = cluster.getFileSystem();
-      Path file1 = new Path("t1");
-      DFSTestUtil.createFile(fileSys, file1, fileSize, fileSize, blockSize, 
-          (short) 1, seed);
-
-      LOG.info("--doing checkpoint");
-      sn.doCheckpoint();  // this shouldn't fail
-      LOG.info("--done checkpoint");
+      // Create count new files and checkpoints
+      for (int i=0; i<count; i++) {
+        // create a file
+        FileSystem fileSys = cluster.getFileSystem();
+        Path p = new Path("t" + i);
+        DFSTestUtil.createFile(fileSys, p, fileSize, fileSize,
+            blockSize, (short) 1, seed);
+        LOG.info("--file " + p.toString() + " created");
+        LOG.info("--doing checkpoint");
+        sn.doCheckpoint();  // this shouldn't fail
+        LOG.info("--done checkpoint");
+      }
     } catch (IOException e) {
       fail(StringUtils.stringifyException(e));
       System.err.println("checkpoint failed");
@@ -151,7 +157,36 @@ public class TestStartup {
         sn.shutdown();
       if(cluster!=null) 
         cluster.shutdown();
-      LOG.info("--file t1 created, cluster shutdown");
+      LOG.info("--cluster shutdown");
+    }
+  }
+
+  /**
+   * Corrupts the MD5 sum of the fsimage.
+   * 
+   * @param corruptAll
+   *          whether to corrupt one or all of the MD5 sums in the configured
+   *          namedirs
+   * @throws IOException
+   */
+  private void corruptFSImageMD5(boolean corruptAll) throws IOException {
+    List<URI> nameDirs = (List<URI>)FSNamesystem.getNamespaceDirs(config);
+    // Corrupt the md5 files in all the namedirs
+    for (URI uri: nameDirs) {
+      // Directory layout looks like:
+      // test/data/dfs/nameN/current/{fsimage,edits,...}
+      File nameDir = new File(uri.getPath());
+      File dfsDir = nameDir.getParentFile();
+      assertEquals(dfsDir.getName(), "dfs"); // make sure we got right dir
+      // Set the md5 file to all zeros
+      File imageFile = new File(nameDir,
+          Storage.STORAGE_DIR_CURRENT + "/"
+          + NNStorage.getImageFileName(0));
+      MD5FileUtils.saveMD5File(imageFile, new MD5Hash(new byte[16]));
+      // Only need to corrupt one if !corruptAll
+      if (!corruptAll) {
+        break;
+      }
     }
   }
 
@@ -165,7 +200,7 @@ public class TestStartup {
 
     // get name dir and its length, then delete and recreate the directory
     File dir = new File(nameDirs.get(0).getPath()); // has only one
-    this.fsimageLength = new File(new File(dir, "current"), 
+    this.fsimageLength = new File(new File(dir, Storage.STORAGE_DIR_CURRENT), 
         NameNodeFile.IMAGE.getName()).length();
 
     if(dir.exists() && !(FileUtil.fullyDelete(dir)))
@@ -178,7 +213,7 @@ public class TestStartup {
 
     dir = new File( nameEditsDirs.get(0).getPath()); //has only one
 
-    this.editsLength = new File(new File(dir, "current"), 
+    this.editsLength = new File(new File(dir, Storage.STORAGE_DIR_CURRENT), 
         NameNodeFile.EDITS.getName()).length();
 
     if(dir.exists() && !(FileUtil.fullyDelete(dir)))
@@ -262,7 +297,7 @@ public class TestStartup {
     config.set(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_DIR_KEY,
         fileAsURI(new File(hdfsDir, "chkpt")).toString());
 
-    createCheckPoint();
+    createCheckPoint(1);
 
     corruptNameNodeFiles();
     checkNameNodeFiles();
@@ -289,7 +324,7 @@ public class TestStartup {
     config.set(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_DIR_KEY,
         fileAsURI(new File(hdfsDir, "chkpt")).toString());
 
-    createCheckPoint();
+    createCheckPoint(1);
     corruptNameNodeFiles();
     checkNameNodeFiles();
   }
@@ -447,20 +482,18 @@ public class TestStartup {
         FileSystem fs = cluster.getFileSystem();
         fs.mkdirs(new Path("/test"));
         
-        // Directory layout looks like:
-        // test/data/dfs/nameN/current/{fsimage,edits,...}
-        File nameDir = new File(cluster.getNameDirs(0).iterator().next().getPath());
-        File dfsDir = nameDir.getParentFile();
-        assertEquals(dfsDir.getName(), "dfs"); // make sure we got right dir
-        
         LOG.info("Shutting down cluster #1");
         cluster.shutdown();
         cluster = null;
 
-        // Corrupt the md5 file to all 0s
-        File imageFile = new File(nameDir, "current/" + NNStorage.getImageFileName(0));
-        MD5FileUtils.saveMD5File(imageFile, new MD5Hash(new byte[16]));
-        
+        // Corrupt the md5 files in all the namedirs
+        corruptFSImageMD5(true);
+
+        // Attach our own log appender so we can verify output
+        final LogVerificationAppender appender = new LogVerificationAppender();
+        final Logger logger = Logger.getRootLogger();
+        logger.addAppender(appender);
+
         // Try to start a new cluster
         LOG.info("\n===========================================\n" +
         "Starting same cluster after simulated crash");
@@ -471,9 +504,12 @@ public class TestStartup {
             .build();
           fail("Should not have successfully started with corrupt image");
         } catch (IOException ioe) {
-          if (!ioe.getCause().getMessage().contains("is corrupt with MD5")) {
-            throw ioe;
-          }
+          GenericTestUtils.assertExceptionContains(
+              "Failed to load an FSImage file!", ioe);
+          int md5failures = appender.countExceptionsWithMessage(
+              " is corrupt with MD5 checksum of ");
+          // Two namedirs, so should have seen two failures
+          assertEquals(2, md5failures);
         }
     } finally {
       if (cluster != null) {
@@ -482,6 +518,21 @@ public class TestStartup {
     }
   }
   
+  @Test(timeout=30000)
+  public void testCorruptImageFallback() throws IOException {
+    // Create two checkpoints
+    createCheckPoint(2);
+    // Delete a single md5sum
+    corruptFSImageMD5(false);
+    // Should still be able to start
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(config)
+        .format(false)
+        .manageDataDfsDirs(false)
+        .manageNameDfsDirs(false)
+        .build();
+    cluster.waitActive();
+}
+
   /**
    * This test tests hosts include list contains host names.  After namenode
    * restarts, the still alive datanodes should not have any trouble in getting
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
index 7eb347e74e0..2f93cca5dd8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.namenode.ha;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.io.File;
 import java.io.IOException;
@@ -26,6 +27,8 @@ import java.io.OutputStream;
 import java.net.URI;
 import java.util.List;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -43,7 +46,10 @@ import org.apache.hadoop.hdfs.util.Canceler;
 import org.apache.hadoop.io.compress.CompressionCodecFactory;
 import org.apache.hadoop.io.compress.CompressionOutputStream;
 import org.apache.hadoop.io.compress.GzipCodec;
+import org.apache.hadoop.ipc.StandbyException;
 import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
+import org.apache.hadoop.util.ThreadUtil;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -59,6 +65,8 @@ public class TestStandbyCheckpoints {
   protected MiniDFSCluster cluster;
   protected NameNode nn0, nn1;
   protected FileSystem fs;
+  
+  private static final Log LOG = LogFactory.getLog(TestStandbyCheckpoints.class);
 
   @SuppressWarnings("rawtypes")
   @Before
@@ -231,6 +239,49 @@ public class TestStandbyCheckpoints {
     
     assertTrue(canceledOne);
   }
+  
+  /**
+   * Make sure that clients will receive StandbyExceptions even when a
+   * checkpoint is in progress on the SBN, and therefore the StandbyCheckpointer
+   * thread will have FSNS lock. Regression test for HDFS-4591.
+   */
+  @Test(timeout=120000)
+  public void testStandbyExceptionThrownDuringCheckpoint() throws Exception {
+    
+    // Set it up so that we know when the SBN checkpoint starts and ends.
+    FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nn1);
+    DelayAnswer answerer = new DelayAnswer(LOG);
+    Mockito.doAnswer(answerer).when(spyImage1)
+        .saveNamespace(Mockito.any(FSNamesystem.class),
+            Mockito.any(Canceler.class));
+    
+    // Perform some edits and wait for a checkpoint to start on the SBN.
+    doEdits(0, 2000);
+    nn0.getRpcServer().rollEditLog();
+    answerer.waitForCall();
+    answerer.proceed();
+    assertTrue("SBN is not performing checkpoint but it should be.",
+        answerer.getFireCount() == 1 && answerer.getResultCount() == 0);
+    
+    // Make sure that the lock has actually been taken by the checkpointing
+    // thread.
+    ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
+    try {
+      // Perform an RPC to the SBN and make sure it throws a StandbyException.
+      nn1.getRpcServer().getFileInfo("/");
+      fail("Should have thrown StandbyException, but instead succeeded.");
+    } catch (StandbyException se) {
+      GenericTestUtils.assertExceptionContains("is not supported", se);
+    }
+    
+    // Make sure that the checkpoint is still going on, implying that the client
+    // RPC to the SBN happened during the checkpoint.
+    assertTrue("SBN should have still been checkpointing.",
+        answerer.getFireCount() == 1 && answerer.getResultCount() == 0);
+    answerer.waitForResult();
+    assertTrue("SBN should have finished checkpointing.",
+        answerer.getFireCount() == 1 && answerer.getResultCount() == 1);
+  }
 
   private void doEdits(int start, int stop) throws IOException {
     for (int i = start; i < stop; i++) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
index 041d754e154..ecd52437b0f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
@@ -143,6 +143,7 @@ public class TestStandbyIsHot {
     conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
     // We read from the standby to watch block locations
     HAUtil.setAllowStandbyReads(conf, true);
+    conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 0);
     conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
       .nnTopology(MiniDFSNNTopology.simpleHATopology())
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
index 56bf4a8ecae..007fda5fb5e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
@@ -123,7 +123,25 @@ public class TestNameNodeMetrics {
     stm.read(buffer,0,4);
     stm.close();
   }
-  
+
+  /**
+   * Test that capacity metrics are exported and pass
+   * basic sanity tests.
+   */
+  @Test (timeout = 1800)
+  public void testCapacityMetrics() throws Exception {
+    MetricsRecordBuilder rb = getMetrics(NS_METRICS);
+    long capacityTotal = MetricsAsserts.getLongGauge("CapacityTotal", rb);
+    assert(capacityTotal != 0);
+    long capacityUsed = MetricsAsserts.getLongGauge("CapacityUsed", rb);
+    long capacityRemaining =
+        MetricsAsserts.getLongGauge("CapacityRemaining", rb);
+    long capacityUsedNonDFS =
+        MetricsAsserts.getLongGauge("CapacityUsedNonDFS", rb);
+    assert(capacityUsed + capacityRemaining + capacityUsedNonDFS ==
+        capacityTotal);
+  }
+
   /** Test metrics indicating the number of stale DataNodes */
   @Test
   public void testStaleNodes() throws Exception {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java
index 652979eab48..f6730c1d4c1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java
@@ -224,7 +224,7 @@ public class TestGetConf {
   /**
    * Test empty configuration
    */
-  @Test
+  @Test(timeout=10000)
   public void testEmptyConf() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration(false);
     // Verify getting addresses fails
@@ -247,7 +247,7 @@ public class TestGetConf {
   /**
    * Test invalid argument to the tool
    */
-  @Test
+  @Test(timeout=10000)
   public void testInvalidArgument() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration();
     String[] args = {"-invalidArgument"};
@@ -259,7 +259,7 @@ public class TestGetConf {
    * Tests to make sure the returned addresses are correct in case of default
    * configuration with no federation
    */
-  @Test
+  @Test(timeout=10000)
   public void testNonFederation() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration(false);
   
@@ -294,7 +294,7 @@ public class TestGetConf {
    * Tests to make sure the returned addresses are correct in case of federation
    * of setup.
    */
-  @Test
+  @Test(timeout=10000)
   public void testFederation() throws Exception {
     final int nsCount = 10;
     HdfsConfiguration conf = new HdfsConfiguration(false);
@@ -333,15 +333,16 @@ public class TestGetConf {
     verifyAddresses(conf, TestType.NNRPCADDRESSES, true, nnAddresses);
   }
   
-  @Test
+  @Test(timeout=10000)
   public void testGetSpecificKey() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration();
     conf.set("mykey", " myval ");
     String[] args = {"-confKey", "mykey"};
-    assertTrue(runTool(conf, args, true).equals("myval\n"));
+    String toolResult = runTool(conf, args, true);
+    assertEquals(String.format("myval%n"), toolResult);
   }
   
-  @Test
+  @Test(timeout=10000)
   public void testExtraArgsThrowsError() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration();
     conf.set("mykey", "myval");
@@ -354,7 +355,7 @@ public class TestGetConf {
    * Tests commands other than {@link Command#NAMENODE}, {@link Command#BACKUP},
    * {@link Command#SECONDARY} and {@link Command#NNRPCADDRESSES}
    */
-  @Test
+  @Test(timeout=10000)
   public void testTool() throws Exception {
     HdfsConfiguration conf = new HdfsConfiguration(false);
     for (Command cmd : Command.values()) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestGSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestGSet.java
index ed2007775fe..971d538b272 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestGSet.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestGSet.java
@@ -21,6 +21,7 @@ import java.util.ConcurrentModificationException;
 import java.util.Iterator;
 import java.util.Random;
 
+import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.util.Time;
 import org.junit.Assert;
 import org.junit.Test;
@@ -452,4 +453,81 @@ public class TestGSet {
       next = e;
     }
   }
+  
+  /** 
+   * Test for {@link LightWeightGSet#computeCapacity(double, String)}
+   * with invalid percent less than 0.
+   */
+  @Test(expected=HadoopIllegalArgumentException.class)
+  public void testComputeCapacityNegativePercent() {
+    LightWeightGSet.computeCapacity(1024, -1.0, "testMap");
+  }
+  
+  /** 
+   * Test for {@link LightWeightGSet#computeCapacity(double, String)}
+   * with invalid percent greater than 100.
+   */
+  @Test(expected=HadoopIllegalArgumentException.class)
+  public void testComputeCapacityInvalidPercent() {
+    LightWeightGSet.computeCapacity(1024, 101.0, "testMap");
+  }
+  
+  /** 
+   * Test for {@link LightWeightGSet#computeCapacity(double, String)}
+   * with invalid negative max memory
+   */
+  @Test(expected=HadoopIllegalArgumentException.class)
+  public void testComputeCapacityInvalidMemory() {
+    LightWeightGSet.computeCapacity(-1, 50.0, "testMap");
+  }
+  
+  private static boolean isPowerOfTwo(int num) {
+    return num == 0 || (num > 0 && Integer.bitCount(num) == 1);
+  }
+  
+  /** Return capacity as percentage of total memory */
+  private static int getPercent(long total, int capacity) {
+    // Reference size in bytes
+    double referenceSize = 
+        System.getProperty("sun.arch.data.model").equals("32") ? 4.0 : 8.0;
+    return (int)(((capacity * referenceSize)/total) * 100.0);
+  }
+  
+  /** Return capacity as percentage of total memory */
+  private static void testCapacity(long maxMemory, double percent) {
+    int capacity = LightWeightGSet.computeCapacity(maxMemory, percent, "map");
+    LightWeightGSet.LOG.info("Validating - total memory " + maxMemory + " percent "
+        + percent + " returned capacity " + capacity);
+    // Returned capacity is zero or power of two
+    Assert.assertTrue(isPowerOfTwo(capacity));
+
+    // Ensure the capacity returned is the nearest to the asked perecentage
+    int capacityPercent = getPercent(maxMemory, capacity);
+    if (capacityPercent == percent) {
+      return;
+    } else if (capacityPercent > percent) {
+      Assert.assertTrue(getPercent(maxMemory, capacity * 2) > percent);
+    } else {
+      Assert.assertTrue(getPercent(maxMemory, capacity / 2) < percent);
+    }
+  }
+  
+  /** 
+   * Test for {@link LightWeightGSet#computeCapacity(double, String)}
+   */
+  @Test
+  public void testComputeCapacity() {
+    // Tests for boundary conditions where percent or memory are zero
+    testCapacity(0, 0.0);
+    testCapacity(100, 0.0);
+    testCapacity(0, 100.0);
+    
+    // Compute capacity for some 100 random max memory and percentage
+    Random r = new Random();
+    for (int i = 0; i < 100; i++) {
+      long maxMemory = r.nextInt(Integer.MAX_VALUE);
+      double percent = r.nextInt(101);
+      testCapacity(maxMemory, percent);
+    }
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestXMLUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestXMLUtils.java
new file mode 100644
index 00000000000..520107c0707
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestXMLUtils.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.util;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.hdfs.util.XMLUtils.UnmanglingError;
+import org.junit.Test;
+
+public class TestXMLUtils {
+  private static void testRoundTrip(String str, String expectedMangled) {
+    String mangled = XMLUtils.mangleXmlString(str);
+    Assert.assertEquals(mangled, expectedMangled);
+    String unmangled = XMLUtils.unmangleXmlString(mangled);
+    Assert.assertEquals(unmangled, str);
+  }
+
+  @Test
+  public void testMangleEmptyString() throws Exception {
+    testRoundTrip("", "");
+  }
+
+  @Test
+  public void testMangleVanillaString() throws Exception {
+    testRoundTrip("abcdef", "abcdef");
+  }
+
+  @Test
+  public void testMangleStringWithBackSlash() throws Exception {
+    testRoundTrip("a\\bcdef", "a\\005c;bcdef");
+    testRoundTrip("\\\\", "\\005c;\\005c;");
+  }  
+
+  @Test
+  public void testMangleStringWithForbiddenCodePoint() throws Exception {
+    testRoundTrip("a\u0001bcdef", "a\\0001;bcdef");
+    testRoundTrip("a\u0002\ud800bcdef", "a\\0002;\\d800;bcdef");
+  }
+
+  @Test
+  public void testInvalidSequence() throws Exception {
+    try {
+      XMLUtils.unmangleXmlString("\\000g;foo");
+      Assert.fail("expected an unmangling error");
+    } catch (UnmanglingError e) {
+      // pass through
+    }
+    try {
+      XMLUtils.unmangleXmlString("\\0");
+      Assert.fail("expected an unmangling error");
+    } catch (UnmanglingError e) {
+      // pass through
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsTokens.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsTokens.java
new file mode 100644
index 00000000000..9751a666cdb
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsTokens.java
@@ -0,0 +1,202 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.web;
+
+import static org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod.KERBEROS;
+import static org.junit.Assert.*;
+import static org.mockito.Matchers.*;
+import static org.mockito.Mockito.*;
+
+import java.io.IOException;
+import java.net.URI;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
+import org.apache.hadoop.hdfs.web.resources.DeleteOpParam;
+import org.apache.hadoop.hdfs.web.resources.GetOpParam;
+import org.apache.hadoop.hdfs.web.resources.HttpOpParam;
+import org.apache.hadoop.hdfs.web.resources.PostOpParam;
+import org.apache.hadoop.hdfs.web.resources.PutOpParam;
+import org.apache.hadoop.security.SecurityUtil;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.Token;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestWebHdfsTokens {
+  static Configuration conf;
+  static UserGroupInformation ugi;
+  
+  @BeforeClass
+  public static void setup() throws IOException {
+    conf = new Configuration();
+    SecurityUtil.setAuthenticationMethod(KERBEROS, conf);
+    UserGroupInformation.setConfiguration(conf);    
+    ugi = UserGroupInformation.getCurrentUser();
+  }
+  
+  @SuppressWarnings("unchecked")
+  @Test(timeout=1000)
+  public void testInitWithNoToken() throws IOException {
+    WebHdfsFileSystem fs = spy(new WebHdfsFileSystem());
+    doReturn(null).when(fs).getDelegationToken(anyString());
+    doNothing().when(fs).addRenewAction(any(WebHdfsFileSystem.class));
+    fs.initialize(URI.create("webhdfs://127.0.0.1:0"), conf);
+    
+    // when not in ugi, don't get one
+    verify(fs).initDelegationToken();
+    verify(fs).selectDelegationToken(ugi);
+    verify(fs, never()).setDelegationToken(any(Token.class));
+    verify(fs, never()).getDelegationToken();
+    verify(fs, never()).getDelegationToken(anyString());
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test(timeout=1000)
+  public void testInitWithUGIToken() throws IOException {
+    WebHdfsFileSystem fs = spy(new WebHdfsFileSystem());
+    Token<DelegationTokenIdentifier> token = mock(Token.class);    
+    doReturn(token).when(fs).selectDelegationToken(ugi);
+    doReturn(null).when(fs).getDelegationToken(anyString());
+    doNothing().when(fs).addRenewAction(any(WebHdfsFileSystem.class));
+    fs.initialize(URI.create("webhdfs://127.0.0.1:0"), conf);
+    
+    // when in the ugi, store it but don't renew it
+    verify(fs).initDelegationToken();
+    verify(fs).selectDelegationToken(ugi);
+    verify(fs).setDelegationToken(token);
+    verify(fs, never()).getDelegationToken();
+    verify(fs, never()).getDelegationToken(anyString());
+    verify(fs, never()).addRenewAction(fs);
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test(timeout=1000)
+  public void testInternalGetDelegationToken() throws IOException {
+    WebHdfsFileSystem fs = spy(new WebHdfsFileSystem());
+    Token<DelegationTokenIdentifier> token = mock(Token.class);    
+    doReturn(null).when(fs).selectDelegationToken(ugi);
+    doReturn(token).when(fs).getDelegationToken(anyString());
+    doNothing().when(fs).addRenewAction(any(WebHdfsFileSystem.class));
+    fs.initialize(URI.create("webhdfs://127.0.0.1:0"), conf);
+
+    // get token, store it, and renew it
+    Token<?> token2 = fs.getDelegationToken();
+    assertEquals(token2, token);
+    verify(fs).getDelegationToken(null);
+    verify(fs).setDelegationToken(token);
+    verify(fs).addRenewAction(fs);
+    reset(fs);
+
+    // just return token, don't get/set/renew
+    token2 = fs.getDelegationToken();
+    assertEquals(token2, token);
+    verify(fs, never()).getDelegationToken(null);
+    verify(fs, never()).setDelegationToken(any(Token.class));
+    verify(fs, never()).addRenewAction(fs);
+  }
+
+  @SuppressWarnings("unchecked")
+  @Test(timeout=1000)
+  public void testTokenForNonTokenOp() throws IOException {
+    WebHdfsFileSystem fs = spy(new WebHdfsFileSystem());
+    Token<DelegationTokenIdentifier> token = mock(Token.class);    
+    doReturn(null).when(fs).selectDelegationToken(ugi);
+    doReturn(token).when(fs).getDelegationToken(null);
+    doNothing().when(fs).addRenewAction(any(WebHdfsFileSystem.class));
+    fs.initialize(URI.create("webhdfs://127.0.0.1:0"), conf);
+
+    // should get/set/renew token
+    fs.toUrl(GetOpParam.Op.OPEN, null);
+    verify(fs).getDelegationToken();
+    verify(fs).getDelegationToken(null);
+    verify(fs).setDelegationToken(token);
+    verify(fs).addRenewAction(fs);
+    reset(fs);
+    
+    // should return prior token
+    fs.toUrl(GetOpParam.Op.OPEN, null);
+    verify(fs).getDelegationToken();
+    verify(fs, never()).getDelegationToken(null);
+    verify(fs, never()).setDelegationToken(token);
+    verify(fs, never()).addRenewAction(fs);
+  }
+  
+  @Test(timeout=1000)
+  public void testNoTokenForGetToken() throws IOException {
+    checkNoTokenForOperation(GetOpParam.Op.GETDELEGATIONTOKEN);
+  }
+  
+  @Test(timeout=1000)
+  public void testNoTokenForCanclToken() throws IOException {
+    checkNoTokenForOperation(PutOpParam.Op.RENEWDELEGATIONTOKEN);
+  }
+
+  @Test(timeout=1000)
+  public void testNoTokenForCancelToken() throws IOException {
+    checkNoTokenForOperation(PutOpParam.Op.CANCELDELEGATIONTOKEN);
+  }
+
+  @SuppressWarnings("unchecked")
+  private void checkNoTokenForOperation(HttpOpParam.Op op) throws IOException {
+    WebHdfsFileSystem fs = spy(new WebHdfsFileSystem());
+    doReturn(null).when(fs).selectDelegationToken(ugi);
+    doReturn(null).when(fs).getDelegationToken(null);
+    doNothing().when(fs).addRenewAction(any(WebHdfsFileSystem.class));
+    fs.initialize(URI.create("webhdfs://127.0.0.1:0"), conf);
+    
+    // do not get a token!
+    fs.toUrl(op, null);
+    verify(fs, never()).getDelegationToken();
+    verify(fs, never()).getDelegationToken(null);
+    verify(fs, never()).setDelegationToken(any(Token.class));
+    verify(fs, never()).addRenewAction(fs);
+  }
+  
+  @Test(timeout=1000)
+  public void testGetOpRequireAuth() {
+    for (HttpOpParam.Op op : GetOpParam.Op.values()) {
+      boolean expect = (op == GetOpParam.Op.GETDELEGATIONTOKEN);
+      assertEquals(expect, op.getRequireAuth()); 
+    }
+  }
+
+  @Test(timeout=1000)
+  public void testPutOpRequireAuth() {
+    for (HttpOpParam.Op op : PutOpParam.Op.values()) {
+      boolean expect = (op == PutOpParam.Op.RENEWDELEGATIONTOKEN ||
+                        op == PutOpParam.Op.CANCELDELEGATIONTOKEN);
+      assertEquals(expect, op.getRequireAuth()); 
+    }
+  }
+  
+  @Test(timeout=1000)
+  public void testPostOpRequireAuth() {    
+    for (HttpOpParam.Op op : PostOpParam.Op.values()) {
+      assertFalse(op.getRequireAuth());
+    }
+  }
+  
+  @Test(timeout=1000)
+  public void testDeleteOpRequireAuth() {    
+    for (HttpOpParam.Op op : DeleteOpParam.Op.values()) {
+      assertFalse(op.getRequireAuth());
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsUrl.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsUrl.java
index aef467a0ef2..551a37b5c0e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsUrl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsUrl.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hdfs.web;
 
+import static org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod.KERBEROS;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
@@ -26,78 +27,270 @@ import static org.mockito.Mockito.mock;
 import java.io.IOException;
 import java.net.URI;
 import java.net.URL;
+import java.util.Arrays;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
-import org.apache.hadoop.hdfs.web.resources.DelegationParam;
-import org.apache.hadoop.hdfs.web.resources.HttpOpParam;
-import org.apache.hadoop.hdfs.web.resources.PutOpParam;
-import org.apache.hadoop.hdfs.web.resources.TokenArgumentParam;
+import org.apache.hadoop.hdfs.web.resources.*;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.SecurityUtilTestHelper;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
-import org.junit.Assert;
-import org.junit.Test;
+import org.junit.*;
 
 public class TestWebHdfsUrl {
+  // NOTE: port is never used 
+  final URI uri = URI.create(WebHdfsFileSystem.SCHEME + "://" + "127.0.0.1:0");
 
-  @Test
-  public void testDelegationTokenInUrl() throws IOException {
-    Configuration conf = new Configuration();
-    final String uri = WebHdfsFileSystem.SCHEME + "://" + "127.0.0.1:9071";
-    // Turn on security
-    conf.set(CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION, "kerberos");
-    UserGroupInformation.setConfiguration(conf);
-    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
-    DelegationTokenIdentifier dtId = new DelegationTokenIdentifier(new Text(
-        ugi.getUserName()), null, null);
-    FSNamesystem namesystem = mock(FSNamesystem.class);
-    DelegationTokenSecretManager dtSecretManager = new DelegationTokenSecretManager(
-        86400000, 86400000, 86400000, 86400000, namesystem);
-    dtSecretManager.startThreads();
-    Token<DelegationTokenIdentifier> token = new Token<DelegationTokenIdentifier>(
-        dtId, dtSecretManager);
-    token.setService(new Text("127.0.0.1:9071"));
-    token.setKind(WebHdfsFileSystem.TOKEN_KIND);
-    ugi.addToken(token);
-    final WebHdfsFileSystem webhdfs = (WebHdfsFileSystem) FileSystem.get(
-        URI.create(uri), conf);
-    String tokenString = token.encodeToUrlString();
-    Path fsPath = new Path("/");
-    URL renewTokenUrl = webhdfs.toUrl(PutOpParam.Op.RENEWDELEGATIONTOKEN,
-        fsPath, new TokenArgumentParam(tokenString));
-    URL cancelTokenUrl = webhdfs.toUrl(PutOpParam.Op.CANCELDELEGATIONTOKEN,
-        fsPath, new TokenArgumentParam(tokenString));
-    Assert.assertEquals(
-        generateUrlQueryPrefix(PutOpParam.Op.RENEWDELEGATIONTOKEN,
-            ugi.getUserName())
-            + "&token=" + tokenString, renewTokenUrl.getQuery());
-    Token<DelegationTokenIdentifier> delegationToken = new Token<DelegationTokenIdentifier>(
-        token);
-    delegationToken.setKind(WebHdfsFileSystem.TOKEN_KIND);
-    Assert.assertEquals(
-        generateUrlQueryPrefix(PutOpParam.Op.CANCELDELEGATIONTOKEN,
-            ugi.getUserName())
-            + "&token="
-            + tokenString
-            + "&"
-            + DelegationParam.NAME
-            + "="
-            + delegationToken.encodeToUrlString(), cancelTokenUrl.getQuery());
-  }
-
-  private String generateUrlQueryPrefix(HttpOpParam.Op op, String username) {
-    return "op=" + op.toString() + "&user.name=" + username;
+  @Before
+  public void resetUGI() {
+    UserGroupInformation.setConfiguration(new Configuration());
   }
   
-  @Test
+  @Test(timeout=4000)
+  public void testSimpleAuthParamsInUrl() throws IOException {
+    Configuration conf = new Configuration();
+
+    UserGroupInformation ugi =
+        UserGroupInformation.createRemoteUser("test-user");
+    UserGroupInformation.setLoginUser(ugi);
+
+    WebHdfsFileSystem webhdfs = getWebHdfsFileSystem(ugi, conf);
+    Path fsPath = new Path("/");
+
+    // send user+token
+    URL fileStatusUrl = webhdfs.toUrl(GetOpParam.Op.GETFILESTATUS, fsPath);
+    checkQueryParams(
+        new String[]{
+            GetOpParam.Op.GETFILESTATUS.toQueryString(),
+            new UserParam(ugi.getShortUserName()).toString()
+        },
+        fileStatusUrl);
+  }
+
+  @Test(timeout=4000)
+  public void testSimpleProxyAuthParamsInUrl() throws IOException {
+    Configuration conf = new Configuration();
+
+    UserGroupInformation ugi =
+        UserGroupInformation.createRemoteUser("test-user");
+    ugi = UserGroupInformation.createProxyUser("test-proxy-user", ugi);
+    UserGroupInformation.setLoginUser(ugi);
+
+    WebHdfsFileSystem webhdfs = getWebHdfsFileSystem(ugi, conf);
+    Path fsPath = new Path("/");
+
+    // send real+effective
+    URL fileStatusUrl = webhdfs.toUrl(GetOpParam.Op.GETFILESTATUS, fsPath);
+    checkQueryParams(
+        new String[]{
+            GetOpParam.Op.GETFILESTATUS.toQueryString(),
+            new UserParam(ugi.getRealUser().getShortUserName()).toString(),
+            new DoAsParam(ugi.getShortUserName()).toString()
+    },
+        fileStatusUrl);
+  }
+
+  @Test(timeout=4000)
+  public void testSecureAuthParamsInUrl() throws IOException {
+    Configuration conf = new Configuration();
+    // fake turning on security so api thinks it should use tokens
+    SecurityUtil.setAuthenticationMethod(KERBEROS, conf);
+    UserGroupInformation.setConfiguration(conf);
+
+    UserGroupInformation ugi =
+        UserGroupInformation.createRemoteUser("test-user");
+    ugi.setAuthenticationMethod(KERBEROS);
+    UserGroupInformation.setLoginUser(ugi);
+
+    WebHdfsFileSystem webhdfs = getWebHdfsFileSystem(ugi, conf);
+    Path fsPath = new Path("/");
+    String tokenString = webhdfs.getDelegationToken().encodeToUrlString();
+
+    // send user
+    URL getTokenUrl = webhdfs.toUrl(GetOpParam.Op.GETDELEGATIONTOKEN, fsPath);
+    checkQueryParams(
+        new String[]{
+            GetOpParam.Op.GETDELEGATIONTOKEN.toQueryString(),
+            new UserParam(ugi.getShortUserName()).toString()
+        },
+        getTokenUrl);
+
+    // send user
+    URL renewTokenUrl = webhdfs.toUrl(PutOpParam.Op.RENEWDELEGATIONTOKEN,
+        fsPath, new TokenArgumentParam(tokenString));
+    checkQueryParams(
+        new String[]{
+            PutOpParam.Op.RENEWDELEGATIONTOKEN.toQueryString(),
+            new UserParam(ugi.getShortUserName()).toString(),
+            new TokenArgumentParam(tokenString).toString(),
+        },
+        renewTokenUrl);
+
+    // send token
+    URL cancelTokenUrl = webhdfs.toUrl(PutOpParam.Op.CANCELDELEGATIONTOKEN,
+        fsPath, new TokenArgumentParam(tokenString));
+    checkQueryParams(
+        new String[]{
+            PutOpParam.Op.CANCELDELEGATIONTOKEN.toQueryString(),
+            new UserParam(ugi.getShortUserName()).toString(),
+            new TokenArgumentParam(tokenString).toString(),
+        },
+        cancelTokenUrl);
+    
+    // send token
+    URL fileStatusUrl = webhdfs.toUrl(GetOpParam.Op.GETFILESTATUS, fsPath);
+    checkQueryParams(
+        new String[]{
+            GetOpParam.Op.GETFILESTATUS.toQueryString(),
+            new DelegationParam(tokenString).toString()
+        },
+        fileStatusUrl);
+
+    // wipe out internal token to simulate auth always required
+    webhdfs.setDelegationToken(null);
+
+    // send user
+    cancelTokenUrl = webhdfs.toUrl(PutOpParam.Op.CANCELDELEGATIONTOKEN,
+        fsPath, new TokenArgumentParam(tokenString));
+    checkQueryParams(
+        new String[]{
+            PutOpParam.Op.CANCELDELEGATIONTOKEN.toQueryString(),
+            new UserParam(ugi.getShortUserName()).toString(),
+            new TokenArgumentParam(tokenString).toString(),
+        },
+        cancelTokenUrl);
+
+    // send user
+    fileStatusUrl = webhdfs.toUrl(GetOpParam.Op.GETFILESTATUS, fsPath);
+    checkQueryParams(
+        new String[]{
+            GetOpParam.Op.GETFILESTATUS.toQueryString(),
+            new UserParam(ugi.getShortUserName()).toString()
+        },
+        fileStatusUrl);    
+  }
+
+  @Test(timeout=4000)
+  public void testSecureProxyAuthParamsInUrl() throws IOException {
+    Configuration conf = new Configuration();
+    // fake turning on security so api thinks it should use tokens
+    SecurityUtil.setAuthenticationMethod(KERBEROS, conf);
+    UserGroupInformation.setConfiguration(conf);
+
+    UserGroupInformation ugi =
+        UserGroupInformation.createRemoteUser("test-user");
+    ugi.setAuthenticationMethod(KERBEROS);
+    ugi = UserGroupInformation.createProxyUser("test-proxy-user", ugi);
+    UserGroupInformation.setLoginUser(ugi);
+
+    WebHdfsFileSystem webhdfs = getWebHdfsFileSystem(ugi, conf);
+    Path fsPath = new Path("/");
+    String tokenString = webhdfs.getDelegationToken().encodeToUrlString();
+
+    // send real+effective
+    URL getTokenUrl = webhdfs.toUrl(GetOpParam.Op.GETDELEGATIONTOKEN, fsPath);
+    checkQueryParams(
+        new String[]{
+            GetOpParam.Op.GETDELEGATIONTOKEN.toQueryString(),
+            new UserParam(ugi.getRealUser().getShortUserName()).toString(),
+            new DoAsParam(ugi.getShortUserName()).toString()
+        },
+        getTokenUrl);
+
+    // send real+effective
+    URL renewTokenUrl = webhdfs.toUrl(PutOpParam.Op.RENEWDELEGATIONTOKEN,
+        fsPath, new TokenArgumentParam(tokenString));
+    checkQueryParams(
+        new String[]{
+            PutOpParam.Op.RENEWDELEGATIONTOKEN.toQueryString(),
+            new UserParam(ugi.getRealUser().getShortUserName()).toString(),
+            new DoAsParam(ugi.getShortUserName()).toString(),
+            new TokenArgumentParam(tokenString).toString(),
+        },
+        renewTokenUrl);
+
+    // send token
+    URL cancelTokenUrl = webhdfs.toUrl(PutOpParam.Op.CANCELDELEGATIONTOKEN,
+        fsPath, new TokenArgumentParam(tokenString));
+    checkQueryParams(
+        new String[]{
+            PutOpParam.Op.CANCELDELEGATIONTOKEN.toQueryString(),
+            new UserParam(ugi.getRealUser().getShortUserName()).toString(),
+            new DoAsParam(ugi.getShortUserName()).toString(),
+            new TokenArgumentParam(tokenString).toString(),
+        },
+        cancelTokenUrl);
+    
+    // send token
+    URL fileStatusUrl = webhdfs.toUrl(GetOpParam.Op.GETFILESTATUS, fsPath);
+    checkQueryParams(
+        new String[]{
+            GetOpParam.Op.GETFILESTATUS.toQueryString(),
+            new DelegationParam(tokenString).toString()
+        },
+        fileStatusUrl);
+
+    // wipe out internal token to simulate auth always required
+    webhdfs.setDelegationToken(null);
+    
+    // send real+effective
+    cancelTokenUrl = webhdfs.toUrl(PutOpParam.Op.CANCELDELEGATIONTOKEN,
+        fsPath, new TokenArgumentParam(tokenString));
+    checkQueryParams(
+        new String[]{
+            PutOpParam.Op.CANCELDELEGATIONTOKEN.toQueryString(),
+            new UserParam(ugi.getRealUser().getShortUserName()).toString(),
+            new DoAsParam(ugi.getShortUserName()).toString(),
+            new TokenArgumentParam(tokenString).toString()
+        },
+        cancelTokenUrl);
+    
+    // send real+effective
+    fileStatusUrl = webhdfs.toUrl(GetOpParam.Op.GETFILESTATUS, fsPath);
+    checkQueryParams(
+        new String[]{
+            GetOpParam.Op.GETFILESTATUS.toQueryString(),
+            new UserParam(ugi.getRealUser().getShortUserName()).toString(),
+            new DoAsParam(ugi.getShortUserName()).toString()
+        },
+        fileStatusUrl);    
+  }
+  
+  private void checkQueryParams(String[] expected, URL url) {
+    Arrays.sort(expected);
+    String[] query = url.getQuery().split("&");
+    Arrays.sort(query);
+    assertEquals(Arrays.toString(expected), Arrays.toString(query));
+  }
+
+  private WebHdfsFileSystem getWebHdfsFileSystem(UserGroupInformation ugi,
+      Configuration conf) throws IOException {
+    if (UserGroupInformation.isSecurityEnabled()) {
+      DelegationTokenIdentifier dtId = new DelegationTokenIdentifier(new Text(
+          ugi.getUserName()), null, null);
+      FSNamesystem namesystem = mock(FSNamesystem.class);
+      DelegationTokenSecretManager dtSecretManager = new DelegationTokenSecretManager(
+          86400000, 86400000, 86400000, 86400000, namesystem);
+      dtSecretManager.startThreads();
+      Token<DelegationTokenIdentifier> token = new Token<DelegationTokenIdentifier>(
+          dtId, dtSecretManager);
+      SecurityUtil.setTokenService(
+          token, NetUtils.createSocketAddr(uri.getAuthority()));
+      token.setKind(WebHdfsFileSystem.TOKEN_KIND);
+      ugi.addToken(token);
+    }
+    return (WebHdfsFileSystem) FileSystem.get(uri, conf);
+  }
+  
+  @Test(timeout=4000)
   public void testSelectHdfsDelegationToken() throws Exception {
     SecurityUtilTestHelper.setTokenServiceUseIp(true);
 
@@ -186,8 +379,5 @@ public class TestWebHdfsUrl {
     public int getDefaultPort() {
       return super.getDefaultPort();
     }
-    // don't automatically get a token
-    @Override
-    protected void initDelegationToken() throws IOException {}
   }
 }
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/resources/TestParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/resources/TestParam.java
index c228c1f2989..97223ab2fb4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/resources/TestParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/resources/TestParam.java
@@ -17,18 +17,22 @@
  */
 package org.apache.hadoop.hdfs.web.resources;
 
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+
+import java.util.Arrays;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.util.StringUtils;
 import org.junit.Assert;
 import org.junit.Test;
 
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-
 public class TestParam {
   public static final Log LOG = LogFactory.getLog(TestParam.class);
 
@@ -265,4 +269,20 @@ public class TestParam {
     UserParam userParam = new UserParam("a$");
     assertNotNull(userParam.getValue());
   }
+  
+  @Test
+  public void testConcatSourcesParam() {
+    final String[] strings = {"/", "/foo", "/bar"};
+    for(int n = 0; n < strings.length; n++) {
+      final String[] sub = new String[n]; 
+      final Path[] paths = new Path[n];
+      for(int i = 0; i < paths.length; i++) {
+        paths[i] = new Path(sub[i] = strings[i]);
+      }
+
+      final String expected = StringUtils.join(",", Arrays.asList(sub));
+      final ConcatSourcesParam computed = new ConcatSourcesParam(paths);
+      Assert.assertEquals(expected, computed.getValue());
+    }
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java
index 2c4721b6204..83d5a88cb23 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/net/TestNetworkTopology.java
@@ -26,12 +26,23 @@ import static org.junit.Assert.fail;
 import java.util.HashMap;
 import java.util.Map;
 
+import junit.framework.Assert;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.junit.Before;
 import org.junit.Test;
 
 public class TestNetworkTopology {
+  private static final Log LOG = LogFactory.getLog(TestNetworkTopology.class);
   private final static NetworkTopology cluster = new NetworkTopology();
   private DatanodeDescriptor dataNodes[];
   
@@ -213,4 +224,65 @@ public class TestNetworkTopology {
       }
     }
   }
+
+  @Test(timeout=180000)
+  public void testInvalidNetworkTopologiesNotCachedInHdfs() throws Exception {
+    // start a cluster
+    Configuration conf = new HdfsConfiguration();
+    MiniDFSCluster cluster = null;
+    try {
+      // bad rack topology
+      String racks[] = { "/a/b", "/c" };
+      String hosts[] = { "foo1.example.com", "foo2.example.com" };
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).
+          racks(racks).hosts(hosts).build();
+      cluster.waitActive();
+      
+      NamenodeProtocols nn = cluster.getNameNodeRpc();
+      Assert.assertNotNull(nn);
+      
+      // Wait for one DataNode to register.
+      // The other DataNode will not be able to register up because of the rack mismatch.
+      DatanodeInfo[] info;
+      while (true) {
+        info = nn.getDatanodeReport(DatanodeReportType.LIVE);
+        Assert.assertFalse(info.length == 2);
+        if (info.length == 1) {
+          break;
+        }
+        Thread.sleep(1000);
+      }
+      // Set the network topology of the other node to the match the network
+      // topology of the node that came up.
+      int validIdx = info[0].getHostName().equals(hosts[0]) ? 0 : 1;
+      int invalidIdx = validIdx == 1 ? 0 : 1;
+      StaticMapping.addNodeToRack(hosts[invalidIdx], racks[validIdx]);
+      LOG.info("datanode " + validIdx + " came up with network location " + 
+        info[0].getNetworkLocation());
+
+      // Restart the DN with the invalid topology and wait for it to register.
+      cluster.restartDataNode(invalidIdx);
+      Thread.sleep(5000);
+      while (true) {
+        info = nn.getDatanodeReport(DatanodeReportType.LIVE);
+        if (info.length == 2) {
+          break;
+        }
+        if (info.length == 0) {
+          LOG.info("got no valid DNs");
+        } else if (info.length == 1) {
+          LOG.info("got one valid DN: " + info[0].getHostName() +
+              " (at " + info[0].getNetworkLocation() + ")");
+        }
+        Thread.sleep(1000);
+      }
+      Assert.assertEquals(info[0].getNetworkLocation(),
+                          info[1].getNetworkLocation());
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml
index 2fb10837fcd..940c0f6f98e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml
@@ -5457,6 +5457,39 @@
         </comparator>
      </comparators>
     </test>
+
+    <test> <!-- TESTED-->
+      <description>checksum: checksum of files(relative path) using globbing</description>
+      <test-commands>
+        <command>-fs NAMENODE -mkdir -p dir0</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data15bytes dir0/data15bytes</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data30bytes dir0/data30bytes</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data60bytes dir0/data60bytes</command>
+        <command>-fs NAMENODE -put CLITEST_DATA/data120bytes dir0/data120bytes</command>
+        <command>-fs NAMENODE -checksum dir0/data*</command>
+      </test-commands>
+      <cleanup-commands>
+        <command>-fs NAMENODE -rm -r /user</command>
+      </cleanup-commands>
+      <comparators>
+        <comparator>
+          <type>RegexpComparator</type>
+          <expected-output>^dir0/data120bytes\tMD5-of-0MD5-of-512CRC32C\t000002000000000000000000a58cdc3c0967fc8cddb7fed5960d06f2</expected-output>
+        </comparator>
+        <comparator>
+          <type>RegexpComparator</type>
+          <expected-output>^dir0/data15bytes\tMD5-of-0MD5-of-512CRC32C\t0000020000000000000000007267e9528002723a30939aefc238d665</expected-output>
+        </comparator>
+        <comparator>
+          <type>RegexpComparator</type>
+          <expected-output>^dir0/data30bytes\tMD5-of-0MD5-of-512CRC32C\t000002000000000000000000fc09371298117c4943cf089b4bd79c96</expected-output>
+        </comparator>
+        <comparator>
+          <type>RegexpComparator</type>
+          <expected-output>^dir0/data60bytes\tMD5-of-0MD5-of-512CRC32C\t000002000000000000000000009476431d851dd7b0a8d057a404d7b9</expected-output>
+        </comparator>
+      </comparators>
+    </test>
     
     <!-- Tests for moveToLocal -->
     <!-- Not yet implemented -->

From fd1000bcefa07992ff5c6fae3508f3e33b7955c6 Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Wed, 10 Apr 2013 22:45:23 +0000
Subject: [PATCH 25/52] HDFS-4679. Namenode operation checks should be done in
 a consistent manner. Contributed by Suresh Srinivas.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1466721 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |   3 +
 .../hdfs/server/namenode/FSNamesystem.java    | 260 ++++++++----------
 .../org/apache/hadoop/hdfs/TestSafeMode.java  |  18 +-
 3 files changed, 134 insertions(+), 147 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index c0b580cf411..598d3c21fd0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -399,6 +399,9 @@ Release 2.0.5-beta - UNRELEASED
     HDFS-3940. Add Gset#clear method and clear the block map when namenode is
     shutdown. (suresh)
 
+    HDFS-4679. Namenode operation checks should be done in a consistent
+    manner. (suresh)
+
   OPTIMIZATIONS
 
   BUG FIXES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index a9b16494f17..95b9cf02f0f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -1210,7 +1210,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
-
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot set permission for " + src, safeMode);
       }
@@ -1248,7 +1247,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
-
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot set owner for " + src, safeMode);
       }
@@ -1302,9 +1300,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   LocatedBlocks getBlockLocations(String src, long offset, long length,
       boolean doAccessTime, boolean needBlockToken, boolean checkSafeMode)
       throws FileNotFoundException, UnresolvedLinkException, IOException {
-    FSPermissionChecker pc = getPermissionChecker();
     try {
-      return getBlockLocationsInt(pc, src, offset, length, doAccessTime,
+      return getBlockLocationsInt(src, offset, length, doAccessTime,
                                   needBlockToken, checkSafeMode);
     } catch (AccessControlException e) {
       logAuditEvent(false, "open", src);
@@ -1312,14 +1309,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
   }
 
-  private LocatedBlocks getBlockLocationsInt(FSPermissionChecker pc,
-      String src, long offset, long length, boolean doAccessTime,
-      boolean needBlockToken, boolean checkSafeMode)
+  private LocatedBlocks getBlockLocationsInt(String src, long offset,
+      long length, boolean doAccessTime, boolean needBlockToken,
+      boolean checkSafeMode)
       throws FileNotFoundException, UnresolvedLinkException, IOException {
-    if (isPermissionEnabled) {
-      checkPathAccess(pc, src, FsAction.READ);
-    }
-
     if (offset < 0) {
       throw new HadoopIllegalArgumentException(
           "Negative offset is not supported. File: " + src);
@@ -1347,13 +1340,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    * Get block locations within the specified range, updating the
    * access times if necessary. 
    */
-  private LocatedBlocks getBlockLocationsUpdateTimes(String src,
-                                                       long offset, 
-                                                       long length,
-                                                       boolean doAccessTime, 
-                                                       boolean needBlockToken)
-      throws FileNotFoundException, UnresolvedLinkException, IOException {
-
+  private LocatedBlocks getBlockLocationsUpdateTimes(String src, long offset,
+      long length, boolean doAccessTime, boolean needBlockToken)
+      throws FileNotFoundException,
+      UnresolvedLinkException, IOException {
+    FSPermissionChecker pc = getPermissionChecker();
     for (int attempt = 0; attempt < 2; attempt++) {
       boolean isReadOp = (attempt == 0);
       if (isReadOp) { // first attempt is with readlock
@@ -1369,6 +1360,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         } else {
           checkOperation(OperationCategory.WRITE);
         }
+        if (isPermissionEnabled) {
+          checkPathAccess(pc, src, FsAction.READ);
+        }
 
         // if the namenode is in safemode, then do not update access time
         if (isInSafeMode()) {
@@ -1411,6 +1405,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    */
   void concat(String target, String [] srcs) 
       throws IOException, UnresolvedLinkException {
+    if(FSNamesystem.LOG.isDebugEnabled()) {
+      FSNamesystem.LOG.debug("concat " + Arrays.toString(srcs) +
+          " to " + target);
+    }
     try {
       concatInt(target, srcs);
     } catch (AccessControlException e) {
@@ -1421,11 +1419,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
   private void concatInt(String target, String [] srcs) 
       throws IOException, UnresolvedLinkException {
-    if(FSNamesystem.LOG.isDebugEnabled()) {
-      FSNamesystem.LOG.debug("concat " + Arrays.toString(srcs) +
-          " to " + target);
-    }
-    
     // verify args
     if(target.isEmpty()) {
       throw new IllegalArgumentException("Target file name is empty");
@@ -1574,6 +1567,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    */
   void setTimes(String src, long mtime, long atime) 
       throws IOException, UnresolvedLinkException {
+    if (!isAccessTimeSupported() && atime != -1) {
+      throw new IOException("Access time for hdfs is not configured. " +
+                            " Please set " + DFS_NAMENODE_ACCESSTIME_PRECISION_KEY + " configuration parameter.");
+    }
     try {
       setTimesInt(src, mtime, atime);
     } catch (AccessControlException e) {
@@ -1584,16 +1581,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
   private void setTimesInt(String src, long mtime, long atime) 
     throws IOException, UnresolvedLinkException {
-    if (!isAccessTimeSupported() && atime != -1) {
-      throw new IOException("Access time for hdfs is not configured. " +
-                            " Please set " + DFS_NAMENODE_ACCESSTIME_PRECISION_KEY + " configuration parameter.");
-    }
     HdfsFileStatus resultingStat = null;
     FSPermissionChecker pc = getPermissionChecker();
     checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
+      if (isInSafeMode()) {
+        throw new SafeModeException("Cannot set times " + src, safeMode);
+      }
 
       // Write access is required to set access and modification times
       if (isPermissionEnabled) {
@@ -1618,6 +1614,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   void createSymlink(String target, String link,
       PermissionStatus dirPerms, boolean createParent) 
       throws IOException, UnresolvedLinkException {
+    if (!DFSUtil.isValidName(link)) {
+      throw new InvalidPathException("Invalid file name: " + link);
+    }
     try {
       createSymlinkInt(target, link, dirPerms, createParent);
     } catch (AccessControlException e) {
@@ -1629,17 +1628,34 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   private void createSymlinkInt(String target, String link,
       PermissionStatus dirPerms, boolean createParent) 
       throws IOException, UnresolvedLinkException {
+    if (NameNode.stateChangeLog.isDebugEnabled()) {
+      NameNode.stateChangeLog.debug("DIR* NameSystem.createSymlink: target="
+          + target + " link=" + link);
+    }
     HdfsFileStatus resultingStat = null;
     FSPermissionChecker pc = getPermissionChecker();
     checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
-
+      if (isInSafeMode()) {
+        throw new SafeModeException("Cannot create symlink " + link, safeMode);
+      }
       if (!createParent) {
         verifyParentDir(link);
       }
-      createSymlinkInternal(pc, target, link, dirPerms, createParent);
+      if (!dir.isValidToCreate(link)) {
+        throw new IOException("failed to create link " + link 
+            +" either because the filename is invalid or the file exists");
+      }
+      if (isPermissionEnabled) {
+        checkAncestorAccess(pc, link, FsAction.WRITE);
+      }
+      // validate that we have enough inodes.
+      checkFsObjectLimit();
+
+      // add symbolic link to namespace
+      dir.addSymlink(link, target, dirPerms, createParent);
       resultingStat = getAuditFileInfo(link, false);
     } finally {
       writeUnlock();
@@ -1648,37 +1664,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     logAuditEvent(true, "createSymlink", link, target, resultingStat);
   }
 
-  /**
-   * Create a symbolic link.
-   */
-  private void createSymlinkInternal(FSPermissionChecker pc, String target,
-      String link, PermissionStatus dirPerms, boolean createParent)
-      throws IOException, UnresolvedLinkException {
-    assert hasWriteLock();
-    if (NameNode.stateChangeLog.isDebugEnabled()) {
-      NameNode.stateChangeLog.debug("DIR* NameSystem.createSymlink: target=" + 
-        target + " link=" + link);
-    }
-    if (isInSafeMode()) {
-      throw new SafeModeException("Cannot create symlink " + link, safeMode);
-    }
-    if (!DFSUtil.isValidName(link)) {
-      throw new InvalidPathException("Invalid file name: " + link);
-    }
-    if (!dir.isValidToCreate(link)) {
-      throw new IOException("failed to create link " + link 
-          +" either because the filename is invalid or the file exists");
-    }
-    if (isPermissionEnabled) {
-      checkAncestorAccess(pc, link, FsAction.WRITE);
-    }
-    // validate that we have enough inodes.
-    checkFsObjectLimit();
-
-    // add symbolic link to namespace
-    dir.addSymlink(link, target, dirPerms, createParent);
-  }
-
   /**
    * Set replication for an existing file.
    * 
@@ -1798,13 +1783,24 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       throws AccessControlException, SafeModeException,
       FileAlreadyExistsException, UnresolvedLinkException,
       FileNotFoundException, ParentNotDirectoryException, IOException {
+    if (NameNode.stateChangeLog.isDebugEnabled()) {
+      NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: src=" + src
+          + ", holder=" + holder
+          + ", clientMachine=" + clientMachine
+          + ", createParent=" + createParent
+          + ", replication=" + replication
+          + ", createFlag=" + flag.toString());
+    }
+    if (!DFSUtil.isValidName(src)) {
+      throw new InvalidPathException(src);
+    }
+
     boolean skipSync = false;
     final HdfsFileStatus stat;
     FSPermissionChecker pc = getPermissionChecker();
     checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
-      checkOperation(OperationCategory.WRITE);
       startFileInternal(pc, src, permissions, holder, clientMachine, flag,
           createParent, replication, blockSize);
       stat = dir.getFileInfo(src, false);
@@ -1847,21 +1843,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       AccessControlException, UnresolvedLinkException, FileNotFoundException,
       ParentNotDirectoryException, IOException {
     assert hasWriteLock();
-    if (NameNode.stateChangeLog.isDebugEnabled()) {
-      NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: src=" + src
-          + ", holder=" + holder
-          + ", clientMachine=" + clientMachine
-          + ", createParent=" + createParent
-          + ", replication=" + replication
-          + ", createFlag=" + flag.toString());
-    }
+    checkOperation(OperationCategory.WRITE);
     if (isInSafeMode()) {
       throw new SafeModeException("Cannot create file" + src, safeMode);
     }
-    if (!DFSUtil.isValidName(src)) {
-      throw new InvalidPathException(src);
-    }
-
     // Verify that the destination does not exist as a directory already.
     boolean pathExists = dir.exists(src);
     if (pathExists && dir.isDir(src)) {
@@ -1997,21 +1982,20 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    */
   boolean recoverLease(String src, String holder, String clientMachine)
       throws IOException {
+    if (!DFSUtil.isValidName(src)) {
+      throw new IOException("Invalid file name: " + src);
+    }
+  
     boolean skipSync = false;
     FSPermissionChecker pc = getPermissionChecker();
     checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
-
       if (isInSafeMode()) {
         throw new SafeModeException(
             "Cannot recover the lease of " + src, safeMode);
       }
-      if (!DFSUtil.isValidName(src)) {
-        throw new IOException("Invalid file name: " + src);
-      }
-  
       final INodeFile inode = INodeFile.valueOf(dir.getINode(src), src);
       if (!inode.isUnderConstruction()) {
         return true;
@@ -2135,13 +2119,20 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
           "Append is not enabled on this NameNode. Use the " +
           DFS_SUPPORT_APPEND_KEY + " configuration option to enable it.");
     }
+    if (NameNode.stateChangeLog.isDebugEnabled()) {
+      NameNode.stateChangeLog.debug("DIR* NameSystem.appendFile: src=" + src
+          + ", holder=" + holder
+          + ", clientMachine=" + clientMachine);
+    }
+    if (!DFSUtil.isValidName(src)) {
+      throw new InvalidPathException(src);
+    }
+
     LocatedBlock lb = null;
     FSPermissionChecker pc = getPermissionChecker();
     checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
-      checkOperation(OperationCategory.WRITE);
-
       lb = startFileInternal(pc, src, null, holder, clientMachine, 
                         EnumSet.of(CreateFlag.APPEND), 
                         false, blockManager.maxReplication, 0);
@@ -2434,21 +2425,21 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   boolean abandonBlock(ExtendedBlock b, String src, String holder)
       throws LeaseExpiredException, FileNotFoundException,
       UnresolvedLinkException, IOException {
+    if(NameNode.stateChangeLog.isDebugEnabled()) {
+      NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: " + b
+          + "of file " + src);
+    }
     checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
-      //
-      // Remove the block from the pending creates list
-      //
-      if(NameNode.stateChangeLog.isDebugEnabled()) {
-        NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: "
-                                      +b+"of file "+src);
-      }
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot abandon block " + b +
                                     " for fle" + src, safeMode);
       }
+      //
+      // Remove the block from the pending creates list
+      //
       INodeFileUnderConstruction file = checkLease(src, holder);
       dir.removeBlock(src, file, ExtendedBlock.getLocalBlock(b));
       if(NameNode.stateChangeLog.isDebugEnabled()) {
@@ -2510,19 +2501,23 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    */
   boolean completeFile(String src, String holder, ExtendedBlock last) 
     throws SafeModeException, UnresolvedLinkException, IOException {
+    if (NameNode.stateChangeLog.isDebugEnabled()) {
+      NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " +
+          src + " for " + holder);
+    }
     checkBlock(last);
     boolean success = false;
     checkOperation(OperationCategory.WRITE);
     writeLock();
     try {
-      checkOperation(OperationCategory.WRITE);
-
-      success = completeFileInternal(src, holder, 
-        ExtendedBlock.getLocalBlock(last));
+      success = completeFileInternal(src, holder,
+          ExtendedBlock.getLocalBlock(last));
     } finally {
       writeUnlock();
     }
     getEditLog().logSync();
+    NameNode.stateChangeLog.info("DIR* completeFile: " + src + " is closed by "
+        + holder);
     return success;
   }
 
@@ -2530,10 +2525,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       String holder, Block last) throws SafeModeException,
       UnresolvedLinkException, IOException {
     assert hasWriteLock();
-    if (NameNode.stateChangeLog.isDebugEnabled()) {
-      NameNode.stateChangeLog.debug("DIR* NameSystem.completeFile: " +
-          src + " for " + holder);
-    }
+    checkOperation(OperationCategory.WRITE);
     if (isInSafeMode()) {
       throw new SafeModeException("Cannot complete file " + src, safeMode);
     }
@@ -2569,9 +2561,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
 
     finalizeINodeFileUnderConstruction(src, pendingFile);
-
-    NameNode.stateChangeLog.info("DIR* completeFile: " + src + " is closed by "
-        + holder);
     return true;
   }
 
@@ -2672,18 +2661,19 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
   private boolean renameToInt(String src, String dst) 
     throws IOException, UnresolvedLinkException {
-    boolean status = false;
-    HdfsFileStatus resultingStat = null;
     if (NameNode.stateChangeLog.isDebugEnabled()) {
       NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: " + src +
           " to " + dst);
     }
+    if (!DFSUtil.isValidName(dst)) {
+      throw new IOException("Invalid name: " + dst);
+    }
     FSPermissionChecker pc = getPermissionChecker();
     checkOperation(OperationCategory.WRITE);
+    boolean status = false;
+    HdfsFileStatus resultingStat = null;
     writeLock();
     try {
-      checkOperation(OperationCategory.WRITE);
-
       status = renameToInternal(pc, src, dst);
       if (status) {
         resultingStat = getAuditFileInfo(dst, false);
@@ -2703,12 +2693,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   private boolean renameToInternal(FSPermissionChecker pc, String src, String dst)
     throws IOException, UnresolvedLinkException {
     assert hasWriteLock();
+      checkOperation(OperationCategory.WRITE);
     if (isInSafeMode()) {
       throw new SafeModeException("Cannot rename " + src, safeMode);
     }
-    if (!DFSUtil.isValidName(dst)) {
-      throw new IOException("Invalid name: " + dst);
-    }
     if (isPermissionEnabled) {
       //We should not be doing this.  This is move() not renameTo().
       //but for now,
@@ -2730,16 +2718,18 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   /** Rename src to dst */
   void renameTo(String src, String dst, Options.Rename... options)
       throws IOException, UnresolvedLinkException {
-    HdfsFileStatus resultingStat = null;
     if (NameNode.stateChangeLog.isDebugEnabled()) {
       NameNode.stateChangeLog.debug("DIR* NameSystem.renameTo: with options - "
           + src + " to " + dst);
     }
+    if (!DFSUtil.isValidName(dst)) {
+      throw new InvalidPathException("Invalid name: " + dst);
+    }
     FSPermissionChecker pc = getPermissionChecker();
     checkOperation(OperationCategory.WRITE);
+    HdfsFileStatus resultingStat = null;
     writeLock();
     try {
-      checkOperation(OperationCategory.WRITE);
       renameToInternal(pc, src, dst, options);
       resultingStat = getAuditFileInfo(dst, false);
     } finally {
@@ -2758,12 +2748,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   private void renameToInternal(FSPermissionChecker pc, String src, String dst,
       Options.Rename... options) throws IOException {
     assert hasWriteLock();
+    checkOperation(OperationCategory.WRITE);
     if (isInSafeMode()) {
       throw new SafeModeException("Cannot rename " + src, safeMode);
     }
-    if (!DFSUtil.isValidName(dst)) {
-      throw new InvalidPathException("Invalid name: " + dst);
-    }
     if (isPermissionEnabled) {
       checkParentAccess(pc, src, FsAction.WRITE);
       checkAncestorAccess(pc, dst, FsAction.WRITE);
@@ -2950,16 +2938,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   HdfsFileStatus getFileInfo(String src, boolean resolveLink) 
     throws AccessControlException, UnresolvedLinkException,
            StandbyException, IOException {
+    if (!DFSUtil.isValidName(src)) {
+      throw new InvalidPathException("Invalid file name: " + src);
+    }
     HdfsFileStatus stat = null;
     FSPermissionChecker pc = getPermissionChecker();
     checkOperation(OperationCategory.READ);
     readLock();
     try {
       checkOperation(OperationCategory.READ);
-
-      if (!DFSUtil.isValidName(src)) {
-        throw new InvalidPathException("Invalid file name: " + src);
-      }
       if (isPermissionEnabled) {
         checkTraverse(pc, src);
       }
@@ -3016,16 +3003,18 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
   private boolean mkdirsInt(String src, PermissionStatus permissions,
       boolean createParent) throws IOException, UnresolvedLinkException {
-    HdfsFileStatus resultingStat = null;
-    boolean status = false;
     if(NameNode.stateChangeLog.isDebugEnabled()) {
       NameNode.stateChangeLog.debug("DIR* NameSystem.mkdirs: " + src);
     }
+    if (!DFSUtil.isValidName(src)) {
+      throw new InvalidPathException(src);
+    }
     FSPermissionChecker pc = getPermissionChecker();
     checkOperation(OperationCategory.WRITE);
+    HdfsFileStatus resultingStat = null;
+    boolean status = false;
     writeLock();
     try {
-      checkOperation(OperationCategory.WRITE);
       status = mkdirsInternal(pc, src, permissions, createParent);
       if (status) {
         resultingStat = dir.getFileInfo(src, false);
@@ -3047,6 +3036,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       PermissionStatus permissions, boolean createParent) 
       throws IOException, UnresolvedLinkException {
     assert hasWriteLock();
+    checkOperation(OperationCategory.WRITE);   
     if (isInSafeMode()) {
       throw new SafeModeException("Cannot create directory " + src, safeMode);
     }
@@ -3058,9 +3048,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       // a new directory is not created.
       return true;
     }
-    if (!DFSUtil.isValidName(src)) {
-      throw new InvalidPathException(src);
-    }
     if (isPermissionEnabled) {
       checkAncestorAccess(pc, src, FsAction.WRITE);
     }
@@ -3331,8 +3318,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       boolean closeFile, boolean deleteblock, DatanodeID[] newtargets,
       String[] newtargetstorages)
       throws IOException, UnresolvedLinkException {
-    String src = "";
+    LOG.info("commitBlockSynchronization(lastblock=" + lastblock
+             + ", newgenerationstamp=" + newgenerationstamp
+             + ", newlength=" + newlength
+             + ", newtargets=" + Arrays.asList(newtargets)
+             + ", closeFile=" + closeFile
+             + ", deleteBlock=" + deleteblock
+             + ")");
     checkOperation(OperationCategory.WRITE);
+    String src = "";
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -3344,13 +3338,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
           "Cannot commitBlockSynchronization while in safe mode",
           safeMode);
       }
-      LOG.info("commitBlockSynchronization(lastblock=" + lastblock
-               + ", newgenerationstamp=" + newgenerationstamp
-               + ", newlength=" + newlength
-               + ", newtargets=" + Arrays.asList(newtargets)
-               + ", closeFile=" + closeFile
-               + ", deleteBlock=" + deleteblock
-               + ")");
       final BlockInfo storedBlock = blockManager.getStoredBlock(ExtendedBlock
         .getLocalBlock(lastblock));
       if (storedBlock == null) {
@@ -3440,7 +3427,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
-
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot renew lease for " + holder, safeMode);
       }
@@ -4916,11 +4902,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    */
   void reportBadBlocks(LocatedBlock[] blocks) throws IOException {
     checkOperation(OperationCategory.WRITE);
+    NameNode.stateChangeLog.info("*DIR* reportBadBlocks");
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
-      
-      NameNode.stateChangeLog.info("*DIR* reportBadBlocks");
       for (int i = 0; i < blocks.length; i++) {
         ExtendedBlock blk = blocks[i].getBlock();
         DatanodeInfo[] nodes = blocks[i].getLocations();
@@ -4983,6 +4968,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       ExtendedBlock newBlock, DatanodeID[] newNodes)
       throws IOException {
     checkOperation(OperationCategory.WRITE);
+    LOG.info("updatePipeline(block=" + oldBlock
+             + ", newGenerationStamp=" + newBlock.getGenerationStamp()
+             + ", newLength=" + newBlock.getNumBytes()
+             + ", newNodes=" + Arrays.asList(newNodes)
+             + ", clientName=" + clientName
+             + ")");
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
@@ -4992,12 +4983,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       }
       assert newBlock.getBlockId()==oldBlock.getBlockId() : newBlock + " and "
         + oldBlock + " has different block identifier";
-      LOG.info("updatePipeline(block=" + oldBlock
-               + ", newGenerationStamp=" + newBlock.getGenerationStamp()
-               + ", newLength=" + newBlock.getNumBytes()
-               + ", newNodes=" + Arrays.asList(newNodes)
-               + ", clientName=" + clientName
-               + ")");
       updatePipelineInternal(clientName, oldBlock, newBlock, newNodes);
     } finally {
       writeUnlock();
@@ -5249,7 +5234,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
-
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot issue delegation token", safeMode);
       }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java
index 6eab01090c7..794b44d438a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java
@@ -99,7 +99,7 @@ public class TestSafeMode {
    */
   @Test
   public void testManualSafeMode() throws IOException {      
-    fs = (DistributedFileSystem)cluster.getFileSystem();
+    fs = cluster.getFileSystem();
     Path file1 = new Path("/tmp/testManualSafeMode/file1");
     Path file2 = new Path("/tmp/testManualSafeMode/file2");
     
@@ -112,7 +112,7 @@ public class TestSafeMode {
     // now bring up just the NameNode.
     cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).format(false).build();
     cluster.waitActive();
-    dfs = (DistributedFileSystem)cluster.getFileSystem();
+    dfs = cluster.getFileSystem();
     
     assertTrue("No datanode is started. Should be in SafeMode", 
                dfs.setSafeMode(SafeModeAction.SAFEMODE_GET));
@@ -322,11 +322,11 @@ public class TestSafeMode {
         fs.rename(file1, new Path("file2"));
       }});
 
-    try {
-      fs.setTimes(file1, 0, 0);
-    } catch (IOException ioe) {
-      fail("Set times failed while in SM");
-    }
+    runFsFun("Set time while in SM", new FSRun() {
+      @Override
+      public void run(FileSystem fs) throws IOException {
+        fs.setTimes(file1, 0, 0);
+      }});
 
     try {
       DFSTestUtil.readFile(fs, file1);
@@ -350,7 +350,7 @@ public class TestSafeMode {
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY, 1);
 
     cluster.restartNameNode();
-    fs = (DistributedFileSystem)cluster.getFileSystem();
+    fs = cluster.getFileSystem();
 
     String tipMsg = cluster.getNamesystem().getSafemode();
     assertTrue("Safemode tip message looks right: " + tipMsg,
@@ -375,7 +375,7 @@ public class TestSafeMode {
    * @throws IOException when there's an issue connecting to the test DFS.
    */
   public void testSafeModeUtils() throws IOException {
-    dfs = (DistributedFileSystem)cluster.getFileSystem();
+    dfs = cluster.getFileSystem();
 
     // Enter safemode.
     dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);

From add00d6d74ee694f8368561f72c4cc83162b4b90 Mon Sep 17 00:00:00 2001
From: Vinod Kumar Vavilapalli <vinodkv@apache.org>
Date: Thu, 11 Apr 2013 01:00:38 +0000
Subject: [PATCH 26/52] YARN-487. Modify path manipulation in
 LocalDirsHandlerService to let TestDiskFailures pass on Windows. Contributed
 by Chris Nauroth.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1466746 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt                               | 3 +++
 .../yarn/server/nodemanager/LocalDirsHandlerService.java      | 2 +-
 .../java/org/apache/hadoop/yarn/server/TestDiskFailures.java  | 4 ++--
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index c737b259451..4f270b1c528 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -31,6 +31,9 @@ Trunk - Unreleased
     YARN-524 TestYarnVersionInfo failing if generated properties doesn't
     include an SVN URL. (stevel)
 
+    YARN-487. Modify path manipulation in LocalDirsHandlerService to let
+    TestDiskFailures pass on Windows. (Chris Nauroth via vinodkv)
+
   BREAKDOWN OF HADOOP-8562 SUBTASKS
 
     YARN-158. Yarn creating package-info.java must not depend on sh.
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
index 517b365d060..582db06f5f2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java
@@ -307,7 +307,7 @@ public class LocalDirsHandlerService extends AbstractService {
         URI uriPath = (new Path(paths[i])).toUri();
         if (uriPath.getScheme() == null
             || uriPath.getScheme().equals(FILE_SCHEME)) {
-          validPaths.add(uriPath.getPath());
+          validPaths.add(new Path(uriPath.getPath()).toString());
         } else {
           LOG.warn(paths[i] + " is not a valid path. Path should be with "
               + FILE_SCHEME + " scheme or without scheme");
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java
index 559262d8e78..eab19634734 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestDiskFailures.java
@@ -133,10 +133,10 @@ public class TestDiskFailures {
     dirSvc.init(conf);
     List<String> localDirs = dirSvc.getLocalDirs();
     Assert.assertEquals(1, localDirs.size());
-    Assert.assertEquals(localDir2, localDirs.get(0));
+    Assert.assertEquals(new Path(localDir2).toString(), localDirs.get(0));
     List<String> logDirs = dirSvc.getLogDirs();
     Assert.assertEquals(1, logDirs.size());
-    Assert.assertEquals(logDir1, logDirs.get(0));
+    Assert.assertEquals(new Path(logDir1).toString(), logDirs.get(0));
   }
 
   private void testDirsFailures(boolean localORLogDirs) throws IOException {

From 2e3b56f6e907f15f7c6caaad37d37b9e0ee89963 Mon Sep 17 00:00:00 2001
From: Vinod Kumar Vavilapalli <vinodkv@apache.org>
Date: Thu, 11 Apr 2013 02:00:47 +0000
Subject: [PATCH 27/52] YARN-495. Changed NM reboot behaviour to be a simple
 resync - kill all containers  and re-register with RM. Contributed by Jian
 He.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1466752 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt               |   3 +
 .../yarn/server/api/records/NodeAction.java   |   2 +-
 .../proto/yarn_server_common_protos.proto     |   2 +-
 .../yarn/server/nodemanager/NodeManager.java  |  71 ++++++--
 .../nodemanager/NodeManagerEventType.java     |   2 +-
 .../server/nodemanager/NodeStatusUpdater.java |   2 +
 .../nodemanager/NodeStatusUpdaterImpl.java    |  39 +++-
 .../nodemanager/TestNodeManagerReboot.java    |  25 +--
 .../nodemanager/TestNodeManagerShutdown.java  | 166 +++++++++++++-----
 .../nodemanager/TestNodeStatusUpdater.java    |  55 +-----
 .../ResourceTrackerService.java               |   8 +-
 .../server/resourcemanager/TestRMRestart.java |   8 +-
 .../TestResourceTrackerService.java           |   2 +-
 .../TestRMNMRPCResponseId.java                |   2 +-
 14 files changed, 230 insertions(+), 157 deletions(-)

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 4f270b1c528..cef08148004 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -135,6 +135,9 @@ Release 2.0.5-beta - UNRELEASED
     YARN-479. NM retry behavior for connection to RM should be similar for
     lost heartbeats (Jian He via bikas)
 
+    YARN-495. Changed NM reboot behaviour to be a simple resync - kill all
+    containers  and re-register with RM. (Jian He via vinodkv)
+
   OPTIMIZATIONS
 
   BUG FIXES
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeAction.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeAction.java
index 4d8246e7127..652c05fdbc6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeAction.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeAction.java
@@ -24,5 +24,5 @@ package org.apache.hadoop.yarn.server.api.records;
  */
 
 public enum NodeAction {
-  NORMAL, REBOOT, SHUTDOWN
+  NORMAL, RESYNC, SHUTDOWN
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto
index 89ec81c3ab9..7fa1fb74030 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto
@@ -25,7 +25,7 @@ import "yarn_protos.proto";
 
 enum NodeActionProto {
   NORMAL = 0;
-  REBOOT = 1;
+  RESYNC = 1;
   SHUTDOWN = 2;
 }
 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
index 867a02d2388..a5d16c568c8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
@@ -81,6 +81,7 @@ public class NodeManager extends CompositeService
   private Context context;
   private AsyncDispatcher dispatcher;
   private ContainerManagerImpl containerManager;
+  private NodeStatusUpdater nodeStatusUpdater;
   private static CompositeServiceShutdownHook nodeManagerShutdownHook; 
   
   private long waitForContainersOnShutdownMillis;
@@ -163,7 +164,7 @@ public class NodeManager extends CompositeService
     addService(nodeHealthChecker);
     dirsHandler = nodeHealthChecker.getDiskHandler();
 
-    NodeStatusUpdater nodeStatusUpdater =
+    nodeStatusUpdater =
         createNodeStatusUpdater(context, dispatcher, nodeHealthChecker);
 
     NodeResourceMonitor nodeResourceMonitor = createNodeResourceMonitor();
@@ -214,35 +215,67 @@ public class NodeManager extends CompositeService
     if (isStopping.getAndSet(true)) {
       return;
     }
-    
-    cleanupContainers();
+
+    cleanupContainers(NodeManagerEventType.SHUTDOWN);
     super.stop();
     DefaultMetricsSystem.shutdown();
   }
-  
+
+  protected void cleanupContainersOnResync() {
+    //we do not want to block dispatcher thread here
+    new Thread() {
+      @Override
+      public void run() {
+        cleanupContainers(NodeManagerEventType.RESYNC);
+        ((NodeStatusUpdaterImpl) nodeStatusUpdater ).rebootNodeStatusUpdater();
+      }
+    }.start();
+  }
+
   @SuppressWarnings("unchecked")
-  protected void cleanupContainers() {
+  protected void cleanupContainers(NodeManagerEventType eventType) {
     Map<ContainerId, Container> containers = context.getContainers();
     if (containers.isEmpty()) {
       return;
     }
-    LOG.info("Containers still running on shutdown: " + containers.keySet());
+    LOG.info("Containers still running on " + eventType + " : "
+        + containers.keySet());
     
-    List<ContainerId> containerIds = new ArrayList<ContainerId>(containers.keySet());
+    List<ContainerId> containerIds =
+        new ArrayList<ContainerId>(containers.keySet());
     dispatcher.getEventHandler().handle(
         new CMgrCompletedContainersEvent(containerIds, 
             CMgrCompletedContainersEvent.Reason.ON_SHUTDOWN));
     
     LOG.info("Waiting for containers to be killed");
     
-    long waitStartTime = System.currentTimeMillis();
-    while (!containers.isEmpty() && 
-        System.currentTimeMillis() - waitStartTime < waitForContainersOnShutdownMillis) {
-      try {
-        Thread.sleep(1000);
-      } catch (InterruptedException ex) {
-        LOG.warn("Interrupted while sleeping on container kill", ex);
+    switch (eventType) {
+    case SHUTDOWN:
+      long waitStartTime = System.currentTimeMillis();
+      while (!containers.isEmpty()
+          && System.currentTimeMillis() - waitStartTime < waitForContainersOnShutdownMillis) {
+        try {
+          Thread.sleep(1000);
+        } catch (InterruptedException ex) {
+          LOG.warn("Interrupted while sleeping on container kill on shutdown",
+            ex);
+        }
       }
+      break;
+    case RESYNC:
+      while (!containers.isEmpty()) {
+        try {
+          Thread.sleep(1000);
+          //to remove done containers from the map
+          nodeStatusUpdater.getNodeStatusAndUpdateContainersInContext();
+        } catch (InterruptedException ex) {
+          LOG.warn("Interrupted while sleeping on container kill on resync",
+            ex);
+        }
+      }
+      break;
+    default:
+      LOG.warn("Invalid eventType: " + eventType);
     }
 
     // All containers killed
@@ -342,9 +375,8 @@ public class NodeManager extends CompositeService
     case SHUTDOWN:
       stop();
       break;
-    case REBOOT:
-      stop();
-      reboot();
+    case RESYNC:
+      cleanupContainersOnResync();
       break;
     default:
       LOG.warn("Invalid shutdown event " + event.getType() + ". Ignoring.");
@@ -361,6 +393,11 @@ public class NodeManager extends CompositeService
     return containerManager;
   }
   
+  //For testing
+  Dispatcher getNMDispatcher(){
+    return dispatcher;
+  }
+
   @VisibleForTesting
   Context getNMContext() {
     return this.context;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerEventType.java
index d18cec6c0fb..f4d1caad789 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerEventType.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManagerEventType.java
@@ -18,5 +18,5 @@
 package org.apache.hadoop.yarn.server.nodemanager;
 
 public enum NodeManagerEventType {
-  SHUTDOWN, REBOOT
+  SHUTDOWN, RESYNC
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java
index f1e6ac3bf4c..41949e7baab 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdater.java
@@ -18,9 +18,11 @@
 
 package org.apache.hadoop.yarn.server.nodemanager;
 
+import org.apache.hadoop.yarn.server.api.records.NodeStatus;
 import org.apache.hadoop.yarn.service.Service;
 
 public interface NodeStatusUpdater extends Service {
 
   void sendOutofBandHeartBeat();
+  NodeStatus getNodeStatusAndUpdateContainersInContext();
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
index cca296cd15d..e9583c2a2e9 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
@@ -60,6 +60,8 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Cont
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
 import org.apache.hadoop.yarn.service.AbstractService;
 
+import com.google.common.annotations.VisibleForTesting;
+
 public class NodeStatusUpdaterImpl extends AbstractService implements
     NodeStatusUpdater {
 
@@ -91,6 +93,9 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
   private long rmConnectionRetryIntervalMS;
   private boolean waitForEver;
 
+  private Runnable statusUpdaterRunnable;
+  private Thread  statusUpdater;
+
   public NodeStatusUpdaterImpl(Context context, Dispatcher dispatcher,
       NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) {
     super(NodeStatusUpdaterImpl.class.getName());
@@ -169,6 +174,22 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
     this.isStopped = true;
     super.stop();
   }
+
+  protected void rebootNodeStatusUpdater() {
+    // Interrupt the updater.
+    this.isStopped = true;
+
+    try {
+      statusUpdater.join();
+      registerWithRM();
+      statusUpdater = new Thread(statusUpdaterRunnable, "Node Status Updater");
+      this.isStopped = false;
+      statusUpdater.start();
+      LOG.info("NodeStatusUpdater thread is reRegistered and restarted");
+    } catch (Exception e) {
+      throw new AvroRuntimeException(e);
+    }
+  }
   
   private boolean isSecurityEnabled() {
     return UserGroupInformation.isSecurityEnabled();
@@ -188,7 +209,8 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
         conf);
   }
 
-  private void registerWithRM() throws YarnRemoteException {
+  @VisibleForTesting
+  protected void registerWithRM() throws YarnRemoteException {
     Configuration conf = getConfig();
     rmConnectWaitMS =
         conf.getInt(
@@ -312,7 +334,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
     return appList;
   }
 
-  private NodeStatus getNodeStatus() {
+  public NodeStatus getNodeStatusAndUpdateContainersInContext() {
 
     NodeStatus nodeStatus = recordFactory.newRecordInstance(NodeStatus.class);
     nodeStatus.setNodeId(this.nodeId);
@@ -387,7 +409,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
 
   protected void startStatusUpdater() {
 
-    new Thread("Node Status Updater") {
+    statusUpdaterRunnable = new Runnable() {
       @Override
       @SuppressWarnings("unchecked")
       public void run() {
@@ -398,7 +420,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
             NodeHeartbeatResponse response = null;
             int rmRetryCount = 0;
             long waitStartTime = System.currentTimeMillis();
-            NodeStatus nodeStatus = getNodeStatus();
+            NodeStatus nodeStatus = getNodeStatusAndUpdateContainersInContext();
             nodeStatus.setResponseId(lastHeartBeatID);
             
             NodeHeartbeatRequest request = recordFactory
@@ -453,11 +475,11 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
                   new NodeManagerEvent(NodeManagerEventType.SHUTDOWN));
               break;
             }
-            if (response.getNodeAction() == NodeAction.REBOOT) {
+            if (response.getNodeAction() == NodeAction.RESYNC) {
               LOG.info("Node is out of sync with ResourceManager,"
                   + " hence rebooting.");
               dispatcher.getEventHandler().handle(
-                  new NodeManagerEvent(NodeManagerEventType.REBOOT));
+                  new NodeManagerEvent(NodeManagerEventType.RESYNC));
               break;
             }
 
@@ -500,6 +522,9 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
           }
         }
       }
-    }.start();
+    };
+    statusUpdater =
+        new Thread(statusUpdaterRunnable, "Node Status Updater");
+    statusUpdater.start();
   }
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java
index 10a85c74804..9ac237b6614 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java
@@ -160,7 +160,10 @@ public class TestNodeManagerReboot {
         "container is launched", numOfLocalDirs(nmLocalDir.getAbsolutePath(),
         ResourceLocalizationService.NM_PRIVATE_DIR) > 0);
 
-    nm.handle(new NodeManagerEvent(NodeManagerEventType.REBOOT));
+    // restart the NodeManager
+    nm.stop();
+    nm = new MyNodeManager();
+    nm.start();    
 
     numTries = 0;
     while ((numOfLocalDirs(nmLocalDir.getAbsolutePath(), ContainerLocalizer
@@ -250,26 +253,6 @@ public class TestNodeManagerReboot {
       return delService;
     }
 
-    // mimic part of reboot process
-    @Override
-    public void handle(NodeManagerEvent event) {
-      switch (event.getType()) {
-        case SHUTDOWN:
-          this.stop();
-          break;
-        case REBOOT:
-          this.stop();
-          this.createNewMyNodeManager().start();
-          break;
-        default:
-          LOG.warn("Invalid shutdown event " + event.getType() + ". Ignoring.");
-      }
-    }
-
-    private MyNodeManager createNewMyNodeManager() {
-      return new MyNodeManager();
-    }
-
     private YarnConfiguration createNMConfig() {
       YarnConfiguration conf = new YarnConfiguration();
       conf.setInt(YarnConfiguration.NM_PMEM_MB, 5 * 1024); // 5GB
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java
index f42261765fb..72f3433c027 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java
@@ -28,6 +28,9 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.BrokenBarrierException;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.CyclicBarrier;
 
 import junit.framework.Assert;
 
@@ -49,9 +52,12 @@ import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.URL;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.junit.After;
 import org.junit.Before;
@@ -71,6 +77,7 @@ public class TestNodeManagerShutdown {
       .getRecordFactory(null);
   static final String user = "nobody";
   private FileContext localFS;
+  private CyclicBarrier syncBarrier = new CyclicBarrier(2);
 
   @Before
   public void setup() throws UnsupportedFileSystemException {
@@ -91,52 +98,7 @@ public class TestNodeManagerShutdown {
     NodeManager nm = getNodeManager();
     nm.init(createNMConfig());
     nm.start();
-    
-    ContainerManagerImpl containerManager = nm.getContainerManager();
-    File scriptFile = createUnhaltingScriptFile();
-    
-    ContainerLaunchContext containerLaunchContext = 
-        recordFactory.newRecordInstance(ContainerLaunchContext.class);
-
-    // Construct the Container-id
-    ContainerId cId = createContainerId();
-    containerLaunchContext.setContainerId(cId);
-
-    containerLaunchContext.setUser(user);
-
-    URL localResourceUri =
-        ConverterUtils.getYarnUrlFromPath(localFS
-            .makeQualified(new Path(scriptFile.getAbsolutePath())));
-    LocalResource localResource =
-        recordFactory.newRecordInstance(LocalResource.class);
-    localResource.setResource(localResourceUri);
-    localResource.setSize(-1);
-    localResource.setVisibility(LocalResourceVisibility.APPLICATION);
-    localResource.setType(LocalResourceType.FILE);
-    localResource.setTimestamp(scriptFile.lastModified());
-    String destinationFile = "dest_file";
-    Map<String, LocalResource> localResources = 
-        new HashMap<String, LocalResource>();
-    localResources.put(destinationFile, localResource);
-    containerLaunchContext.setLocalResources(localResources);
-    containerLaunchContext.setUser(containerLaunchContext.getUser());
-    List<String> commands = new ArrayList<String>();
-    commands.add("/bin/bash");
-    commands.add(scriptFile.getAbsolutePath());
-    containerLaunchContext.setCommands(commands);
-    containerLaunchContext.setResource(recordFactory
-        .newRecordInstance(Resource.class));
-    containerLaunchContext.getResource().setMemory(1024);
-    StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class);
-    startRequest.setContainerLaunchContext(containerLaunchContext);
-    containerManager.startContainer(startRequest);
-    
-    GetContainerStatusRequest request =
-        recordFactory.newRecordInstance(GetContainerStatusRequest.class);
-        request.setContainerId(cId);
-    ContainerStatus containerStatus =
-        containerManager.getContainerStatus(request).getStatus();
-    Assert.assertEquals(ContainerState.RUNNING, containerStatus.getState());
+    startContainers(nm);
     
     final int MAX_TRIES=20;
     int numTries = 0;
@@ -170,6 +132,74 @@ public class TestNodeManagerShutdown {
     reader.close();
   }
   
+  @SuppressWarnings("unchecked")
+  @Test
+  public void testKillContainersOnResync() throws IOException, InterruptedException {
+    NodeManager nm = new TestNodeManager();
+    YarnConfiguration conf = createNMConfig();
+    nm.init(conf);
+    nm.start();
+    startContainers(nm);
+
+    assert ((TestNodeManager) nm).getNMRegistrationCount() == 1;
+    nm.getNMDispatcher().getEventHandler().
+        handle( new NodeManagerEvent(NodeManagerEventType.RESYNC));
+    try {
+      syncBarrier.await();
+    } catch (BrokenBarrierException e) {
+    }
+    assert ((TestNodeManager) nm).getNMRegistrationCount() == 2;
+  }
+
+  private void startContainers(NodeManager nm) throws IOException {
+    ContainerManagerImpl containerManager = nm.getContainerManager();
+    File scriptFile = createUnhaltingScriptFile();
+    
+    ContainerLaunchContext containerLaunchContext =
+        recordFactory.newRecordInstance(ContainerLaunchContext.class);
+
+    // Construct the Container-id
+    ContainerId cId = createContainerId();
+    containerLaunchContext.setContainerId(cId);
+
+    containerLaunchContext.setUser(user);
+
+    URL localResourceUri =
+        ConverterUtils.getYarnUrlFromPath(localFS
+            .makeQualified(new Path(scriptFile.getAbsolutePath())));
+    LocalResource localResource =
+        recordFactory.newRecordInstance(LocalResource.class);
+    localResource.setResource(localResourceUri);
+    localResource.setSize(-1);
+    localResource.setVisibility(LocalResourceVisibility.APPLICATION);
+    localResource.setType(LocalResourceType.FILE);
+    localResource.setTimestamp(scriptFile.lastModified());
+    String destinationFile = "dest_file";
+    Map<String, LocalResource> localResources = 
+        new HashMap<String, LocalResource>();
+    localResources.put(destinationFile, localResource);
+    containerLaunchContext.setLocalResources(localResources);
+    containerLaunchContext.setUser(containerLaunchContext.getUser());
+    List<String> commands = new ArrayList<String>();
+    commands.add("/bin/bash");
+    commands.add(scriptFile.getAbsolutePath());
+    containerLaunchContext.setCommands(commands);
+    containerLaunchContext.setResource(recordFactory
+        .newRecordInstance(Resource.class));
+    containerLaunchContext.getResource().setMemory(1024);
+    StartContainerRequest startRequest =
+        recordFactory.newRecordInstance(StartContainerRequest.class);
+    startRequest.setContainerLaunchContext(containerLaunchContext);
+    containerManager.startContainer(startRequest);
+    
+    GetContainerStatusRequest request =
+        recordFactory.newRecordInstance(GetContainerStatusRequest.class);
+        request.setContainerId(cId);
+    ContainerStatus containerStatus =
+        containerManager.getContainerStatus(request).getStatus();
+    Assert.assertEquals(ContainerState.RUNNING, containerStatus.getState());
+  }
+  
   private ContainerId createContainerId() {
     ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class);
     appId.setClusterTimestamp(0);
@@ -226,4 +256,48 @@ public class TestNodeManagerShutdown {
       }
     };
   }
+
+  class TestNodeManager extends NodeManager {
+
+    private int registrationCount = 0;
+
+    @Override
+    protected NodeStatusUpdater createNodeStatusUpdater(Context context,
+        Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
+      return new TestNodeStatusUpdaterImpl(context, dispatcher,
+          healthChecker, metrics);
+    }
+
+    public int getNMRegistrationCount() {
+      return registrationCount;
+    }
+
+    class TestNodeStatusUpdaterImpl extends MockNodeStatusUpdater {
+
+      public TestNodeStatusUpdaterImpl(Context context, Dispatcher dispatcher,
+          NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) {
+        super(context, dispatcher, healthChecker, metrics);
+      }
+
+      @Override
+      protected void registerWithRM() throws YarnRemoteException {
+        super.registerWithRM();
+        registrationCount++;
+      }
+
+      @Override
+      protected void rebootNodeStatusUpdater() {
+        ConcurrentMap<ContainerId, Container> containers =
+            getNMContext().getContainers();
+        // ensure that containers are empty before restart nodeStatusUpdater
+        Assert.assertTrue(containers.isEmpty());
+        super.rebootNodeStatusUpdater();
+        try {
+          syncBarrier.await();
+        } catch (InterruptedException e) {
+        } catch (BrokenBarrierException e) {
+        }
+      }
+    }
+  }
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
index b0f3093f510..c06a54dc118 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
@@ -99,7 +99,6 @@ public class TestNodeStatusUpdater {
   private final List<NodeId> registeredNodes = new ArrayList<NodeId>();
   private final Configuration conf = createNMConfig();
   private NodeManager nm;
-  protected NodeManager rebootedNodeManager;
   private boolean containerStatusBackupSuccessfully = true;
   private List<ContainerStatus> completedContainerStatusList = new ArrayList<ContainerStatus>();
 
@@ -663,8 +662,8 @@ public class TestNodeStatusUpdater {
       }
       
       @Override
-      protected void cleanupContainers() {
-        super.cleanupContainers();
+      protected void cleanupContainers(NodeManagerEventType eventType) {
+        super.cleanupContainers(NodeManagerEventType.SHUTDOWN);
         numCleanups.incrementAndGet();
       }
     };
@@ -717,50 +716,6 @@ public class TestNodeStatusUpdater {
     Assert.assertEquals(STATE.STOPPED, nm.getServiceState());
   }
 
-  @Test
-  public void testNodeReboot() throws Exception {
-    nm = getNodeManager(NodeAction.REBOOT);
-    YarnConfiguration conf = createNMConfig();
-    nm.init(conf);
-    Assert.assertEquals(STATE.INITED, nm.getServiceState());
-    nm.start();
-
-    int waitCount = 0;
-    while (heartBeatID < 1 && waitCount++ != 20) {
-      Thread.sleep(500);
-    }
-    Assert.assertFalse(heartBeatID < 1);
-
-    // NM takes a while to reach the STOPPED state.
-    waitCount = 0;
-    while (nm.getServiceState() != STATE.STOPPED && waitCount++ != 20) {
-      LOG.info("Waiting for NM to stop..");
-      Thread.sleep(1000);
-    }
-    Assert.assertEquals(STATE.STOPPED, nm.getServiceState());
-    
-    waitCount = 0;
-    while (null == rebootedNodeManager && waitCount++ != 20) {
-      LOG.info("Waiting for NM to reinitialize..");
-      Thread.sleep(1000);
-    }
-      
-    waitCount = 0;
-    while (rebootedNodeManager.getServiceState() != STATE.STARTED && waitCount++ != 20) {
-      LOG.info("Waiting for NM to start..");
-      Thread.sleep(1000);
-    }
-    Assert.assertEquals(STATE.STARTED, rebootedNodeManager.getServiceState());
-
-    rebootedNodeManager.stop();
-    waitCount = 0;
-    while (rebootedNodeManager.getServiceState() != STATE.STOPPED && waitCount++ != 20) {
-      LOG.info("Waiting for NM to stop..");
-      Thread.sleep(1000);
-    }
-    Assert.assertEquals(STATE.STOPPED, rebootedNodeManager.getServiceState());
-  }
-  
   @Test
   public void testNMShutdownForRegistrationFailure() {
 
@@ -1108,12 +1063,6 @@ public class TestNodeStatusUpdater {
         myNodeStatusUpdater.resourceTracker = myResourceTracker2;
         return myNodeStatusUpdater;
       }
-
-      @Override
-      NodeManager createNewNodeManager() {
-        rebootedNodeManager = getNodeManager(NodeAction.NORMAL);
-        return rebootedNodeManager;
-      }
     };
   }
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java
index 27f74d4f740..258c7dc0e47 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java
@@ -73,13 +73,13 @@ public class ResourceTrackerService extends AbstractService implements
   private Server server;
   private InetSocketAddress resourceTrackerAddress;
 
-  private static final NodeHeartbeatResponse reboot = recordFactory
+  private static final NodeHeartbeatResponse resync = recordFactory
       .newRecordInstance(NodeHeartbeatResponse.class);
   private static final NodeHeartbeatResponse shutDown = recordFactory
   .newRecordInstance(NodeHeartbeatResponse.class);
   
   static {
-    reboot.setNodeAction(NodeAction.REBOOT);
+    resync.setNodeAction(NodeAction.RESYNC);
 
     shutDown.setNodeAction(NodeAction.SHUTDOWN);
   }
@@ -220,7 +220,7 @@ public class ResourceTrackerService extends AbstractService implements
     if (rmNode == null) {
       /* node does not exist */
       LOG.info("Node not found rebooting " + remoteNodeStatus.getNodeId());
-      return reboot;
+      return resync;
     }
 
     // Send ping
@@ -250,7 +250,7 @@ public class ResourceTrackerService extends AbstractService implements
       // TODO: Just sending reboot is not enough. Think more.
       this.rmContext.getDispatcher().getEventHandler().handle(
           new RMNodeEvent(nodeId, RMNodeEventType.REBOOTING));
-      return reboot;
+      return resync;
     }
 
     // Heartbeat response
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java
index d19879c0a3b..78adf79eba0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java
@@ -225,9 +225,9 @@ public class TestRMRestart {
     
     // NM should be rebooted on heartbeat, even first heartbeat for nm2
     NodeHeartbeatResponse hbResponse = nm1.nodeHeartbeat(true);
-    Assert.assertEquals(NodeAction.REBOOT, hbResponse.getNodeAction());
+    Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction());
     hbResponse = nm2.nodeHeartbeat(true);
-    Assert.assertEquals(NodeAction.REBOOT, hbResponse.getNodeAction());
+    Assert.assertEquals(NodeAction.RESYNC, hbResponse.getNodeAction());
     
     // new NM to represent NM re-register
     nm1 = rm2.registerNode("h1:1234", 15120);
@@ -235,9 +235,9 @@ public class TestRMRestart {
 
     // verify no more reboot response sent
     hbResponse = nm1.nodeHeartbeat(true);
-    Assert.assertTrue(NodeAction.REBOOT != hbResponse.getNodeAction());
+    Assert.assertTrue(NodeAction.RESYNC != hbResponse.getNodeAction());
     hbResponse = nm2.nodeHeartbeat(true);
-    Assert.assertTrue(NodeAction.REBOOT != hbResponse.getNodeAction());
+    Assert.assertTrue(NodeAction.RESYNC != hbResponse.getNodeAction());
     
     // assert app1 attempt is saved
     attempt1 = loadedApp1.getCurrentAppAttempt();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java
index 61ed065fb61..af9d5d2c0bf 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java
@@ -282,7 +282,7 @@ public class TestResourceTrackerService {
 
     nodeHeartbeat = nm2.nodeHeartbeat(
       new HashMap<ApplicationId, List<ContainerStatus>>(), true, -100);
-    Assert.assertTrue(NodeAction.REBOOT.equals(nodeHeartbeat.getNodeAction()));
+    Assert.assertTrue(NodeAction.RESYNC.equals(nodeHeartbeat.getNodeAction()));
     checkRebootedNMCount(rm, ++initialMetricCount);
   }
 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java
index 984d7cdfcf5..1fd1b2c9d36 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/TestRMNMRPCResponseId.java
@@ -130,6 +130,6 @@ public class TestRMNMRPCResponseId {
 
     nodeStatus.setResponseId(0);
     response = resourceTrackerService.nodeHeartbeat(nodeHeartBeatRequest);
-    Assert.assertTrue(NodeAction.REBOOT.equals(response.getNodeAction()));
+    Assert.assertTrue(NodeAction.RESYNC.equals(response.getNodeAction()));
   }
 }
\ No newline at end of file

From 4234bc87b3e0bf7e9716d6ca1873b8bb0239472e Mon Sep 17 00:00:00 2001
From: Vinod Kumar Vavilapalli <vinodkv@apache.org>
Date: Thu, 11 Apr 2013 02:08:11 +0000
Subject: [PATCH 28/52] YARN-539. Addressed memory leak of LocalResource
 objects NM when a resource localization fails. Contributed by Omkar Vinit
 Joshi.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1466756 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt               |   3 +
 .../dev-support/findbugs-exclude.xml          |   7 +
 .../localizer/LocalResourcesTracker.java      |   3 -
 .../localizer/LocalResourcesTrackerImpl.java  |  42 +++--
 .../localizer/LocalizedResource.java          |  35 +++-
 .../ResourceLocalizationService.java          |  80 +++------
 .../localizer/ResourceState.java              |   3 +-
 .../localizer/event/ResourceEventType.java    |   4 +-
 .../ResourceFailedLocalizationEvent.java      |  39 +++++
 .../TestLocalResourcesTrackerImpl.java        | 161 +++++++++++++++++-
 10 files changed, 291 insertions(+), 86 deletions(-)
 create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceFailedLocalizationEvent.java

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index cef08148004..a7ce38e6c35 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -220,6 +220,9 @@ Release 2.0.5-beta - UNRELEASED
     YARN-534. Change RM restart recovery to also account for AM max-attempts 
     configuration after the restart. (Jian He via vinodkv)
 
+    YARN-539. Addressed memory leak of LocalResource objects NM when a resource
+    localization fails. (Omkar Vinit Joshi via vinodkv)
+
 Release 2.0.4-alpha - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
index 247406434e7..4ba2d72289e 100644
--- a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
+++ b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
@@ -270,4 +270,11 @@
     <Bug pattern="NP_NULL_ON_SOME_PATH_FROM_RETURN_VALUE" />
   </Match>
 
+  <!-- This type cast problem will never occur. -->
+  <Match>
+    <Class name="org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.LocalResourcesTrackerImpl" />
+    <Method name="handle" />
+    <Bug pattern="BC_UNCONFIRMED_CAST" />
+  </Match>
+
 </FindBugsFilter>
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTracker.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTracker.java
index 2e795e54a10..98ec471abf0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTracker.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTracker.java
@@ -40,8 +40,5 @@ interface LocalResourcesTracker
 
   String getUser();
 
-  // TODO: Remove this in favour of EventHandler.handle
-  void localizationCompleted(LocalResourceRequest req, boolean success);
-
   long nextUniqueNumber();
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java
index 53ca9013da8..786b58ca5d0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalResourcesTrackerImpl.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent;
 
 
 /**
@@ -96,13 +97,22 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
     this.conf = conf;
   }
 
+  /*
+   * Synchronizing this method for avoiding races due to multiple ResourceEvent's
+   * coming to LocalResourcesTracker from Public/Private localizer and
+   * Resource Localization Service.
+   */
   @Override
-  public void handle(ResourceEvent event) {
+  public synchronized void handle(ResourceEvent event) {
     LocalResourceRequest req = event.getLocalResourceRequest();
     LocalizedResource rsrc = localrsrc.get(req);
     switch (event.getType()) {
-    case REQUEST:
     case LOCALIZED:
+      if (useLocalCacheDirectoryManager) {
+        inProgressLocalResourcesMap.remove(req);
+      }
+      break;
+    case REQUEST:
       if (rsrc != null && (!isResourcePresent(rsrc))) {
         LOG.info("Resource " + rsrc.getLocalPath()
             + " is missing, localizing it again");
@@ -117,10 +127,24 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
       break;
     case RELEASE:
       if (null == rsrc) {
-        LOG.info("Release unknown rsrc null (discard)");
+        // The container sent a release event on a resource which 
+        // 1) Failed
+        // 2) Removed for some reason (ex. disk is no longer accessible)
+        ResourceReleaseEvent relEvent = (ResourceReleaseEvent) event;
+        LOG.info("Container " + relEvent.getContainer()
+            + " sent RELEASE event on a resource request " + req
+            + " not present in cache.");
         return;
       }
       break;
+    case LOCALIZATION_FAILED:
+      decrementFileCountForLocalCacheDirectory(req, null);
+      /*
+       * If resource localization fails then Localized resource will be
+       * removed from local cache.
+       */
+      localrsrc.remove(req);
+      break;
     }
     rsrc.handle(event);
   }
@@ -279,18 +303,6 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
     }
   }
 
-  @Override
-  public void localizationCompleted(LocalResourceRequest req,
-      boolean success) {
-    if (useLocalCacheDirectoryManager) {
-      if (!success) {
-        decrementFileCountForLocalCacheDirectory(req, null);
-      } else {
-        inProgressLocalResourcesMap.remove(req);
-      }
-    }
-  }
-
   @Override
   public long nextUniqueNumber() {
     return uniqueNumberGenerator.incrementAndGet();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java
index 00709fd91c2..f0cd87b573a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java
@@ -32,10 +32,12 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerResourceFailedEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerResourceLocalizedEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerResourceRequestEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEventType;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceLocalizedEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRequestEvent;
@@ -89,6 +91,8 @@ public class LocalizedResource implements EventHandler<ResourceEvent> {
     .addTransition(ResourceState.DOWNLOADING,
         EnumSet.of(ResourceState.DOWNLOADING, ResourceState.INIT),
         ResourceEventType.RELEASE, new ReleasePendingTransition())
+    .addTransition(ResourceState.DOWNLOADING, ResourceState.FAILED,
+        ResourceEventType.LOCALIZATION_FAILED, new FetchFailedTransition())
 
     // From LOCALIZED (ref >= 0, on disk)
     .addTransition(ResourceState.LOCALIZED, ResourceState.LOCALIZED,
@@ -126,12 +130,14 @@ public class LocalizedResource implements EventHandler<ResourceEvent> {
   }
 
   private void release(ContainerId container) {
-    if (!ref.remove(container)) {
-      LOG.info("Attempt to release claim on " + this +
-               " from unregistered container " + container);
-      assert false; // TODO: FIX
+    if (ref.remove(container)) {
+      // updating the timestamp only in case of success.
+      timestamp.set(currentTime());
+    } else {
+      LOG.info("Container " + container
+          + " doesn't exist in the container list of the Resource " + this
+          + " to which it sent RELEASE event");
     }
-    timestamp.set(currentTime());
   }
 
   private long currentTime() {
@@ -250,6 +256,25 @@ public class LocalizedResource implements EventHandler<ResourceEvent> {
     }
   }
 
+  /**
+   * Resource localization failed, notify waiting containers.
+   */
+  @SuppressWarnings("unchecked")
+  private static class FetchFailedTransition extends ResourceTransition {
+    @Override
+    public void transition(LocalizedResource rsrc, ResourceEvent event) {
+      ResourceFailedLocalizationEvent failedEvent =
+          (ResourceFailedLocalizationEvent) event;
+      Queue<ContainerId> containers = rsrc.ref;
+      Throwable failureCause = failedEvent.getCause();
+      for (ContainerId container : containers) {
+        rsrc.dispatcher.getEventHandler().handle(
+          new ContainerResourceFailedEvent(container, failedEvent
+            .getLocalResourceRequest(), failureCause));
+      }
+    }
+  }
+
   /**
    * Resource already localized, notify immediately.
    */
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java
index 5058cb2cad9..7b9873a1f45 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java
@@ -84,7 +84,6 @@ import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalResour
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerAction;
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerHeartbeatResponse;
 import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerStatus;
-import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.ResourceStatusType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType;
@@ -101,6 +100,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.even
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerResourceRequestEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceLocalizedEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRequestEvent;
@@ -683,7 +683,6 @@ public class ResourceLocalizationService extends CompositeService
     }
 
     @Override
-    @SuppressWarnings("unchecked") // dispatcher not typed
     public void run() {
       try {
         // TODO shutdown, better error handling esp. DU
@@ -699,10 +698,8 @@ public class ResourceLocalizationService extends CompositeService
                 return;
               }
               LocalResourceRequest key = assoc.getResource().getRequest();
-              assoc.getResource().handle(
-                  new ResourceLocalizedEvent(key,
-                    local, FileUtil.getDU(new File(local.toUri()))));
-              publicRsrc.localizationCompleted(key, true);
+              publicRsrc.handle(new ResourceLocalizedEvent(key, local, FileUtil
+                .getDU(new File(local.toUri()))));
               synchronized (attempts) {
                 attempts.remove(key);
               }
@@ -710,13 +707,10 @@ public class ResourceLocalizationService extends CompositeService
               LOG.info("Failed to download rsrc " + assoc.getResource(),
                   e.getCause());
               LocalResourceRequest req = assoc.getResource().getRequest();
-              dispatcher.getEventHandler().handle(
-                  new ContainerResourceFailedEvent(
-                    assoc.getContext().getContainerId(),
-                    req, e.getCause()));
-              publicRsrc.localizationCompleted(req, false);
-              List<LocalizerResourceRequestEvent> reqs;
+              publicRsrc.handle(new ResourceFailedLocalizationEvent(req, e
+                .getCause()));
               synchronized (attempts) {
+                List<LocalizerResourceRequestEvent> reqs;
                 reqs = attempts.get(req);
                 if (null == reqs) {
                   LOG.error("Missing pending list for " + req);
@@ -724,13 +718,6 @@ public class ResourceLocalizationService extends CompositeService
                 }
                 attempts.remove(req);
               }
-              // let the other containers know about the localization failure
-              for (LocalizerResourceRequestEvent reqEvent : reqs) {
-                dispatcher.getEventHandler().handle(
-                    new ContainerResourceFailedEvent(
-                        reqEvent.getContext().getContainerId(),
-                        reqEvent.getResource().getRequest(), e.getCause()));
-              }
             } catch (CancellationException e) {
               // ignore; shutting down
             }
@@ -810,13 +797,14 @@ public class ResourceLocalizationService extends CompositeService
       return null;
     }
 
-    // TODO this sucks. Fix it later
-    @SuppressWarnings("unchecked") // dispatcher not typed
     LocalizerHeartbeatResponse update(
         List<LocalResourceStatus> remoteResourceStatuses) {
       LocalizerHeartbeatResponse response =
         recordFactory.newRecordInstance(LocalizerHeartbeatResponse.class);
 
+      String user = context.getUser();
+      ApplicationId applicationId =
+          context.getContainerId().getApplicationAttemptId().getApplicationId();
       // The localizer has just spawned. Start giving it resources for
       // remote-fetching.
       if (remoteResourceStatuses.isEmpty()) {
@@ -847,6 +835,11 @@ public class ResourceLocalizationService extends CompositeService
       }
       ArrayList<ResourceLocalizationSpec> rsrcs =
           new ArrayList<ResourceLocalizationSpec>();
+       /*
+        * TODO : It doesn't support multiple downloads per ContainerLocalizer
+        * at the same time. We need to think whether we should support this.
+        */
+
       for (LocalResourceStatus stat : remoteResourceStatuses) {
         LocalResource rsrc = stat.getResource();
         LocalResourceRequest req = null;
@@ -865,11 +858,10 @@ public class ResourceLocalizationService extends CompositeService
           case FETCH_SUCCESS:
             // notify resource
             try {
-              assoc.getResource().handle(
-                  new ResourceLocalizedEvent(req,
-                    ConverterUtils.getPathFromYarnURL(stat.getLocalPath()),
-                    stat.getLocalSize()));
-              localizationCompleted(stat);
+            getLocalResourcesTracker(req.getVisibility(), user, applicationId)
+              .handle(
+                new ResourceLocalizedEvent(req, ConverterUtils
+                  .getPathFromYarnURL(stat.getLocalPath()), stat.getLocalSize()));
             } catch (URISyntaxException e) { }
             if (pending.isEmpty()) {
               // TODO: Synchronization
@@ -899,19 +891,16 @@ public class ResourceLocalizationService extends CompositeService
             LOG.info("DEBUG: FAILED " + req, stat.getException());
             assoc.getResource().unlock();
             response.setLocalizerAction(LocalizerAction.DIE);
-            localizationCompleted(stat);
-            // TODO: Why is this event going directly to the container. Why not
-            // the resource itself? What happens to the resource? Is it removed?
-            dispatcher.getEventHandler().handle(
-                new ContainerResourceFailedEvent(context.getContainerId(),
-                  req, stat.getException()));
+            getLocalResourcesTracker(req.getVisibility(), user, applicationId)
+              .handle(
+                new ResourceFailedLocalizationEvent(req, stat.getException()));
             break;
           default:
             LOG.info("Unknown status: " + stat.getStatus());
             response.setLocalizerAction(LocalizerAction.DIE);
-            dispatcher.getEventHandler().handle(
-                new ContainerResourceFailedEvent(context.getContainerId(),
-                  req, stat.getException()));
+            getLocalResourcesTracker(req.getVisibility(), user, applicationId)
+              .handle(
+                new ResourceFailedLocalizationEvent(req, stat.getException()));
             break;
         }
       }
@@ -919,27 +908,6 @@ public class ResourceLocalizationService extends CompositeService
       return response;
     }
 
-    private void localizationCompleted(LocalResourceStatus stat) {
-      try {
-        LocalResource rsrc = stat.getResource();
-        LocalResourceRequest key = new LocalResourceRequest(rsrc);
-        String user = context.getUser();
-        ApplicationId appId =
-            context.getContainerId().getApplicationAttemptId()
-              .getApplicationId();
-        LocalResourceVisibility vis = rsrc.getVisibility();
-        LocalResourcesTracker tracker =
-            getLocalResourcesTracker(vis, user, appId);
-        if (stat.getStatus() == ResourceStatusType.FETCH_SUCCESS) {
-          tracker.localizationCompleted(key, true);
-        } else {
-          tracker.localizationCompleted(key, false);
-        }
-      } catch (URISyntaxException e) {
-        LOG.error("Invalid resource URL specified", e);
-      }
-    }
-
     private Path getPathForLocalization(LocalResource rsrc) throws IOException,
         URISyntaxException {
       String user = context.getUser();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceState.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceState.java
index 751f60e0af1..75c8ad7663c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceState.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceState.java
@@ -20,5 +20,6 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer;
 enum ResourceState {
   INIT,
   DOWNLOADING,
-  LOCALIZED
+  LOCALIZED,
+  FAILED
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceEventType.java
index d68a1b6d391..e657c0acf3c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceEventType.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceEventType.java
@@ -29,5 +29,7 @@ public enum ResourceEventType {
   /** See {@link ResourceLocalizedEvent} */ 
   LOCALIZED,
   /** See {@link ResourceReleaseEvent} */
-  RELEASE
+  RELEASE,
+  /** See {@link ResourceFailedLocalizationEvent} */
+  LOCALIZATION_FAILED
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceFailedLocalizationEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceFailedLocalizationEvent.java
new file mode 100644
index 00000000000..79b28bac908
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/event/ResourceFailedLocalizationEvent.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event;
+
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.LocalResourceRequest;
+
+/**
+ * This event is sent by the localizer in case resource localization fails for
+ * the requested resource.
+ */
+public class ResourceFailedLocalizationEvent extends ResourceEvent {
+
+  private Throwable cause;
+
+  public ResourceFailedLocalizationEvent(LocalResourceRequest rsrc,
+      Throwable cause) {
+    super(rsrc, ResourceEventType.LOCALIZATION_FAILED);
+    this.cause = cause;
+  }
+
+  public Throwable getCause() {
+    return cause;
+  }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalResourcesTrackerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalResourcesTrackerImpl.java
index a8bbdb03521..b2caba02e81 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalResourcesTrackerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalResourcesTrackerImpl.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer;
 
 import static org.mockito.Mockito.any;
+import static org.mockito.Matchers.isA;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
@@ -41,11 +42,15 @@ import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.DrainDispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerResourceFailedEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerResourceLocalizedEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerEventType;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.LocalizerResourceRequestEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceFailedLocalizationEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceLocalizedEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceReleaseEvent;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.event.ResourceRequestEvent;
@@ -224,6 +229,142 @@ public class TestLocalResourcesTrackerImpl {
     }
   }
 
+  @Test(timeout = 1000)
+  @SuppressWarnings("unchecked")
+  public void testLocalResourceCache() {
+    String user = "testuser";
+    DrainDispatcher dispatcher = null;
+    try {
+      Configuration conf = new Configuration();
+      dispatcher = createDispatcher(conf);
+
+      EventHandler<LocalizerEvent> localizerEventHandler =
+          mock(EventHandler.class);
+      EventHandler<ContainerEvent> containerEventHandler =
+          mock(EventHandler.class);
+
+      // Registering event handlers.
+      dispatcher.register(LocalizerEventType.class, localizerEventHandler);
+      dispatcher.register(ContainerEventType.class, containerEventHandler);
+
+      ConcurrentMap<LocalResourceRequest, LocalizedResource> localrsrc =
+          new ConcurrentHashMap<LocalResourceRequest, LocalizedResource>();
+      LocalResourcesTracker tracker =
+          new LocalResourcesTrackerImpl(user, dispatcher, localrsrc, true, conf);
+
+      LocalResourceRequest lr =
+          createLocalResourceRequest(user, 1, 1, LocalResourceVisibility.PUBLIC);
+
+      // Creating 2 containers for same application which will be requesting
+      // same local resource.
+      // Container 1 requesting local resource.
+      ContainerId cId1 = BuilderUtils.newContainerId(1, 1, 1, 1);
+      LocalizerContext lc1 = new LocalizerContext(user, cId1, null);
+      ResourceEvent reqEvent1 =
+          new ResourceRequestEvent(lr, LocalResourceVisibility.PRIVATE, lc1);
+
+      // No resource request is initially present in local cache
+      Assert.assertEquals(0, localrsrc.size());
+
+      // Container-1 requesting local resource.
+      tracker.handle(reqEvent1);
+
+      // New localized Resource should have been added to local resource map
+      // and the requesting container will be added to its waiting queue.
+      Assert.assertEquals(1, localrsrc.size());
+      Assert.assertTrue(localrsrc.containsKey(lr));
+      Assert.assertEquals(1, localrsrc.get(lr).getRefCount());
+      Assert.assertTrue(localrsrc.get(lr).ref.contains(cId1));
+      Assert.assertEquals(ResourceState.DOWNLOADING, localrsrc.get(lr)
+        .getState());
+
+      // Container 2 requesting the resource
+      ContainerId cId2 = BuilderUtils.newContainerId(1, 1, 1, 2);
+      LocalizerContext lc2 = new LocalizerContext(user, cId2, null);
+      ResourceEvent reqEvent2 =
+          new ResourceRequestEvent(lr, LocalResourceVisibility.PRIVATE, lc2);
+      tracker.handle(reqEvent2);
+
+      // Container 2 should have been added to the waiting queue of the local
+      // resource
+      Assert.assertEquals(2, localrsrc.get(lr).getRefCount());
+      Assert.assertTrue(localrsrc.get(lr).ref.contains(cId2));
+
+      // Failing resource localization
+      ResourceEvent resourceFailedEvent =
+          new ResourceFailedLocalizationEvent(lr, new Exception("test"));
+      
+      // Backing up the resource to track its state change as it will be
+      // removed after the failed event.
+      LocalizedResource localizedResource = localrsrc.get(lr);
+      
+      tracker.handle(resourceFailedEvent);
+
+      // After receiving failed resource event; all waiting containers will be
+      // notified with Container Resource Failed Event.
+      Assert.assertEquals(0, localrsrc.size());
+      verify(containerEventHandler, times(2)).handle(
+        isA(ContainerResourceFailedEvent.class));
+      Assert.assertEquals(ResourceState.FAILED, localizedResource.getState());
+
+      // Container 1 trying to release the resource (This resource is already
+      // deleted from the cache. This call should return silently without
+      // exception.
+      ResourceReleaseEvent relEvent1 = new ResourceReleaseEvent(lr, cId1);
+      tracker.handle(relEvent1);
+
+      // Container-3 now requests for the same resource. This request call
+      // is coming prior to Container-2's release call.
+      ContainerId cId3 = BuilderUtils.newContainerId(1, 1, 1, 3);
+      LocalizerContext lc3 = new LocalizerContext(user, cId3, null);
+      ResourceEvent reqEvent3 =
+          new ResourceRequestEvent(lr, LocalResourceVisibility.PRIVATE, lc3);
+      tracker.handle(reqEvent3);
+
+      // Local resource cache now should have the requested resource and the
+      // number of waiting containers should be 1.
+      Assert.assertEquals(1, localrsrc.size());
+      Assert.assertTrue(localrsrc.containsKey(lr));
+      Assert.assertEquals(1, localrsrc.get(lr).getRefCount());
+      Assert.assertTrue(localrsrc.get(lr).ref.contains(cId3));
+
+      // Container-2 Releases the resource
+      ResourceReleaseEvent relEvent2 = new ResourceReleaseEvent(lr, cId2);
+      tracker.handle(relEvent2);
+
+      // Making sure that there is no change in the cache after the release.
+      Assert.assertEquals(1, localrsrc.size());
+      Assert.assertTrue(localrsrc.containsKey(lr));
+      Assert.assertEquals(1, localrsrc.get(lr).getRefCount());
+      Assert.assertTrue(localrsrc.get(lr).ref.contains(cId3));
+      
+      // Sending ResourceLocalizedEvent to tracker. In turn resource should
+      // send Container Resource Localized Event to waiting containers.
+      Path localizedPath = new Path("/tmp/file1");
+      ResourceLocalizedEvent localizedEvent =
+          new ResourceLocalizedEvent(lr, localizedPath, 123L);
+      tracker.handle(localizedEvent);
+      
+      // Verifying ContainerResourceLocalizedEvent .
+      verify(containerEventHandler, times(1)).handle(
+        isA(ContainerResourceLocalizedEvent.class));
+      Assert.assertEquals(ResourceState.LOCALIZED, localrsrc.get(lr)
+        .getState());
+      Assert.assertEquals(1, localrsrc.get(lr).getRefCount());
+      
+      // Container-3 releasing the resource.
+      ResourceReleaseEvent relEvent3 = new ResourceReleaseEvent(lr, cId3);
+      tracker.handle(relEvent3);
+      
+      Assert.assertEquals(0, localrsrc.get(lr).getRefCount());
+      
+    } finally {
+      if (dispatcher != null) {
+        dispatcher.stop();
+      }
+    }
+  }
+
   @Test(timeout = 100000)
   @SuppressWarnings("unchecked")
   public void testHierarchicalLocalCacheDirectories() {
@@ -266,19 +407,25 @@ public class TestLocalResourcesTrackerImpl {
       // Simulate the process of localization of lr1
       Path hierarchicalPath1 = tracker.getPathForLocalization(lr1, localDir);
       // Simulate lr1 getting localized
-      ResourceLocalizedEvent rle =
+      ResourceLocalizedEvent rle1 =
           new ResourceLocalizedEvent(lr1,
               new Path(hierarchicalPath1.toUri().toString() +
                   Path.SEPARATOR + "file1"), 120);
-      tracker.handle(rle);
+      tracker.handle(rle1);
       // Localization successful.
-      tracker.localizationCompleted(lr1, true);
 
       LocalResourceRequest lr2 = createLocalResourceRequest(user, 3, 3,
           LocalResourceVisibility.PUBLIC);
+      // Container 1 requests lr2 to be localized.
+      ResourceEvent reqEvent2 =
+          new ResourceRequestEvent(lr2, LocalResourceVisibility.PUBLIC, lc1);
+      tracker.handle(reqEvent2);
+
       Path hierarchicalPath2 = tracker.getPathForLocalization(lr2, localDir);
       // localization failed.
-      tracker.localizationCompleted(lr2, false);
+      ResourceFailedLocalizationEvent rfe2 =
+          new ResourceFailedLocalizationEvent(lr2, new Exception("Test"));
+      tracker.handle(rfe2);
 
       /*
        * The path returned for two localization should be different because we
@@ -292,7 +439,11 @@ public class TestLocalResourcesTrackerImpl {
           LocalResourceVisibility.PUBLIC, lc1);
       tracker.handle(reqEvent3);
       Path hierarchicalPath3 = tracker.getPathForLocalization(lr3, localDir);
-      tracker.localizationCompleted(lr3, true);
+      // localization successful
+      ResourceLocalizedEvent rle3 =
+          new ResourceLocalizedEvent(lr3, new Path(hierarchicalPath3.toUri()
+            .toString() + Path.SEPARATOR + "file3"), 120);
+      tracker.handle(rle3);
 
       // Verifying that path created is inside the subdirectory
       Assert.assertEquals(hierarchicalPath3.toUri().toString(),

From 3ddf8319ca974c65f2605a97532bdb9516367029 Mon Sep 17 00:00:00 2001
From: Alejandro Abdelnur <tucu@apache.org>
Date: Thu, 11 Apr 2013 04:05:29 +0000
Subject: [PATCH 29/52] HADOOP-9471. hadoop-client wrongfully excludes
 jetty-util JAR, breaking webhdfs. (tucu)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1466763 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-client/pom.xml                           | 4 ----
 hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/hadoop-client/pom.xml b/hadoop-client/pom.xml
index 5d8c7fad4d6..8dea4325899 100644
--- a/hadoop-client/pom.xml
+++ b/hadoop-client/pom.xml
@@ -143,10 +143,6 @@
           <groupId>org.mortbay.jetty</groupId>
           <artifactId>jetty</artifactId>
         </exclusion>
-        <exclusion>
-          <groupId>org.mortbay.jetty</groupId>
-          <artifactId>jetty-util</artifactId>
-        </exclusion>
         <exclusion>
           <groupId>com.sun.jersey</groupId>
           <artifactId>jersey-core</artifactId>
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 6651d6d0640..480e7f8f39d 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -617,6 +617,9 @@ Release 2.0.5-beta - UNRELEASED
     HADOOP-9429. TestConfiguration fails with IBM JAVA. (Amir Sanjar via
     suresh)
 
+    HADOOP-9471. hadoop-client wrongfully excludes jetty-util JAR, 
+    breaking webhdfs. (tucu)
+
 Release 2.0.4-alpha - UNRELEASED
 
   INCOMPATIBLE CHANGES

From 6a1c41111edcdc58c846fc50e53554fbba230171 Mon Sep 17 00:00:00 2001
From: Siddharth Seth <sseth@apache.org>
Date: Thu, 11 Apr 2013 04:52:38 +0000
Subject: [PATCH 30/52] MAPREDUCE-5079. Changes job recovery to restore state
 directly from job history, instaed of simulating state machine events.
 Contributed by Jason Lowe and Robert Parker.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1466767 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt          |   4 +
 .../hadoop/mapreduce/v2/app/MRAppMaster.java  | 172 ++++---
 .../event/JobStartEvent.java}                 |  27 +-
 .../app/job/event/TaskAttemptEventType.java   |   1 +
 .../job/event/TaskAttemptRecoverEvent.java    |  50 ++
 .../v2/app/job/event/TaskEventType.java       |   1 +
 .../v2/app/job/event/TaskRecoverEvent.java    |  50 ++
 .../mapreduce/v2/app/job/impl/JobImpl.java    |  33 +-
 .../v2/app/job/impl/MapTaskImpl.java          |   9 +-
 .../v2/app/job/impl/ReduceTaskImpl.java       |   9 +-
 .../v2/app/job/impl/TaskAttemptImpl.java      | 214 ++++++--
 .../mapreduce/v2/app/job/impl/TaskImpl.java   | 269 ++++++----
 .../mapreduce/v2/app/recover/Recovery.java    |  39 --
 .../v2/app/recover/RecoveryService.java       | 480 ------------------
 .../apache/hadoop/mapreduce/v2/app/MRApp.java |   8 +-
 .../hadoop/mapreduce/v2/app/TestRecovery.java | 470 +++++++++++++++++
 .../mapreduce/v2/app/TestStagingCleanup.java  |   3 +-
 .../v2/app/job/impl/TestJobImpl.java          |  41 +-
 .../v2/app/job/impl/TestTaskImpl.java         |  19 +-
 .../v2/jobhistory/JobHistoryUtils.java        |  16 +
 .../apache/hadoop/mapreduce/MRJobConfig.java  |   1 +
 21 files changed, 1146 insertions(+), 770 deletions(-)
 rename hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/{recover/package-info.java => job/event/JobStartEvent.java} (61%)
 create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptRecoverEvent.java
 create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskRecoverEvent.java
 delete mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/Recovery.java
 delete mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/RecoveryService.java

diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 72967fd6a6a..9d0a815287e 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -169,6 +169,10 @@ Release 2.0.5-alpha - UNRELEASED
     MAPREDUCE-5129. Allow tags to JobHistory for deeper analytics. (billie via
     acmurthy)
 
+    MAPREDUCE-5079. Changes job recovery to restore state directly from job
+    history, instaed of simulating state machine events.
+    (Jason Lowe and Robert Parker via sseth)
+
   OPTIMIZATIONS
 
   BUG FIXES
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
index b320e4110da..45d6e9e84a9 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
@@ -24,9 +24,12 @@ import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
 import java.security.PrivilegedExceptionAction;
 import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.commons.io.IOUtils;
@@ -46,6 +49,7 @@ import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.OutputCommitter;
 import org.apache.hadoop.mapreduce.OutputFormat;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.jobhistory.AMStartedEvent;
 import org.apache.hadoop.mapreduce.jobhistory.EventReader;
@@ -54,6 +58,9 @@ import org.apache.hadoop.mapreduce.jobhistory.HistoryEvent;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryCopyService;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo;
 import org.apache.hadoop.mapreduce.security.TokenCache;
 import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
@@ -61,6 +68,7 @@ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
 import org.apache.hadoop.mapreduce.v2.api.records.AMInfo;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskState;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
 import org.apache.hadoop.mapreduce.v2.app.client.ClientService;
 import org.apache.hadoop.mapreduce.v2.app.client.MRClientService;
@@ -74,6 +82,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt;
 import org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.JobEventType;
 import org.apache.hadoop.mapreduce.v2.app.job.event.JobFinishEvent;
+import org.apache.hadoop.mapreduce.v2.app.job.event.JobStartEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent;
@@ -84,8 +93,6 @@ import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent;
 import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherImpl;
 import org.apache.hadoop.mapreduce.v2.app.local.LocalContainerAllocator;
 import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics;
-import org.apache.hadoop.mapreduce.v2.app.recover.Recovery;
-import org.apache.hadoop.mapreduce.v2.app.recover.RecoveryService;
 import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator;
 import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent;
 import org.apache.hadoop.mapreduce.v2.app.rm.RMCommunicator;
@@ -94,6 +101,7 @@ import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler;
 import org.apache.hadoop.mapreduce.v2.app.speculate.DefaultSpeculator;
 import org.apache.hadoop.mapreduce.v2.app.speculate.Speculator;
 import org.apache.hadoop.mapreduce.v2.app.speculate.SpeculatorEvent;
+import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils;
 import org.apache.hadoop.mapreduce.v2.util.MRApps;
 import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
@@ -167,7 +175,6 @@ public class MRAppMaster extends CompositeService {
   private AppContext context;
   private Dispatcher dispatcher;
   private ClientService clientService;
-  private Recovery recoveryServ;
   private ContainerAllocator containerAllocator;
   private ContainerLauncher containerLauncher;
   private EventHandler<CommitterEvent> committerEventHandler;
@@ -180,7 +187,6 @@ public class MRAppMaster extends CompositeService {
   private OutputCommitter committer;
   private JobEventDispatcher jobEventDispatcher;
   private JobHistoryEventHandler jobHistoryEventHandler;
-  private boolean inRecovery = false;
   private SpeculatorEventDispatcher speculatorEventDispatcher;
 
   private Job job;
@@ -193,6 +199,8 @@ public class MRAppMaster extends CompositeService {
   private String shutDownMessage = null;
   JobStateInternal forcedState = null;
 
+  private long recoveredJobStartTime = 0;
+
   public MRAppMaster(ApplicationAttemptId applicationAttemptId,
       ContainerId containerId, String nmHost, int nmPort, int nmHttpPort,
       long appSubmitTime, int maxAppAttempts) {
@@ -340,34 +348,9 @@ public class MRAppMaster extends CompositeService {
       }
     } else {
       committer = createOutputCommitter(conf);
-      boolean recoveryEnabled = conf.getBoolean(
-          MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE, true);
-      boolean recoverySupportedByCommitter = committer.isRecoverySupported();
 
-      // If a shuffle secret was not provided by the job client then this app
-      // attempt will generate one.  However that disables recovery if there
-      // are reducers as the shuffle secret would be app attempt specific.
-      boolean shuffleKeyValidForRecovery = (numReduceTasks > 0 &&
-          TokenCache.getShuffleSecretKey(fsTokens) != null);
-
-      if (recoveryEnabled && recoverySupportedByCommitter
-          && shuffleKeyValidForRecovery && appAttemptID.getAttemptId() > 1) {
-        LOG.info("Recovery is enabled. "
-            + "Will try to recover from previous life on best effort basis.");
-        recoveryServ = createRecoveryService(context);
-        addIfService(recoveryServ);
-        dispatcher = recoveryServ.getDispatcher();
-        clock = recoveryServ.getClock();
-        inRecovery = true;
-      } else {
-        LOG.info("Not starting RecoveryService: recoveryEnabled: "
-            + recoveryEnabled + " recoverySupportedByCommitter: "
-            + recoverySupportedByCommitter + " shuffleKeyValidForRecovery: "
-            + shuffleKeyValidForRecovery + " ApplicationAttemptID: "
-            + appAttemptID.getAttemptId());
-        dispatcher = createDispatcher();
-        addIfService(dispatcher);
-      }
+      dispatcher = createDispatcher();
+      addIfService(dispatcher);
 
       //service to handle requests from JobClient
       clientService = createClientService(context);
@@ -595,15 +578,6 @@ public class MRAppMaster extends CompositeService {
     return new JobFinishEventHandler();
   }
 
-  /**
-   * Create the recovery service.
-   * @return an instance of the recovery service.
-   */
-  protected Recovery createRecoveryService(AppContext appContext) {
-    return new RecoveryService(appContext.getApplicationAttemptId(),
-        appContext.getClock(), getCommitter(), isNewApiCommitter());
-  }
-
   /** Create and initialize (but don't start) a single job. 
    * @param forcedState a state to force the job into or null for normal operation. 
    * @param diagnostic a diagnostic message to include with the job.
@@ -615,7 +589,8 @@ public class MRAppMaster extends CompositeService {
     Job newJob =
         new JobImpl(jobId, appAttemptID, conf, dispatcher.getEventHandler(),
             taskAttemptListener, jobTokenSecretManager, fsTokens, clock,
-            completedTasksFromPreviousRun, metrics, newApiCommitter,
+            completedTasksFromPreviousRun, metrics,
+            committer, newApiCommitter,
             currentUser.getUserName(), appSubmitTime, amInfos, context, 
             forcedState, diagnostic);
     ((RunningAppContext) context).jobs.put(newJob.getID(), newJob);
@@ -978,18 +953,8 @@ public class MRAppMaster extends CompositeService {
   public void start() {
 
     amInfos = new LinkedList<AMInfo>();
-
-    // Pull completedTasks etc from recovery
-    if (inRecovery) {
-      completedTasksFromPreviousRun = recoveryServ.getCompletedTasks();
-      amInfos = recoveryServ.getAMInfos();
-    } else {
-      // Get the amInfos anyways irrespective of whether recovery is enabled or
-      // not IF this is not the first AM generation
-      if (appAttemptID.getAttemptId() != 1) {
-        amInfos.addAll(readJustAMInfos());
-      }
-    }
+    completedTasksFromPreviousRun = new HashMap<TaskId, TaskInfo>();
+    processRecovery();
 
     // Current an AMInfo for the current AM generation.
     AMInfo amInfo =
@@ -1051,13 +1016,105 @@ public class MRAppMaster extends CompositeService {
     startJobs();
   }
 
+  private void processRecovery() {
+    if (appAttemptID.getAttemptId() == 1) {
+      return;  // no need to recover on the first attempt
+    }
+
+    boolean recoveryEnabled = getConfig().getBoolean(
+        MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE,
+        MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE_DEFAULT);
+    boolean recoverySupportedByCommitter =
+        committer != null && committer.isRecoverySupported();
+
+    // If a shuffle secret was not provided by the job client then this app
+    // attempt will generate one.  However that disables recovery if there
+    // are reducers as the shuffle secret would be app attempt specific.
+    int numReduceTasks = getConfig().getInt(MRJobConfig.NUM_REDUCES, 0);
+    boolean shuffleKeyValidForRecovery = (numReduceTasks > 0 &&
+        TokenCache.getShuffleSecretKey(fsTokens) != null);
+
+    if (recoveryEnabled && recoverySupportedByCommitter
+          && shuffleKeyValidForRecovery) {
+      LOG.info("Recovery is enabled. "
+          + "Will try to recover from previous life on best effort basis.");
+      try {
+        parsePreviousJobHistory();
+      } catch (IOException e) {
+        LOG.warn("Unable to parse prior job history, aborting recovery", e);
+        // try to get just the AMInfos
+        amInfos.addAll(readJustAMInfos());
+      }
+    } else {
+      LOG.info("Will not try to recover. recoveryEnabled: "
+            + recoveryEnabled + " recoverySupportedByCommitter: "
+            + recoverySupportedByCommitter + " shuffleKeyValidForRecovery: "
+            + shuffleKeyValidForRecovery + " ApplicationAttemptID: "
+            + appAttemptID.getAttemptId());
+      // Get the amInfos anyways whether recovery is enabled or not
+      amInfos.addAll(readJustAMInfos());
+    }
+  }
+
+  private static FSDataInputStream getPreviousJobHistoryStream(
+      Configuration conf, ApplicationAttemptId appAttemptId)
+      throws IOException {
+    Path historyFile = JobHistoryUtils.getPreviousJobHistoryPath(conf,
+        appAttemptId);
+    LOG.info("Previous history file is at " + historyFile);
+    return historyFile.getFileSystem(conf).open(historyFile);
+  }
+
+  private void parsePreviousJobHistory() throws IOException {
+    FSDataInputStream in = getPreviousJobHistoryStream(getConfig(),
+        appAttemptID);
+    JobHistoryParser parser = new JobHistoryParser(in);
+    JobInfo jobInfo = parser.parse();
+    Exception parseException = parser.getParseException();
+    if (parseException != null) {
+      LOG.info("Got an error parsing job-history file" +
+          ", ignoring incomplete events.", parseException);
+    }
+    Map<org.apache.hadoop.mapreduce.TaskID, TaskInfo> taskInfos = jobInfo
+        .getAllTasks();
+    for (TaskInfo taskInfo : taskInfos.values()) {
+      if (TaskState.SUCCEEDED.toString().equals(taskInfo.getTaskStatus())) {
+        Iterator<Entry<TaskAttemptID, TaskAttemptInfo>> taskAttemptIterator =
+            taskInfo.getAllTaskAttempts().entrySet().iterator();
+        while (taskAttemptIterator.hasNext()) {
+          Map.Entry<TaskAttemptID, TaskAttemptInfo> currentEntry = taskAttemptIterator.next();
+          if (!jobInfo.getAllCompletedTaskAttempts().containsKey(currentEntry.getKey())) {
+            taskAttemptIterator.remove();
+          }
+        }
+        completedTasksFromPreviousRun
+            .put(TypeConverter.toYarn(taskInfo.getTaskId()), taskInfo);
+        LOG.info("Read from history task "
+            + TypeConverter.toYarn(taskInfo.getTaskId()));
+      }
+    }
+    LOG.info("Read completed tasks from history "
+        + completedTasksFromPreviousRun.size());
+    recoveredJobStartTime = jobInfo.getLaunchTime();
+
+    // recover AMInfos
+    List<JobHistoryParser.AMInfo> jhAmInfoList = jobInfo.getAMInfos();
+    if (jhAmInfoList != null) {
+      for (JobHistoryParser.AMInfo jhAmInfo : jhAmInfoList) {
+        AMInfo amInfo = MRBuilderUtils.newAMInfo(jhAmInfo.getAppAttemptId(),
+            jhAmInfo.getStartTime(), jhAmInfo.getContainerId(),
+            jhAmInfo.getNodeManagerHost(), jhAmInfo.getNodeManagerPort(),
+            jhAmInfo.getNodeManagerHttpPort());
+        amInfos.add(amInfo);
+      }
+    }
+  }
+
   private List<AMInfo> readJustAMInfos() {
     List<AMInfo> amInfos = new ArrayList<AMInfo>();
     FSDataInputStream inputStream = null;
     try {
-      inputStream =
-          RecoveryService.getPreviousJobHistoryFileStream(getConfig(),
-            appAttemptID);
+      inputStream = getPreviousJobHistoryStream(getConfig(), appAttemptID);
       EventReader jobHistoryEventReader = new EventReader(inputStream);
 
       // All AMInfos are contiguous. Track when the first AMStartedEvent
@@ -1108,7 +1165,8 @@ public class MRAppMaster extends CompositeService {
   @SuppressWarnings("unchecked")
   protected void startJobs() {
     /** create a job-start event to get this ball rolling */
-    JobEvent startJobEvent = new JobEvent(job.getID(), JobEventType.JOB_START);
+    JobEvent startJobEvent = new JobStartEvent(job.getID(),
+        recoveredJobStartTime);
     /** send the job-start event. this triggers the job execution. */
     dispatcher.getEventHandler().handle(startJobEvent);
   }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/package-info.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobStartEvent.java
similarity index 61%
rename from hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/package-info.java
rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobStartEvent.java
index 400fdfaea63..39051da000f 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/package-info.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/JobStartEvent.java
@@ -1,4 +1,4 @@
-/*
+/**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -15,6 +15,25 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-@InterfaceAudience.Private
-package org.apache.hadoop.mapreduce.v2.app.recover;
-import org.apache.hadoop.classification.InterfaceAudience;
+
+package org.apache.hadoop.mapreduce.v2.app.job.event;
+
+import org.apache.hadoop.mapreduce.v2.api.records.JobId;
+
+public class JobStartEvent extends JobEvent {
+
+  long recoveredJobStartTime;
+
+  public JobStartEvent(JobId jobID) {
+    this(jobID, 0);
+  }
+
+  public JobStartEvent(JobId jobID, long recoveredJobStartTime) {
+    super(jobID, JobEventType.JOB_START);
+    this.recoveredJobStartTime = recoveredJobStartTime;
+  }
+
+  public long getRecoveredJobStartTime() {
+    return recoveredJobStartTime;
+  }
+}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptEventType.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptEventType.java
index a6c684015ed..a43263264e9 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptEventType.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptEventType.java
@@ -26,6 +26,7 @@ public enum TaskAttemptEventType {
   //Producer:Task
   TA_SCHEDULE,
   TA_RESCHEDULE,
+  TA_RECOVER,
 
   //Producer:Client, Task
   TA_KILL,
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptRecoverEvent.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptRecoverEvent.java
new file mode 100644
index 00000000000..19fe752fb12
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptRecoverEvent.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.v2.app.job.event;
+
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
+
+public class TaskAttemptRecoverEvent extends TaskAttemptEvent {
+
+  private TaskAttemptInfo taInfo;
+  private OutputCommitter committer;
+  private boolean recoverAttemptOutput;
+
+  public TaskAttemptRecoverEvent(TaskAttemptId id, TaskAttemptInfo taInfo,
+      OutputCommitter committer, boolean recoverOutput) {
+    super(id, TaskAttemptEventType.TA_RECOVER);
+    this.taInfo = taInfo;
+    this.committer = committer;
+    this.recoverAttemptOutput = recoverOutput;
+  }
+
+  public TaskAttemptInfo getTaskAttemptInfo() {
+    return taInfo;
+  }
+
+  public OutputCommitter getCommitter() {
+    return committer;
+  }
+
+  public boolean getRecoverOutput() {
+    return recoverAttemptOutput;
+  }
+}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskEventType.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskEventType.java
index d385e2fc682..8ce9c9f27c5 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskEventType.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskEventType.java
@@ -28,6 +28,7 @@ public enum TaskEventType {
 
   //Producer:Job
   T_SCHEDULE,
+  T_RECOVER,
 
   //Producer:Speculator
   T_ADD_SPEC_ATTEMPT,
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskRecoverEvent.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskRecoverEvent.java
new file mode 100644
index 00000000000..b5ead5ecb4f
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskRecoverEvent.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.v2.app.job.event;
+
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
+
+public class TaskRecoverEvent extends TaskEvent {
+
+  private TaskInfo taskInfo;
+  private OutputCommitter committer;
+  private boolean recoverTaskOutput;
+
+  public TaskRecoverEvent(TaskId taskID, TaskInfo taskInfo,
+      OutputCommitter committer, boolean recoverTaskOutput) {
+    super(taskID, TaskEventType.T_RECOVER);
+    this.taskInfo = taskInfo;
+    this.committer = committer;
+    this.recoverTaskOutput = recoverTaskOutput;
+  }
+
+  public TaskInfo getTaskInfo() {
+    return taskInfo;
+  }
+
+  public OutputCommitter getOutputCommitter() {
+    return committer;
+  }
+
+  public boolean getRecoverTaskOutput() {
+    return recoverTaskOutput;
+  }
+}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
index ec15fae5b45..367b0280845 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
@@ -49,6 +49,7 @@ import org.apache.hadoop.mapreduce.Counters;
 import org.apache.hadoop.mapreduce.JobACL;
 import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.apache.hadoop.mapreduce.OutputCommitter;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.jobhistory.JobFinishedEvent;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent;
@@ -92,6 +93,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.JobEventType;
 import org.apache.hadoop.mapreduce.v2.app.job.event.JobFinishEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.JobSetupFailedEvent;
+import org.apache.hadoop.mapreduce.v2.app.job.event.JobStartEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskAttemptCompletedEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskAttemptFetchFailureEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent;
@@ -101,6 +103,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptKillEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskRecoverEvent;
 import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics;
 import org.apache.hadoop.mapreduce.v2.util.MRApps;
 import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
@@ -159,6 +162,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
   private final Lock writeLock;
   private final JobId jobId;
   private final String jobName;
+  private final OutputCommitter committer;
   private final boolean newApiCommitter;
   private final org.apache.hadoop.mapreduce.JobID oldJobId;
   private final TaskAttemptListener taskAttemptListener;
@@ -602,7 +606,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
       JobTokenSecretManager jobTokenSecretManager,
       Credentials fsTokenCredentials, Clock clock,
       Map<TaskId, TaskInfo> completedTasksFromPreviousRun, MRAppMetrics metrics,
-      boolean newApiCommitter, String userName,
+      OutputCommitter committer, boolean newApiCommitter, String userName,
       long appSubmitTime, List<AMInfo> amInfos, AppContext appContext,
       JobStateInternal forcedState, String forcedDiagnostic) {
     this.applicationAttemptId = applicationAttemptId;
@@ -618,6 +622,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
     this.queueName = conf.get(MRJobConfig.QUEUE_NAME, "default");
     this.appSubmitTime = appSubmitTime;
     this.oldJobId = TypeConverter.fromYarn(jobId);
+    this.committer = committer;
     this.newApiCommitter = newApiCommitter;
 
     this.taskAttemptListener = taskAttemptListener;
@@ -888,10 +893,16 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
     }
   }
 
-  protected void scheduleTasks(Set<TaskId> taskIDs) {
+  protected void scheduleTasks(Set<TaskId> taskIDs,
+      boolean recoverTaskOutput) {
     for (TaskId taskID : taskIDs) {
-      eventHandler.handle(new TaskEvent(taskID, 
-          TaskEventType.T_SCHEDULE));
+      TaskInfo taskInfo = completedTasksFromPreviousRun.remove(taskID);
+      if (taskInfo != null) {
+        eventHandler.handle(new TaskRecoverEvent(taskID, taskInfo,
+            committer, recoverTaskOutput));
+      } else {
+        eventHandler.handle(new TaskEvent(taskID, TaskEventType.T_SCHEDULE));
+      }
     }
   }
 
@@ -1421,7 +1432,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
                 job.conf, splits[i], 
                 job.taskAttemptListener, 
                 job.jobToken, job.fsTokens,
-                job.clock, job.completedTasksFromPreviousRun, 
+                job.clock,
                 job.applicationAttemptId.getAttemptId(),
                 job.metrics, job.appContext);
         job.addTask(task);
@@ -1439,7 +1450,6 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
                 job.conf, job.numMapTasks, 
                 job.taskAttemptListener, job.jobToken,
                 job.fsTokens, job.clock,
-                job.completedTasksFromPreviousRun, 
                 job.applicationAttemptId.getAttemptId(),
                 job.metrics, job.appContext);
         job.addTask(task);
@@ -1475,8 +1485,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
     @Override
     public void transition(JobImpl job, JobEvent event) {
       job.setupProgress = 1.0f;
-      job.scheduleTasks(job.mapTasks);  // schedule (i.e., start) the maps
-      job.scheduleTasks(job.reduceTasks);
+      job.scheduleTasks(job.mapTasks, job.numReduceTasks == 0);
+      job.scheduleTasks(job.reduceTasks, true);
 
       // If we have no tasks, just transition to job completed
       if (job.numReduceTasks == 0 && job.numMapTasks == 0) {
@@ -1507,7 +1517,12 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
      */
     @Override
     public void transition(JobImpl job, JobEvent event) {
-      job.startTime = job.clock.getTime();
+      JobStartEvent jse = (JobStartEvent) event;
+      if (jse.getRecoveredJobStartTime() != 0) {
+        job.startTime = jse.getRecoveredJobStartTime();
+      } else {
+        job.startTime = job.clock.getTime();
+      }
       JobInitedEvent jie =
         new JobInitedEvent(job.oldJobId,
              job.startTime,
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/MapTaskImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/MapTaskImpl.java
index bec20aa0c81..c625f739c63 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/MapTaskImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/MapTaskImpl.java
@@ -18,17 +18,13 @@
 
 package org.apache.hadoop.mapreduce.v2.app.job.impl;
 
-import java.util.Map;
-
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapTaskAttemptImpl;
 import org.apache.hadoop.mapreduce.MRJobConfig;
-import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo;
 import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
 import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
-import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;
 import org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener;
@@ -49,11 +45,10 @@ public class MapTaskImpl extends TaskImpl {
       TaskAttemptListener taskAttemptListener,
       Token<JobTokenIdentifier> jobToken,
       Credentials credentials, Clock clock,
-      Map<TaskId, TaskInfo> completedTasksFromPreviousRun, int startCount,
-      MRAppMetrics metrics, AppContext appContext) {
+      int appAttemptId, MRAppMetrics metrics, AppContext appContext) {
     super(jobId, TaskType.MAP, partition, eventHandler, remoteJobConfFile,
         conf, taskAttemptListener, jobToken, credentials, clock,
-        completedTasksFromPreviousRun, startCount, metrics, appContext);
+        appAttemptId, metrics, appContext);
     this.taskSplitMetaInfo = taskSplitMetaInfo;
   }
 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/ReduceTaskImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/ReduceTaskImpl.java
index a860ad70242..0f4ea9a73bd 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/ReduceTaskImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/ReduceTaskImpl.java
@@ -18,16 +18,12 @@
 
 package org.apache.hadoop.mapreduce.v2.app.job.impl;
 
-import java.util.Map;
-
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.ReduceTaskAttemptImpl;
 import org.apache.hadoop.mapreduce.MRJobConfig;
-import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo;
 import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
-import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;
 import org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener;
@@ -47,11 +43,10 @@ public class ReduceTaskImpl extends TaskImpl {
       int numMapTasks, TaskAttemptListener taskAttemptListener,
       Token<JobTokenIdentifier> jobToken,
       Credentials credentials, Clock clock,
-      Map<TaskId, TaskInfo> completedTasksFromPreviousRun, int startCount,
-      MRAppMetrics metrics, AppContext appContext) {
+      int appAttemptId, MRAppMetrics metrics, AppContext appContext) {
     super(jobId, TaskType.REDUCE, partition, eventHandler, jobFile, conf,
         taskAttemptListener, jobToken, credentials, clock,
-        completedTasksFromPreviousRun, startCount, metrics, appContext);
+        appAttemptId, metrics, appContext);
     this.numMapTasks = numMapTasks;
   }
 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
index dae9be0d322..3cb4bf913c9 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
@@ -56,10 +56,12 @@ import org.apache.hadoop.mapreduce.Counter;
 import org.apache.hadoop.mapreduce.Counters;
 import org.apache.hadoop.mapreduce.JobCounter;
 import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.apache.hadoop.mapreduce.OutputCommitter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.TaskCounter;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo;
 import org.apache.hadoop.mapreduce.jobhistory.MapAttemptFinishedEvent;
 import org.apache.hadoop.mapreduce.jobhistory.ReduceAttemptFinishedEvent;
 import org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStartedEvent;
@@ -89,6 +91,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptDiagnosticsUpdate
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptKillEvent;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptRecoverEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
@@ -204,6 +207,11 @@ public abstract class TaskAttemptImpl implements
          TaskAttemptEventType.TA_KILL, new KilledTransition())
      .addTransition(TaskAttemptStateInternal.NEW, TaskAttemptStateInternal.FAILED,
          TaskAttemptEventType.TA_FAILMSG, new FailedTransition())
+     .addTransition(TaskAttemptStateInternal.NEW,
+         EnumSet.of(TaskAttemptStateInternal.FAILED,
+             TaskAttemptStateInternal.KILLED,
+             TaskAttemptStateInternal.SUCCEEDED),
+         TaskAttemptEventType.TA_RECOVER, new RecoverTransition())
      .addTransition(TaskAttemptStateInternal.NEW,
           TaskAttemptStateInternal.NEW,
           TaskAttemptEventType.TA_DIAGNOSTICS_UPDATE,
@@ -1082,6 +1090,102 @@ public abstract class TaskAttemptImpl implements
     this.avataar = avataar;
   }
   
+  @SuppressWarnings("unchecked")
+  public TaskAttemptStateInternal recover(TaskAttemptInfo taInfo,
+      OutputCommitter committer, boolean recoverOutput) {
+    containerID = taInfo.getContainerId();
+    containerNodeId = ConverterUtils.toNodeId(taInfo.getHostname() + ":"
+        + taInfo.getPort());
+    containerMgrAddress = StringInterner.weakIntern(
+        containerNodeId.toString());
+    nodeHttpAddress = StringInterner.weakIntern(taInfo.getHostname() + ":"
+        + taInfo.getHttpPort());
+    computeRackAndLocality();
+    launchTime = taInfo.getStartTime();
+    finishTime = (taInfo.getFinishTime() != -1) ?
+        taInfo.getFinishTime() : clock.getTime();
+    shufflePort = taInfo.getShufflePort();
+    trackerName = taInfo.getHostname();
+    httpPort = taInfo.getHttpPort();
+    sendLaunchedEvents();
+
+    reportedStatus.id = attemptId;
+    reportedStatus.progress = 1.0f;
+    reportedStatus.counters = taInfo.getCounters();
+    reportedStatus.stateString = taInfo.getState();
+    reportedStatus.phase = Phase.CLEANUP;
+    reportedStatus.mapFinishTime = taInfo.getMapFinishTime();
+    reportedStatus.shuffleFinishTime = taInfo.getShuffleFinishTime();
+    reportedStatus.sortFinishTime = taInfo.getSortFinishTime();
+    addDiagnosticInfo(taInfo.getError());
+
+    boolean needToClean = false;
+    String recoveredState = taInfo.getTaskStatus();
+    if (recoverOutput
+        && TaskAttemptState.SUCCEEDED.toString().equals(recoveredState)) {
+      TaskAttemptContext tac = new TaskAttemptContextImpl(conf,
+          TypeConverter.fromYarn(attemptId));
+      try {
+        committer.recoverTask(tac);
+        LOG.info("Recovered output from task attempt " + attemptId);
+      } catch (Exception e) {
+        LOG.error("Unable to recover task attempt " + attemptId, e);
+        LOG.info("Task attempt " + attemptId + " will be recovered as KILLED");
+        recoveredState = TaskAttemptState.KILLED.toString();
+        needToClean = true;
+      }
+    }
+
+    TaskAttemptStateInternal attemptState;
+    if (TaskAttemptState.SUCCEEDED.toString().equals(recoveredState)) {
+      attemptState = TaskAttemptStateInternal.SUCCEEDED;
+      reportedStatus.taskState = TaskAttemptState.SUCCEEDED;
+      eventHandler.handle(createJobCounterUpdateEventTASucceeded(this));
+      logAttemptFinishedEvent(attemptState);
+    } else if (TaskAttemptState.FAILED.toString().equals(recoveredState)) {
+      attemptState = TaskAttemptStateInternal.FAILED;
+      reportedStatus.taskState = TaskAttemptState.FAILED;
+      eventHandler.handle(createJobCounterUpdateEventTAFailed(this, false));
+      TaskAttemptUnsuccessfulCompletionEvent tauce =
+          createTaskAttemptUnsuccessfulCompletionEvent(this,
+              TaskAttemptStateInternal.FAILED);
+      eventHandler.handle(
+          new JobHistoryEvent(attemptId.getTaskId().getJobId(), tauce));
+    } else {
+      if (!TaskAttemptState.KILLED.toString().equals(recoveredState)) {
+        if (String.valueOf(recoveredState).isEmpty()) {
+          LOG.info("TaskAttempt" + attemptId
+              + " had not completed, recovering as KILLED");
+        } else {
+          LOG.warn("TaskAttempt " + attemptId + " found in unexpected state "
+              + recoveredState + ", recovering as KILLED");
+        }
+        addDiagnosticInfo("Killed during application recovery");
+        needToClean = true;
+      }
+      attemptState = TaskAttemptStateInternal.KILLED;
+      reportedStatus.taskState = TaskAttemptState.KILLED;
+      eventHandler.handle(createJobCounterUpdateEventTAKilled(this, false));
+      TaskAttemptUnsuccessfulCompletionEvent tauce =
+          createTaskAttemptUnsuccessfulCompletionEvent(this,
+              TaskAttemptStateInternal.KILLED);
+      eventHandler.handle(
+          new JobHistoryEvent(attemptId.getTaskId().getJobId(), tauce));
+    }
+
+    if (needToClean) {
+      TaskAttemptContext tac = new TaskAttemptContextImpl(conf,
+          TypeConverter.fromYarn(attemptId));
+      try {
+        committer.abortTask(tac);
+      } catch (Exception e) {
+        LOG.warn("Task cleanup failed for attempt " + attemptId, e);
+      }
+    }
+
+    return attemptState;
+  }
+
   private static TaskAttemptState getExternalState(
       TaskAttemptStateInternal smState) {
     switch (smState) {
@@ -1122,6 +1226,24 @@ public abstract class TaskAttemptImpl implements
     }
   }
 
+  private void computeRackAndLocality() {
+    nodeRackName = RackResolver.resolve(
+        containerNodeId.getHost()).getNetworkLocation();
+
+    locality = Locality.OFF_SWITCH;
+    if (dataLocalHosts.size() > 0) {
+      String cHost = resolveHost(containerNodeId.getHost());
+      if (dataLocalHosts.contains(cHost)) {
+        locality = Locality.NODE_LOCAL;
+      }
+    }
+    if (locality == Locality.OFF_SWITCH) {
+      if (dataLocalRacks.contains(nodeRackName)) {
+        locality = Locality.RACK_LOCAL;
+      }
+    }
+  }
+
   private static long computeSlotMillis(TaskAttemptImpl taskAttempt) {
     TaskType taskType = taskAttempt.getID().getTaskId().getTaskType();
     int slotMemoryReq =
@@ -1141,6 +1263,18 @@ public abstract class TaskAttemptImpl implements
     return slotMillisIncrement;
   }
 
+  private static JobCounterUpdateEvent createJobCounterUpdateEventTASucceeded(
+      TaskAttemptImpl taskAttempt) {
+    long slotMillis = computeSlotMillis(taskAttempt);
+    TaskId taskId = taskAttempt.attemptId.getTaskId();
+    JobCounterUpdateEvent jce = new JobCounterUpdateEvent(taskId.getJobId());
+    jce.addCounterUpdate(
+      taskId.getTaskType() == TaskType.MAP ?
+        JobCounter.SLOTS_MILLIS_MAPS : JobCounter.SLOTS_MILLIS_REDUCES,
+        slotMillis);
+    return jce;
+  }
+
   private static JobCounterUpdateEvent createJobCounterUpdateEventTAFailed(
       TaskAttemptImpl taskAttempt, boolean taskAlreadyCompleted) {
     TaskType taskType = taskAttempt.getID().getTaskId().getTaskType();
@@ -1210,6 +1344,26 @@ public abstract class TaskAttemptImpl implements
     return tauce;
   }
 
+  @SuppressWarnings("unchecked")
+  private void sendLaunchedEvents() {
+    JobCounterUpdateEvent jce = new JobCounterUpdateEvent(attemptId.getTaskId()
+        .getJobId());
+    jce.addCounterUpdate(attemptId.getTaskId().getTaskType() == TaskType.MAP ?
+        JobCounter.TOTAL_LAUNCHED_MAPS : JobCounter.TOTAL_LAUNCHED_REDUCES, 1);
+    eventHandler.handle(jce);
+
+    LOG.info("TaskAttempt: [" + attemptId
+        + "] using containerId: [" + containerID + " on NM: ["
+        + containerMgrAddress + "]");
+    TaskAttemptStartedEvent tase =
+      new TaskAttemptStartedEvent(TypeConverter.fromYarn(attemptId),
+          TypeConverter.fromYarn(attemptId.getTaskId().getTaskType()),
+          launchTime, trackerName, httpPort, shufflePort, containerID,
+          locality.toString(), avataar.toString());
+    eventHandler.handle(
+        new JobHistoryEvent(attemptId.getTaskId().getJobId(), tase));
+  }
+
   private WrappedProgressSplitsBlock getProgressSplitBlock() {
     readLock.lock();
     try {
@@ -1342,8 +1496,6 @@ public abstract class TaskAttemptImpl implements
           taskAttempt.containerNodeId.toString());
       taskAttempt.nodeHttpAddress = StringInterner.weakIntern(
           cEvent.getContainer().getNodeHttpAddress());
-      taskAttempt.nodeRackName = RackResolver.resolve(
-          taskAttempt.containerNodeId.getHost()).getNetworkLocation();
       taskAttempt.containerToken = cEvent.getContainer().getContainerToken();
       taskAttempt.assignedCapability = cEvent.getContainer().getResource();
       // this is a _real_ Task (classic Hadoop mapred flavor):
@@ -1354,19 +1506,7 @@ public abstract class TaskAttemptImpl implements
       taskAttempt.taskAttemptListener.registerPendingTask(
           taskAttempt.remoteTask, taskAttempt.jvmID);
 
-      taskAttempt.locality = Locality.OFF_SWITCH;
-      if (taskAttempt.dataLocalHosts.size() > 0) {
-        String cHost = taskAttempt.resolveHost(
-            taskAttempt.containerNodeId.getHost());
-        if (taskAttempt.dataLocalHosts.contains(cHost)) {
-          taskAttempt.locality = Locality.NODE_LOCAL;
-        }
-      }
-      if (taskAttempt.locality == Locality.OFF_SWITCH) {
-        if (taskAttempt.dataLocalRacks.contains(taskAttempt.nodeRackName)) {
-          taskAttempt.locality = Locality.RACK_LOCAL;
-        }
-      }
+      taskAttempt.computeRackAndLocality();
       
       //launch the container
       //create the container object to be launched for a given Task attempt
@@ -1471,27 +1611,7 @@ public abstract class TaskAttemptImpl implements
                                                                   // Costly?
       taskAttempt.trackerName = nodeHttpInetAddr.getHostName();
       taskAttempt.httpPort = nodeHttpInetAddr.getPort();
-      JobCounterUpdateEvent jce =
-          new JobCounterUpdateEvent(taskAttempt.attemptId.getTaskId()
-              .getJobId());
-      jce.addCounterUpdate(
-          taskAttempt.attemptId.getTaskId().getTaskType() == TaskType.MAP ? 
-              JobCounter.TOTAL_LAUNCHED_MAPS: JobCounter.TOTAL_LAUNCHED_REDUCES
-              , 1);
-      taskAttempt.eventHandler.handle(jce);
-      
-      LOG.info("TaskAttempt: [" + taskAttempt.attemptId
-          + "] using containerId: [" + taskAttempt.containerID + " on NM: ["
-          + taskAttempt.containerMgrAddress + "]");
-      TaskAttemptStartedEvent tase =
-        new TaskAttemptStartedEvent(TypeConverter.fromYarn(taskAttempt.attemptId),
-            TypeConverter.fromYarn(taskAttempt.attemptId.getTaskId().getTaskType()),
-            taskAttempt.launchTime,
-            nodeHttpInetAddr.getHostName(), nodeHttpInetAddr.getPort(),
-            taskAttempt.shufflePort, taskAttempt.containerID,
-            taskAttempt.locality.toString(), taskAttempt.avataar.toString());
-      taskAttempt.eventHandler.handle
-          (new JobHistoryEvent(taskAttempt.attemptId.getTaskId().getJobId(), tase));
+      taskAttempt.sendLaunchedEvents();
       taskAttempt.eventHandler.handle
           (new SpeculatorEvent
               (taskAttempt.attemptId, true, taskAttempt.clock.getTime()));
@@ -1540,14 +1660,8 @@ public abstract class TaskAttemptImpl implements
         TaskAttemptEvent event) {
       //set the finish time
       taskAttempt.setFinishTime();
-      long slotMillis = computeSlotMillis(taskAttempt);
-      TaskId taskId = taskAttempt.attemptId.getTaskId();
-      JobCounterUpdateEvent jce = new JobCounterUpdateEvent(taskId.getJobId());
-      jce.addCounterUpdate(
-        taskId.getTaskType() == TaskType.MAP ? 
-          JobCounter.SLOTS_MILLIS_MAPS : JobCounter.SLOTS_MILLIS_REDUCES,
-          slotMillis);
-      taskAttempt.eventHandler.handle(jce);
+      taskAttempt.eventHandler.handle(
+          createJobCounterUpdateEventTASucceeded(taskAttempt));
       taskAttempt.logAttemptFinishedEvent(TaskAttemptStateInternal.SUCCEEDED);
       taskAttempt.eventHandler.handle(new TaskTAttemptEvent(
           taskAttempt.attemptId,
@@ -1585,6 +1699,18 @@ public abstract class TaskAttemptImpl implements
     }
   }
 
+  private static class RecoverTransition implements
+      MultipleArcTransition<TaskAttemptImpl, TaskAttemptEvent, TaskAttemptStateInternal> {
+
+    @Override
+    public TaskAttemptStateInternal transition(TaskAttemptImpl taskAttempt,
+        TaskAttemptEvent event) {
+      TaskAttemptRecoverEvent tare = (TaskAttemptRecoverEvent) event;
+      return taskAttempt.recover(tare.getTaskAttemptInfo(),
+          tare.getCommitter(), tare.getRecoverOutput());
+    }
+  }
+
   @SuppressWarnings({ "unchecked" })
   private void logAttemptFinishedEvent(TaskAttemptStateInternal state) {
     //Log finished events only if an attempt started.
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java
index d01d9998aaf..6e4f1b27f62 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.mapreduce.v2.app.job.impl;
 
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.EnumSet;
@@ -37,7 +38,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapreduce.Counters;
 import org.apache.hadoop.mapreduce.MRConfig;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.OutputCommitter;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo;
@@ -69,8 +70,10 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskAttemptCompletedEvent
 import org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptRecoverEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskRecoverEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent;
 import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics;
 import org.apache.hadoop.mapreduce.v2.app.rm.ContainerFailedEvent;
@@ -152,6 +155,12 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
         TaskEventType.T_SCHEDULE, new InitialScheduleTransition())
     .addTransition(TaskStateInternal.NEW, TaskStateInternal.KILLED, 
         TaskEventType.T_KILL, new KillNewTransition())
+    .addTransition(TaskStateInternal.NEW,
+        EnumSet.of(TaskStateInternal.FAILED,
+                   TaskStateInternal.KILLED,
+                   TaskStateInternal.RUNNING,
+                   TaskStateInternal.SUCCEEDED),
+        TaskEventType.T_RECOVER, new RecoverTransition())
 
     // Transitions from SCHEDULED state
       //when the first attempt is launched, the task state is set to RUNNING
@@ -250,20 +259,16 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
 
   // By default, the next TaskAttempt number is zero. Changes during recovery  
   protected int nextAttemptNumber = 0;
-  private List<TaskAttemptInfo> taskAttemptsFromPreviousGeneration =
-      new ArrayList<TaskAttemptInfo>();
 
-  private static final class RecoverdAttemptsComparator implements
-      Comparator<TaskAttemptInfo> {
-    @Override
-    public int compare(TaskAttemptInfo attempt1, TaskAttemptInfo attempt2) {
-      long diff = attempt1.getStartTime() - attempt2.getStartTime();
-      return diff == 0 ? 0 : (diff < 0 ? -1 : 1);
-    }
-  }
-
-  private static final RecoverdAttemptsComparator RECOVERED_ATTEMPTS_COMPARATOR =
-      new RecoverdAttemptsComparator();
+  // For sorting task attempts by completion time
+  private static final Comparator<TaskAttemptInfo> TA_INFO_COMPARATOR =
+      new Comparator<TaskAttemptInfo>() {
+        @Override
+        public int compare(TaskAttemptInfo a, TaskAttemptInfo b) {
+          long diff = a.getFinishTime() - b.getFinishTime();
+          return diff == 0 ? 0 : (diff < 0 ? -1 : 1);
+        }
+      };
 
   @Override
   public TaskState getState() {
@@ -280,8 +285,7 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
       TaskAttemptListener taskAttemptListener,
       Token<JobTokenIdentifier> jobToken,
       Credentials credentials, Clock clock,
-      Map<TaskId, TaskInfo> completedTasksFromPreviousRun, int startCount,
-      MRAppMetrics metrics, AppContext appContext) {
+      int appAttemptId, MRAppMetrics metrics, AppContext appContext) {
     this.conf = conf;
     this.clock = clock;
     this.jobFile = remoteJobConfFile;
@@ -307,41 +311,15 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
     this.encryptedShuffle = conf.getBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY,
                                             MRConfig.SHUFFLE_SSL_ENABLED_DEFAULT);
 
-    // See if this is from a previous generation.
-    if (completedTasksFromPreviousRun != null
-        && completedTasksFromPreviousRun.containsKey(taskId)) {
-      // This task has TaskAttempts from previous generation. We have to replay
-      // them.
-      LOG.info("Task is from previous run " + taskId);
-      TaskInfo taskInfo = completedTasksFromPreviousRun.get(taskId);
-      Map<TaskAttemptID, TaskAttemptInfo> allAttempts =
-          taskInfo.getAllTaskAttempts();
-      taskAttemptsFromPreviousGeneration = new ArrayList<TaskAttemptInfo>();
-      taskAttemptsFromPreviousGeneration.addAll(allAttempts.values());
-      Collections.sort(taskAttemptsFromPreviousGeneration,
-        RECOVERED_ATTEMPTS_COMPARATOR);
-    }
-
-    if (taskAttemptsFromPreviousGeneration.isEmpty()) {
-      // All the previous attempts are exhausted, now start with a new
-      // generation.
-
-      // All the new TaskAttemptIDs are generated based on MR
-      // ApplicationAttemptID so that attempts from previous lives don't
-      // over-step the current one. This assumes that a task won't have more
-      // than 1000 attempts in its single generation, which is very reasonable.
-      // Someone is nuts if he/she thinks he/she can live with 1000 TaskAttempts
-      // and requires serious medical attention.
-      nextAttemptNumber = (startCount - 1) * 1000;
-    } else {
-      // There are still some TaskAttempts from previous generation, use them
-      nextAttemptNumber =
-          taskAttemptsFromPreviousGeneration.remove(0).getAttemptId().getId();
-    }
-
     // This "this leak" is okay because the retained pointer is in an
     //  instance variable.
     stateMachine = stateMachineFactory.make(this);
+
+    // All the new TaskAttemptIDs are generated based on MR
+    // ApplicationAttemptID so that attempts from previous lives don't
+    // over-step the current one. This assumes that a task won't have more
+    // than 1000 attempts in its single generation, which is very reasonable.
+    nextAttemptNumber = (appAttemptId - 1) * 1000;
   }
 
   @Override
@@ -600,14 +578,28 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
 
   // This is always called in the Write Lock
   private void addAndScheduleAttempt(Avataar avataar) {
-    TaskAttempt attempt = createAttempt();
-    ((TaskAttemptImpl) attempt).setAvataar(avataar);
+    TaskAttempt attempt = addAttempt(avataar);
+    inProgressAttempts.add(attempt.getID());
+    //schedule the nextAttemptNumber
+    if (failedAttempts.size() > 0) {
+      eventHandler.handle(new TaskAttemptEvent(attempt.getID(),
+          TaskAttemptEventType.TA_RESCHEDULE));
+    } else {
+      eventHandler.handle(new TaskAttemptEvent(attempt.getID(),
+          TaskAttemptEventType.TA_SCHEDULE));
+    }
+  }
+
+  private TaskAttemptImpl addAttempt(Avataar avataar) {
+    TaskAttemptImpl attempt = createAttempt();
+    attempt.setAvataar(avataar);
     if (LOG.isDebugEnabled()) {
       LOG.debug("Created attempt " + attempt.getID());
     }
     switch (attempts.size()) {
       case 0:
-        attempts = Collections.singletonMap(attempt.getID(), attempt);
+        attempts = Collections.singletonMap(attempt.getID(),
+            (TaskAttempt) attempt);
         break;
         
       case 1:
@@ -623,24 +615,8 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
         break;
     }
 
-    // Update nextATtemptNumber
-    if (taskAttemptsFromPreviousGeneration.isEmpty()) {
-      ++nextAttemptNumber;
-    } else {
-      // There are still some TaskAttempts from previous generation, use them
-      nextAttemptNumber =
-          taskAttemptsFromPreviousGeneration.remove(0).getAttemptId().getId();
-    }
-
-    inProgressAttempts.add(attempt.getID());
-    //schedule the nextAttemptNumber
-    if (failedAttempts.size() > 0) {
-      eventHandler.handle(new TaskAttemptEvent(attempt.getID(),
-        TaskAttemptEventType.TA_RESCHEDULE));
-    } else {
-      eventHandler.handle(new TaskAttemptEvent(attempt.getID(),
-          TaskAttemptEventType.TA_SCHEDULE));
-    }
+    ++nextAttemptNumber;
+    return attempt;
   }
 
   @Override
@@ -705,6 +681,16 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
     }
   }
 
+  private void sendTaskStartedEvent() {
+    TaskStartedEvent tse = new TaskStartedEvent(
+        TypeConverter.fromYarn(taskId), getLaunchTime(),
+        TypeConverter.fromYarn(taskId.getTaskType()),
+        getSplitsAsString());
+    eventHandler
+        .handle(new JobHistoryEvent(taskId.getJobId(), tse));
+    historyTaskStartGenerated = true;
+  }
+
   private static TaskFinishedEvent createTaskFinishedEvent(TaskImpl task, TaskStateInternal taskState) {
     TaskFinishedEvent tfe =
       new TaskFinishedEvent(TypeConverter.fromYarn(task.taskId),
@@ -740,6 +726,16 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
     task.successfulAttempt = null;
   }
 
+  private void sendTaskSucceededEvents() {
+    eventHandler.handle(new JobTaskEvent(taskId, TaskState.SUCCEEDED));
+    LOG.info("Task succeeded with attempt " + successfulAttempt);
+    if (historyTaskStartGenerated) {
+      TaskFinishedEvent tfe = createTaskFinishedEvent(this,
+          TaskStateInternal.SUCCEEDED);
+      eventHandler.handle(new JobHistoryEvent(taskId.getJobId(), tfe));
+    }
+  }
+
   /**
   * @return a String representation of the splits.
   *
@@ -751,6 +747,122 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
 	  return "";
   }
 
+  /**
+   * Recover a completed task from a previous application attempt
+   * @param taskInfo recovered info about the task
+   * @param recoverTaskOutput whether to recover task outputs
+   * @return state of the task after recovery
+   */
+  private TaskStateInternal recover(TaskInfo taskInfo,
+      OutputCommitter committer, boolean recoverTaskOutput) {
+    LOG.info("Recovering task " + taskId
+        + " from prior app attempt, status was " + taskInfo.getTaskStatus());
+
+    scheduledTime = taskInfo.getStartTime();
+    sendTaskStartedEvent();
+    Collection<TaskAttemptInfo> attemptInfos =
+        taskInfo.getAllTaskAttempts().values();
+
+    if (attemptInfos.size() > 0) {
+      metrics.launchedTask(this);
+    }
+
+    // recover the attempts for this task in the order they finished
+    // so task attempt completion events are ordered properly
+    int savedNextAttemptNumber = nextAttemptNumber;
+    ArrayList<TaskAttemptInfo> taInfos =
+        new ArrayList<TaskAttemptInfo>(taskInfo.getAllTaskAttempts().values());
+    Collections.sort(taInfos, TA_INFO_COMPARATOR);
+    for (TaskAttemptInfo taInfo : taInfos) {
+      nextAttemptNumber = taInfo.getAttemptId().getId();
+      TaskAttemptImpl attempt = addAttempt(Avataar.VIRGIN);
+      // handle the recovery inline so attempts complete before task does
+      attempt.handle(new TaskAttemptRecoverEvent(attempt.getID(), taInfo,
+          committer, recoverTaskOutput));
+      finishedAttempts.add(attempt.getID());
+      TaskAttemptCompletionEventStatus taces = null;
+      TaskAttemptState attemptState = attempt.getState();
+      switch (attemptState) {
+      case FAILED:
+        taces = TaskAttemptCompletionEventStatus.FAILED;
+        break;
+      case KILLED:
+        taces = TaskAttemptCompletionEventStatus.KILLED;
+        break;
+      case SUCCEEDED:
+        taces = TaskAttemptCompletionEventStatus.SUCCEEDED;
+        break;
+      default:
+        throw new IllegalStateException(
+            "Unexpected attempt state during recovery: " + attemptState);
+      }
+      if (attemptState == TaskAttemptState.FAILED) {
+        failedAttempts.add(attempt.getID());
+        if (failedAttempts.size() >= maxAttempts) {
+          taces = TaskAttemptCompletionEventStatus.TIPFAILED;
+        }
+      }
+
+      // don't clobber the successful attempt completion event
+      // TODO: this shouldn't be necessary after MAPREDUCE-4330
+      if (successfulAttempt == null) {
+        handleTaskAttemptCompletion(attempt.getID(), taces);
+        if (attemptState == TaskAttemptState.SUCCEEDED) {
+          successfulAttempt = attempt.getID();
+        }
+      }
+    }
+    nextAttemptNumber = savedNextAttemptNumber;
+
+    TaskStateInternal taskState = TaskStateInternal.valueOf(
+        taskInfo.getTaskStatus());
+    switch (taskState) {
+    case SUCCEEDED:
+      if (successfulAttempt != null) {
+        sendTaskSucceededEvents();
+      } else {
+        LOG.info("Missing successful attempt for task " + taskId
+            + ", recovering as RUNNING");
+        // there must have been a fetch failure and the retry wasn't complete
+        taskState = TaskStateInternal.RUNNING;
+        metrics.runningTask(this);
+        addAndScheduleAttempt(Avataar.VIRGIN);
+      }
+      break;
+    case FAILED:
+    case KILLED:
+    {
+      if (taskState == TaskStateInternal.KILLED && attemptInfos.size() == 0) {
+        metrics.endWaitingTask(this);
+      }
+      TaskFailedEvent tfe = new TaskFailedEvent(taskInfo.getTaskId(),
+          taskInfo.getFinishTime(), taskInfo.getTaskType(),
+          taskInfo.getError(), taskInfo.getTaskStatus(),
+          taskInfo.getFailedDueToAttemptId(), taskInfo.getCounters());
+      eventHandler.handle(new JobHistoryEvent(taskId.getJobId(), tfe));
+      eventHandler.handle(
+          new JobTaskEvent(taskId, getExternalState(taskState)));
+      break;
+    }
+    default:
+      throw new java.lang.AssertionError("Unexpected recovered task state: "
+          + taskState);
+    }
+
+    return taskState;
+  }
+
+  private static class RecoverTransition
+    implements MultipleArcTransition<TaskImpl, TaskEvent, TaskStateInternal> {
+
+    @Override
+    public TaskStateInternal transition(TaskImpl task, TaskEvent event) {
+      TaskRecoverEvent tre = (TaskRecoverEvent) event;
+      return task.recover(tre.getTaskInfo(), tre.getOutputCommitter(),
+          tre.getRecoverTaskOutput());
+    }
+  }
+
   private static class InitialScheduleTransition
     implements SingleArcTransition<TaskImpl, TaskEvent> {
 
@@ -758,13 +870,7 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
     public void transition(TaskImpl task, TaskEvent event) {
       task.addAndScheduleAttempt(Avataar.VIRGIN);
       task.scheduledTime = task.clock.getTime();
-      TaskStartedEvent tse = new TaskStartedEvent(
-          TypeConverter.fromYarn(task.taskId), task.getLaunchTime(),
-          TypeConverter.fromYarn(task.taskId.getTaskType()),
-          task.getSplitsAsString());
-      task.eventHandler
-          .handle(new JobHistoryEvent(task.taskId.getJobId(), tse));
-      task.historyTaskStartGenerated = true;
+      task.sendTaskStartedEvent();
     }
   }
 
@@ -818,16 +924,7 @@ public abstract class TaskImpl implements Task, EventHandler<TaskEvent> {
       task.finishedAttempts.add(taskAttemptId);
       task.inProgressAttempts.remove(taskAttemptId);
       task.successfulAttempt = taskAttemptId;
-      task.eventHandler.handle(new JobTaskEvent(
-          task.taskId, TaskState.SUCCEEDED));
-      LOG.info("Task succeeded with attempt " + task.successfulAttempt);
-      // issue kill to all other attempts
-      if (task.historyTaskStartGenerated) {
-        TaskFinishedEvent tfe = createTaskFinishedEvent(task,
-            TaskStateInternal.SUCCEEDED);
-        task.eventHandler.handle(new JobHistoryEvent(task.taskId.getJobId(),
-            tfe));
-      }
+      task.sendTaskSucceededEvents();
       for (TaskAttempt attempt : task.attempts.values()) {
         if (attempt.getID() != task.successfulAttempt &&
             // This is okay because it can only talk us out of sending a
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/Recovery.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/Recovery.java
deleted file mode 100644
index c7134a46bd7..00000000000
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/Recovery.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.hadoop.mapreduce.v2.app.recover;
-
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo;
-import org.apache.hadoop.mapreduce.v2.api.records.AMInfo;
-import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
-import org.apache.hadoop.yarn.Clock;
-import org.apache.hadoop.yarn.event.Dispatcher;
-
-public interface Recovery {
-
-  Dispatcher getDispatcher();
-
-  Clock getClock();
-  
-  Map<TaskId, TaskInfo> getCompletedTasks();
-  
-  List<AMInfo> getAMInfos();
-}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/RecoveryService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/RecoveryService.java
deleted file mode 100644
index aca752721a7..00000000000
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/RecoveryService.java
+++ /dev/null
@@ -1,480 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.hadoop.mapreduce.v2.app.recover;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileContext;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapreduce.MRJobConfig;
-import org.apache.hadoop.mapreduce.OutputCommitter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.hadoop.mapreduce.TaskType;
-import org.apache.hadoop.mapreduce.TypeConverter;
-import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser;
-import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo;
-import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo;
-import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo;
-import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
-import org.apache.hadoop.mapreduce.v2.api.records.AMInfo;
-import org.apache.hadoop.mapreduce.v2.api.records.Phase;
-import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
-import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState;
-import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
-import org.apache.hadoop.mapreduce.v2.api.records.TaskState;
-import org.apache.hadoop.mapreduce.v2.app.ControlledClock;
-import org.apache.hadoop.mapreduce.v2.app.commit.CommitterTaskAbortEvent;
-import org.apache.hadoop.mapreduce.v2.app.commit.CommitterEventType;
-import org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent;
-import org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent;
-import org.apache.hadoop.mapreduce.v2.app.job.event.JobEventType;
-import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent;
-import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerLaunchedEvent;
-import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
-import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
-import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent;
-import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus;
-import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent;
-import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
-import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent;
-import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher;
-import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent;
-import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerRemoteLaunchEvent;
-import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator;
-import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent;
-import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils;
-import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
-import org.apache.hadoop.yarn.Clock;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
-import org.apache.hadoop.yarn.api.records.Container;
-import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.event.AsyncDispatcher;
-import org.apache.hadoop.yarn.event.Dispatcher;
-import org.apache.hadoop.yarn.event.Event;
-import org.apache.hadoop.yarn.event.EventHandler;
-import org.apache.hadoop.yarn.service.CompositeService;
-import org.apache.hadoop.yarn.service.Service;
-import org.apache.hadoop.yarn.util.BuilderUtils;
-import org.apache.hadoop.yarn.util.ConverterUtils;
-
-/*
- * Recovers the completed tasks from the previous life of Application Master.
- * The completed tasks are deciphered from the history file of the previous life.
- * Recovery service intercepts and replay the events for completed tasks.
- * While recovery is in progress, the scheduling of new tasks are delayed by 
- * buffering the task schedule events.
- * The recovery service controls the clock while recovery is in progress.
- */
-
-//TODO:
-//task cleanup for all non completed tasks
-public class RecoveryService extends CompositeService implements Recovery {
-
-  private static final Log LOG = LogFactory.getLog(RecoveryService.class);
-
-  private final ApplicationAttemptId applicationAttemptId;
-  private final OutputCommitter committer;
-  private final boolean newApiCommitter;
-  private final Dispatcher dispatcher;
-  private final ControlledClock clock;
-
-  private JobInfo jobInfo = null;
-  private final Map<TaskId, TaskInfo> completedTasks =
-    new HashMap<TaskId, TaskInfo>();
-  
-  private final List<TaskEvent> pendingTaskScheduleEvents =
-    new ArrayList<TaskEvent>();
-
-  private volatile boolean recoveryMode = false;
-
-  public RecoveryService(ApplicationAttemptId applicationAttemptId, 
-      Clock clock, OutputCommitter committer, boolean newApiCommitter) {
-    super("RecoveringDispatcher");
-    this.applicationAttemptId = applicationAttemptId;
-    this.committer = committer;
-    this.newApiCommitter = newApiCommitter;
-    this.dispatcher = createRecoveryDispatcher();
-    this.clock = new ControlledClock(clock);
-      addService((Service) dispatcher);
-  }
-
-  @Override
-  public void init(Configuration conf) {
-    super.init(conf);
-    // parse the history file
-    try {
-      parse();
-    } catch (Exception e) {
-      LOG.warn(e);
-      LOG.warn("Could not parse the old history file. Aborting recovery. "
-          + "Starting afresh.", e);
-    }
-    if (completedTasks.size() > 0) {
-      recoveryMode = true;
-      LOG.info("SETTING THE RECOVERY MODE TO TRUE. NO OF COMPLETED TASKS "
-          + "TO RECOVER " + completedTasks.size());
-      LOG.info("Job launch time " + jobInfo.getLaunchTime());
-      clock.setTime(jobInfo.getLaunchTime());
-    }
-  }
-
-  @Override
-  public Dispatcher getDispatcher() {
-    return dispatcher;
-  }
-
-  @Override
-  public Clock getClock() {
-    return clock;
-  }
-
-  @Override
-  public Map<TaskId, TaskInfo> getCompletedTasks() {
-    return completedTasks;
-  }
-
-  @Override
-  public List<AMInfo> getAMInfos() {
-    if (jobInfo == null || jobInfo.getAMInfos() == null) {
-      return new LinkedList<AMInfo>();
-    }
-    List<AMInfo> amInfos = new LinkedList<AMInfo>();
-    for (org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.AMInfo jhAmInfo : jobInfo
-        .getAMInfos()) {
-      AMInfo amInfo =
-          MRBuilderUtils.newAMInfo(jhAmInfo.getAppAttemptId(),
-              jhAmInfo.getStartTime(), jhAmInfo.getContainerId(),
-              jhAmInfo.getNodeManagerHost(), jhAmInfo.getNodeManagerPort(),
-              jhAmInfo.getNodeManagerHttpPort());
-
-      amInfos.add(amInfo);
-    }
-    return amInfos;
-  }
-
-  private void parse() throws IOException {
-    FSDataInputStream in =
-        getPreviousJobHistoryFileStream(getConfig(), applicationAttemptId);
-    JobHistoryParser parser = new JobHistoryParser(in);
-    jobInfo = parser.parse();
-    Exception parseException = parser.getParseException();
-    if (parseException != null) {
-      LOG.info("Got an error parsing job-history file" + 
-          ", ignoring incomplete events.", parseException);
-    }
-    Map<org.apache.hadoop.mapreduce.TaskID, TaskInfo> taskInfos = jobInfo
-        .getAllTasks();
-    for (TaskInfo taskInfo : taskInfos.values()) {
-      if (TaskState.SUCCEEDED.toString().equals(taskInfo.getTaskStatus())) {
-        Iterator<Entry<TaskAttemptID, TaskAttemptInfo>> taskAttemptIterator = 
-            taskInfo.getAllTaskAttempts().entrySet().iterator();
-        while (taskAttemptIterator.hasNext()) {
-          Map.Entry<TaskAttemptID, TaskAttemptInfo> currentEntry = taskAttemptIterator.next();
-          if (!jobInfo.getAllCompletedTaskAttempts().containsKey(currentEntry.getKey())) {
-            taskAttemptIterator.remove();
-          }
-        }
-        completedTasks
-            .put(TypeConverter.toYarn(taskInfo.getTaskId()), taskInfo);
-        LOG.info("Read from history task "
-            + TypeConverter.toYarn(taskInfo.getTaskId()));
-      }
-    }
-    LOG.info("Read completed tasks from history "
-        + completedTasks.size());
-  }
-
-  public static FSDataInputStream getPreviousJobHistoryFileStream(
-      Configuration conf, ApplicationAttemptId applicationAttemptId)
-      throws IOException {
-    FSDataInputStream in = null;
-    Path historyFile = null;
-    String jobId =
-        TypeConverter.fromYarn(applicationAttemptId.getApplicationId())
-          .toString();
-    String jobhistoryDir =
-        JobHistoryUtils.getConfiguredHistoryStagingDirPrefix(conf, jobId);
-    Path histDirPath =
-        FileContext.getFileContext(conf).makeQualified(new Path(jobhistoryDir));
-    LOG.info("Trying file " + histDirPath.toString());
-    FileContext fc = FileContext.getFileContext(histDirPath.toUri(), conf);
-    // read the previous history file
-    historyFile =
-        fc.makeQualified(JobHistoryUtils.getStagingJobHistoryFile(histDirPath,
-          jobId, (applicationAttemptId.getAttemptId() - 1)));
-    LOG.info("History file is at " + historyFile);
-    in = fc.open(historyFile);
-    return in;
-  }
-  
-  protected Dispatcher createRecoveryDispatcher() {
-    return new RecoveryDispatcher();
-  }
-
-  @SuppressWarnings("rawtypes")
-  class RecoveryDispatcher extends AsyncDispatcher {
-    private final EventHandler actualHandler;
-    private final EventHandler handler;
-
-    RecoveryDispatcher() {
-      super();
-      actualHandler = super.getEventHandler();
-      handler = new InterceptingEventHandler(actualHandler);
-    }
-
-    @Override
-    @SuppressWarnings("unchecked")
-    public void dispatch(Event event) {
-      if (recoveryMode) {
-        if (event.getType() == TaskAttemptEventType.TA_CONTAINER_LAUNCHED) {
-          TaskAttemptInfo attInfo = getTaskAttemptInfo(((TaskAttemptEvent) event)
-              .getTaskAttemptID());
-          LOG.info("Recovered Attempt start time " + attInfo.getStartTime());
-          clock.setTime(attInfo.getStartTime());
-
-        } else if (event.getType() == TaskAttemptEventType.TA_DONE
-            || event.getType() == TaskAttemptEventType.TA_FAILMSG
-            || event.getType() == TaskAttemptEventType.TA_KILL) {
-          TaskAttemptInfo attInfo = getTaskAttemptInfo(((TaskAttemptEvent) event)
-              .getTaskAttemptID());
-          LOG.info("Recovered Attempt finish time " + attInfo.getFinishTime());
-          clock.setTime(attInfo.getFinishTime());
-        }
-
-        else if (event.getType() == TaskEventType.T_ATTEMPT_FAILED
-            || event.getType() == TaskEventType.T_ATTEMPT_KILLED
-            || event.getType() == TaskEventType.T_ATTEMPT_SUCCEEDED) {
-          TaskTAttemptEvent tEvent = (TaskTAttemptEvent) event;
-          LOG.info("Recovered Task attempt " + tEvent.getTaskAttemptID());
-          TaskInfo taskInfo = completedTasks.get(tEvent.getTaskAttemptID()
-              .getTaskId());
-          taskInfo.getAllTaskAttempts().remove(
-              TypeConverter.fromYarn(tEvent.getTaskAttemptID()));
-          // remove the task info from completed tasks if all attempts are
-          // recovered
-          if (taskInfo.getAllTaskAttempts().size() == 0) {
-            completedTasks.remove(tEvent.getTaskAttemptID().getTaskId());
-            // checkForRecoveryComplete
-            LOG.info("CompletedTasks() " + completedTasks.size());
-            if (completedTasks.size() == 0) {
-              recoveryMode = false;
-              clock.reset();
-              LOG.info("Setting the recovery mode to false. " +
-                 "Recovery is complete!");
-
-              // send all pending tasks schedule events
-              for (TaskEvent tEv : pendingTaskScheduleEvents) {
-                actualHandler.handle(tEv);
-              }
-
-            }
-          }
-        }
-      }
-      realDispatch(event);
-    }
-    
-    public void realDispatch(Event event) {
-      super.dispatch(event);
-    }
-
-    @Override
-    public EventHandler getEventHandler() {
-      return handler;
-    }
-  }
-
-  private TaskAttemptInfo getTaskAttemptInfo(TaskAttemptId id) {
-    TaskInfo taskInfo = completedTasks.get(id.getTaskId());
-    return taskInfo.getAllTaskAttempts().get(TypeConverter.fromYarn(id));
-  }
-
-  @SuppressWarnings({"rawtypes", "unchecked"})
-  private class InterceptingEventHandler implements EventHandler {
-    EventHandler actualHandler;
-
-    InterceptingEventHandler(EventHandler actualHandler) {
-      this.actualHandler = actualHandler;
-    }
-
-    @Override
-    public void handle(Event event) {
-      if (!recoveryMode) {
-        // delegate to the dispatcher one
-        actualHandler.handle(event);
-        return;
-      }
-
-      else if (event.getType() == TaskEventType.T_SCHEDULE) {
-        TaskEvent taskEvent = (TaskEvent) event;
-        // delay the scheduling of new tasks till previous ones are recovered
-        if (completedTasks.get(taskEvent.getTaskID()) == null) {
-          LOG.debug("Adding to pending task events "
-              + taskEvent.getTaskID());
-          pendingTaskScheduleEvents.add(taskEvent);
-          return;
-        }
-      }
-
-      else if (event.getType() == ContainerAllocator.EventType.CONTAINER_REQ) {
-        TaskAttemptId aId = ((ContainerAllocatorEvent) event).getAttemptID();
-        TaskAttemptInfo attInfo = getTaskAttemptInfo(aId);
-        LOG.debug("CONTAINER_REQ " + aId);
-        sendAssignedEvent(aId, attInfo);
-        return;
-      }
-
-      else if (event.getType() == CommitterEventType.TASK_ABORT) {
-        TaskAttemptId aId = ((CommitterTaskAbortEvent) event).getAttemptID();
-        LOG.debug("TASK_CLEAN");
-        actualHandler.handle(new TaskAttemptEvent(aId,
-            TaskAttemptEventType.TA_CLEANUP_DONE));
-        return;
-      }
-
-      else if (event.getType() == ContainerLauncher.EventType.CONTAINER_REMOTE_LAUNCH) {
-        TaskAttemptId aId = ((ContainerRemoteLaunchEvent) event)
-            .getTaskAttemptID();
-        TaskAttemptInfo attInfo = getTaskAttemptInfo(aId);
-        actualHandler.handle(new TaskAttemptContainerLaunchedEvent(aId,
-            attInfo.getShufflePort()));
-        // send the status update event
-        sendStatusUpdateEvent(aId, attInfo);
-
-        TaskAttemptState state = TaskAttemptState.valueOf(attInfo.getTaskStatus());
-        switch (state) {
-        case SUCCEEDED:
-          //recover the task output
-          
-          // check the committer type and construct corresponding context
-          TaskAttemptContext taskContext = null;
-          if(newApiCommitter) {
-            taskContext = new TaskAttemptContextImpl(getConfig(),
-                attInfo.getAttemptId());
-          } else {
-            taskContext = new org.apache.hadoop.mapred.TaskAttemptContextImpl(new JobConf(getConfig()),
-                TypeConverter.fromYarn(aId));
-          }
-          
-          try { 
-            TaskType type = taskContext.getTaskAttemptID().getTaskID().getTaskType();
-            int numReducers = taskContext.getConfiguration().getInt(MRJobConfig.NUM_REDUCES, 1); 
-            if(type == TaskType.REDUCE || (type == TaskType.MAP && numReducers <= 0)) {
-              committer.recoverTask(taskContext);
-              LOG.info("Recovered output from task attempt " + attInfo.getAttemptId());
-            } else {
-              LOG.info("Will not try to recover output for "
-                  + taskContext.getTaskAttemptID());
-            }
-          } catch (IOException e) {
-            LOG.error("Caught an exception while trying to recover task "+aId, e);
-            actualHandler.handle(new JobDiagnosticsUpdateEvent(
-                aId.getTaskId().getJobId(), "Error in recovering task output " + 
-                e.getMessage()));
-            actualHandler.handle(new JobEvent(aId.getTaskId().getJobId(),
-                JobEventType.INTERNAL_ERROR));
-          }
-          
-          // send the done event
-          LOG.info("Sending done event to recovered attempt " + aId);
-          actualHandler.handle(new TaskAttemptEvent(aId,
-              TaskAttemptEventType.TA_DONE));
-          break;
-        case KILLED:
-          LOG.info("Sending kill event to recovered attempt " + aId);
-          actualHandler.handle(new TaskAttemptEvent(aId,
-              TaskAttemptEventType.TA_KILL));
-          break;
-        default:
-          LOG.info("Sending fail event to recovered attempt " + aId);
-          actualHandler.handle(new TaskAttemptEvent(aId,
-              TaskAttemptEventType.TA_FAILMSG));
-          break;
-        }
-        return;
-      }
-
-      else if (event.getType() == 
-        ContainerLauncher.EventType.CONTAINER_REMOTE_CLEANUP) {
-        TaskAttemptId aId = ((ContainerLauncherEvent) event)
-          .getTaskAttemptID();
-        actualHandler.handle(
-           new TaskAttemptEvent(aId,
-                TaskAttemptEventType.TA_CONTAINER_CLEANED));
-        return;
-      }
-
-      // delegate to the actual handler
-      actualHandler.handle(event);
-    }
-
-    private void sendStatusUpdateEvent(TaskAttemptId yarnAttemptID,
-        TaskAttemptInfo attemptInfo) {
-      LOG.info("Sending status update event to " + yarnAttemptID);
-      TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatus();
-      taskAttemptStatus.id = yarnAttemptID;
-      taskAttemptStatus.progress = 1.0f;
-      taskAttemptStatus.stateString = attemptInfo.getTaskStatus(); 
-      // taskAttemptStatus.outputSize = attemptInfo.getOutputSize();
-      taskAttemptStatus.phase = Phase.CLEANUP;
-      org.apache.hadoop.mapreduce.Counters cntrs = attemptInfo.getCounters();
-      if (cntrs == null) {
-        taskAttemptStatus.counters = null;
-      } else {
-        taskAttemptStatus.counters = cntrs;
-      }
-      actualHandler.handle(new TaskAttemptStatusUpdateEvent(
-          taskAttemptStatus.id, taskAttemptStatus));
-    }
-
-    private void sendAssignedEvent(TaskAttemptId yarnAttemptID,
-        TaskAttemptInfo attemptInfo) {
-      LOG.info("Sending assigned event to " + yarnAttemptID);
-      ContainerId cId = attemptInfo.getContainerId();
-
-      NodeId nodeId =
-          ConverterUtils.toNodeId(attemptInfo.getHostname() + ":"
-              + attemptInfo.getPort());
-      // Resource/Priority/ApplicationACLs are only needed while launching the
-      // container on an NM, these are already completed tasks, so setting them
-      // to null
-      Container container = BuilderUtils.newContainer(cId, nodeId,
-          attemptInfo.getTrackerName() + ":" + attemptInfo.getHttpPort(),
-          null, null, null);
-      actualHandler.handle(new TaskAttemptContainerAssignedEvent(yarnAttemptID,
-          container, null));
-    }
-  }
-
-}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java
index 4a28ab00637..4ef4d8d9f4b 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java
@@ -414,7 +414,8 @@ public class MRApp extends MRAppMaster {
     Job newJob = new TestJob(getJobId(), getAttemptID(), conf, 
     		getDispatcher().getEventHandler(),
             getTaskAttemptListener(), getContext().getClock(),
-            isNewApiCommitter(), currentUser.getUserName(), getContext(),
+            getCommitter(), isNewApiCommitter(),
+            currentUser.getUserName(), getContext(),
             forcedState, diagnostic);
     ((AppContext) getContext()).getAllJobs().put(newJob.getID(), newJob);
 
@@ -648,12 +649,13 @@ public class MRApp extends MRAppMaster {
     public TestJob(JobId jobId, ApplicationAttemptId applicationAttemptId,
         Configuration conf, EventHandler eventHandler,
         TaskAttemptListener taskAttemptListener, Clock clock,
-        boolean newApiCommitter, String user, AppContext appContext, 
+        OutputCommitter committer, boolean newApiCommitter,
+        String user, AppContext appContext,
         JobStateInternal forcedState, String diagnostic) {
       super(jobId, getApplicationAttemptId(applicationId, getStartCount()),
           conf, eventHandler, taskAttemptListener,
           new JobTokenSecretManager(), new Credentials(), clock,
-          getCompletedTaskFromPreviousRun(), metrics,
+          getCompletedTaskFromPreviousRun(), metrics, committer,
           newApiCommitter, user, System.currentTimeMillis(), getAllAMInfos(),
           appContext, forcedState, diagnostic);
 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRecovery.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRecovery.java
index a0a08c91bce..e89f0374bf1 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRecovery.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRecovery.java
@@ -18,10 +18,21 @@
 
 package org.apache.hadoop.mapreduce.v2.app;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.atLeast;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
 
 import junit.framework.Assert;
 
@@ -31,36 +42,66 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapreduce.Counters;
+import org.apache.hadoop.mapreduce.JobCounter;
+import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.OutputCommitter;
 import org.apache.hadoop.mapreduce.OutputFormat;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.TaskID;
+import org.apache.hadoop.mapreduce.TaskType;
 import org.apache.hadoop.mapreduce.TypeConverter;
+import org.apache.hadoop.mapreduce.jobhistory.Event;
+import org.apache.hadoop.mapreduce.jobhistory.EventType;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
+import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
 import org.apache.hadoop.mapreduce.v2.api.records.AMInfo;
+import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.JobState;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskState;
 import org.apache.hadoop.mapreduce.v2.app.job.Job;
 import org.apache.hadoop.mapreduce.v2.app.job.Task;
 import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt;
+import org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent;
+import org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerLaunchedEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskRecoverEvent;
+import org.apache.hadoop.mapreduce.v2.app.job.impl.MapTaskImpl;
 import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher;
 import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent;
+import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics;
+import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
+import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.yarn.Clock;
+import org.apache.hadoop.yarn.ClusterInfo;
+import org.apache.hadoop.yarn.SystemClock;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.junit.Test;
+import org.mockito.ArgumentCaptor;
 
 @SuppressWarnings({"unchecked", "rawtypes"})
 public class TestRecovery {
@@ -75,6 +116,7 @@ public class TestRecovery {
   private Text val1 = new Text("val1");
   private Text val2 = new Text("val2");
 
+
   /**
    * AM with 2 maps and 1 reduce. For 1st map, one attempt fails, one attempt
    * completely disappears because of failed launch, one attempt gets killed and
@@ -1011,6 +1053,423 @@ public class TestRecovery {
     app.verifyCompleted();
   }
 
+  @Test
+  public void testRecoverySuccessAttempt() {
+    LOG.info("--- START: testRecoverySuccessAttempt ---");
+
+    long clusterTimestamp = System.currentTimeMillis();
+    EventHandler mockEventHandler = mock(EventHandler.class);
+    MapTaskImpl recoverMapTask = getMockMapTask(clusterTimestamp,
+        mockEventHandler);
+
+    TaskId taskId = recoverMapTask.getID();
+    JobID jobID = new JobID(Long.toString(clusterTimestamp), 1);
+    TaskID taskID = new TaskID(jobID,
+        org.apache.hadoop.mapreduce.TaskType.MAP, taskId.getId());
+
+    //Mock up the TaskAttempts
+    Map<TaskAttemptID, TaskAttemptInfo> mockTaskAttempts =
+        new HashMap<TaskAttemptID, TaskAttemptInfo>();
+
+    TaskAttemptID taId1 = new TaskAttemptID(taskID, 2);
+    TaskAttemptInfo mockTAinfo1 = getMockTaskAttemptInfo(taId1,
+        TaskAttemptState.SUCCEEDED);
+    mockTaskAttempts.put(taId1, mockTAinfo1);
+
+    TaskAttemptID taId2 = new TaskAttemptID(taskID, 1);
+    TaskAttemptInfo mockTAinfo2 = getMockTaskAttemptInfo(taId2,
+        TaskAttemptState.FAILED);
+    mockTaskAttempts.put(taId2, mockTAinfo2);
+
+    OutputCommitter mockCommitter = mock (OutputCommitter.class);
+    TaskInfo mockTaskInfo = mock(TaskInfo.class);
+    when(mockTaskInfo.getTaskStatus()).thenReturn("SUCCEEDED");
+    when(mockTaskInfo.getTaskId()).thenReturn(taskID);
+    when(mockTaskInfo.getAllTaskAttempts()).thenReturn(mockTaskAttempts);
+
+    recoverMapTask.handle(
+        new TaskRecoverEvent(taskId, mockTaskInfo,mockCommitter, true));
+
+    ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
+    verify(mockEventHandler,atLeast(1)).handle(
+        (org.apache.hadoop.yarn.event.Event) arg.capture());
+
+    Map<TaskAttemptID, TaskAttemptState> finalAttemptStates =
+        new HashMap<TaskAttemptID, TaskAttemptState>();
+    finalAttemptStates.put(taId1, TaskAttemptState.SUCCEEDED);
+    finalAttemptStates.put(taId2, TaskAttemptState.FAILED);
+
+    List<EventType> jobHistoryEvents = new ArrayList<EventType>();
+    jobHistoryEvents.add(EventType.TASK_STARTED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_FINISHED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_FAILED);
+    jobHistoryEvents.add(EventType.TASK_FINISHED);
+    recoveryChecker(recoverMapTask, TaskState.SUCCEEDED, finalAttemptStates,
+        arg, jobHistoryEvents, 2L, 1L);
+  }
+
+  @Test
+  public void testRecoveryAllFailAttempts() {
+    LOG.info("--- START: testRecoveryAllFailAttempts ---");
+
+    long clusterTimestamp = System.currentTimeMillis();
+    EventHandler mockEventHandler = mock(EventHandler.class);
+    MapTaskImpl recoverMapTask = getMockMapTask(clusterTimestamp,
+        mockEventHandler);
+
+    TaskId taskId = recoverMapTask.getID();
+    JobID jobID = new JobID(Long.toString(clusterTimestamp), 1);
+    TaskID taskID = new TaskID(jobID,
+        org.apache.hadoop.mapreduce.TaskType.MAP, taskId.getId());
+
+    //Mock up the TaskAttempts
+    Map<TaskAttemptID, TaskAttemptInfo> mockTaskAttempts =
+        new HashMap<TaskAttemptID, TaskAttemptInfo>();
+
+    TaskAttemptID taId1 = new TaskAttemptID(taskID, 2);
+    TaskAttemptInfo mockTAinfo1 = getMockTaskAttemptInfo(taId1,
+        TaskAttemptState.FAILED);
+    mockTaskAttempts.put(taId1, mockTAinfo1);
+
+    TaskAttemptID taId2 = new TaskAttemptID(taskID, 1);
+    TaskAttemptInfo mockTAinfo2 = getMockTaskAttemptInfo(taId2,
+        TaskAttemptState.FAILED);
+    mockTaskAttempts.put(taId2, mockTAinfo2);
+
+    OutputCommitter mockCommitter = mock (OutputCommitter.class);
+
+    TaskInfo mockTaskInfo = mock(TaskInfo.class);
+    when(mockTaskInfo.getTaskStatus()).thenReturn("FAILED");
+    when(mockTaskInfo.getTaskId()).thenReturn(taskID);
+    when(mockTaskInfo.getAllTaskAttempts()).thenReturn(mockTaskAttempts);
+
+    recoverMapTask.handle(
+        new TaskRecoverEvent(taskId, mockTaskInfo, mockCommitter, true));
+
+    ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
+    verify(mockEventHandler,atLeast(1)).handle(
+        (org.apache.hadoop.yarn.event.Event) arg.capture());
+
+    Map<TaskAttemptID, TaskAttemptState> finalAttemptStates =
+        new HashMap<TaskAttemptID, TaskAttemptState>();
+    finalAttemptStates.put(taId1, TaskAttemptState.FAILED);
+    finalAttemptStates.put(taId2, TaskAttemptState.FAILED);
+
+    List<EventType> jobHistoryEvents = new ArrayList<EventType>();
+    jobHistoryEvents.add(EventType.TASK_STARTED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_FAILED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_FAILED);
+    jobHistoryEvents.add(EventType.TASK_FAILED);
+    recoveryChecker(recoverMapTask, TaskState.FAILED, finalAttemptStates,
+        arg, jobHistoryEvents, 2L, 2L);
+  }
+
+  @Test
+  public void testRecoveryTaskSuccessAllAttemptsFail() {
+    LOG.info("--- START:  testRecoveryTaskSuccessAllAttemptsFail ---");
+
+    long clusterTimestamp = System.currentTimeMillis();
+    EventHandler mockEventHandler = mock(EventHandler.class);
+    MapTaskImpl recoverMapTask = getMockMapTask(clusterTimestamp,
+        mockEventHandler);
+
+    TaskId taskId = recoverMapTask.getID();
+    JobID jobID = new JobID(Long.toString(clusterTimestamp), 1);
+    TaskID taskID = new TaskID(jobID,
+        org.apache.hadoop.mapreduce.TaskType.MAP, taskId.getId());
+
+    //Mock up the TaskAttempts
+    Map<TaskAttemptID, TaskAttemptInfo> mockTaskAttempts =
+        new HashMap<TaskAttemptID, TaskAttemptInfo>();
+
+    TaskAttemptID taId1 = new TaskAttemptID(taskID, 2);
+    TaskAttemptInfo mockTAinfo1 = getMockTaskAttemptInfo(taId1,
+        TaskAttemptState.FAILED);
+    mockTaskAttempts.put(taId1, mockTAinfo1);
+
+    TaskAttemptID taId2 = new TaskAttemptID(taskID, 1);
+    TaskAttemptInfo mockTAinfo2 = getMockTaskAttemptInfo(taId2,
+        TaskAttemptState.FAILED);
+    mockTaskAttempts.put(taId2, mockTAinfo2);
+
+    OutputCommitter mockCommitter = mock (OutputCommitter.class);
+    TaskInfo mockTaskInfo = mock(TaskInfo.class);
+    when(mockTaskInfo.getTaskStatus()).thenReturn("SUCCEEDED");
+    when(mockTaskInfo.getTaskId()).thenReturn(taskID);
+    when(mockTaskInfo.getAllTaskAttempts()).thenReturn(mockTaskAttempts);
+
+    recoverMapTask.handle(
+        new TaskRecoverEvent(taskId, mockTaskInfo, mockCommitter, true));
+
+    ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
+    verify(mockEventHandler,atLeast(1)).handle(
+        (org.apache.hadoop.yarn.event.Event) arg.capture());
+
+    Map<TaskAttemptID, TaskAttemptState> finalAttemptStates =
+        new HashMap<TaskAttemptID, TaskAttemptState>();
+    finalAttemptStates.put(taId1, TaskAttemptState.FAILED);
+    finalAttemptStates.put(taId2, TaskAttemptState.FAILED);
+    // check for one new attempt launched since successful attempt not found
+    TaskAttemptID taId3 = new TaskAttemptID(taskID, 2000);
+    finalAttemptStates.put(taId3, TaskAttemptState.NEW);
+
+    List<EventType> jobHistoryEvents = new ArrayList<EventType>();
+    jobHistoryEvents.add(EventType.TASK_STARTED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_FAILED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_FAILED);
+    recoveryChecker(recoverMapTask, TaskState.RUNNING, finalAttemptStates,
+        arg, jobHistoryEvents, 2L, 2L);
+  }
+
+  @Test
+  public void testRecoveryTaskSuccessAllAttemptsSucceed() {
+    LOG.info("--- START:  testRecoveryTaskSuccessAllAttemptsFail ---");
+
+    long clusterTimestamp = System.currentTimeMillis();
+    EventHandler mockEventHandler = mock(EventHandler.class);
+    MapTaskImpl recoverMapTask = getMockMapTask(clusterTimestamp,
+        mockEventHandler);
+
+    TaskId taskId = recoverMapTask.getID();
+    JobID jobID = new JobID(Long.toString(clusterTimestamp), 1);
+    TaskID taskID = new TaskID(jobID,
+        org.apache.hadoop.mapreduce.TaskType.MAP, taskId.getId());
+
+    //Mock up the TaskAttempts
+    Map<TaskAttemptID, TaskAttemptInfo> mockTaskAttempts =
+        new HashMap<TaskAttemptID, TaskAttemptInfo>();
+
+    TaskAttemptID taId1 = new TaskAttemptID(taskID, 2);
+    TaskAttemptInfo mockTAinfo1 = getMockTaskAttemptInfo(taId1,
+        TaskAttemptState.SUCCEEDED);
+    mockTaskAttempts.put(taId1, mockTAinfo1);
+
+    TaskAttemptID taId2 = new TaskAttemptID(taskID, 1);
+    TaskAttemptInfo mockTAinfo2 = getMockTaskAttemptInfo(taId2,
+        TaskAttemptState.SUCCEEDED);
+    mockTaskAttempts.put(taId2, mockTAinfo2);
+
+    OutputCommitter mockCommitter = mock (OutputCommitter.class);
+    TaskInfo mockTaskInfo = mock(TaskInfo.class);
+    when(mockTaskInfo.getTaskStatus()).thenReturn("SUCCEEDED");
+    when(mockTaskInfo.getTaskId()).thenReturn(taskID);
+    when(mockTaskInfo.getAllTaskAttempts()).thenReturn(mockTaskAttempts);
+
+    recoverMapTask.handle(
+        new TaskRecoverEvent(taskId, mockTaskInfo, mockCommitter, true));
+
+    ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
+    verify(mockEventHandler,atLeast(1)).handle(
+        (org.apache.hadoop.yarn.event.Event) arg.capture());
+
+    Map<TaskAttemptID, TaskAttemptState> finalAttemptStates =
+        new HashMap<TaskAttemptID, TaskAttemptState>();
+    finalAttemptStates.put(taId1, TaskAttemptState.SUCCEEDED);
+    finalAttemptStates.put(taId2, TaskAttemptState.SUCCEEDED);
+
+    List<EventType> jobHistoryEvents = new ArrayList<EventType>();
+    jobHistoryEvents.add(EventType.TASK_STARTED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_FINISHED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_FINISHED);
+    jobHistoryEvents.add(EventType.TASK_FINISHED);
+    recoveryChecker(recoverMapTask, TaskState.SUCCEEDED, finalAttemptStates,
+        arg, jobHistoryEvents, 2L, 0L);
+  }
+
+  @Test
+  public void testRecoveryAllAttemptsKilled() {
+    LOG.info("--- START:  testRecoveryAllAttemptsKilled ---");
+
+    long clusterTimestamp = System.currentTimeMillis();
+    EventHandler mockEventHandler = mock(EventHandler.class);
+    MapTaskImpl recoverMapTask = getMockMapTask(clusterTimestamp,
+        mockEventHandler);
+
+    TaskId taskId = recoverMapTask.getID();
+    JobID jobID = new JobID(Long.toString(clusterTimestamp), 1);
+    TaskID taskID = new TaskID(jobID,
+        org.apache.hadoop.mapreduce.TaskType.MAP, taskId.getId());
+
+    //Mock up the TaskAttempts
+    Map<TaskAttemptID, TaskAttemptInfo> mockTaskAttempts =
+        new HashMap<TaskAttemptID, TaskAttemptInfo>();
+    TaskAttemptID taId1 = new TaskAttemptID(taskID, 2);
+    TaskAttemptInfo mockTAinfo1 = getMockTaskAttemptInfo(taId1,
+        TaskAttemptState.KILLED);
+    mockTaskAttempts.put(taId1, mockTAinfo1);
+
+    TaskAttemptID taId2 = new TaskAttemptID(taskID, 1);
+    TaskAttemptInfo mockTAinfo2 = getMockTaskAttemptInfo(taId2,
+        TaskAttemptState.KILLED);
+    mockTaskAttempts.put(taId2, mockTAinfo2);
+
+    OutputCommitter mockCommitter = mock (OutputCommitter.class);
+    TaskInfo mockTaskInfo = mock(TaskInfo.class);
+    when(mockTaskInfo.getTaskStatus()).thenReturn("KILLED");
+    when(mockTaskInfo.getTaskId()).thenReturn(taskID);
+    when(mockTaskInfo.getAllTaskAttempts()).thenReturn(mockTaskAttempts);
+
+    recoverMapTask.handle(
+        new TaskRecoverEvent(taskId, mockTaskInfo, mockCommitter, true));
+
+    ArgumentCaptor<Event> arg = ArgumentCaptor.forClass(Event.class);
+    verify(mockEventHandler,atLeast(1)).handle(
+        (org.apache.hadoop.yarn.event.Event) arg.capture());
+
+    Map<TaskAttemptID, TaskAttemptState> finalAttemptStates =
+        new HashMap<TaskAttemptID, TaskAttemptState>();
+    finalAttemptStates.put(taId1, TaskAttemptState.KILLED);
+    finalAttemptStates.put(taId2, TaskAttemptState.KILLED);
+
+    List<EventType> jobHistoryEvents = new ArrayList<EventType>();
+    jobHistoryEvents.add(EventType.TASK_STARTED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_KILLED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_STARTED);
+    jobHistoryEvents.add(EventType.MAP_ATTEMPT_KILLED);
+    jobHistoryEvents.add(EventType.TASK_FAILED);
+    recoveryChecker(recoverMapTask, TaskState.KILLED, finalAttemptStates,
+        arg, jobHistoryEvents, 2L, 0L);
+  }
+
+  private void recoveryChecker(MapTaskImpl checkTask, TaskState finalState,
+      Map<TaskAttemptID, TaskAttemptState> finalAttemptStates,
+      ArgumentCaptor<Event> arg, List<EventType> expectedJobHistoryEvents,
+      long expectedMapLaunches, long expectedFailedMaps) {
+
+    assertEquals("Final State of Task", finalState, checkTask.getState());
+
+    Map<TaskAttemptId, TaskAttempt> recoveredAttempts =
+        checkTask.getAttempts();
+    assertEquals("Expected Number of Task Attempts",
+        finalAttemptStates.size(), recoveredAttempts.size());
+    for (TaskAttemptID taID : finalAttemptStates.keySet()) {
+      assertEquals("Expected Task Attempt State",
+          finalAttemptStates.get(taID),
+          recoveredAttempts.get(TypeConverter.toYarn(taID)).getState());
+    }
+
+    Iterator<Event> ie = arg.getAllValues().iterator();
+    int eventNum = 0;
+    long totalLaunchedMaps = 0;
+    long totalFailedMaps = 0;
+    boolean jobTaskEventReceived = false;
+
+    while (ie.hasNext()) {
+      Object current = ie.next();
+      ++eventNum;
+      LOG.info(eventNum + " " + current.getClass().getName());
+      if (current instanceof JobHistoryEvent) {
+        JobHistoryEvent jhe = (JobHistoryEvent) current;
+        LOG.info(expectedJobHistoryEvents.get(0).toString() + " " +
+            jhe.getHistoryEvent().getEventType().toString() + " " +
+            jhe.getJobID());
+        assertEquals(expectedJobHistoryEvents.get(0),
+            jhe.getHistoryEvent().getEventType());
+        expectedJobHistoryEvents.remove(0);
+      }  else if (current instanceof JobCounterUpdateEvent) {
+        JobCounterUpdateEvent jcue = (JobCounterUpdateEvent) current;
+
+        LOG.info("JobCounterUpdateEvent "
+            + jcue.getCounterUpdates().get(0).getCounterKey()
+            + " " + jcue.getCounterUpdates().get(0).getIncrementValue());
+        if (jcue.getCounterUpdates().get(0).getCounterKey() ==
+            JobCounter.NUM_FAILED_MAPS) {
+          totalFailedMaps += jcue.getCounterUpdates().get(0)
+              .getIncrementValue();
+        } else if (jcue.getCounterUpdates().get(0).getCounterKey() ==
+            JobCounter.TOTAL_LAUNCHED_MAPS) {
+          totalLaunchedMaps += jcue.getCounterUpdates().get(0)
+              .getIncrementValue();
+        }
+      } else if (current instanceof JobTaskEvent) {
+        JobTaskEvent jte = (JobTaskEvent) current;
+        assertEquals(jte.getState(), finalState);
+        jobTaskEventReceived = true;
+      }
+    }
+    assertTrue(jobTaskEventReceived || (finalState == TaskState.RUNNING));
+    assertEquals("Did not process all expected JobHistoryEvents",
+        0, expectedJobHistoryEvents.size());
+    assertEquals("Expected Map Launches",
+        expectedMapLaunches, totalLaunchedMaps);
+    assertEquals("Expected Failed Maps",
+        expectedFailedMaps, totalFailedMaps);
+  }
+
+  private MapTaskImpl getMockMapTask(long clusterTimestamp, EventHandler eh) {
+
+    ApplicationId appId = BuilderUtils.newApplicationId(clusterTimestamp, 1);
+    JobId jobId = MRBuilderUtils.newJobId(appId, 1);
+
+    int partitions = 2;
+
+    Path remoteJobConfFile = mock(Path.class);
+    JobConf conf = new JobConf();
+    TaskAttemptListener taskAttemptListener = mock(TaskAttemptListener.class);
+    Token<JobTokenIdentifier> jobToken =
+        (Token<JobTokenIdentifier>) mock(Token.class);
+    Credentials credentials = null;
+    Clock clock = new SystemClock();
+    int appAttemptId = 3;
+    MRAppMetrics metrics = mock(MRAppMetrics.class);
+    Resource minContainerRequirements = mock(Resource.class);
+    when(minContainerRequirements.getMemory()).thenReturn(1000);
+
+    ClusterInfo clusterInfo = mock(ClusterInfo.class);
+    when(clusterInfo.getMinContainerCapability()).thenReturn(
+        minContainerRequirements);
+    AppContext appContext = mock(AppContext.class);
+    when(appContext.getClusterInfo()).thenReturn(clusterInfo);
+
+    TaskSplitMetaInfo taskSplitMetaInfo = mock(TaskSplitMetaInfo.class);
+    MapTaskImpl mapTask = new MapTaskImpl(jobId, partitions,
+        eh, remoteJobConfFile, conf,
+        taskSplitMetaInfo, taskAttemptListener, jobToken, credentials, clock,
+        appAttemptId, metrics, appContext);
+    return mapTask;
+  }
+
+  private TaskAttemptInfo getMockTaskAttemptInfo(TaskAttemptID tai,
+      TaskAttemptState tas) {
+
+    ContainerId ci = mock(ContainerId.class);
+    Counters counters = mock(Counters.class);
+    TaskType tt = TaskType.MAP;
+
+    long finishTime = System.currentTimeMillis();
+
+    TaskAttemptInfo mockTAinfo = mock(TaskAttemptInfo.class);
+
+    when(mockTAinfo.getAttemptId()).thenReturn(tai);
+    when(mockTAinfo.getContainerId()).thenReturn(ci);
+    when(mockTAinfo.getCounters()).thenReturn(counters);
+    when(mockTAinfo.getError()).thenReturn("");
+    when(mockTAinfo.getFinishTime()).thenReturn(finishTime);
+    when(mockTAinfo.getHostname()).thenReturn("localhost");
+    when(mockTAinfo.getHttpPort()).thenReturn(23);
+    when(mockTAinfo.getMapFinishTime()).thenReturn(finishTime - 1000L);
+    when(mockTAinfo.getPort()).thenReturn(24);
+    when(mockTAinfo.getRackname()).thenReturn("defaultRack");
+    when(mockTAinfo.getShuffleFinishTime()).thenReturn(finishTime - 2000L);
+    when(mockTAinfo.getShufflePort()).thenReturn(25);
+    when(mockTAinfo.getSortFinishTime()).thenReturn(finishTime - 3000L);
+    when(mockTAinfo.getStartTime()).thenReturn(finishTime -10000);
+    when(mockTAinfo.getState()).thenReturn("task in progress");
+    when(mockTAinfo.getTaskStatus()).thenReturn(tas.toString());
+    when(mockTAinfo.getTaskType()).thenReturn(tt);
+    when(mockTAinfo.getTrackerName()).thenReturn("TrackerName");
+    return mockTAinfo;
+  }
+
   private void writeBadOutput(TaskAttempt attempt, Configuration conf)
   throws Exception {
   TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, 
@@ -1145,5 +1604,16 @@ public class TestRecovery {
   public static void main(String[] arg) throws Exception {
     TestRecovery test = new TestRecovery();
     test.testCrashed();
+    test.testMultipleCrashes();
+    test.testOutputRecovery();
+    test.testOutputRecoveryMapsOnly();
+    test.testRecoveryWithOldCommiter();
+    test.testSpeculative();
+    test.testRecoveryWithoutShuffleSecret();
+    test.testRecoverySuccessAttempt();
+    test.testRecoveryAllFailAttempts();
+    test.testRecoveryTaskSuccessAllAttemptsFail();
+    test.testRecoveryTaskSuccessAllAttemptsSucceed();
+    test.testRecoveryAllAttemptsKilled();
   }
 }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java
index 3636273a9c4..10b79aba73e 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestStagingCleanup.java
@@ -316,7 +316,8 @@ import org.junit.Test;
       Job newJob = new TestJob(getJobId(), getAttemptID(), conf,
           getDispatcher().getEventHandler(),
           getTaskAttemptListener(), getContext().getClock(),
-          isNewApiCommitter(), currentUser.getUserName(), getContext(),
+          getCommitter(), isNewApiCommitter(),
+          currentUser.getUserName(), getContext(),
           forcedState, diagnostic);
       ((AppContext) getContext()).getAllJobs().put(newJob.getID(), newJob);
 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java
index aaaa4472b8c..88cf949bc96 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java
@@ -25,6 +25,7 @@ import static org.mockito.Mockito.when;
 
 import java.io.File;
 import java.io.IOException;
+import java.util.Collections;
 import java.util.EnumSet;
 import java.util.concurrent.BrokenBarrierException;
 import java.util.concurrent.CyclicBarrier;
@@ -35,6 +36,7 @@ import org.apache.hadoop.mapreduce.JobACL;
 import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.jobhistory.EventType;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo;
 import org.apache.hadoop.mapreduce.jobhistory.JobSubmittedEvent;
 import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.JobStatus.State;
@@ -47,6 +49,7 @@ import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
 import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.JobState;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskState;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;
@@ -57,6 +60,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.JobDiagnosticsUpdateEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.JobEventType;
 import org.apache.hadoop.mapreduce.v2.app.job.event.JobFinishEvent;
+import org.apache.hadoop.mapreduce.v2.app.job.event.JobStartEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.JobTaskEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
 import org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl.InitTransition;
@@ -69,7 +73,6 @@ import org.apache.hadoop.yarn.SystemClock;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.event.AsyncDispatcher;
 import org.apache.hadoop.yarn.event.Dispatcher;
-import org.apache.hadoop.yarn.event.Event;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.state.StateMachine;
 import org.apache.hadoop.yarn.state.StateMachineFactory;
@@ -133,7 +136,7 @@ public class TestJobImpl {
     JobImpl job = createStubbedJob(conf, dispatcher, 0);
     job.handle(new JobEvent(job.getID(), JobEventType.JOB_INIT));
     assertJobState(job, JobStateInternal.INITED);
-    job.handle(new JobEvent(job.getID(), JobEventType.JOB_START));
+    job.handle(new JobStartEvent(job.getID()));
     assertJobState(job, JobStateInternal.SUCCEEDED);
     dispatcher.stop();
     commitHandler.stop();
@@ -222,7 +225,7 @@ public class TestJobImpl {
     JobId jobId = job.getID();
     job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
     assertJobState(job, JobStateInternal.INITED);
-    job.handle(new JobEvent(jobId, JobEventType.JOB_START));
+    job.handle(new JobStartEvent(jobId));
     assertJobState(job, JobStateInternal.SETUP);
 
     job.handle(new JobEvent(job.getID(), JobEventType.JOB_AM_REBOOT));
@@ -284,7 +287,7 @@ public class TestJobImpl {
     JobId jobId = job.getID();
     job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
     assertJobState(job, JobStateInternal.INITED);
-    job.handle(new JobEvent(jobId, JobEventType.JOB_START));
+    job.handle(new JobStartEvent(jobId));
     assertJobState(job, JobStateInternal.SETUP);
 
     job.handle(new JobEvent(job.getID(), JobEventType.JOB_KILL));
@@ -351,7 +354,7 @@ public class TestJobImpl {
     JobId jobId = job.getID();
     job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
     assertJobState(job, JobStateInternal.INITED);
-    job.handle(new JobEvent(jobId, JobEventType.JOB_START));
+    job.handle(new JobStartEvent(jobId));
     assertJobState(job, JobStateInternal.FAIL_ABORT);
 
     job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
@@ -388,7 +391,7 @@ public class TestJobImpl {
     JobId jobId = job.getID();
     job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
     assertJobState(job, JobStateInternal.INITED);
-    job.handle(new JobEvent(jobId, JobEventType.JOB_START));
+    job.handle(new JobStartEvent(jobId));
     assertJobState(job, JobStateInternal.SETUP);
 
     job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
@@ -428,7 +431,7 @@ public class TestJobImpl {
 
     // Verify access
     JobImpl job1 = new JobImpl(jobId, null, conf1, null, null, null, null, null,
-        null, null, true, null, 0, null, null, null, null);
+        null, null, null, true, null, 0, null, null, null, null);
     Assert.assertTrue(job1.checkAccess(ugi1, JobACL.VIEW_JOB));
     Assert.assertFalse(job1.checkAccess(ugi2, JobACL.VIEW_JOB));
 
@@ -439,7 +442,7 @@ public class TestJobImpl {
 
     // Verify access
     JobImpl job2 = new JobImpl(jobId, null, conf2, null, null, null, null, null,
-        null, null, true, null, 0, null, null, null, null);
+        null, null, null, true, null, 0, null, null, null, null);
     Assert.assertTrue(job2.checkAccess(ugi1, JobACL.VIEW_JOB));
     Assert.assertTrue(job2.checkAccess(ugi2, JobACL.VIEW_JOB));
 
@@ -450,7 +453,7 @@ public class TestJobImpl {
 
     // Verify access
     JobImpl job3 = new JobImpl(jobId, null, conf3, null, null, null, null, null,
-        null, null, true, null, 0, null, null, null, null);
+        null, null, null, true, null, 0, null, null, null, null);
     Assert.assertTrue(job3.checkAccess(ugi1, JobACL.VIEW_JOB));
     Assert.assertTrue(job3.checkAccess(ugi2, JobACL.VIEW_JOB));
 
@@ -461,7 +464,7 @@ public class TestJobImpl {
 
     // Verify access
     JobImpl job4 = new JobImpl(jobId, null, conf4, null, null, null, null, null,
-        null, null, true, null, 0, null, null, null, null);
+        null, null, null, true, null, 0, null, null, null, null);
     Assert.assertTrue(job4.checkAccess(ugi1, JobACL.VIEW_JOB));
     Assert.assertTrue(job4.checkAccess(ugi2, JobACL.VIEW_JOB));
 
@@ -472,7 +475,7 @@ public class TestJobImpl {
 
     // Verify access
     JobImpl job5 = new JobImpl(jobId, null, conf5, null, null, null, null, null,
-        null, null, true, null, 0, null, null, null, null);
+        null, null, null, true, null, 0, null, null, null, null);
     Assert.assertTrue(job5.checkAccess(ugi1, null));
     Assert.assertTrue(job5.checkAccess(ugi2, null));
   }
@@ -490,7 +493,7 @@ public class TestJobImpl {
         mock(EventHandler.class),
         null, mock(JobTokenSecretManager.class), null,
         new SystemClock(), null,
-        mrAppMetrics, true, null, 0, null, null, null, null);
+        mrAppMetrics, null, true, null, 0, null, null, null, null);
     job.handle(diagUpdateEvent);
     String diagnostics = job.getReport().getDiagnostics();
     Assert.assertNotNull(diagnostics);
@@ -501,7 +504,7 @@ public class TestJobImpl {
         mock(EventHandler.class),
         null, mock(JobTokenSecretManager.class), null,
         new SystemClock(), null,
-        mrAppMetrics, true, null, 0, null, null, null, null);
+        mrAppMetrics, null, true, null, 0, null, null, null, null);
     job.handle(new JobEvent(jobId, JobEventType.JOB_KILL));
     job.handle(diagUpdateEvent);
     diagnostics = job.getReport().getDiagnostics();
@@ -556,7 +559,7 @@ public class TestJobImpl {
     JobImpl job = new JobImpl(jobId, Records
         .newRecord(ApplicationAttemptId.class), conf, mock(EventHandler.class),
         null, new JobTokenSecretManager(), new Credentials(), null, null,
-        mrAppMetrics, true, null, 0, null, null, null, null);
+        mrAppMetrics, null, true, null, 0, null, null, null, null);
     InitTransition initTransition = getInitTransition(2);
     JobEvent mockJobEvent = mock(JobEvent.class);
     initTransition.transition(job, mockJobEvent);
@@ -597,7 +600,7 @@ public class TestJobImpl {
     JobId jobId = job.getID();
     job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
     assertJobState(job, JobStateInternal.INITED);
-    job.handle(new JobEvent(jobId, JobEventType.JOB_START));
+    job.handle(new JobStartEvent(jobId));
     assertJobState(job, JobStateInternal.FAILED);
 
     job.handle(new JobEvent(jobId, JobEventType.JOB_TASK_COMPLETED));
@@ -661,7 +664,7 @@ public class TestJobImpl {
     StubbedJob job = createStubbedJob(conf, dispatcher, numSplits);
     job.handle(new JobEvent(job.getID(), JobEventType.JOB_INIT));
     assertJobState(job, JobStateInternal.INITED);
-    job.handle(new JobEvent(job.getID(), JobEventType.JOB_START));
+    job.handle(new JobStartEvent(job.getID()));
     assertJobState(job, JobStateInternal.RUNNING);
     return job;
   }
@@ -785,9 +788,9 @@ public class TestJobImpl {
         boolean newApiCommitter, String user, int numSplits) {
       super(jobId, applicationAttemptId, conf, eventHandler,
           null, new JobTokenSecretManager(), new Credentials(),
-          new SystemClock(), null, MRAppMetrics.create(),
-          newApiCommitter, user, System.currentTimeMillis(), null, null, null,
-          null);
+          new SystemClock(), Collections.<TaskId, TaskInfo> emptyMap(),
+          MRAppMetrics.create(), null, newApiCommitter, user,
+          System.currentTimeMillis(), null, null, null, null);
 
       initTransition = getInitTransition(numSplits);
       localFactory = stateMachineFactory.addTransition(JobStateInternal.NEW,
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java
index d3297b3fb6b..9fd0fb8b1ac 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java
@@ -27,7 +27,6 @@ import static org.mockito.Mockito.when;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -38,7 +37,6 @@ import org.apache.hadoop.mapred.TaskUmbilicalProtocol;
 import org.apache.hadoop.mapreduce.Counter;
 import org.apache.hadoop.mapreduce.Counters;
 import org.apache.hadoop.mapreduce.TaskCounter;
-import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo;
 import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
 import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
 import org.apache.hadoop.mapreduce.v2.api.records.Avataar;
@@ -80,7 +78,6 @@ public class TestTaskImpl {
   private Path remoteJobConfFile;
   private Credentials credentials;
   private Clock clock;
-  private Map<TaskId, TaskInfo> completedTasksFromPreviousRun;
   private MRAppMetrics metrics;
   private TaskImpl mockTask;
   private ApplicationId appId;
@@ -104,13 +101,12 @@ public class TestTaskImpl {
         EventHandler eventHandler, Path remoteJobConfFile, JobConf conf,
         TaskAttemptListener taskAttemptListener,
         Token<JobTokenIdentifier> jobToken,
-        Credentials credentials, Clock clock,
-        Map<TaskId, TaskInfo> completedTasksFromPreviousRun, int startCount,
+        Credentials credentials, Clock clock, int startCount,
         MRAppMetrics metrics, AppContext appContext, TaskType taskType) {
       super(jobId, taskType , partition, eventHandler,
           remoteJobConfFile, conf, taskAttemptListener,
           jobToken, credentials, clock,
-          completedTasksFromPreviousRun, startCount, metrics, appContext);
+          startCount, metrics, appContext);
       this.taskType = taskType;
     }
 
@@ -247,8 +243,7 @@ public class TestTaskImpl {
     return new MockTaskImpl(jobId, partition, dispatcher.getEventHandler(),
         remoteJobConfFile, conf, taskAttemptListener, jobToken,
         credentials, clock,
-        completedTasksFromPreviousRun, startCount,
-        metrics, appContext, taskType);
+        startCount, metrics, appContext, taskType);
   }
 
   @After 
@@ -652,9 +647,7 @@ public class TestTaskImpl {
   public void testFailedTransitions() {
     mockTask = new MockTaskImpl(jobId, partition, dispatcher.getEventHandler(),
         remoteJobConfFile, conf, taskAttemptListener, jobToken,
-        credentials, clock,
-        completedTasksFromPreviousRun, startCount,
-        metrics, appContext, TaskType.MAP) {
+        credentials, clock, startCount, metrics, appContext, TaskType.MAP) {
           @Override
           protected int getMaxAttempts() {
             return 1;
@@ -721,9 +714,7 @@ public class TestTaskImpl {
   public void testCountersWithSpeculation() {
     mockTask = new MockTaskImpl(jobId, partition, dispatcher.getEventHandler(),
         remoteJobConfFile, conf, taskAttemptListener, jobToken,
-        credentials, clock,
-        completedTasksFromPreviousRun, startCount,
-        metrics, appContext, TaskType.MAP) {
+        credentials, clock, startCount, metrics, appContext, TaskType.MAP) {
           @Override
           protected int getMaxAttempts() {
             return 1;
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java
index a2ba7f79d73..66471d367cb 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java
@@ -47,6 +47,7 @@ import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.util.MRApps;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 
 import com.google.common.base.Joiner;
@@ -525,4 +526,19 @@ public class JobHistoryUtils {
     sb.append(jobId.toString());
     return sb.toString();
   }
+
+  public static Path getPreviousJobHistoryPath(
+      Configuration conf, ApplicationAttemptId applicationAttemptId)
+      throws IOException {
+    String jobId =
+        TypeConverter.fromYarn(applicationAttemptId.getApplicationId())
+          .toString();
+    String jobhistoryDir =
+        JobHistoryUtils.getConfiguredHistoryStagingDirPrefix(conf, jobId);
+    Path histDirPath = FileContext.getFileContext(conf).makeQualified(
+            new Path(jobhistoryDir));
+    FileContext fc = FileContext.getFileContext(histDirPath.toUri(), conf);
+    return fc.makeQualified(JobHistoryUtils.getStagingJobHistoryFile(
+        histDirPath,jobId, (applicationAttemptId.getAttemptId() - 1)));
+  }
 }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
index 7a7d22ec212..6e399ee7410 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
@@ -422,6 +422,7 @@ public interface MRJobConfig {
   /** Enable job recovery.*/
   public static final String MR_AM_JOB_RECOVERY_ENABLE = 
     MR_AM_PREFIX + "job.recovery.enable";
+  public static final boolean MR_AM_JOB_RECOVERY_ENABLE_DEFAULT = true;
 
   /** 
    * Limit on the number of reducers that can be preempted to ensure that at

From 9e2a96a2accd768b405a9a411e3c1c33132a1b9b Mon Sep 17 00:00:00 2001
From: Siddharth Seth <sseth@apache.org>
Date: Thu, 11 Apr 2013 07:21:00 +0000
Subject: [PATCH 31/52] Undo the unnecessary changes to CHANGES.txt included in
 git-revision 06b93c7

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1466777 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt | 102 ++++++++++++++++++++++++++-
 1 file changed, 100 insertions(+), 2 deletions(-)

diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 9d0a815287e..502d02fad66 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -14,6 +14,10 @@ Trunk (Unreleased)
     MAPREDUCE-4887. Add RehashPartitioner, to smooth distributions
     with poor implementations of Object#hashCode().  (Radim Kolar via cutting)
 
+    HADOOP-8562. Enhancements to support Hadoop on Windows Server and Windows
+    Azure environments. (See breakdown of tasks below for subtasks and
+    contributors)
+
   IMPROVEMENTS
 
     MAPREDUCE-3787. [Gridmix] Optimize job monitoring and STRESS mode for
@@ -71,6 +75,9 @@ Trunk (Unreleased)
     MAPREDUCE-4735. Make arguments in TestDFSIO case insensitive.
     (Brandon Li via suresh)
 
+    MAPREDUCE-5014. Extend Distcp to accept a custom CopyListing.
+    (Srikanth Sundarrajan via amareshwari)
+
   BUG FIXES
 
     MAPREDUCE-4272. SortedRanges.Range#compareTo is not spec compliant.
@@ -155,8 +162,29 @@ Trunk (Unreleased)
     MAPREDUCE-5012. Typo in javadoc for IdentityMapper class. (Adam Monsen
     via suresh)
 
-    MAPREDUCE-5006. Fix failing streaming tests due to MAPREDUCE-4994.
-    (Sandy Ryza via tomwhite)
+    MAPREDUCE-5078. TestMRAppMaster fails on Windows due to mismatched path
+    separators. (Chris Nauroth via sseth)
+
+  BREAKDOWN OF HADOOP-8562 SUBTASKS
+
+    MAPREDUCE-4739. Some MapReduce tests fail to find winutils.
+    (Chris Nauroth via suresh)
+
+    MAPREDUCE-4780. MapReduce distribution build fails on Windows.
+    (Chris Nauroth via suresh)
+
+    MAPREDUCE-4790. MapReduce build script would be more readable using abspath.
+    (Chris Nauroth via suresh)
+
+    MAPREDUCE-4869. Fix TestMapReduceChildJVM. (Chris Nauroth via acmurthy)
+
+    MAPREDUCE-4870. Fix TestMRJobsWithHistoryService. (Chris Nauroth via acmurthy)
+
+    MAPREDUCE-4983. Fixed various platform specific assumptions in various tests,
+    so that they can pass on Windows too. (Chris Nauroth via vinodkv)
+
+    HADOOP-9372. Fix bad timeout annotations on tests.
+    (Arpit Agarwal via suresh)
 
 Release 2.0.5-alpha - UNRELEASED
 
@@ -196,6 +224,15 @@ Release 2.0.4-beta - UNRELEASED
     MAPREDUCE-5033. mapred shell script should respect usage flags
     (--help -help -h). (Andrew Wang via atm)
 
+    MAPREDUCE-4892. Modify CombineFileInputFormat to not skew input slits'
+    allocation on small clusters. (Bikas Saha via vinodkv)
+    
+    MAPREDUCE-4990. Construct debug strings conditionally in
+    ShuffleHandler.Shuffle#sendMapOutput(). (kkambatl via tucu)
+    
+    MAPREDUCE-4875. coverage fixing for org.apache.hadoop.mapred
+    (Aleksey Gorshkov via bobby)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -222,6 +259,36 @@ Release 2.0.4-beta - UNRELEASED
     MAPREDUCE-5008. Merger progress miscounts with respect to EOF_MARKER.
     (Sandy Ryza via tomwhite)
 
+    MAPREDUCE-4693. History server should include counters for failed tasks.
+    (Xuan Gong via sseth)
+
+    MAPREDUCE-4896. mapred queue -info spits out ugly exception when queue does
+    not exist. (sandyr via tucu)
+
+    MAPREDUCE-3685. Fix bugs in MergeManager to ensure compression codec is
+    appropriately used and that on-disk segments are correctly sorted on
+    file-size. (Anty Rao and Ravi Prakash via acmurthy)
+    
+    MAPREDUCE-4571. TestHsWebServicesJobs fails on jdk7. (tgraves via tucu)
+
+    MAPREDUCE-4716. TestHsWebServicesJobsQuery.testJobsQueryStateInvalid
+    fails with jdk7. (tgraves via tucu)
+
+    MAPREDUCE-5075. DistCp leaks input file handles since ThrottledInputStream
+    does not close the wrapped InputStream.  (Chris Nauroth via szetszwo)
+
+    MAPREDUCE-3872. Fix an event handling races in ContainerLauncherImpl.
+    (Robert Kanter via sseth)
+
+    MAPREDUCE-5083. MiniMRCluster should use a random component when creating an
+    actual cluster (Siddharth Seth via hitesh)
+
+    MAPREDUCE-5062. Fix MR AM to read max-retries from the RM. (Zhijie Shen via
+    vinodkv)
+
+    MAPREDUCE-5077. Remove mapreduce.util.ResourceCalculatorPlugin and related
+    code. (Karthik Kambatla via sseth)
+
     MAPREDUCE-5117. Changed MRClientProtocolPBClientImpl to be closeable and thus
     fix failures in renewal of HistoryServer's delegations tokens. (Siddharth
     Seth via vinodkv)
@@ -232,6 +299,9 @@ Release 2.0.4-beta - UNRELEASED
     MAPREDUCE-5138. Fix LocalDistributedCacheManager after YARN-112. (Omkar Vinit
     Joshi via vinodkv)
 
+    MAPREDUCE-5006. Fix failing streaming tests due to MAPREDUCE-4994.
+    (Sandy Ryza via tomwhite)
+
 Release 2.0.3-alpha - 2013-02-06 
 
   INCOMPATIBLE CHANGES
@@ -762,6 +832,12 @@ Release 0.23.7 - UNRELEASED
     MAPREDUCE-4989. JSONify DataTables input data for Attempts page (Ravi
     Prakash via jlowe)
 
+    MAPREDUCE-5027. Shuffle does not limit number of outstanding connections
+    (Robert Parker via jeagles)
+    
+    MAPREDUCE-4972. Coverage fixing for org.apache.hadoop.mapreduce.jobhistory
+    (Aleksey Gorshkov via bobby)
+
   OPTIMIZATIONS
 
     MAPREDUCE-4946. Fix a performance problem for large jobs by reducing the
@@ -781,6 +857,28 @@ Release 0.23.7 - UNRELEASED
     MAPREDUCE-5009. Killing the Task Attempt slated for commit does not clear
     the value from the Task commitAttempt member (Robert Parker via jeagles)
 
+    MAPREDUCE-4871. AM uses mapreduce.jobtracker.split.metainfo.maxsize but
+    mapred-default has mapreduce.job.split.metainfo.maxsize (Jason Lowe via
+    jeagles)
+
+    MAPREDUCE-4794. DefaultSpeculator generates error messages on normal
+    shutdown (Jason Lowe via jeagles)
+
+    MAPREDUCE-5043. Fetch failure processing can cause AM event queue to
+    backup and eventually OOM (Jason Lowe via bobby)
+
+    MAPREDUCE-5023. History Server Web Services missing Job Counters (Ravi
+    Prakash via tgraves)
+
+    MAPREDUCE-5060. Fetch failures that time out only count against the first
+    map task (Robert Joseph Evans via jlowe)
+
+    MAPREDUCE-5042. Reducer unable to fetch for a map task that was recovered
+    (Jason Lowe via bobby)
+
+    MAPREDUCE-5053. java.lang.InternalError from decompression codec cause
+    reducer to fail (Robert Parker via jeagles)
+
     MAPREDUCE-4991. coverage for gridmix (Aleksey Gorshkov via tgraves)
 
     MAPREDUCE-5007. fix coverage org.apache.hadoop.mapreduce.v2.hs (Aleksey 

From 972eb9bbfeddd86f1838baa7c83c9dcc9d09a363 Mon Sep 17 00:00:00 2001
From: Siddharth Seth <sseth@apache.org>
Date: Thu, 11 Apr 2013 07:21:43 +0000
Subject: [PATCH 32/52] Additional fixes to CHANGES.txt to move jiras under the
 correct 2.x revision.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1466778 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt | 144 +++++++++++++--------------
 1 file changed, 72 insertions(+), 72 deletions(-)

diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 502d02fad66..e11ef4893d9 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -20,9 +20,6 @@ Trunk (Unreleased)
 
   IMPROVEMENTS
 
-    MAPREDUCE-3787. [Gridmix] Optimize job monitoring and STRESS mode for
-                    faster job submission. (amarrk)
-
     MAPREDUCE-3481. [Gridmix] Improve Gridmix STRESS mode. (amarrk)
 
     MAPREDUCE-3597. [Rumen] Rumen should provide APIs to access all the
@@ -34,9 +31,6 @@ Trunk (Unreleased)
     MAPREDUCE-2733. [Gridmix] Gridmix3 cpu emulation system tests.
                     (Vinay Thota via amarrk)
 
-    MAPREDUCE-3008. Improvements to cumulative CPU emulation for short running
-                    tasks in Gridmix. (amarrk)
-
     MAPREDUCE-2836. Provide option to fail jobs when submitted to non-existent
     fair scheduler pools. (Ahmed Radwan via todd)
 
@@ -83,34 +77,6 @@ Trunk (Unreleased)
     MAPREDUCE-4272. SortedRanges.Range#compareTo is not spec compliant.
     (Yu Gao via llu)
 
-    MAPREDUCE-4356. [Rumen] Provide access to the method
-                    ParsedTask.obtainTaskAttempts(). (ravigummadi)
-
-    MAPREDUCE-4100. [Gridmix] Bug fixed in compression emulation feature for 
-                    map only jobs. (amarrk)
-
-    MAPREDUCE-4149. [Rumen] Rumen fails to parse certain counter
-                    strings. (ravigummadi)
-
-    MAPREDUCE-4083. [Gridmix] NPE in cpu emulation. (amarrk)
-
-    MAPREDUCE-4087. [Gridmix] GenerateDistCacheData job of Gridmix can
-                    become slow in some cases (ravigummadi).
-
-    MAPREDUCE-3953. [Gridmix] Gridmix throws NPE and does not simulate a
-                    job if the trace contains null taskStatus for a task.
-                    (ravigummadi)
-
-    MAPREDUCE-3829. [Gridmix] Gridmix should give better error message when
-                    input data directory already exists and -generate opton is
-                    given.(ravigummadi)
-
-    MAPREDUCE-2722. [Gridmix] Gridmix simulated job's map's hdfsBytesRead
-                    counter is wrong when compressed input is used.(ravigummadi)
-
-    MAPREDUCE-3757. [Rumen] Fixed Rumen Folder to adjust shuffleFinished and
-                    sortFinished times when needed.(ravigummadi)
-
     MAPREDUCE-3194. "mapred mradmin" command is broken in mrv2
                      (Jason Lowe via bobby)
 
@@ -186,7 +152,7 @@ Trunk (Unreleased)
     HADOOP-9372. Fix bad timeout annotations on tests.
     (Arpit Agarwal via suresh)
 
-Release 2.0.5-alpha - UNRELEASED
+Release 2.0.5-beta - UNRELEASED
 
   INCOMPATIBLE CHANGES
 
@@ -194,32 +160,8 @@ Release 2.0.5-alpha - UNRELEASED
 
   IMPROVEMENTS
 
-    MAPREDUCE-5129. Allow tags to JobHistory for deeper analytics. (billie via
-    acmurthy)
-
-    MAPREDUCE-5079. Changes job recovery to restore state directly from job
-    history, instaed of simulating state machine events.
-    (Jason Lowe and Robert Parker via sseth)
-
-  OPTIMIZATIONS
-
-  BUG FIXES
-
-    MAPREDUCE-5113. Streaming input/output types are ignored with java 
-    mapper/reducer. (sandyr via tucu)
-
-    MAPREDUCE-5098. Fix findbugs warnings in gridmix. (kkambatl via tucu)
-
-    MAPREDUCE-5086. MR app master deletes staging dir when sent a reboot
-    command from the RM. (Jian He via jlowe)
-
-Release 2.0.4-beta - UNRELEASED
-
-  INCOMPATIBLE CHANGES
-
-  NEW FEATURES
-
-  IMPROVEMENTS
+    MAPREDUCE-3008. Improvements to cumulative CPU emulation for short running
+    tasks in Gridmix. (amarrk via tgraves)
 
     MAPREDUCE-5033. mapred shell script should respect usage flags
     (--help -help -h). (Andrew Wang via atm)
@@ -233,6 +175,16 @@ Release 2.0.4-beta - UNRELEASED
     MAPREDUCE-4875. coverage fixing for org.apache.hadoop.mapred
     (Aleksey Gorshkov via bobby)
 
+    MAPREDUCE-5129. Allow tags to JobHistory for deeper analytics. (billie via
+    acmurthy)
+
+    MAPREDUCE-3787. [Gridmix] Optimize job monitoring and STRESS mode for
+    faster job submission. (amarrk via tgraves)
+
+    MAPREDUCE-5079. Changes job recovery to restore state directly from job
+    history, instaed of simulating state machine events.
+    (Jason Lowe and Robert Parker via sseth)
+
   OPTIMIZATIONS
 
   BUG FIXES
@@ -280,27 +232,75 @@ Release 2.0.4-beta - UNRELEASED
     MAPREDUCE-3872. Fix an event handling races in ContainerLauncherImpl.
     (Robert Kanter via sseth)
 
-    MAPREDUCE-5083. MiniMRCluster should use a random component when creating an
-    actual cluster (Siddharth Seth via hitesh)
-
     MAPREDUCE-5062. Fix MR AM to read max-retries from the RM. (Zhijie Shen via
     vinodkv)
 
+    MAPREDUCE-3829. [Gridmix] Gridmix should give better error message when
+    input data directory already exists and -generate opton is
+    given.(ravigummadi via tgraves)
+
+    MAPREDUCE-2722. [Gridmix] Gridmix simulated job's map's hdfsBytesRead
+    counter is wrong when compressed input is used.(ravigummadi via tgraves)
+
+    MAPREDUCE-3953. [Gridmix] Gridmix throws NPE and does not simulate a
+    job if the trace contains null taskStatus for a task.  (ravigummadi via 
+    tgraves)
+
+    MAPREDUCE-4087. [Gridmix] GenerateDistCacheData job of Gridmix can
+    become slow in some cases (ravigummadi via tgraves).
+
     MAPREDUCE-5077. Remove mapreduce.util.ResourceCalculatorPlugin and related
     code. (Karthik Kambatla via sseth)
 
+    MAPREDUCE-4083. [Gridmix] NPE in cpu emulation. (amarrk via tgraves)
+
+    MAPREDUCE-4100. [Gridmix] Bug fixed in compression emulation feature for 
+    map only jobs. (amarrk via tgraves)
+
+    MAPREDUCE-4356. [Rumen] Provide access to the method
+    ParsedTask.obtainTaskAttempts(). (ravigummadi via tgraves)
+
+    MAPREDUCE-4149. [Rumen] Rumen fails to parse certain counter
+    strings. (ravigummadi via tgraves) 
+
+    MAPREDUCE-3757. [Rumen] Fixed Rumen Folder to adjust shuffleFinished and
+    sortFinished times when needed. (Ravi Gummadi via tgraves)
+
+    MAPREDUCE-5138. Fix LocalDistributedCacheManager after YARN-112. (Omkar Vinit
+    Joshi via vinodkv)
+
+    MAPREDUCE-5113. Streaming input/output types are ignored with java 
+    mapper/reducer. (sandyr via tucu)
+
+    MAPREDUCE-5098. Fix findbugs warnings in gridmix. (kkambatl via tucu)
+
+    MAPREDUCE-5086. MR app master deletes staging dir when sent a reboot
+    command from the RM. (Jian He via jlowe)
+
+Release 2.0.4-alpha - UNRELEASED
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
+    MAPREDUCE-5006. Fix failing streaming tests due to MAPREDUCE-4994.
+    (Sandy Ryza via tomwhite)
+
+    MAPREDUCE-5088. MR Client gets an renewer token exception while Oozie is
+    submitting a job (Daryn Sharp via cos)
+
     MAPREDUCE-5117. Changed MRClientProtocolPBClientImpl to be closeable and thus
     fix failures in renewal of HistoryServer's delegations tokens. (Siddharth
     Seth via vinodkv)
 
-    MAPREDUCE-5088. MR Client gets an renewer token exception while Oozie is
-    submitting a job (Daryn Sharp via cos)
-
-    MAPREDUCE-5138. Fix LocalDistributedCacheManager after YARN-112. (Omkar Vinit
-    Joshi via vinodkv)
-
-    MAPREDUCE-5006. Fix failing streaming tests due to MAPREDUCE-4994.
-    (Sandy Ryza via tomwhite)
+    MAPREDUCE-5083. MiniMRCluster should use a random component when creating an
+    actual cluster (Siddharth Seth via hitesh)
 
 Release 2.0.3-alpha - 2013-02-06 
 

From d04e65b797538dc16ceae6a08155798808748437 Mon Sep 17 00:00:00 2001
From: Jason Darrell Lowe <jlowe@apache.org>
Date: Thu, 11 Apr 2013 16:27:24 +0000
Subject: [PATCH 33/52] MAPREDUCE-5137. AM web UI: clicking on Map Task results
 in 500 error. Contributed by Thomas Graves

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1466948 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt                  |  6 ++++++
 .../hadoop/mapreduce/v2/app/webapp/TaskPage.java      | 11 +++++++++--
 .../hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java  |  5 ++++-
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index e11ef4893d9..95d1424bb5d 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -277,6 +277,9 @@ Release 2.0.5-beta - UNRELEASED
     MAPREDUCE-5086. MR app master deletes staging dir when sent a reboot
     command from the RM. (Jian He via jlowe)
 
+    MAPREDUCE-5137. AM web UI: clicking on Map Task results in 500 error
+    (Thomas Graves via jlowe)
+
 Release 2.0.4-alpha - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -884,6 +887,9 @@ Release 0.23.7 - UNRELEASED
     MAPREDUCE-5007. fix coverage org.apache.hadoop.mapreduce.v2.hs (Aleksey 
     Gorshkov via tgraves)
 
+    MAPREDUCE-5137. AM web UI: clicking on Map Task results in 500 error
+    (Thomas Graves via jlowe)
+
 Release 0.23.6 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java
index 6ee62725e0a..430117c4e25 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java
@@ -33,6 +33,9 @@ import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt;
 import org.apache.hadoop.mapreduce.v2.app.webapp.dao.TaskAttemptInfo;
 import org.apache.hadoop.yarn.webapp.SubView;
 import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
+import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
+import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
+import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY;
 
 import com.google.inject.Inject;
 
@@ -53,7 +56,7 @@ public class TaskPage extends AppView {
           h2($(TITLE));
         return;
       }
-      html.
+      TBODY<TABLE<Hamlet>> tbody = html.
       table("#attempts").
         thead().
           tr().
@@ -65,7 +68,8 @@ public class TaskPage extends AppView {
             th(".tsh", "Started").
             th(".tsh", "Finished").
             th(".tsh", "Elapsed").
-            th(".note", "Note")._()._();
+            th(".note", "Note")._()._().
+      tbody();
       // Write all the data into a JavaScript array of arrays for JQuery
       // DataTables to display
       StringBuilder attemptsTableData = new StringBuilder("[\n");
@@ -105,6 +109,9 @@ public class TaskPage extends AppView {
       attemptsTableData.append("]");
       html.script().$type("text/javascript").
       _("var attemptsTableData=" + attemptsTableData)._();
+
+      tbody._()._();
+
     }
 
     protected boolean isValidRequest() {
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java
index 8ef7e68c6cc..fdf650d647b 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebApp.java
@@ -144,7 +144,10 @@ public class TestAMWebApp {
   @Test public void testTaskView() {
     AppContext appContext = new TestAppContext();
     Map<String, String> params = getTaskParams(appContext);
-    WebAppTests.testPage(TaskPage.class, AppContext.class, appContext, params);
+    App app = new App(appContext);
+    app.setJob(appContext.getAllJobs().values().iterator().next());
+    app.setTask(app.getJob().getTasks().values().iterator().next());
+    WebAppTests.testPage(TaskPage.class, App.class, app, params);
   }
 
   public static Map<String, String> getJobParams(AppContext appContext) {

From 8e86ffd91e72e376799a62aa0f1b731ec0f93381 Mon Sep 17 00:00:00 2001
From: Jason Darrell Lowe <jlowe@apache.org>
Date: Thu, 11 Apr 2013 19:12:59 +0000
Subject: [PATCH 34/52] MAPREDUCE-5136. TestJobImpl->testJobNoTasks fails with
 IBM JAVA. Contributed by Amir Sanjar

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467061 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt                      | 3 +++
 .../hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java     | 8 +++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 95d1424bb5d..0a89cbc6a29 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -280,6 +280,9 @@ Release 2.0.5-beta - UNRELEASED
     MAPREDUCE-5137. AM web UI: clicking on Map Task results in 500 error
     (Thomas Graves via jlowe)
 
+    MAPREDUCE-5136. TestJobImpl->testJobNoTasks fails with IBM JAVA (Amir
+    Sanjar via jlowe)
+
 Release 2.0.4-alpha - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java
index 88cf949bc96..0b93e75546d 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java
@@ -25,6 +25,7 @@ import static org.mockito.Mockito.when;
 
 import java.io.File;
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.EnumSet;
 import java.util.concurrent.BrokenBarrierException;
@@ -744,7 +745,12 @@ public class TestJobImpl {
         setAssertValue(false);
         return;
       }
-      if (!workflowAdjacencies.equals(jsEvent.getWorkflowAdjacencies())) {
+     
+      String[] wrkflowAdj = workflowAdjacencies.split(" ");
+      String[] jswrkflowAdj = jsEvent.getWorkflowAdjacencies().split(" ");
+      Arrays.sort(wrkflowAdj);
+      Arrays.sort(jswrkflowAdj);
+      if (!Arrays.equals(wrkflowAdj, jswrkflowAdj)) {
         setAssertValue(false);
         return;
       }

From e4c55e17fea55e2fcbef182bb2b0c4b22686f38c Mon Sep 17 00:00:00 2001
From: Vinod Kumar Vavilapalli <vinodkv@apache.org>
Date: Thu, 11 Apr 2013 19:28:51 +0000
Subject: [PATCH 35/52] YARN-486. Changed NM's startContainer API to accept
 Container record given by RM as a direct parameter instead of as part of the
 ContainerLaunchContext record. Contributed by Xuan Gong. MAPREDUCE-5139.
 Update MR AM to use the modified startContainer API after YARN-486.
 Contributed by Xuan Gong.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467063 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt          |  3 +
 .../v2/app/job/impl/TaskAttemptImpl.java      | 22 +++---
 .../app/launcher/ContainerLauncherImpl.java   |  5 ++
 .../launcher/ContainerRemoteLaunchEvent.java  | 10 ++-
 .../impl/TestTaskAttemptContainerRequest.java |  4 +-
 .../app/launcher/TestContainerLauncher.java   |  4 +-
 .../org/apache/hadoop/mapred/YARNRunner.java  |  8 +-
 hadoop-yarn-project/CHANGES.txt               |  4 +
 .../StartContainerRequest.java                |  9 +++
 .../impl/pb/StartContainerRequestPBImpl.java  | 38 ++++++++-
 .../records/ApplicationSubmissionContext.java | 24 ++----
 .../api/records/ContainerLaunchContext.java   | 35 --------
 .../ApplicationSubmissionContextPBImpl.java   | 59 +++++++++-----
 .../impl/pb/ContainerLaunchContextPBImpl.java | 79 +------------------
 .../src/main/proto/yarn_protos.proto          | 30 ++++---
 .../src/main/proto/yarn_service_protos.proto  |  1 +
 .../distributedshell/ApplicationMaster.java   |  4 +-
 .../applications/distributedshell/Client.java |  3 +-
 .../apache/hadoop/yarn/util/BuilderUtils.java |  7 +-
 .../hadoop/yarn/TestContainerLaunchRPC.java   | 12 +--
 .../java/org/apache/hadoop/yarn/TestRPC.java  | 16 ++--
 .../nodemanager/LinuxContainerExecutor.java   |  2 +-
 .../ContainerManagerImpl.java                 | 23 +++---
 .../containermanager/container/Container.java |  3 +
 .../container/ContainerImpl.java              | 65 +++++++++------
 .../launcher/ContainerLaunch.java             | 15 ++--
 .../nodemanager/webapp/dao/ContainerInfo.java |  2 +-
 .../server/nodemanager/TestEventFlow.java     | 10 ++-
 .../nodemanager/TestNodeManagerReboot.java    | 11 ++-
 .../nodemanager/TestNodeManagerShutdown.java  | 18 +++--
 .../nodemanager/TestNodeStatusUpdater.java    | 24 +++---
 .../TestContainerManager.java                 | 42 +++++-----
 .../container/TestContainer.java              | 13 +--
 .../launcher/TestContainerLaunch.java         | 23 +++---
 .../TestLogAggregationService.java            | 13 +--
 .../monitor/TestContainersMonitor.java        | 15 ++--
 .../nodemanager/webapp/MockContainer.java     |  8 +-
 .../nodemanager/webapp/TestNMWebServer.java   | 18 +++--
 .../resourcemanager/ClientRMService.java      |  6 +-
 .../server/resourcemanager/RMAppManager.java  |  9 ++-
 .../amlauncher/AMLauncher.java                | 41 +++++-----
 .../rmapp/attempt/RMAppAttemptImpl.java       | 15 ++--
 .../webapp/dao/AppAttemptInfo.java            |  4 +-
 .../server/resourcemanager/Application.java   |  9 +--
 .../yarn/server/resourcemanager/MockRM.java   |  7 +-
 .../server/resourcemanager/NodeManager.java   | 20 +++--
 .../resourcemanager/TestAppManager.java       |  5 ++
 .../resourcemanager/TestApplicationACLs.java  |  2 +-
 .../TestApplicationMasterLauncher.java        |  2 +-
 .../resourcemanager/TestClientRMService.java  | 11 +--
 .../attempt/TestRMAppAttemptTransitions.java  |  9 +--
 .../scheduler/fair/TestFairScheduler.java     |  6 ++
 .../webapp/TestRMWebServicesApps.java         |  5 +-
 .../server/TestContainerManagerSecurity.java  | 40 ++++++----
 54 files changed, 453 insertions(+), 420 deletions(-)

diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 0a89cbc6a29..246aa69db05 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -283,6 +283,9 @@ Release 2.0.5-beta - UNRELEASED
     MAPREDUCE-5136. TestJobImpl->testJobNoTasks fails with IBM JAVA (Amir
     Sanjar via jlowe)
 
+    MAPREDUCE-5139. Update MR AM to use the modified startContainer API after
+    YARN-486. (Xuan Gong via vinodkv)
+
 Release 2.0.4-alpha - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
index 3cb4bf913c9..ac3af4f0128 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
@@ -114,6 +114,7 @@ import org.apache.hadoop.yarn.Clock;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
+import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerToken;
@@ -767,8 +768,8 @@ public abstract class TaskAttemptImpl implements
     // The null fields are per-container and will be constructed for each
     // container separately.
     ContainerLaunchContext container = BuilderUtils
-        .newContainerLaunchContext(null, conf
-            .get(MRJobConfig.USER_NAME), null, localResources,
+        .newContainerLaunchContext(conf
+            .get(MRJobConfig.USER_NAME), localResources,
             environment, null, serviceData, taskCredentialsBuffer,
             applicationACLs);
 
@@ -777,10 +778,9 @@ public abstract class TaskAttemptImpl implements
 
   static ContainerLaunchContext createContainerLaunchContext(
       Map<ApplicationAccessType, String> applicationACLs,
-      ContainerId containerID, Configuration conf,
-      Token<JobTokenIdentifier> jobToken, Task remoteTask,
+      Configuration conf, Token<JobTokenIdentifier> jobToken, Task remoteTask,
       final org.apache.hadoop.mapred.JobID oldJobId,
-      Resource assignedCapability, WrappedJvmID jvmID,
+      WrappedJvmID jvmID,
       TaskAttemptListener taskAttemptListener,
       Credentials credentials) {
 
@@ -813,7 +813,7 @@ public abstract class TaskAttemptImpl implements
 
     // Construct the actual Container
     ContainerLaunchContext container = BuilderUtils.newContainerLaunchContext(
-        containerID, commonContainerSpec.getUser(), assignedCapability,
+        commonContainerSpec.getUser(),
         commonContainerSpec.getLocalResources(), myEnv, commands,
         myServiceData, commonContainerSpec.getContainerTokens().duplicate(),
         applicationACLs);
@@ -1511,15 +1511,13 @@ public abstract class TaskAttemptImpl implements
       //launch the container
       //create the container object to be launched for a given Task attempt
       ContainerLaunchContext launchContext = createContainerLaunchContext(
-          cEvent.getApplicationACLs(), taskAttempt.containerID,
-          taskAttempt.conf, taskAttempt.jobToken, taskAttempt.remoteTask,
-          taskAttempt.oldJobId, taskAttempt.assignedCapability,
-          taskAttempt.jvmID, taskAttempt.taskAttemptListener,
-          taskAttempt.credentials);
+          cEvent.getApplicationACLs(), taskAttempt.conf, taskAttempt.jobToken,
+          taskAttempt.remoteTask, taskAttempt.oldJobId, taskAttempt.jvmID,
+          taskAttempt.taskAttemptListener, taskAttempt.credentials);
       taskAttempt.eventHandler.handle(new ContainerRemoteLaunchEvent(
           taskAttempt.attemptId, taskAttempt.containerID,
           taskAttempt.containerMgrAddress, taskAttempt.containerToken,
-          launchContext, taskAttempt.remoteTask));
+          launchContext, taskAttempt.assignedCapability, taskAttempt.remoteTask));
 
       // send event to speculator that our container needs are satisfied
       taskAttempt.eventHandler.handle
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java
index 588d031bc23..86281f60b1b 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java
@@ -59,6 +59,7 @@ import org.apache.hadoop.yarn.api.records.ContainerToken;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
 import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
 import org.apache.hadoop.yarn.service.AbstractService;
+import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.apache.hadoop.yarn.util.ProtoUtils;
 import org.apache.hadoop.yarn.util.Records;
 
@@ -150,10 +151,14 @@ public class ContainerLauncherImpl extends AbstractService implements
         ContainerLaunchContext containerLaunchContext =
           event.getContainer();
 
+        org.apache.hadoop.yarn.api.records.Container container =
+            BuilderUtils.newContainer(containerID, null, null,
+                event.getResource(), null, containerToken);
         // Now launch the actual container
         StartContainerRequest startRequest = Records
           .newRecord(StartContainerRequest.class);
         startRequest.setContainerLaunchContext(containerLaunchContext);
+        startRequest.setContainer(container);
         StartContainerResponse response = proxy.startContainer(startRequest);
 
         ByteBuffer portInfo = response
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerRemoteLaunchEvent.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerRemoteLaunchEvent.java
index 0fac5335b0c..eb95f3bb314 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerRemoteLaunchEvent.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerRemoteLaunchEvent.java
@@ -23,26 +23,34 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerToken;
+import org.apache.hadoop.yarn.api.records.Resource;
 
 public class ContainerRemoteLaunchEvent extends ContainerLauncherEvent {
 
   private final ContainerLaunchContext container;
   private final Task task;
+  private final Resource resource;
 
   public ContainerRemoteLaunchEvent(TaskAttemptId taskAttemptID,
       ContainerId containerID, String containerMgrAddress,
       ContainerToken containerToken,
-      ContainerLaunchContext containerLaunchContext, Task remoteTask) {
+      ContainerLaunchContext containerLaunchContext, Resource resource,
+      Task remoteTask) {
     super(taskAttemptID, containerID, containerMgrAddress, containerToken,
         ContainerLauncher.EventType.CONTAINER_REMOTE_LAUNCH);
     this.container = containerLaunchContext;
     this.task = remoteTask;
+    this.resource = resource;
   }
 
   public ContainerLaunchContext getContainer() {
     return this.container;
   }
 
+  public Resource getResource() {
+    return this.resource;
+  }
+
   public Task getRemoteTask() {
     return this.task;
   }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttemptContainerRequest.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttemptContainerRequest.java
index 87575d61f39..54be1d74f0c 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttemptContainerRequest.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttemptContainerRequest.java
@@ -113,9 +113,9 @@ public class TestTaskAttemptContainerRequest {
     ContainerId containerId = BuilderUtils.newContainerId(1, 1, 1, 1);
 
     ContainerLaunchContext launchCtx =
-        TaskAttemptImpl.createContainerLaunchContext(acls, containerId,
+        TaskAttemptImpl.createContainerLaunchContext(acls,
             jobConf, jobToken, taImpl.createRemoteTask(),
-            TypeConverter.fromYarn(jobId), mock(Resource.class),
+            TypeConverter.fromYarn(jobId),
             mock(WrappedJvmID.class), taListener,
             credentials);
 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncher.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncher.java
index e1bab017561..c5d0a885f4b 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncher.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncher.java
@@ -60,7 +60,6 @@ import org.apache.hadoop.yarn.api.protocolrecords.StopContainerResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerState;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.api.records.ContainerToken;
@@ -383,7 +382,6 @@ public class TestContainerLauncher {
     @Override
     public StartContainerResponse startContainer(StartContainerRequest request)
         throws YarnRemoteException {
-      ContainerLaunchContext container = request.getContainerLaunchContext();
       StartContainerResponse response = recordFactory
           .newRecordInstance(StartContainerResponse.class);
       status = recordFactory.newRecordInstance(ContainerStatus.class);
@@ -395,7 +393,7 @@ public class TestContainerLauncher {
         throw new UndeclaredThrowableException(e);
             }
       status.setState(ContainerState.RUNNING);
-      status.setContainerId(container.getContainerId());
+      status.setContainerId(request.getContainer().getId());
       status.setExitStatus(0);
       return response;
             }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java
index 971492b0ea9..241258ac222 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java
@@ -468,16 +468,14 @@ public class YARNRunner implements ClientProtocol {
 
     // Setup ContainerLaunchContext for AM container
     ContainerLaunchContext amContainer = BuilderUtils
-        .newContainerLaunchContext(null, UserGroupInformation
-            .getCurrentUser().getShortUserName(), capability, localResources,
+        .newContainerLaunchContext(UserGroupInformation
+            .getCurrentUser().getShortUserName(), localResources,
             environment, vargsFinal, null, securityTokens, acls);
 
     // Set up the ApplicationSubmissionContext
     ApplicationSubmissionContext appContext =
         recordFactory.newRecordInstance(ApplicationSubmissionContext.class);
     appContext.setApplicationId(applicationId);                // ApplicationId
-    appContext.setUser(                                        // User name
-        UserGroupInformation.getCurrentUser().getShortUserName());
     appContext.setQueue(                                       // Queue name
         jobConf.get(JobContext.QUEUE_NAME,
         YarnConfiguration.DEFAULT_QUEUE_NAME));
@@ -490,7 +488,7 @@ public class YARNRunner implements ClientProtocol {
     appContext.setMaxAppAttempts(
         conf.getInt(MRJobConfig.MR_AM_MAX_ATTEMPTS,
             MRJobConfig.DEFAULT_MR_AM_MAX_ATTEMPTS));
-
+    appContext.setResource(capability);
     return appContext;
   }
 
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index a7ce38e6c35..4c640d3e877 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -78,6 +78,10 @@ Release 2.0.5-beta - UNRELEASED
     YARN-536. Removed the unused objects ContainerStatus and ContainerStatus from
     Container which also don't belong to the container. (Xuan Gong via vinodkv)
 
+    YARN-486. Changed NM's startContainer API to accept Container record given by
+    RM as a direct parameter instead of as part of the ContainerLaunchContext
+    record. (Xuan Gong via vinodkv)
+
   NEW FEATURES
 
   IMPROVEMENTS
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/StartContainerRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/StartContainerRequest.java
index a3f7e4b70d0..78862b95d1a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/StartContainerRequest.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/StartContainerRequest.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.api.protocolrecords;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.hadoop.classification.InterfaceStability.Stable;
 import org.apache.hadoop.yarn.api.ContainerManager;
+import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 
 /**
@@ -58,4 +59,12 @@ public interface StartContainerRequest {
   @Public
   @Stable
   public abstract void setContainerLaunchContext(ContainerLaunchContext context);
+
+  @Public
+  @Stable
+  public Container getContainer();
+
+  @Public
+  @Stable
+  public void setContainer(Container container);
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/StartContainerRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/StartContainerRequestPBImpl.java
index e1c589e5308..d4190e09a04 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/StartContainerRequestPBImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/StartContainerRequestPBImpl.java
@@ -20,10 +20,13 @@ package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
 
 
 import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
+import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ProtoBase;
 import org.apache.hadoop.yarn.api.records.impl.pb.ContainerLaunchContextPBImpl;
+import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
 import org.apache.hadoop.yarn.proto.YarnProtos.ContainerLaunchContextProto;
+import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainerRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainerRequestProtoOrBuilder;
 
@@ -35,7 +38,8 @@ public class StartContainerRequestPBImpl extends ProtoBase<StartContainerRequest
   boolean viaProto = false;
   
   private ContainerLaunchContext containerLaunchContext = null;
-  
+
+  private Container container = null;
   
   public StartContainerRequestPBImpl() {
     builder = StartContainerRequestProto.newBuilder();
@@ -57,6 +61,9 @@ public class StartContainerRequestPBImpl extends ProtoBase<StartContainerRequest
     if (this.containerLaunchContext != null) {
       builder.setContainerLaunchContext(convertToProtoFormat(this.containerLaunchContext));
     }
+    if(this.container != null) {
+      builder.setContainer(convertToProtoFormat(this.container));
+    }
   }
 
   private void mergeLocalToProto() {
@@ -96,6 +103,28 @@ public class StartContainerRequestPBImpl extends ProtoBase<StartContainerRequest
     this.containerLaunchContext = containerLaunchContext;
   }
 
+  @Override
+  public Container getContainer() {
+    StartContainerRequestProtoOrBuilder p = viaProto ? proto : builder;
+    if (this.container != null) {
+      return this.container;
+    }
+    if (!p.hasContainer()) {
+      return null;
+    }
+    this.container = convertFromProtoFormat(p.getContainer());
+    return this.container;
+  }
+
+  @Override
+  public void setContainer(Container container) {
+    maybeInitBuilder();
+    if(container == null) {
+      builder.clearContainer();
+    }
+    this.container = container;
+  }
+
   private ContainerLaunchContextPBImpl convertFromProtoFormat(ContainerLaunchContextProto p) {
     return new ContainerLaunchContextPBImpl(p);
   }
@@ -106,4 +135,11 @@ public class StartContainerRequestPBImpl extends ProtoBase<StartContainerRequest
 
 
+  private ContainerPBImpl convertFromProtoFormat(ContainerProto containerProto) {
+    return new ContainerPBImpl(containerProto);
+  }
+
+  private ContainerProto convertToProtoFormat(Container container) {
+    return ((ContainerPBImpl)container).getProto();
+  }
 }  
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationSubmissionContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationSubmissionContext.java
index f057012777f..903ad1be379 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationSubmissionContext.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationSubmissionContext.java
@@ -112,22 +112,6 @@ public interface ApplicationSubmissionContext {
   @Public
   @Stable
   public void setPriority(Priority priority);
-  
-  /**
-   * Get the <em>user</em> submitting the application.
-   * @return <em>user</em> submitting the application
-   */
-  @Public
-  @Stable
-  public String getUser();
-  
-  /**
-   * Set the <em>user</em> submitting the application.
-   * @param user <em>user</em> submitting the application
-   */
-  @Public
-  @Stable
-  public void setUser(String user);
 
   /**
    * Get the <code>ContainerLaunchContext</code> to describe the 
@@ -207,4 +191,12 @@ public interface ApplicationSubmissionContext {
   @Public
   @Unstable
   public void setMaxAppAttempts(int maxAppAttempts);
+
+  @Public
+  @Stable
+  public Resource getResource();
+
+  @Public
+  @Stable
+  public void setResource(Resource resource);
 }
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerLaunchContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerLaunchContext.java
index 78f85b2b0ec..36cfdfbadd8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerLaunchContext.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerLaunchContext.java
@@ -51,22 +51,6 @@ import org.apache.hadoop.yarn.api.ContainerManager;
 @Public
 @Stable
 public interface ContainerLaunchContext {
-  /**
-   * Get <code>ContainerId</code> of container to be launched.
-   * @return <code>ContainerId</code> of container to be launched
-   */
-  @Public
-  @Stable
-  ContainerId getContainerId();
-
-  /**
-   * Set <code>ContainerId</code> of container to be launched.
-   * @param containerId et <code>ContainerId</code> of container to be launched
-   */
-  @Public
-  @Stable
-  void setContainerId(ContainerId containerId);
-
   /**
    * Get the <em>user</em> to whom the container has been allocated.
    * @return the <em>user</em> to whom the container has been allocated
@@ -83,25 +67,6 @@ public interface ContainerLaunchContext {
   @Stable
   void setUser(String user);
 
-  /**
-   * Get the <code>Resource</code> allocated to the container by the
-   * <code>ResourceManager</code>.
-   * @return <code>Resource</code> allocated to the container by the
-   *         <code>ResourceManager</code>
-   */
-  @Public
-  @Stable
-  Resource getResource();
-
-  /**
-   * Set the <code>Resource</code> allocated to the container by the
-   * <code>ResourceManager</code>.
-   * @param resource allocated resource
-   */
-  @Public
-  @Stable
-  void setResource(Resource resource);
-
   /**
    * Get security tokens (if security is enabled).
    * @return security tokens (if security is enabled)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java
index a6a890cc433..403ce6ef4f8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationSubmissionContextPBImpl.java
@@ -23,11 +23,13 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.ProtoBase;
+import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationSubmissionContextProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationSubmissionContextProtoOrBuilder;
 import org.apache.hadoop.yarn.proto.YarnProtos.ContainerLaunchContextProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.PriorityProto;
+import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
     
 public class ApplicationSubmissionContextPBImpl 
 extends ProtoBase<ApplicationSubmissionContextProto> 
@@ -40,7 +42,8 @@ implements ApplicationSubmissionContext {
   private ApplicationId applicationId = null;
   private Priority priority = null;
   private ContainerLaunchContext amContainer = null;
-  
+  private Resource resource = null;
+
   public ApplicationSubmissionContextPBImpl() {
     builder = ApplicationSubmissionContextProto.newBuilder();
   }
@@ -68,6 +71,11 @@ implements ApplicationSubmissionContext {
     if (this.amContainer != null) {
       builder.setAmContainerSpec(convertToProtoFormat(this.amContainer));
     }
+    if (this.resource != null &&
+        !((ResourcePBImpl) this.resource).getProto().equals(
+            builder.getResource())) {
+      builder.setResource(convertToProtoFormat(this.resource));
+    }
   }
 
   private void mergeLocalToProto() {
@@ -165,25 +173,6 @@ implements ApplicationSubmissionContext {
     }
     builder.setQueue((queue));
   }
-  
-  @Override
-  public String getUser() {
-    ApplicationSubmissionContextProtoOrBuilder p = viaProto ? proto : builder;
-    if (!p.hasUser()) {
-      return null;
-    }
-    return (p.getUser());
-  }
-
-  @Override
-  public void setUser(String user) {
-    maybeInitBuilder();
-    if (user == null) {
-      builder.clearUser();
-      return;
-    }
-    builder.setUser((user));
-  }
 
   @Override
   public ContainerLaunchContext getAMContainerSpec() {
@@ -244,6 +233,28 @@ implements ApplicationSubmissionContext {
     builder.setMaxAppAttempts(maxAppAttempts);
   }
 
+  @Override
+  public Resource getResource() {
+    ApplicationSubmissionContextProtoOrBuilder p = viaProto ? proto : builder;
+    if (this.resource != null) {
+      return this.resource;
+    }
+    if (!p.hasResource()) {
+      return null;
+    }
+    this.resource = convertFromProtoFormat(p.getResource());
+    return this.resource;
+  }
+
+  @Override
+  public void setResource(Resource resource) {
+    maybeInitBuilder();
+    if (resource == null) {
+      builder.clearResource();
+    }
+    this.resource = resource;
+  }
+
   private PriorityPBImpl convertFromProtoFormat(PriorityProto p) {
     return new PriorityPBImpl(p);
   }
@@ -268,4 +279,12 @@ implements ApplicationSubmissionContext {
   private ContainerLaunchContextProto convertToProtoFormat(ContainerLaunchContext t) {
     return ((ContainerLaunchContextPBImpl)t).getProto();
   }
+
+  private ResourcePBImpl convertFromProtoFormat(ResourceProto p) {
+    return new ResourcePBImpl(p);
+  }
+
+  private ResourceProto convertToProtoFormat(Resource t) {
+    return ((ResourcePBImpl)t).getProto();
+  }
 }  
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerLaunchContextPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerLaunchContextPBImpl.java
index b8ba4df26d7..6a26508f711 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerLaunchContextPBImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerLaunchContextPBImpl.java
@@ -26,17 +26,13 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
-import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.ProtoBase;
-import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationACLMapProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.ContainerLaunchContextProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.ContainerLaunchContextProtoOrBuilder;
 import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.StringBytesMapProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.StringLocalResourceMapProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.StringStringMapProto;
@@ -50,8 +46,6 @@ implements ContainerLaunchContext {
   ContainerLaunchContextProto.Builder builder = null;
   boolean viaProto = false;
   
-  private ContainerId containerId = null;
-  private Resource resource = null;
   private Map<String, LocalResource> localResources = null;
   private ByteBuffer containerTokens = null;
   private Map<String, ByteBuffer> serviceData = null;
@@ -76,16 +70,6 @@ implements ContainerLaunchContext {
   }
   
   private void mergeLocalToBuilder() {
-    if (this.containerId != null && 
-        !((ContainerIdPBImpl)containerId).getProto().equals(
-            builder.getContainerId())) {
-      builder.setContainerId(convertToProtoFormat(this.containerId));
-    }
-    if (this.resource != null && 
-        !((ResourcePBImpl)this.resource).getProto().equals(
-            builder.getResource())) {
-      builder.setResource(convertToProtoFormat(this.resource));
-    }
     if (this.localResources != null) {
       addLocalResourcesToProto();
     }
@@ -120,28 +104,6 @@ implements ContainerLaunchContext {
     }
     viaProto = false;
   }
-    
-  
-  @Override
-  public Resource getResource() {
-    ContainerLaunchContextProtoOrBuilder p = viaProto ? proto : builder;
-    if (this.resource != null) {
-      return this.resource;
-    }
-    if (!p.hasResource()) {
-      return null;
-    }
-    this.resource = convertFromProtoFormat(p.getResource());
-    return this.resource;
-  }
-
-  @Override
-  public void setResource(Resource resource) {
-    maybeInitBuilder();
-    if (resource == null) 
-      builder.clearResource();
-    this.resource = resource;
-  }
   
   @Override
   public List<String> getCommands() {
@@ -197,26 +159,6 @@ implements ContainerLaunchContext {
     }
     builder.setUser((user));
   }
-  @Override
-  public ContainerId getContainerId() {
-    ContainerLaunchContextProtoOrBuilder p = viaProto ? proto : builder;
-    if (this.containerId != null) {
-      return this.containerId;
-    }
-    if (!p.hasContainerId()) {
-      return null;
-    }
-    this.containerId = convertFromProtoFormat(p.getContainerId());
-    return this.containerId;
-  }
-
-  @Override
-  public void setContainerId(ContainerId containerId) {
-    maybeInitBuilder();
-    if (containerId == null) 
-      builder.clearContainerId();
-    this.containerId = containerId;
-  }
   
   @Override
   public Map<String, LocalResource> getLocalResources() {
@@ -299,11 +241,12 @@ implements ContainerLaunchContext {
   @Override
   public void setContainerTokens(ByteBuffer containerTokens) {
     maybeInitBuilder();
-    if (containerTokens == null)
+    if (containerTokens == null) {
       builder.clearContainerTokens();
+    }
     this.containerTokens = containerTokens;
   }
-  
+
   @Override
   public Map<String, ByteBuffer> getServiceData() {
     initServiceData();
@@ -500,22 +443,6 @@ implements ContainerLaunchContext {
     this.applicationACLS.putAll(appACLs);
   }
 
-  private ResourcePBImpl convertFromProtoFormat(ResourceProto p) {
-    return new ResourcePBImpl(p);
-  }
-
-  private ResourceProto convertToProtoFormat(Resource t) {
-    return ((ResourcePBImpl)t).getProto();
-  }
-
-  private ContainerIdPBImpl convertFromProtoFormat(ContainerIdProto p) {
-    return new ContainerIdPBImpl(p);
-  }
-
-  private ContainerIdProto convertToProtoFormat(ContainerId t) {
-    return ((ContainerIdPBImpl)t).getProto();
-  }
-
   private LocalResourcePBImpl convertFromProtoFormat(LocalResourceProto p) {
     return new LocalResourcePBImpl(p);
   }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
index 83aac743657..aec162c1f0f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
@@ -211,13 +211,13 @@ message ResourceRequestProto {
 message ApplicationSubmissionContextProto {
   optional ApplicationIdProto application_id = 1;
   optional string application_name = 2 [default = "N/A"];
-  optional string user = 3;
-  optional string queue = 4 [default = "default"];
-  optional PriorityProto priority = 5;
-  optional ContainerLaunchContextProto am_container_spec = 6;
-  optional bool cancel_tokens_when_complete = 7 [default = true];
-  optional bool unmanaged_am = 8 [default = false];
-  optional int32 maxAppAttempts = 9 [default = 0];
+  optional string queue = 3 [default = "default"];
+  optional PriorityProto priority = 4;
+  optional ContainerLaunchContextProto am_container_spec = 5;
+  optional bool cancel_tokens_when_complete = 6 [default = true];
+  optional bool unmanaged_am = 7 [default = false];
+  optional int32 maxAppAttempts = 8 [default = 0];
+  optional ResourceProto resource = 9;
 }
 
 enum ApplicationAccessTypeProto {
@@ -264,15 +264,13 @@ message QueueUserACLInfoProto {
 ////////////////////////////////////////////////////////////////////////
 
 message ContainerLaunchContextProto {
-  optional ContainerIdProto container_id = 1;
-  optional string user = 2;
-  optional ResourceProto resource = 3;
-  repeated StringLocalResourceMapProto localResources = 4;
-  optional bytes container_tokens = 5;
-  repeated StringBytesMapProto service_data = 6;
-  repeated StringStringMapProto environment = 7;
-  repeated string command = 8;
-  repeated ApplicationACLMapProto application_ACLs = 9;
+  optional string user = 1;
+  repeated StringLocalResourceMapProto localResources = 2;
+  optional bytes container_tokens = 3;
+  repeated StringBytesMapProto service_data = 4;
+  repeated StringStringMapProto environment = 5;
+  repeated string command = 6;
+  repeated ApplicationACLMapProto application_ACLs = 7;
 }
 
 message ContainerStatusProto {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto
index 50d1cd320ed..ad3b5f18072 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto
@@ -151,6 +151,7 @@ message GetQueueUserAclsInfoResponseProto {
 
 message StartContainerRequestProto {
   optional ContainerLaunchContextProto container_launch_context = 1;
+  optional ContainerProto container = 2;
 }
 
 message StartContainerResponseProto {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
index 55cc46814ec..114b5e5a699 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java
@@ -679,9 +679,6 @@ public class ApplicationMaster {
       ContainerLaunchContext ctx = Records
           .newRecord(ContainerLaunchContext.class);
 
-      ctx.setContainerId(container.getId());
-      ctx.setResource(container.getResource());
-
       String jobUserName = System.getenv(ApplicationConstants.Environment.USER
           .key());
       ctx.setUser(jobUserName);
@@ -752,6 +749,7 @@ public class ApplicationMaster {
       StartContainerRequest startReq = Records
           .newRecord(StartContainerRequest.class);
       startReq.setContainerLaunchContext(ctx);
+      startReq.setContainer(container);
       try {
         cm.startContainer(startReq);
       } catch (YarnRemoteException e) {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
index e311957e618..0461d46863e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java
@@ -548,7 +548,7 @@ public class Client extends YarnClientImpl {
     // For now, only memory is supported so we set memory requirements
     Resource capability = Records.newRecord(Resource.class);
     capability.setMemory(amMemory);
-    amContainer.setResource(capability);
+    appContext.setResource(capability);
 
     // Service data is a binary blob that can be passed to the application
     // Not needed in this scenario
@@ -573,6 +573,7 @@ public class Client extends YarnClientImpl {
     // Ignore the response as either a valid response object is returned on success 
     // or an exception thrown to denote some form of a failure
     LOG.info("Submitting application to ASM");
+
     super.submitApplication(appContext);
 
     // TODO
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java
index b2aabc80b65..c7502c1fb51 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java
@@ -284,16 +284,13 @@ public class BuilderUtils {
   }
 
   public static ContainerLaunchContext newContainerLaunchContext(
-      ContainerId containerID, String user, Resource assignedCapability,
-      Map<String, LocalResource> localResources,
+      String user, Map<String, LocalResource> localResources,
       Map<String, String> environment, List<String> commands,
-      Map<String, ByteBuffer> serviceData, ByteBuffer containerTokens,
+      Map<String, ByteBuffer> serviceData,  ByteBuffer containerTokens,
       Map<ApplicationAccessType, String> acls) {
     ContainerLaunchContext container = recordFactory
         .newRecordInstance(ContainerLaunchContext.class);
-    container.setContainerId(containerID);
     container.setUser(user);
-    container.setResource(assignedCapability);
     container.setLocalResources(localResources);
     container.setEnvironment(environment);
     container.setCommands(commands);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java
index b18588d9cbb..295a38cee80 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.StopContainerResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerState;
@@ -50,6 +51,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.factory.providers.YarnRemoteExceptionFactoryProvider;
 import org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.junit.Test;
 
 /*
@@ -101,13 +103,14 @@ public class TestContainerLaunchRPC {
       applicationAttemptId.setAttemptId(0);
       containerId.setApplicationAttemptId(applicationAttemptId);
       containerId.setId(100);
-      containerLaunchContext.setContainerId(containerId);
-      containerLaunchContext.setResource(recordFactory
-          .newRecordInstance(Resource.class));
+      Container container =
+          BuilderUtils.newContainer(containerId, null, null, recordFactory
+              .newRecordInstance(Resource.class), null, null);
 
       StartContainerRequest scRequest = recordFactory
           .newRecordInstance(StartContainerRequest.class);
       scRequest.setContainerLaunchContext(containerLaunchContext);
+      scRequest.setContainer(container);
       try {
         proxy.startContainer(scRequest);
       } catch (Exception e) {
@@ -141,7 +144,6 @@ public class TestContainerLaunchRPC {
     @Override
     public StartContainerResponse startContainer(StartContainerRequest request)
         throws YarnRemoteException {
-      ContainerLaunchContext container = request.getContainerLaunchContext();
       StartContainerResponse response = recordFactory
           .newRecordInstance(StartContainerResponse.class);
       status = recordFactory.newRecordInstance(ContainerStatus.class);
@@ -153,7 +155,7 @@ public class TestContainerLaunchRPC {
         throw new UndeclaredThrowableException(e);
       }
       status.setState(ContainerState.RUNNING);
-      status.setContainerId(container.getContainerId());
+      status.setContainerId(request.getContainer().getId());
       status.setExitStatus(0);
       return response;
     }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java
index 6975db229e5..7d941e92a23 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.StopContainerResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerState;
@@ -52,6 +53,7 @@ import org.apache.hadoop.yarn.factory.providers.YarnRemoteExceptionFactoryProvid
 import org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC;
 import org.apache.hadoop.yarn.ipc.RPCUtil;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.apache.hadoop.yarn.util.Records;
 import org.junit.Test;
 
@@ -124,20 +126,21 @@ public class TestRPC {
     applicationAttemptId.setAttemptId(0);
     containerId.setApplicationAttemptId(applicationAttemptId);
     containerId.setId(100);
-    containerLaunchContext.setContainerId(containerId);
-    containerLaunchContext.setResource(
-        recordFactory.newRecordInstance(Resource.class));
+    Container mockContainer =
+        BuilderUtils.newContainer(containerId, null, null, recordFactory
+            .newRecordInstance(Resource.class), null, null);
 //    containerLaunchContext.env = new HashMap<CharSequence, CharSequence>();
 //    containerLaunchContext.command = new ArrayList<CharSequence>();
     
     StartContainerRequest scRequest = 
         recordFactory.newRecordInstance(StartContainerRequest.class);
     scRequest.setContainerLaunchContext(containerLaunchContext);
+    scRequest.setContainer(mockContainer);
     proxy.startContainer(scRequest);
     
     GetContainerStatusRequest gcsRequest = 
         recordFactory.newRecordInstance(GetContainerStatusRequest.class);
-    gcsRequest.setContainerId(containerLaunchContext.getContainerId());
+    gcsRequest.setContainerId(mockContainer.getId());
     GetContainerStatusResponse response =  proxy.getContainerStatus(gcsRequest);
     ContainerStatus status = response.getStatus();
     
@@ -145,7 +148,7 @@ public class TestRPC {
     boolean exception = false;
     try {
       StopContainerRequest stopRequest = recordFactory.newRecordInstance(StopContainerRequest.class);
-      stopRequest.setContainerId(containerLaunchContext.getContainerId());
+      stopRequest.setContainerId(mockContainer.getId());
       proxy.stopContainer(stopRequest);
     } catch (YarnRemoteException e) {
       exception = true;
@@ -179,12 +182,11 @@ public class TestRPC {
     @Override
     public StartContainerResponse startContainer(StartContainerRequest request) 
         throws YarnRemoteException {
-      ContainerLaunchContext container = request.getContainerLaunchContext();
       StartContainerResponse response = 
           recordFactory.newRecordInstance(StartContainerResponse.class);
       status = recordFactory.newRecordInstance(ContainerStatus.class);
       status.setState(ContainerState.RUNNING);
-      status.setContainerId(container.getContainerId());
+      status.setContainerId(request.getContainer().getId());
       status.setExitStatus(0);
       return response;
     }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
index f7e29e5ac93..e0a35829a4a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
@@ -220,7 +220,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
     String containerIdStr = ConverterUtils.toString(containerId);
     
     resourcesHandler.preExecute(containerId,
-            container.getLaunchContext().getResource());
+            container.getResource());
     String resourcesOptions = resourcesHandler.getResourcesOption(
             containerId);
 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
index b4a0034217c..8fc8a3ed6fe 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
@@ -308,7 +308,9 @@ public class ContainerManagerImpl extends CompositeService implements
    * @throws YarnRemoteException
    */
   private void authorizeRequest(String containerIDStr,
-      ContainerLaunchContext launchContext, UserGroupInformation remoteUgi)
+      ContainerLaunchContext launchContext,
+      org.apache.hadoop.yarn.api.records.Container container,
+      UserGroupInformation remoteUgi)
       throws YarnRemoteException {
 
     if (!UserGroupInformation.isSecurityEnabled()) {
@@ -369,10 +371,10 @@ public class ContainerManagerImpl extends CompositeService implements
         }
         
         Resource resource = tokenId.getResource();
-        if (!resource.equals(launchContext.getResource())) {
+        if (!resource.equals(container.getResource())) {
           unauthorized = true;
           messageBuilder.append("\nExpected resource " + resource
-              + " but found " + launchContext.getResource());
+              + " but found " + container.getResource());
         }
       }
     }
@@ -392,12 +394,13 @@ public class ContainerManagerImpl extends CompositeService implements
   public StartContainerResponse startContainer(StartContainerRequest request)
       throws YarnRemoteException {
     ContainerLaunchContext launchContext = request.getContainerLaunchContext();
-
-    ContainerId containerID = launchContext.getContainerId();
+    org.apache.hadoop.yarn.api.records.Container lauchContainer =
+        request.getContainer();
+    ContainerId containerID = lauchContainer.getId();
     String containerIDStr = containerID.toString();
 
     UserGroupInformation remoteUgi = getRemoteUgi(containerIDStr);
-    authorizeRequest(containerIDStr, launchContext, remoteUgi);
+    authorizeRequest(containerIDStr, launchContext, lauchContainer, remoteUgi);
 
     LOG.info("Start request for " + containerIDStr + " by user "
         + launchContext.getUser());
@@ -424,7 +427,7 @@ public class ContainerManagerImpl extends CompositeService implements
     // //////////// End of parsing credentials
 
     Container container = new ContainerImpl(getConfig(), this.dispatcher,
-        launchContext, credentials, metrics);
+        launchContext, lauchContainer, credentials, metrics);
     ApplicationId applicationID = 
         containerID.getApplicationAttemptId().getApplicationId();
     if (context.getContainers().putIfAbsent(containerID, container) != null) {
@@ -469,7 +472,7 @@ public class ContainerManagerImpl extends CompositeService implements
     // TODO launchedContainer misplaced -> doesn't necessarily mean a container
     // launch. A finished Application will not launch containers.
     metrics.launchedContainer();
-    metrics.allocateContainer(launchContext.getResource());
+    metrics.allocateContainer(lauchContainer.getResource());
     return response;
   }
 
@@ -487,7 +490,7 @@ public class ContainerManagerImpl extends CompositeService implements
     // TODO: Only the container's owner can kill containers today.
 
     UserGroupInformation remoteUgi = getRemoteUgi(containerIDStr);
-    authorizeRequest(containerIDStr, null, remoteUgi);
+    authorizeRequest(containerIDStr, null, null, remoteUgi);
 
     StopContainerResponse response =
         recordFactory.newRecordInstance(StopContainerResponse.class);
@@ -529,7 +532,7 @@ public class ContainerManagerImpl extends CompositeService implements
     // TODO: Only the container's owner can get containers' status today.
 
     UserGroupInformation remoteUgi = getRemoteUgi(containerIDStr);
-    authorizeRequest(containerIDStr, null, remoteUgi);
+    authorizeRequest(containerIDStr, null, null, remoteUgi);
 
     LOG.info("Getting container-status for " + containerIDStr);
     Container container = this.context.getContainers().get(containerID);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java
index af0f92ee6fc..a43e1b74d2d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java
@@ -25,6 +25,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.event.EventHandler;
 
 public interface Container extends EventHandler<ContainerEvent> {
@@ -44,4 +45,6 @@ public interface Container extends EventHandler<ContainerEvent> {
   ContainerStatus cloneAndGetContainerStatus();
 
   String toString();
+
+  Resource getResource();
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
index b4752ff8f5d..ad2428ae178 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
@@ -76,6 +77,7 @@ public class ContainerImpl implements Container {
   private final Credentials credentials;
   private final NodeManagerMetrics metrics;
   private final ContainerLaunchContext launchContext;
+  private final org.apache.hadoop.yarn.api.records.Container container;
   private int exitCode = YarnConfiguration.INVALID_CONTAINER_EXIT_STATUS;
   private final StringBuilder diagnostics;
 
@@ -96,12 +98,13 @@ public class ContainerImpl implements Container {
     new ArrayList<LocalResourceRequest>();
 
   public ContainerImpl(Configuration conf,
-      Dispatcher dispatcher,
-      ContainerLaunchContext launchContext, Credentials creds,
-      NodeManagerMetrics metrics) {
+      Dispatcher dispatcher, ContainerLaunchContext launchContext,
+      org.apache.hadoop.yarn.api.records.Container container,
+      Credentials creds, NodeManagerMetrics metrics) {
     this.daemonConf = conf;
     this.dispatcher = dispatcher;
     this.launchContext = launchContext;
+    this.container = container;
     this.diagnostics = new StringBuilder();
     this.credentials = creds;
     this.metrics = metrics;
@@ -312,7 +315,7 @@ public class ContainerImpl implements Container {
   public ContainerId getContainerID() {
     this.readLock.lock();
     try {
-      return this.launchContext.getContainerId();
+      return this.container.getId();
     } finally {
       this.readLock.unlock();
     }
@@ -373,50 +376,61 @@ public class ContainerImpl implements Container {
   public ContainerStatus cloneAndGetContainerStatus() {
     this.readLock.lock();
     try {
-      return BuilderUtils.newContainerStatus(this.getContainerID(),
+      return BuilderUtils.newContainerStatus(this.container.getId(),
         getCurrentState(), diagnostics.toString(), exitCode);
     } finally {
       this.readLock.unlock();
     }
   }
 
+  @Override
+  public Resource getResource() {
+    this.readLock.lock();
+    try {
+      return this.container.getResource();
+    } finally {
+      this.readLock.unlock();
+    }
+  }
+
   @SuppressWarnings({"fallthrough", "unchecked"})
   private void finished() {
+    ContainerId containerID = this.container.getId();
+    String user = this.launchContext.getUser();
     switch (getContainerState()) {
       case EXITED_WITH_SUCCESS:
         metrics.endRunningContainer();
         metrics.completedContainer();
-        NMAuditLogger.logSuccess(getUser(),
+        NMAuditLogger.logSuccess(user,
             AuditConstants.FINISH_SUCCESS_CONTAINER, "ContainerImpl",
-            getContainerID().getApplicationAttemptId().getApplicationId(), 
-            getContainerID());
+            containerID.getApplicationAttemptId()
+                .getApplicationId(), containerID);
         break;
       case EXITED_WITH_FAILURE:
         metrics.endRunningContainer();
         // fall through
       case LOCALIZATION_FAILED:
         metrics.failedContainer();
-        NMAuditLogger.logFailure(getUser(),
+        NMAuditLogger.logFailure(user,
             AuditConstants.FINISH_FAILED_CONTAINER, "ContainerImpl",
             "Container failed with state: " + getContainerState(),
-            getContainerID().getApplicationAttemptId().getApplicationId(), 
-            getContainerID());
+            containerID.getApplicationAttemptId()
+                .getApplicationId(), containerID);
         break;
       case CONTAINER_CLEANEDUP_AFTER_KILL:
         metrics.endRunningContainer();
         // fall through
       case NEW:
         metrics.killedContainer();
-        NMAuditLogger.logSuccess(getUser(),
+        NMAuditLogger.logSuccess(user,
             AuditConstants.FINISH_KILLED_CONTAINER, "ContainerImpl",
-            getContainerID().getApplicationAttemptId().getApplicationId(), 
-            getContainerID());
+            containerID.getApplicationAttemptId().getApplicationId(),
+            containerID);
     }
 
-    metrics.releaseContainer(getLaunchContext().getResource());
+    metrics.releaseContainer(this.container.getResource());
 
     // Inform the application
-    ContainerId containerID = getContainerID();
     @SuppressWarnings("rawtypes")
     EventHandler eventHandler = dispatcher.getEventHandler();
     eventHandler.handle(new ApplicationContainerFinishedEvent(containerID));
@@ -475,7 +489,7 @@ public class ContainerImpl implements Container {
     @Override
     public ContainerState transition(ContainerImpl container,
         ContainerEvent event) {
-      final ContainerLaunchContext ctxt = container.getLaunchContext();
+      final ContainerLaunchContext ctxt = container.launchContext;
       container.metrics.initingContainer();
 
       // Inform the AuxServices about the opaque serviceData
@@ -486,9 +500,9 @@ public class ContainerImpl implements Container {
         for (Map.Entry<String,ByteBuffer> service : csd.entrySet()) {
           container.dispatcher.getEventHandler().handle(
               new AuxServicesEvent(AuxServicesEventType.APPLICATION_INIT,
-                ctxt.getUser(), 
-                ctxt.getContainerId().getApplicationAttemptId().getApplicationId(),
-                service.getKey().toString(), service.getValue()));
+                  ctxt.getUser(), container.container.getId()
+                      .getApplicationAttemptId().getApplicationId(),
+                  service.getKey().toString(), service.getValue()));
         }
       }
 
@@ -571,7 +585,7 @@ public class ContainerImpl implements Container {
           container.pendingResources.remove(rsrcEvent.getResource());
       if (null == syms) {
         LOG.warn("Localized unknown resource " + rsrcEvent.getResource() +
-                 " for container " + container.getContainerID());
+                 " for container " + container.container.getId());
         assert false;
         // fail container?
         return ContainerState.LOCALIZING;
@@ -599,14 +613,14 @@ public class ContainerImpl implements Container {
       // Inform the ContainersMonitor to start monitoring the container's
       // resource usage.
       long pmemBytes =
-          container.getLaunchContext().getResource().getMemory() * 1024 * 1024L;
+          container.container.getResource().getMemory() * 1024 * 1024L;
       float pmemRatio = container.daemonConf.getFloat(
           YarnConfiguration.NM_VMEM_PMEM_RATIO,
           YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO);
       long vmemBytes = (long) (pmemRatio * pmemBytes);
       
       container.dispatcher.getEventHandler().handle(
-          new ContainerStartMonitoringEvent(container.getContainerID(),
+          new ContainerStartMonitoringEvent(container.container.getId(),
               vmemBytes, pmemBytes));
       container.metrics.runningContainer();
     }
@@ -740,7 +754,7 @@ public class ContainerImpl implements Container {
           container.pendingResources.remove(rsrcEvent.getResource());
       if (null == syms) {
         LOG.warn("Localized unknown resource " + rsrcEvent.getResource() +
-                 " for container " + container.getContainerID());
+                 " for container " + container.container.getId());
         assert false;
         // fail container?
         return;
@@ -845,10 +859,9 @@ public class ContainerImpl implements Container {
   public String toString() {
     this.readLock.lock();
     try {
-      return ConverterUtils.toString(launchContext.getContainerId());
+      return ConverterUtils.toString(container.getId());
     } finally {
       this.readLock.unlock();
     }
   }
-
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
index 71809b2d7de..71345e0a39d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
@@ -249,9 +249,8 @@ public class ContainerLaunch implements Callable<Integer> {
     } catch (Throwable e) {
       LOG.warn("Failed to launch container.", e);
       dispatcher.getEventHandler().handle(new ContainerExitEvent(
-            launchContext.getContainerId(),
-            ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret,
-            e.getMessage()));
+          containerID, ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret,
+          e.getMessage()));
       return ret;
     } finally {
       completed.set(true);
@@ -267,7 +266,7 @@ public class ContainerLaunch implements Callable<Integer> {
       // If the process was killed, Send container_cleanedup_after_kill and
       // just break out of this method.
       dispatcher.getEventHandler().handle(
-            new ContainerExitEvent(launchContext.getContainerId(),
+            new ContainerExitEvent(containerID,
                 ContainerEventType.CONTAINER_KILLED_ON_REQUEST, ret,
                 "Container exited with a non-zero exit code " + ret));
       return ret;
@@ -276,15 +275,15 @@ public class ContainerLaunch implements Callable<Integer> {
     if (ret != 0) {
       LOG.warn("Container exited with a non-zero exit code " + ret);
       this.dispatcher.getEventHandler().handle(new ContainerExitEvent(
-              launchContext.getContainerId(),
-              ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret,
-              "Container exited with a non-zero exit code " + ret));
+          containerID,
+          ContainerEventType.CONTAINER_EXITED_WITH_FAILURE, ret,
+          "Container exited with a non-zero exit code " + ret));
       return ret;
     }
 
     LOG.info("Container " + containerIdStr + " succeeded ");
     dispatcher.getEventHandler().handle(
-        new ContainerEvent(launchContext.getContainerId(),
+        new ContainerEvent(containerID,
             ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS));
     return 0;
   }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/ContainerInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/ContainerInfo.java
index 41c649eea2c..3f31279edd9 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/ContainerInfo.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/dao/ContainerInfo.java
@@ -72,7 +72,7 @@ public class ContainerInfo {
     }
 
     this.user = container.getUser();
-    Resource res = container.getLaunchContext().getResource();
+    Resource res = container.getResource();
     if (res != null) {
       this.totalMemoryNeededMB = res.getMemory();
     }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java
index b1283b5b6cb..292d00fadf1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestEventFlow.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerState;
@@ -43,6 +44,8 @@ import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
 import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.junit.Test;
+import static org.mockito.Mockito.*;
+
 
 public class TestEventFlow {
 
@@ -117,12 +120,15 @@ public class TestEventFlow {
     applicationAttemptId.setApplicationId(applicationId);
     applicationAttemptId.setAttemptId(0);
     cID.setApplicationAttemptId(applicationAttemptId);
-    launchContext.setContainerId(cID);
+    Container mockContainer = mock(Container.class);
+    when(mockContainer.getId()).thenReturn(cID);
+    when(mockContainer.getResource()).thenReturn(recordFactory
+        .newRecordInstance(Resource.class));
     launchContext.setUser("testing");
-    launchContext.setResource(recordFactory.newRecordInstance(Resource.class));
     StartContainerRequest request = 
         recordFactory.newRecordInstance(StartContainerRequest.class);
     request.setContainerLaunchContext(launchContext);
+    request.setContainer(mockContainer);
     containerManager.startContainer(request);
 
     BaseContainerManagerTest.waitForContainerState(containerManager, cID,
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java
index 9ac237b6614..1436193d3ae 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerReboot.java
@@ -99,7 +99,9 @@ public class TestNodeManagerReboot {
         Records.newRecord(ContainerLaunchContext.class);
     // Construct the Container-id
     ContainerId cId = createContainerId();
-    containerLaunchContext.setContainerId(cId);
+    org.apache.hadoop.yarn.api.records.Container mockContainer =
+        mock(org.apache.hadoop.yarn.api.records.Container.class);
+    when(mockContainer.getId()).thenReturn(cId);
 
     containerLaunchContext.setUser(user);
 
@@ -122,12 +124,13 @@ public class TestNodeManagerReboot {
     containerLaunchContext.setUser(containerLaunchContext.getUser());
     List<String> commands = new ArrayList<String>();
     containerLaunchContext.setCommands(commands);
-    containerLaunchContext.setResource(Records
-        .newRecord(Resource.class));
-    containerLaunchContext.getResource().setMemory(1024);
+    Resource resource = Records.newRecord(Resource.class);
+    resource.setMemory(1024);
+    when(mockContainer.getResource()).thenReturn(resource);
     StartContainerRequest startRequest =
         Records.newRecord(StartContainerRequest.class);
     startRequest.setContainerLaunchContext(containerLaunchContext);
+    startRequest.setContainer(mockContainer);
     containerManager.startContainer(startRequest);
 
     GetContainerStatusRequest request =
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java
index 72f3433c027..1efe80db075 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java
@@ -18,6 +18,9 @@
 
 package org.apache.hadoop.yarn.server.nodemanager;
 
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.File;
@@ -41,6 +44,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerState;
@@ -56,8 +60,8 @@ import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
+import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.junit.After;
 import org.junit.Before;
@@ -157,10 +161,10 @@ public class TestNodeManagerShutdown {
     
     ContainerLaunchContext containerLaunchContext =
         recordFactory.newRecordInstance(ContainerLaunchContext.class);
-
+    Container mockContainer = mock(Container.class);
     // Construct the Container-id
     ContainerId cId = createContainerId();
-    containerLaunchContext.setContainerId(cId);
+    when(mockContainer.getId()).thenReturn(cId);
 
     containerLaunchContext.setUser(user);
 
@@ -184,12 +188,12 @@ public class TestNodeManagerShutdown {
     commands.add("/bin/bash");
     commands.add(scriptFile.getAbsolutePath());
     containerLaunchContext.setCommands(commands);
-    containerLaunchContext.setResource(recordFactory
-        .newRecordInstance(Resource.class));
-    containerLaunchContext.getResource().setMemory(1024);
+    Resource resource = BuilderUtils.newResource(1024, 1);
+    when(mockContainer.getResource()).thenReturn(resource);
     StartContainerRequest startRequest =
         recordFactory.newRecordInstance(StartContainerRequest.class);
     startRequest.setContainerLaunchContext(containerLaunchContext);
+    startRequest.setContainer(mockContainer);
     containerManager.startContainer(startRequest);
     
     GetContainerStatusRequest request =
@@ -287,7 +291,7 @@ public class TestNodeManagerShutdown {
 
       @Override
       protected void rebootNodeStatusUpdater() {
-        ConcurrentMap<ContainerId, Container> containers =
+        ConcurrentMap<ContainerId, org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container> containers =
             getNMContext().getContainers();
         // ensure that containers are empty before restart nodeStatusUpdater
         Assert.assertTrue(containers.isEmpty());
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
index c06a54dc118..29d6a4c3a84 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
@@ -176,6 +176,8 @@ public class TestNodeStatusUpdater {
       nodeStatus.setResponseId(heartBeatID++);
       Map<ApplicationId, List<ContainerStatus>> appToContainers =
           getAppToContainerStatusMap(nodeStatus.getContainersStatuses());
+      org.apache.hadoop.yarn.api.records.Container mockContainer =
+          mock(org.apache.hadoop.yarn.api.records.Container.class);
       if (heartBeatID == 1) {
         Assert.assertEquals(0, nodeStatus.getContainersStatuses().size());
 
@@ -186,11 +188,12 @@ public class TestNodeStatusUpdater {
         firstContainerID.setId(heartBeatID);
         ContainerLaunchContext launchContext = recordFactory
             .newRecordInstance(ContainerLaunchContext.class);
-        launchContext.setContainerId(firstContainerID);
-        launchContext.setResource(recordFactory.newRecordInstance(Resource.class));
-        launchContext.getResource().setMemory(2);
-        Container container = new ContainerImpl(conf , mockDispatcher,
-            launchContext, null, mockMetrics);
+        when(mockContainer.getId()).thenReturn(firstContainerID);
+        Resource resource = BuilderUtils.newResource(2, 1);
+        when(mockContainer.getResource()).thenReturn(resource);
+        Container container =
+            new ContainerImpl(conf, mockDispatcher, launchContext,
+                mockContainer, null, mockMetrics);
         this.context.getContainers().put(firstContainerID, container);
       } else if (heartBeatID == 2) {
         // Checks on the RM end
@@ -211,11 +214,12 @@ public class TestNodeStatusUpdater {
         secondContainerID.setId(heartBeatID);
         ContainerLaunchContext launchContext = recordFactory
             .newRecordInstance(ContainerLaunchContext.class);
-        launchContext.setContainerId(secondContainerID);
-        launchContext.setResource(recordFactory.newRecordInstance(Resource.class));
-        launchContext.getResource().setMemory(3);
-        Container container = new ContainerImpl(conf, mockDispatcher,
-            launchContext, null, mockMetrics);
+        when(mockContainer.getId()).thenReturn(secondContainerID);
+        Resource resource = BuilderUtils.newResource(3, 1);
+        when(mockContainer.getResource()).thenReturn(resource);
+        Container container =
+            new ContainerImpl(conf, mockDispatcher, launchContext,
+                mockContainer, null, mockMetrics);
         this.context.getContainers().put(secondContainerID, container);
       } else if (heartBeatID == 3) {
         // Checks on the RM end
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
index 5b01cc08140..d405a7c1779 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
@@ -40,6 +40,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerState;
@@ -47,7 +48,6 @@ import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResourceType;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
-import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.URL;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent;
@@ -58,8 +58,10 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Ap
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ContainerLocalizer;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
+import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.junit.Test;
+import static org.mockito.Mockito.*;
 
 public class TestContainerManager extends BaseContainerManagerTest {
 
@@ -121,7 +123,6 @@ public class TestContainerManager extends BaseContainerManagerTest {
 
     // ////// Construct the Container-id
     ContainerId cId = createContainerId();
-    container.setContainerId(cId);
 
     container.setUser(user);
 
@@ -143,14 +144,16 @@ public class TestContainerManager extends BaseContainerManagerTest {
     localResources.put(destinationFile, rsrc_alpha);
     containerLaunchContext.setLocalResources(localResources);
     containerLaunchContext.setUser(container.getUser());
-    containerLaunchContext.setContainerId(container.getContainerId());
-    containerLaunchContext.setResource(recordFactory
-        .newRecordInstance(Resource.class));
+    Container mockContainer = mock(Container.class);
+    when(mockContainer.getId()).thenReturn(cId);
+    when(mockContainer.getResource()).thenReturn(
+        BuilderUtils.newResource(512, 1));
 
     StartContainerRequest startRequest = 
         recordFactory.newRecordInstance(StartContainerRequest.class);
     startRequest.setContainerLaunchContext(containerLaunchContext);
-    
+    startRequest.setContainer(mockContainer);
+
     containerManager.startContainer(startRequest);
 
     BaseContainerManagerTest.waitForContainerState(containerManager, cId,
@@ -208,7 +211,6 @@ public class TestContainerManager extends BaseContainerManagerTest {
 
     // ////// Construct the Container-id
     ContainerId cId = createContainerId();
-    containerLaunchContext.setContainerId(cId);
 
     containerLaunchContext.setUser(user);
 
@@ -232,11 +234,13 @@ public class TestContainerManager extends BaseContainerManagerTest {
     commands.add("/bin/bash");
     commands.add(scriptFile.getAbsolutePath());
     containerLaunchContext.setCommands(commands);
-    containerLaunchContext.setResource(recordFactory
-        .newRecordInstance(Resource.class));
-    containerLaunchContext.getResource().setMemory(100 * 1024 * 1024);
+    Container mockContainer = mock(Container.class);
+    when(mockContainer.getId()).thenReturn(cId);
+    when(mockContainer.getResource()).thenReturn(
+        BuilderUtils.newResource(100 * 1024 * 1024, 1));
     StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class);
     startRequest.setContainerLaunchContext(containerLaunchContext);
+    startRequest.setContainer(mockContainer);
     containerManager.startContainer(startRequest);
  
     int timeoutSecs = 0;
@@ -310,7 +314,6 @@ public class TestContainerManager extends BaseContainerManagerTest {
 
 	  // ////// Construct the Container-id
 	  ContainerId cId = createContainerId();
-	  containerLaunchContext.setContainerId(cId);
 
 	  containerLaunchContext.setUser(user);
 
@@ -334,12 +337,14 @@ public class TestContainerManager extends BaseContainerManagerTest {
 	  commands.add("/bin/bash");
 	  commands.add(scriptFile.getAbsolutePath());
 	  containerLaunchContext.setCommands(commands);
-	  containerLaunchContext.setResource(recordFactory
-			  .newRecordInstance(Resource.class));
-	  containerLaunchContext.getResource().setMemory(100 * 1024 * 1024);
+    Container mockContainer = mock(Container.class);
+    when(mockContainer.getId()).thenReturn(cId);
+    when(mockContainer.getResource()).thenReturn(
+        BuilderUtils.newResource(100 * 1024 * 1024, 1));
 
 	  StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class);
 	  startRequest.setContainerLaunchContext(containerLaunchContext);
+	  startRequest.setContainer(mockContainer);
 	  containerManager.startContainer(startRequest);
 
 	  BaseContainerManagerTest.waitForContainerState(containerManager, cId,
@@ -402,7 +407,6 @@ public class TestContainerManager extends BaseContainerManagerTest {
     // ////// Construct the Container-id
     ContainerId cId = createContainerId();
     ApplicationId appId = cId.getApplicationAttemptId().getApplicationId();
-    container.setContainerId(cId);
 
     container.setUser(user);
 
@@ -425,14 +429,16 @@ public class TestContainerManager extends BaseContainerManagerTest {
     localResources.put(destinationFile, rsrc_alpha);
     containerLaunchContext.setLocalResources(localResources);
     containerLaunchContext.setUser(container.getUser());
-    containerLaunchContext.setContainerId(container.getContainerId());
-    containerLaunchContext.setResource(recordFactory
-        .newRecordInstance(Resource.class));
+    Container mockContainer = mock(Container.class);
+    when(mockContainer.getId()).thenReturn(cId);
+    when(mockContainer.getResource()).thenReturn(
+        BuilderUtils.newResource(100, 1));
 
 //    containerLaunchContext.command = new ArrayList<CharSequence>();
 
     StartContainerRequest request = recordFactory.newRecordInstance(StartContainerRequest.class);
     request.setContainerLaunchContext(containerLaunchContext);
+    request.setContainer(mockContainer);
     containerManager.startContainer(request);
 
     BaseContainerManagerTest.waitForContainerState(containerManager, cId,
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
index 95872440b6a..230ce46c472 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
@@ -525,8 +525,9 @@ public class TestContainer {
     return serviceData;
   }
 
-  private Container newContainer(Dispatcher disp, ContainerLaunchContext ctx) {
-    return new ContainerImpl(conf, disp, ctx, null, metrics);
+  private Container newContainer(Dispatcher disp, ContainerLaunchContext ctx,
+      org.apache.hadoop.yarn.api.records.Container container) {
+    return new ContainerImpl(conf, disp, ctx, container, null, metrics);
   }
   
   @SuppressWarnings("unchecked")
@@ -570,12 +571,14 @@ public class TestContainer {
       this.user = user;
 
       ctxt = mock(ContainerLaunchContext.class);
+      org.apache.hadoop.yarn.api.records.Container mockContainer =
+          mock(org.apache.hadoop.yarn.api.records.Container.class);
       cId = BuilderUtils.newContainerId(appId, 1, timestamp, id);
       when(ctxt.getUser()).thenReturn(this.user);
-      when(ctxt.getContainerId()).thenReturn(cId);
+      when(mockContainer.getId()).thenReturn(cId);
 
       Resource resource = BuilderUtils.newResource(1024, 1);
-      when(ctxt.getResource()).thenReturn(resource);
+      when(mockContainer.getResource()).thenReturn(resource);
 
       if (withLocalRes) {
         Random r = new Random();
@@ -599,7 +602,7 @@ public class TestContainer {
       }
       when(ctxt.getServiceData()).thenReturn(serviceData);
 
-      c = newContainer(dispatcher, ctxt);
+      c = newContainer(dispatcher, ctxt, mockContainer);
       dispatcher.start();
     }
 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java
index 822835dc3d0..702707209d9 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/TestContainerLaunch.java
@@ -43,6 +43,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerState;
@@ -57,11 +58,14 @@ import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch;
+import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.hadoop.yarn.util.LinuxResourceCalculatorPlugin;
 import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin;
 import org.junit.Before;
 import org.junit.Test;
+import static org.mockito.Mockito.*;
+
 import junit.framework.Assert;
 
 public class TestContainerLaunch extends BaseContainerManagerTest {
@@ -184,6 +188,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
     ContainerLaunchContext containerLaunchContext = 
         recordFactory.newRecordInstance(ContainerLaunchContext.class);
 
+    Container mockContainer = mock(Container.class);
     // ////// Construct the Container-id
     ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class);
     appId.setClusterTimestamp(0);
@@ -195,7 +200,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
     ContainerId cId = 
         recordFactory.newRecordInstance(ContainerId.class);
     cId.setApplicationAttemptId(appAttemptId);
-    containerLaunchContext.setContainerId(cId);
+    when(mockContainer.getId()).thenReturn(cId);
 
     containerLaunchContext.setUser(user);
 
@@ -222,11 +227,11 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
     commands.add("/bin/bash");
     commands.add(scriptFile.getAbsolutePath());
     containerLaunchContext.setCommands(commands);
-    containerLaunchContext.setResource(recordFactory
-        .newRecordInstance(Resource.class));
-    containerLaunchContext.getResource().setMemory(1024);
+    when(mockContainer.getResource()).thenReturn(
+        BuilderUtils.newResource(1024, 1));
     StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class);
     startRequest.setContainerLaunchContext(containerLaunchContext);
+    startRequest.setContainer(mockContainer);
     containerManager.startContainer(startRequest);
 
     int timeoutSecs = 0;
@@ -301,7 +306,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
 
     ContainerLaunchContext containerLaunchContext = 
         recordFactory.newRecordInstance(ContainerLaunchContext.class);
-
+    Container mockContainer = mock(Container.class);
     // ////// Construct the Container-id
     ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class);
     appId.setClusterTimestamp(1);
@@ -313,7 +318,7 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
     ContainerId cId = 
         recordFactory.newRecordInstance(ContainerId.class);
     cId.setApplicationAttemptId(appAttemptId);
-    containerLaunchContext.setContainerId(cId);
+    when(mockContainer.getId()).thenReturn(cId);
 
     containerLaunchContext.setUser(user);
 
@@ -339,11 +344,11 @@ public class TestContainerLaunch extends BaseContainerManagerTest {
     List<String> commands = new ArrayList<String>();
     commands.add(scriptFile.getAbsolutePath());
     containerLaunchContext.setCommands(commands);
-    containerLaunchContext.setResource(recordFactory
-        .newRecordInstance(Resource.class));
-    containerLaunchContext.getResource().setMemory(1024);
+    when(mockContainer.getResource()).thenReturn(
+        BuilderUtils.newResource(1024, 1));
     StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class);
     startRequest.setContainerLaunchContext(containerLaunchContext);
+    startRequest.setContainer(mockContainer);
     containerManager.startContainer(startRequest);
 
     int timeoutSecs = 0;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java
index 6a9a6767567..ccbf9f76bf1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java
@@ -56,13 +56,13 @@ import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
+import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerState;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResourceType;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
-import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.URL;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.DrainDispatcher;
@@ -91,6 +91,7 @@ import org.mockito.Mockito;
 import org.mortbay.util.MultiException;
 
 
+
 //@Ignore
 public class TestLogAggregationService extends BaseContainerManagerTest {
 
@@ -679,7 +680,7 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
 
     ContainerLaunchContext containerLaunchContext =
         recordFactory.newRecordInstance(ContainerLaunchContext.class);
-
+    Container mockContainer = mock(Container.class);
     // ////// Construct the Container-id
     ApplicationId appId =
         recordFactory.newRecordInstance(ApplicationId.class);
@@ -689,7 +690,7 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
         BuilderUtils.newApplicationAttemptId(appId, 1);
     ContainerId cId = BuilderUtils.newContainerId(appAttemptId, 0);
 
-    containerLaunchContext.setContainerId(cId);
+    when(mockContainer.getId()).thenReturn(cId);
 
     containerLaunchContext.setUser(this.user);
 
@@ -713,12 +714,12 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
     commands.add("/bin/bash");
     commands.add(scriptFile.getAbsolutePath());
     containerLaunchContext.setCommands(commands);
-    containerLaunchContext.setResource(recordFactory
-        .newRecordInstance(Resource.class));
-    containerLaunchContext.getResource().setMemory(100 * 1024 * 1024);
+    when(mockContainer.getResource()).thenReturn(
+        BuilderUtils.newResource(100 * 1024 * 1024, 1));
     StartContainerRequest startRequest =
         recordFactory.newRecordInstance(StartContainerRequest.class);
     startRequest.setContainerLaunchContext(containerLaunchContext);
+    startRequest.setContainer(mockContainer);
     this.containerManager.startContainer(startRequest);
 
     BaseContainerManagerTest.waitForContainerState(this.containerManager,
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java
index 99d7d4d444a..a27b3575072 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitor.java
@@ -21,7 +21,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.*;
 
 import java.io.BufferedReader;
 import java.io.File;
@@ -44,6 +44,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerState;
@@ -51,7 +52,6 @@ import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResourceType;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
-import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.URL;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.AsyncDispatcher;
@@ -60,6 +60,7 @@ import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.ExitCode;
 import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor.Signal;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
+import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.hadoop.yarn.util.LinuxResourceCalculatorPlugin;
 import org.apache.hadoop.yarn.util.ProcfsBasedProcessTree;
@@ -197,7 +198,7 @@ public class TestContainersMonitor extends BaseContainerManagerTest {
 
     ContainerLaunchContext containerLaunchContext =
         recordFactory.newRecordInstance(ContainerLaunchContext.class);
-
+    Container mockContainer = mock(Container.class);
     // ////// Construct the Container-id
     ApplicationId appId =
         recordFactory.newRecordInstance(ApplicationId.class);
@@ -210,7 +211,7 @@ public class TestContainersMonitor extends BaseContainerManagerTest {
     ContainerId cId = recordFactory.newRecordInstance(ContainerId.class);
     cId.setId(0);
     cId.setApplicationAttemptId(appAttemptId);
-    containerLaunchContext.setContainerId(cId);
+    when(mockContainer.getId()).thenReturn(cId);
 
     containerLaunchContext.setUser(user);
 
@@ -234,12 +235,12 @@ public class TestContainersMonitor extends BaseContainerManagerTest {
     commands.add("/bin/bash");
     commands.add(scriptFile.getAbsolutePath());
     containerLaunchContext.setCommands(commands);
-    containerLaunchContext.setResource(recordFactory
-        .newRecordInstance(Resource.class));
-    containerLaunchContext.getResource().setMemory(8 * 1024 * 1024);
+    when(mockContainer.getResource()).thenReturn(
+        BuilderUtils.newResource(8 * 1024 * 1024, 1));
     StartContainerRequest startRequest =
         recordFactory.newRecordInstance(StartContainerRequest.class);
     startRequest.setContainerLaunchContext(containerLaunchContext);
+    startRequest.setContainer(mockContainer);
     containerManager.startContainer(startRequest);
 
     int timeoutSecs = 0;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java
index 519ff183484..dbb50bba3f2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
@@ -58,7 +59,6 @@ public class MockContainer implements Container {
         uniqId);
     this.launchContext = recordFactory
         .newRecordInstance(ContainerLaunchContext.class);
-    launchContext.setContainerId(id);
     launchContext.setUser(user);
     this.state = ContainerState.NEW;
 
@@ -104,7 +104,6 @@ public class MockContainer implements Container {
         .newRecordInstance(ContainerStatus.class);
     containerStatus
         .setState(org.apache.hadoop.yarn.api.records.ContainerState.RUNNING);
-    containerStatus.setContainerId(this.launchContext.getContainerId());
     containerStatus.setDiagnostics("testing");
     containerStatus.setExitStatus(0);
     return containerStatus;
@@ -119,4 +118,9 @@ public class MockContainer implements Container {
   public void handle(ContainerEvent event) {
   }
 
+  @Override
+  public Resource getResource() {
+    return null;
+  }
+
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java
index d29e73eff43..48abd9e7b19 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestNMWebServer.java
@@ -179,15 +179,19 @@ public class TestNMWebServer {
       // TODO: Use builder utils
       ContainerLaunchContext launchContext =
           recordFactory.newRecordInstance(ContainerLaunchContext.class);
-      launchContext.setContainerId(containerId);
+      org.apache.hadoop.yarn.api.records.Container mockContainer =
+          mock(org.apache.hadoop.yarn.api.records.Container.class);
+      when(mockContainer.getId()).thenReturn(containerId);
       launchContext.setUser(user);
       Container container =
-          new ContainerImpl(conf, dispatcher, launchContext, null, metrics) {
-        @Override
-        public ContainerState getContainerState() {
-          return ContainerState.RUNNING;
-        };
-      };
+          new ContainerImpl(conf, dispatcher, launchContext, mockContainer,
+              null, metrics) {
+
+            @Override
+            public ContainerState getContainerState() {
+              return ContainerState.RUNNING;
+            };
+          };
       nmContext.getContainers().put(containerId, container);
       //TODO: Gross hack. Fix in code.
       ApplicationId applicationId = 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
index ba462e5a6d4..1c3c55ea730 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
@@ -266,7 +266,7 @@ public class ClientRMService extends AbstractService implements
     ApplicationSubmissionContext submissionContext = request
         .getApplicationSubmissionContext();
     ApplicationId applicationId = submissionContext.getApplicationId();
-    String user = submissionContext.getUser();
+    String user = submissionContext.getAMContainerSpec().getUser();
     try {
       user = UserGroupInformation.getCurrentUser().getShortUserName();
       if (rmContext.getRMApps().get(applicationId) != null) {
@@ -275,13 +275,13 @@ public class ClientRMService extends AbstractService implements
       }
 
       // Safety 
-      submissionContext.setUser(user);
+      submissionContext.getAMContainerSpec().setUser(user);
 
       // Check whether AM resource requirements are within required limits
       if (!submissionContext.getUnmanagedAM()) {
         ResourceRequest amReq = BuilderUtils.newResourceRequest(
             RMAppAttemptImpl.AM_CONTAINER_PRIORITY, ResourceRequest.ANY,
-            submissionContext.getAMContainerSpec().getResource(), 1);
+            submissionContext.getResource(), 1);
         try {
           SchedulerUtils.validateResourceRequest(amReq,
               scheduler.getMaximumResourceCapability());
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
index 7193a5998f2..7c4f9d75d5c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
@@ -249,10 +249,11 @@ public class RMAppManager implements EventHandler<RMAppManagerEvent>,
       // Create RMApp
       application =
           new RMAppImpl(applicationId, rmContext, this.conf,
-            submissionContext.getApplicationName(),
-            submissionContext.getUser(), submissionContext.getQueue(),
-            submissionContext, this.scheduler, this.masterService,
-            submitTime);
+              submissionContext.getApplicationName(),
+              submissionContext.getAMContainerSpec().getUser(),
+              submissionContext.getQueue(),
+              submissionContext, this.scheduler, this.masterService,
+              submitTime);
 
       // Sanity check - duplicate?
       if (rmContext.getRMApps().putIfAbsent(applicationId, application) != 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java
index e45e1dd8f81..517d7fb31d7 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java
@@ -77,6 +77,7 @@ public class AMLauncher implements Runnable {
       RecordFactoryProvider.getRecordFactory(null);
   private final AMLauncherEventType eventType;
   private final RMContext rmContext;
+  private final Container masterContainer;
   
   @SuppressWarnings("rawtypes")
   private final EventHandler handler;
@@ -88,34 +89,36 @@ public class AMLauncher implements Runnable {
     this.eventType = eventType;
     this.rmContext = rmContext;
     this.handler = rmContext.getDispatcher().getEventHandler();
+    this.masterContainer = application.getMasterContainer();
   }
   
   private void connect() throws IOException {
-    ContainerId masterContainerID = application.getMasterContainer().getId();
+    ContainerId masterContainerID = masterContainer.getId();
     
     containerMgrProxy = getContainerMgrProxy(masterContainerID);
   }
   
   private void launch() throws IOException {
     connect();
-    ContainerId masterContainerID = application.getMasterContainer().getId();
+    ContainerId masterContainerID = masterContainer.getId();
     ApplicationSubmissionContext applicationContext =
       application.getSubmissionContext();
-    LOG.info("Setting up container " + application.getMasterContainer() 
+    LOG.info("Setting up container " + masterContainer
         + " for AM " + application.getAppAttemptId());  
     ContainerLaunchContext launchContext =
         createAMContainerLaunchContext(applicationContext, masterContainerID);
     StartContainerRequest request = 
         recordFactory.newRecordInstance(StartContainerRequest.class);
     request.setContainerLaunchContext(launchContext);
+    request.setContainer(masterContainer);
     containerMgrProxy.startContainer(request);
-    LOG.info("Done launching container " + application.getMasterContainer() 
+    LOG.info("Done launching container " + masterContainer
         + " for AM " + application.getAppAttemptId());
   }
   
   private void cleanup() throws IOException {
     connect();
-    ContainerId containerId = application.getMasterContainer().getId();
+    ContainerId containerId = masterContainer.getId();
     StopContainerRequest stopRequest = 
         recordFactory.newRecordInstance(StopContainerRequest.class);
     stopRequest.setContainerId(containerId);
@@ -126,9 +129,7 @@ public class AMLauncher implements Runnable {
   protected ContainerManager getContainerMgrProxy(
       final ContainerId containerId) {
 
-    Container container = application.getMasterContainer();
-
-    final NodeId node = container.getNodeId();
+    final NodeId node = masterContainer.getNodeId();
     final InetSocketAddress containerManagerBindAddress =
         NetUtils.createSocketAddrForHost(node.getHost(), node.getPort());
 
@@ -138,8 +139,8 @@ public class AMLauncher implements Runnable {
         .createRemoteUser(containerId.toString());
     if (UserGroupInformation.isSecurityEnabled()) {
       Token<ContainerTokenIdentifier> token =
-          ProtoUtils.convertFromProtoFormat(container.getContainerToken(),
-                                            containerManagerBindAddress);
+          ProtoUtils.convertFromProtoFormat(masterContainer
+              .getContainerToken(), containerManagerBindAddress);
       currentUser.addToken(token);
     }
     return currentUser.doAs(new PrivilegedAction<ContainerManager>() {
@@ -165,30 +166,28 @@ public class AMLauncher implements Runnable {
             new String[0])));
     
     // Finalize the container
-    container.setContainerId(containerID);
-    container.setUser(applicationMasterContext.getUser());
-    setupTokensAndEnv(container);
+    container.setUser(applicationMasterContext.getAMContainerSpec().getUser());
+    setupTokensAndEnv(container, containerID);
     
     return container;
   }
 
   private void setupTokensAndEnv(
-      ContainerLaunchContext container)
+      ContainerLaunchContext container, ContainerId containerID)
       throws IOException {
     Map<String, String> environment = container.getEnvironment();
-
     environment.put(ApplicationConstants.APPLICATION_WEB_PROXY_BASE_ENV,
         application.getWebProxyBase());
     // Set the AppAttemptId, containerId, NMHTTPAdress, AppSubmitTime to be
     // consumable by the AM.
-    environment.put(ApplicationConstants.AM_CONTAINER_ID_ENV, container
-        .getContainerId().toString());
-    environment.put(ApplicationConstants.NM_HOST_ENV, application
-        .getMasterContainer().getNodeId().getHost());
+    environment.put(ApplicationConstants.AM_CONTAINER_ID_ENV,
+        containerID.toString());
+    environment.put(ApplicationConstants.NM_HOST_ENV, masterContainer
+        .getNodeId().getHost());
     environment.put(ApplicationConstants.NM_PORT_ENV,
-        String.valueOf(application.getMasterContainer().getNodeId().getPort()));
+        String.valueOf(masterContainer.getNodeId().getPort()));
     String parts[] =
-        application.getMasterContainer().getNodeHttpAddress().split(":");
+        masterContainer.getNodeHttpAddress().split(":");
     environment.put(ApplicationConstants.NM_HTTP_PORT_ENV, parts[1]);
     ApplicationId applicationId =
         application.getAppAttemptId().getApplicationId();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
index 11fbdab72da..eaa15f53875 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
@@ -690,7 +690,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
       appAttempt.eventHandler.handle(
           new AppAddedSchedulerEvent(appAttempt.applicationAttemptId,
               appAttempt.submissionContext.getQueue(),
-              appAttempt.submissionContext.getUser()));
+              appAttempt.submissionContext.getAMContainerSpec().getUser()));
     }
   }
 
@@ -736,9 +736,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
             RMAppEventType.APP_ACCEPTED));
 
         // Request a container for the AM.
-        ResourceRequest request = BuilderUtils.newResourceRequest(
-            AM_CONTAINER_PRIORITY, ResourceRequest.ANY, appAttempt.submissionContext
-                .getAMContainerSpec().getResource(), 1);
+        ResourceRequest request =
+            BuilderUtils.newResourceRequest(
+                AM_CONTAINER_PRIORITY, ResourceRequest.ANY, appAttempt
+                    .getSubmissionContext().getResource(), 1);
 
         // SchedulerUtils.validateResourceRequests is not necessary because
         // AM resource has been checked when submission
@@ -773,12 +774,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
       // Set the masterContainer
       appAttempt.setMasterContainer(amContainerAllocation.getContainers().get(
                                                                            0));
-      // Updating CLC's resource is no longer necessary once YARN-486 is
-      // completed, because nothing from Container to CLC will be copied into
-      // CLC then.
-      appAttempt.getSubmissionContext().getAMContainerSpec().setResource(
+      appAttempt.getSubmissionContext().setResource(
           appAttempt.getMasterContainer().getResource());
-
       RMStateStore store = appAttempt.rmContext.getStateStore();
       appAttempt.storeAttempt(store);
     }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAttemptInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAttemptInfo.java
index 61b4880e137..875212f5558 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAttemptInfo.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AppAttemptInfo.java
@@ -59,8 +59,8 @@ public class AppAttemptInfo {
         this.logsLink = join(HttpConfig.getSchemePrefix(),
             masterContainer.getNodeHttpAddress(),
             "/node", "/containerlogs/",
-            ConverterUtils.toString(masterContainer.getId()),
-            "/", attempt.getSubmissionContext().getUser());
+            ConverterUtils.toString(masterContainer.getId()), "/",
+            attempt.getSubmissionContext().getAMContainerSpec().getUser());
       }
     }
   }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java
index 20ffeabfeba..3ce45ac6335 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java
@@ -130,7 +130,7 @@ public class Application {
   public synchronized void submit() throws IOException {
     ApplicationSubmissionContext context = recordFactory.newRecordInstance(ApplicationSubmissionContext.class);
     context.setApplicationId(this.applicationId);
-    context.setUser(this.user);
+    context.getAMContainerSpec().setUser(this.user);
     context.setQueue(this.queue);
     SubmitApplicationRequest request = recordFactory
         .newRecordInstance(SubmitApplicationRequest.class);
@@ -340,7 +340,8 @@ public class Application {
 
             // Launch the container
             StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class);
-            startRequest.setContainerLaunchContext(createCLC(container));
+            startRequest.setContainerLaunchContext(createCLC());
+            startRequest.setContainer(container);
             nodeManager.startContainer(startRequest);
             break;
           }
@@ -396,11 +397,9 @@ public class Application {
     }
   }
 
-  private ContainerLaunchContext createCLC(Container container) {
+  private ContainerLaunchContext createCLC() {
     ContainerLaunchContext clc = recordFactory.newRecordInstance(ContainerLaunchContext.class);
-    clc.setContainerId(container.getId());
     clc.setUser(this.user);
-    clc.setResource(container.getResource());
     return clc;
   }
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
index bfed3a7e06e..e39f303ce72 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
@@ -161,7 +161,6 @@ public class MockRM extends ResourceManager {
         .newRecord(ApplicationSubmissionContext.class);
     sub.setApplicationId(appId);
     sub.setApplicationName(name);
-    sub.setUser(user);
     sub.setMaxAppAttempts(maxAppAttempts);
     if(unmanaged) {
       sub.setUnmanagedAM(true);
@@ -171,13 +170,13 @@ public class MockRM extends ResourceManager {
     }
     ContainerLaunchContext clc = Records
         .newRecord(ContainerLaunchContext.class);
-    Resource capability = Records.newRecord(Resource.class);
+    final Resource capability = Records.newRecord(Resource.class);
     capability.setMemory(masterMemory);
-    clc.setResource(capability);
+    sub.setResource(capability);
     clc.setApplicationACLs(acls);
+    clc.setUser(user);
     sub.setAMContainerSpec(clc);
     req.setApplicationSubmissionContext(sub);
-
     UserGroupInformation fakeUser =
       UserGroupInformation.createUserForTesting(user, new String[] {"someGroup"});
     PrivilegedAction<SubmitApplicationResponse> action =
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java
index 8af339db0f3..dba5acdd82d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java
@@ -40,7 +40,6 @@ import org.apache.hadoop.yarn.api.protocolrecords.StopContainerResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerState;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
@@ -162,11 +161,10 @@ public class NodeManager implements ContainerManager {
   synchronized public StartContainerResponse startContainer(
       StartContainerRequest request) 
   throws YarnRemoteException {
-    ContainerLaunchContext containerLaunchContext = 
-        request.getContainerLaunchContext();
-    
+    Container requestContainer = request.getContainer();
+
     ApplicationId applicationId = 
-        containerLaunchContext.getContainerId().getApplicationAttemptId().
+        requestContainer.getId().getApplicationAttemptId().
         getApplicationId();
 
     List<Container> applicationContainers = containers.get(applicationId);
@@ -177,18 +175,18 @@ public class NodeManager implements ContainerManager {
     
     // Sanity check
     for (Container container : applicationContainers) {
-      if (container.getId().compareTo(containerLaunchContext.getContainerId()) 
+      if (container.getId().compareTo(requestContainer.getId())
           == 0) {
         throw new IllegalStateException(
-            "Container " + containerLaunchContext.getContainerId() + 
+            "Container " + requestContainer.getId() +
             " already setup on node " + containerManagerAddress);
       }
     }
 
     Container container =
-        BuilderUtils.newContainer(containerLaunchContext.getContainerId(),
+        BuilderUtils.newContainer(requestContainer.getId(),
             this.nodeId, nodeHttpAddress,
-            containerLaunchContext.getResource(), 
+            requestContainer.getResource(),
             null, null                                 // DKDC - Doesn't matter
             );
 
@@ -197,8 +195,8 @@ public class NodeManager implements ContainerManager {
             "", -1000);
     applicationContainers.add(container);
     containerStatusMap.put(container, containerStatus);
-    Resources.subtractFrom(available, containerLaunchContext.getResource());
-    Resources.addTo(used, containerLaunchContext.getResource());
+    Resources.subtractFrom(available, requestContainer.getResource());
+    Resources.addTo(used, requestContainer.getResource());
     
     if(LOG.isDebugEnabled()) {
       LOG.debug("startContainer:" + " node=" + containerManagerAddress
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java
index 7e06fac573b..fb74cb605b2 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAppManager.java
@@ -50,6 +50,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.Capacity
 import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM;
 import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
 import org.apache.hadoop.yarn.service.Service;
+import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.junit.Test;
 
 import com.google.common.collect.Lists;
@@ -503,6 +504,10 @@ public class TestAppManager{
     RMApp appOrig = rmContext.getRMApps().get(appID);
     Assert.assertTrue("app name matches but shouldn't", "testApp1" != appOrig.getName());
 
+    ContainerLaunchContext clc =
+        BuilderUtils.newContainerLaunchContext(null, null, null, null, null,
+            null, null);
+    context.setAMContainerSpec(clc);
     // our testApp1 should be rejected and original app with same id should be left in place
     appMonitor.submitApplication(context);
 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationACLs.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationACLs.java
index 1b778f2d4c9..2f9aa6db92d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationACLs.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationACLs.java
@@ -169,7 +169,7 @@ public class TestApplicationACLs {
     ContainerLaunchContext amContainer = recordFactory
         .newRecordInstance(ContainerLaunchContext.class);
     Resource resource = BuilderUtils.newResource(1024, 1);
-    amContainer.setResource(resource);
+    context.setResource(resource);
     amContainer.setApplicationACLs(acls);
     context.setAMContainerSpec(amContainer);
     submitRequest.setApplicationSubmissionContext(context);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java
index 84fd9a6dd51..5c6247b3803 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java
@@ -134,7 +134,7 @@ public class TestApplicationMasterLauncher {
     Assert.assertEquals(app.getSubmitTime(),
         containerManager.submitTimeAtContainerManager);
     Assert.assertEquals(app.getRMAppAttempt(appAttemptId)
-        .getSubmissionContext().getAMContainerSpec().getContainerId()
+        .getMasterContainer().getId()
         .toString(), containerManager.containerIdAtContainerManager);
     Assert.assertEquals(nm1.getNodeId().getHost(),
         containerManager.nmHostAtContainerManager);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java
index bafc61194df..aa7af9c18db 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java
@@ -340,7 +340,7 @@ public class TestClientRMService {
     final SubmitApplicationRequest submitRequest = mockSubmitAppRequest(appId);
     Resource resource = Resources.createResource(
         YarnConfiguration.DEFAULT_RM_SCHEDULER_MAXIMUM_ALLOCATION_MB + 1);
-    when(submitRequest.getApplicationSubmissionContext().getAMContainerSpec()
+    when(submitRequest.getApplicationSubmissionContext()
         .getResource()).thenReturn(resource);
 
     final ClientRMService rmService =
@@ -364,16 +364,17 @@ public class TestClientRMService {
     String queue = MockApps.newQueue();
 
     ContainerLaunchContext amContainerSpec = mock(ContainerLaunchContext.class);
+
     Resource resource = Resources.createResource(
         YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB);
-    when(amContainerSpec.getResource()).thenReturn(resource);
 
     ApplicationSubmissionContext submissionContext = mock(ApplicationSubmissionContext.class);
-    when(submissionContext.getUser()).thenReturn(user);
-    when(submissionContext.getQueue()).thenReturn(queue);
     when(submissionContext.getAMContainerSpec()).thenReturn(amContainerSpec);
+    when(submissionContext.getAMContainerSpec().getUser()).thenReturn(user);
+    when(submissionContext.getQueue()).thenReturn(queue);
     when(submissionContext.getApplicationId()).thenReturn(appId);
-    
+    when(submissionContext.getResource()).thenReturn(resource);
+
    SubmitApplicationRequest submitRequest =
        recordFactory.newRecordInstance(SubmitApplicationRequest.class);
    submitRequest.setApplicationSubmissionContext(submissionContext);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
index 0349b57cd52..f736edf2038 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
@@ -200,14 +200,14 @@ public class TestRMAppAttemptTransitions {
     final String user = MockApps.newUserName();
     final String queue = MockApps.newQueue();
     submissionContext = mock(ApplicationSubmissionContext.class);
-    when(submissionContext.getUser()).thenReturn(user);
     when(submissionContext.getQueue()).thenReturn(queue);
     Resource resource = BuilderUtils.newResource(1536, 1);
     ContainerLaunchContext amContainerSpec =
-        BuilderUtils.newContainerLaunchContext(null, user, resource, null, null,
+        BuilderUtils.newContainerLaunchContext(user, null, null,
             null, null, null, null);
     when(submissionContext.getAMContainerSpec()).thenReturn(amContainerSpec);
-    
+    when(submissionContext.getResource()).thenReturn(resource);
+
     unmanagedAM = false;
     
     application = mock(RMApp.class);
@@ -494,9 +494,6 @@ public class TestRMAppAttemptTransitions {
     applicationAttempt.handle(
         new RMAppAttemptStoredEvent(
             applicationAttempt.getAppAttemptId(), null));
-    assertEquals(resource,
-        applicationAttempt.getSubmissionContext()
-        .getAMContainerSpec().getResource());
     
     testAppAttemptAllocatedState(container);
     
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
index 42db1fe325b..4758d62defb 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
 import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.QueueACL;
@@ -72,6 +73,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSc
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.modes.FifoSchedulingMode;
+import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -1406,6 +1408,10 @@ public class TestFairScheduler {
     ApplicationMasterService masterService =
         new ApplicationMasterService(resourceManager.getRMContext(), scheduler);
     ApplicationSubmissionContext submissionContext = new ApplicationSubmissionContextPBImpl();
+    ContainerLaunchContext clc =
+        BuilderUtils.newContainerLaunchContext(user, null, null, null, null,
+            null, null);
+    submissionContext.setAMContainerSpec(clc);
     RMApp application =
         new RMAppImpl(applicationId, resourceManager.getRMContext(), conf, name, user, 
           queue, submissionContext, scheduler, masterService,
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java
index 5d0be9e630d..1d405bef529 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java
@@ -1079,8 +1079,9 @@ public class TestRMWebServicesApps extends JerseyTest {
         .getMasterContainer().getNodeId().toString(), nodeId);
     assertTrue("logsLink doesn't match",
         logsLink.startsWith("http://"));
-    assertTrue("logsLink doesn't contain user info",
-        logsLink.endsWith("/" + appAttempt.getSubmissionContext().getUser()));
+    assertTrue(
+        "logsLink doesn't contain user info", logsLink.endsWith("/"
+        + appAttempt.getSubmissionContext().getAMContainerSpec().getUser()));
   }
 
 }
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java
index 27986cc1a96..69e197aad06 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerManagerSecurity.java
@@ -357,8 +357,13 @@ public class TestContainerManagerSecurity {
 
         LOG.info("Going to contact NM with expired token");
         ContainerLaunchContext context = createContainerLaunchContextForTest(newTokenId);
+        Container container =
+            BuilderUtils.newContainer(newTokenId.getContainerID(), null, null,
+                BuilderUtils.newResource(newTokenId.getResource().getMemory(),
+                    newTokenId.getResource().getVirtualCores()), null, null);
         StartContainerRequest request = Records.newRecord(StartContainerRequest.class);
         request.setContainerLaunchContext(context);
+        request.setContainer(container);
 
         //Calling startContainer with an expired token.
         try {
@@ -402,18 +407,19 @@ public class TestContainerManagerSecurity {
       Arrays.asList("ping", "-n", "100", "127.0.0.1", ">nul") :
       Arrays.asList("sleep", "100");
 
-    ContainerLaunchContext amContainer = BuilderUtils
-        .newContainerLaunchContext(null, "testUser", BuilderUtils
-            .newResource(1024, 1), Collections.<String, LocalResource>emptyMap(),
-            new HashMap<String, String>(), cmd,
-            new HashMap<String, ByteBuffer>(), null,
-            new HashMap<ApplicationAccessType, String>());
+    ContainerLaunchContext amContainer =
+        BuilderUtils.newContainerLaunchContext("testUser",
+                Collections.<String, LocalResource> emptyMap(),
+                new HashMap<String, String>(), cmd,
+                new HashMap<String, ByteBuffer>(), null,
+                new HashMap<ApplicationAccessType, String>());
 
     ApplicationSubmissionContext appSubmissionContext = recordFactory
         .newRecordInstance(ApplicationSubmissionContext.class);
     appSubmissionContext.setApplicationId(appID);
-    appSubmissionContext.setUser("testUser");
     appSubmissionContext.setAMContainerSpec(amContainer);
+    appSubmissionContext.getAMContainerSpec().setUser("testUser");
+    appSubmissionContext.setResource(BuilderUtils.newResource(1024, 1));
 
     SubmitApplicationRequest submitRequest = recordFactory
         .newRecordInstance(SubmitApplicationRequest.class);
@@ -539,8 +545,11 @@ public class TestContainerManagerSecurity {
     // Authenticated but unauthorized, due to wrong resource
     ContainerLaunchContext context =
         createContainerLaunchContextForTest(tokenId);
-    context.getResource().setMemory(2048); // Set a different resource size.
+    Container container =
+        BuilderUtils.newContainer(tokenId.getContainerID(), null, null,
+            BuilderUtils.newResource(2048, 1), null, null);
     request.setContainerLaunchContext(context);
+    request.setContainer(container);
     try {
       client.startContainer(request);
       fail("Connection initiation with unauthorized "
@@ -551,7 +560,7 @@ public class TestContainerManagerSecurity {
           "Unauthorized request to start container. "));
       Assert.assertTrue(e.getMessage().contains(
           "\nExpected resource " + tokenId.getResource().toString()
-              + " but found " + context.getResource().toString()));
+              + " but found " + container.getResource().toString()));
     }
   }
 
@@ -563,7 +572,12 @@ public class TestContainerManagerSecurity {
     ContainerLaunchContext context =
         createContainerLaunchContextForTest(tokenId);
     context.setUser("Saruman"); // Set a different user-name.
+    Container container =
+        BuilderUtils.newContainer(tokenId.getContainerID(), null, null,
+            BuilderUtils.newResource(tokenId.getResource().getMemory(), tokenId
+                .getResource().getVirtualCores()), null, null);
     request.setContainerLaunchContext(context);
+    request.setContainer(container);
     try {
       client.startContainer(request);
       fail("Connection initiation with unauthorized "
@@ -581,12 +595,8 @@ public class TestContainerManagerSecurity {
   private ContainerLaunchContext createContainerLaunchContextForTest(
       ContainerTokenIdentifier tokenId) {
     ContainerLaunchContext context =
-        BuilderUtils.newContainerLaunchContext(tokenId.getContainerID(),
-            "testUser",
-            BuilderUtils.newResource(
-                tokenId.getResource().getMemory(), 
-                tokenId.getResource().getVirtualCores()),
-            new HashMap<String, LocalResource>(),
+        BuilderUtils.newContainerLaunchContext(
+            "testUser", new HashMap<String, LocalResource>(),
             new HashMap<String, String>(), new ArrayList<String>(),
             new HashMap<String, ByteBuffer>(), null,
             new HashMap<ApplicationAccessType, String>());

From 6d9c311840ab929739b1ed847191fb0c1bb71712 Mon Sep 17 00:00:00 2001
From: Thomas Graves <tgraves@apache.org>
Date: Thu, 11 Apr 2013 20:08:00 +0000
Subject: [PATCH 36/52] Preparing for release 0.23.7

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467069 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-common-project/hadoop-common/CHANGES.txt | 12 ++++++++++++
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     | 12 ++++++++++++
 hadoop-mapreduce-project/CHANGES.txt            | 12 ++++++++++++
 hadoop-yarn-project/CHANGES.txt                 | 12 ++++++++++++
 4 files changed, 48 insertions(+)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 480e7f8f39d..3ebdd71aca5 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -1609,6 +1609,18 @@ Release 2.0.0-alpha - 05-23-2012
     HADOOP-8655. Fix TextInputFormat for large deliminators. (Gelesh via
     bobby) 
 
+Release 0.23.8 - UNRELEASED
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
 Release 0.23.7 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 598d3c21fd0..73f69954bd1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -2476,6 +2476,18 @@ Release 2.0.0-alpha - 05-23-2012
     
     HDFS-3039. Address findbugs and javadoc warnings on branch. (todd via atm)
 
+Release 0.23.8 - UNRELEASED
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
 Release 0.23.7 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 246aa69db05..1ae4676dc1c 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -827,6 +827,18 @@ Release 2.0.0-alpha - 05-23-2012
     MAPREDUCE-4444. nodemanager fails to start when one of the local-dirs is
     bad (Jason Lowe via bobby)
 
+Release 0.23.8 - UNRELEASED
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
 Release 0.23.7 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 4c640d3e877..9709e053647 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -509,6 +509,18 @@ Release 2.0.2-alpha - 2012-09-07
     YARN-138. Ensure default values for minimum/maximum container sizes is
     sane. (harsh & sseth via acmurthy)
 
+Release 0.23.8 - UNRELEASED
+
+  INCOMPATIBLE CHANGES
+
+  NEW FEATURES
+
+  IMPROVEMENTS
+
+  OPTIMIZATIONS
+
+  BUG FIXES
+
 Release 0.23.7 - UNRELEASED
 
   INCOMPATIBLE CHANGES

From 313163624918265207b00c5da20d43b3b7b2b16a Mon Sep 17 00:00:00 2001
From: Jason Darrell Lowe <jlowe@apache.org>
Date: Thu, 11 Apr 2013 20:16:49 +0000
Subject: [PATCH 37/52] HADOOP-9222. Cover package with
 org.apache.hadoop.io.lz4 unit tests. Contributed by Vadim Bondarev

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467072 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.txt                 |   6 +
 .../lz4/TestLz4CompressorDecompressor.java    | 316 ++++++++++++++++++
 2 files changed, 322 insertions(+)
 create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/lz4/TestLz4CompressorDecompressor.java

diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 3ebdd71aca5..5a563d2d2f2 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -620,6 +620,9 @@ Release 2.0.5-beta - UNRELEASED
     HADOOP-9471. hadoop-client wrongfully excludes jetty-util JAR, 
     breaking webhdfs. (tucu)
 
+    HADOOP-9222. Cover package with org.apache.hadoop.io.lz4 unit tests (Vadim 
+    Bondarev via jlowe)
+
 Release 2.0.4-alpha - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -1621,6 +1624,9 @@ Release 0.23.8 - UNRELEASED
 
   BUG FIXES
 
+    HADOOP-9222. Cover package with org.apache.hadoop.io.lz4 unit tests (Vadim 
+    Bondarev via jlowe)
+
 Release 0.23.7 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/lz4/TestLz4CompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/lz4/TestLz4CompressorDecompressor.java
new file mode 100644
index 00000000000..e8555b23887
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/lz4/TestLz4CompressorDecompressor.java
@@ -0,0 +1,316 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.compress.lz4;
+
+import static org.junit.Assert.*;
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.hadoop.io.compress.BlockCompressorStream;
+import org.apache.hadoop.io.compress.BlockDecompressorStream;
+import org.apache.hadoop.io.compress.CompressionInputStream;
+import org.apache.hadoop.io.compress.CompressionOutputStream;
+import org.apache.hadoop.io.compress.Lz4Codec;
+import org.apache.hadoop.io.compress.lz4.Lz4Compressor;
+import org.apache.hadoop.io.compress.lz4.Lz4Decompressor;
+import org.junit.Before;
+import org.junit.Test;
+import static org.junit.Assume.*;
+
+public class TestLz4CompressorDecompressor {
+  
+  private static final Random rnd = new Random(12345l);
+
+  @Before
+  public void before() {
+    assumeTrue(Lz4Codec.isNativeCodeLoaded());
+  }
+
+  //test on NullPointerException in {@code compressor.setInput()} 
+  @Test
+  public void testCompressorSetInputNullPointerException() {
+    try {
+      Lz4Compressor compressor = new Lz4Compressor();
+      compressor.setInput(null, 0, 10);
+      fail("testCompressorSetInputNullPointerException error !!!");
+    } catch (NullPointerException ex) {
+      // expected
+    } catch (Exception e) {
+      fail("testCompressorSetInputNullPointerException ex error !!!");
+    }
+  }
+
+  //test on NullPointerException in {@code decompressor.setInput()}
+  @Test
+  public void testDecompressorSetInputNullPointerException() {
+    try {
+      Lz4Decompressor decompressor = new Lz4Decompressor();
+      decompressor.setInput(null, 0, 10);
+      fail("testDecompressorSetInputNullPointerException error !!!");
+    } catch (NullPointerException ex) {
+      // expected
+    } catch (Exception e) {
+      fail("testDecompressorSetInputNullPointerException ex error !!!");
+    }
+  }
+  
+  //test on ArrayIndexOutOfBoundsException in {@code compressor.setInput()}
+  @Test
+  public void testCompressorSetInputAIOBException() {
+    try {
+      Lz4Compressor compressor = new Lz4Compressor();
+      compressor.setInput(new byte[] {}, -5, 10);
+      fail("testCompressorSetInputAIOBException error !!!");
+    } catch (ArrayIndexOutOfBoundsException ex) {
+      // expected
+    } catch (Exception ex) {
+      fail("testCompressorSetInputAIOBException ex error !!!");
+    }
+  }
+
+  //test on ArrayIndexOutOfBoundsException in {@code decompressor.setInput()}
+  @Test
+  public void testDecompressorSetInputAIOUBException() {
+    try {
+      Lz4Decompressor decompressor = new Lz4Decompressor();
+      decompressor.setInput(new byte[] {}, -5, 10);
+      fail("testDecompressorSetInputAIOBException error !!!");
+    } catch (ArrayIndexOutOfBoundsException ex) {
+      // expected
+    } catch (Exception e) {
+      fail("testDecompressorSetInputAIOBException ex error !!!");
+    }
+  }
+
+  //test on NullPointerException in {@code compressor.compress()}  
+  @Test
+  public void testCompressorCompressNullPointerException() {
+    try {
+      Lz4Compressor compressor = new Lz4Compressor();
+      byte[] bytes = generate(1024 * 6);
+      compressor.setInput(bytes, 0, bytes.length);
+      compressor.compress(null, 0, 0);
+      fail("testCompressorCompressNullPointerException error !!!");
+    } catch (NullPointerException ex) {
+      // expected
+    } catch (Exception e) {
+      fail("testCompressorCompressNullPointerException ex error !!!");
+    }
+  }
+
+  //test on NullPointerException in {@code decompressor.decompress()}  
+  @Test
+  public void testDecompressorCompressNullPointerException() {
+    try {
+      Lz4Decompressor decompressor = new Lz4Decompressor();
+      byte[] bytes = generate(1024 * 6);
+      decompressor.setInput(bytes, 0, bytes.length);
+      decompressor.decompress(null, 0, 0);
+      fail("testDecompressorCompressNullPointerException error !!!");
+    } catch (NullPointerException ex) {
+      // expected
+    } catch (Exception e) {
+      fail("testDecompressorCompressNullPointerException ex error !!!");
+    }
+  }
+
+  //test on ArrayIndexOutOfBoundsException in {@code compressor.compress()}  
+  @Test
+  public void testCompressorCompressAIOBException() {
+    try {
+      Lz4Compressor compressor = new Lz4Compressor();
+      byte[] bytes = generate(1024 * 6);
+      compressor.setInput(bytes, 0, bytes.length);
+      compressor.compress(new byte[] {}, 0, -1);
+      fail("testCompressorCompressAIOBException error !!!");
+    } catch (ArrayIndexOutOfBoundsException ex) {
+      // expected
+    } catch (Exception e) {
+      fail("testCompressorCompressAIOBException ex error !!!");
+    }
+  }
+
+  //test on ArrayIndexOutOfBoundsException in decompressor.decompress()  
+  @Test
+  public void testDecompressorCompressAIOBException() {
+    try {
+      Lz4Decompressor decompressor = new Lz4Decompressor();
+      byte[] bytes = generate(1024 * 6);
+      decompressor.setInput(bytes, 0, bytes.length);
+      decompressor.decompress(new byte[] {}, 0, -1);
+      fail("testDecompressorCompressAIOBException error !!!");
+    } catch (ArrayIndexOutOfBoundsException ex) {
+      // expected
+    } catch (Exception e) {
+      fail("testDecompressorCompressAIOBException ex error !!!");
+    }
+  }
+  
+  // test Lz4Compressor compressor.compress()  
+  @Test
+  public void testSetInputWithBytesSizeMoreThenDefaultLz4CompressorByfferSize() {
+    int BYTES_SIZE = 1024 * 64 + 1;
+    try {
+      Lz4Compressor compressor = new Lz4Compressor();
+      byte[] bytes = generate(BYTES_SIZE);
+      assertTrue("needsInput error !!!", compressor.needsInput());
+      compressor.setInput(bytes, 0, bytes.length);
+      byte[] emptyBytes = new byte[BYTES_SIZE];
+      int csize = compressor.compress(emptyBytes, 0, bytes.length);
+      assertTrue(
+          "testSetInputWithBytesSizeMoreThenDefaultLz4CompressorByfferSize error !!!",
+          csize != 0);
+    } catch (Exception ex) {
+      fail("testSetInputWithBytesSizeMoreThenDefaultLz4CompressorByfferSize ex error !!!");
+    }
+  }
+
+  // test compress/decompress process 
+  @Test
+  public void testCompressDecompress() {
+    int BYTE_SIZE = 1024 * 54;
+    byte[] bytes = generate(BYTE_SIZE);
+    Lz4Compressor compressor = new Lz4Compressor();
+    try {
+      compressor.setInput(bytes, 0, bytes.length);
+      assertTrue("Lz4CompressDecompress getBytesRead error !!!",
+          compressor.getBytesRead() > 0);
+      assertTrue(
+          "Lz4CompressDecompress getBytesWritten before compress error !!!",
+          compressor.getBytesWritten() == 0);
+
+      byte[] compressed = new byte[BYTE_SIZE];
+      int cSize = compressor.compress(compressed, 0, compressed.length);
+      assertTrue(
+          "Lz4CompressDecompress getBytesWritten after compress error !!!",
+          compressor.getBytesWritten() > 0);
+      Lz4Decompressor decompressor = new Lz4Decompressor();
+      // set as input for decompressor only compressed data indicated with cSize
+      decompressor.setInput(compressed, 0, cSize);
+      byte[] decompressed = new byte[BYTE_SIZE];
+      decompressor.decompress(decompressed, 0, decompressed.length);
+
+      assertTrue("testLz4CompressDecompress finished error !!!", decompressor.finished());      
+      assertArrayEquals(bytes, decompressed);
+      compressor.reset();
+      decompressor.reset();
+      assertTrue("decompressor getRemaining error !!!",decompressor.getRemaining() == 0);
+    } catch (Exception e) {
+      fail("testLz4CompressDecompress ex error!!!");
+    }
+  }
+
+  // test compress/decompress with empty stream
+  @Test
+  public void testCompressorDecompressorEmptyStreamLogic() {
+    ByteArrayInputStream bytesIn = null;
+    ByteArrayOutputStream bytesOut = null;
+    byte[] buf = null;
+    BlockDecompressorStream blockDecompressorStream = null;
+    try {
+      // compress empty stream
+      bytesOut = new ByteArrayOutputStream();
+      BlockCompressorStream blockCompressorStream = new BlockCompressorStream(
+          bytesOut, new Lz4Compressor(), 1024, 0);
+      // close without write
+      blockCompressorStream.close();
+      // check compressed output
+      buf = bytesOut.toByteArray();
+      assertEquals("empty stream compressed output size != 4", 4, buf.length);
+      // use compressed output as input for decompression
+      bytesIn = new ByteArrayInputStream(buf);
+      // create decompression stream
+      blockDecompressorStream = new BlockDecompressorStream(bytesIn,
+          new Lz4Decompressor(), 1024);
+      // no byte is available because stream was closed
+      assertEquals("return value is not -1", -1, blockDecompressorStream.read());
+    } catch (Exception e) {
+      fail("testCompressorDecompressorEmptyStreamLogic ex error !!!"
+          + e.getMessage());
+    } finally {
+      if (blockDecompressorStream != null)
+        try {
+          bytesIn.close();
+          bytesOut.close();
+          blockDecompressorStream.close();
+        } catch (IOException e) {
+        }
+    }
+  }
+  
+  // test compress/decompress process through CompressionOutputStream/CompressionInputStream api 
+  @Test
+  public void testCompressorDecopressorLogicWithCompressionStreams() {
+    DataOutputStream deflateOut = null;
+    DataInputStream inflateIn = null;
+    int BYTE_SIZE = 1024 * 100;
+    byte[] bytes = generate(BYTE_SIZE);
+    int bufferSize = 262144;
+    int compressionOverhead = (bufferSize / 6) + 32;
+    try {
+      DataOutputBuffer compressedDataBuffer = new DataOutputBuffer();
+      CompressionOutputStream deflateFilter = new BlockCompressorStream(
+          compressedDataBuffer, new Lz4Compressor(bufferSize), bufferSize,
+          compressionOverhead);
+      deflateOut = new DataOutputStream(new BufferedOutputStream(deflateFilter));
+      deflateOut.write(bytes, 0, bytes.length);
+      deflateOut.flush();
+      deflateFilter.finish();
+
+      DataInputBuffer deCompressedDataBuffer = new DataInputBuffer();
+      deCompressedDataBuffer.reset(compressedDataBuffer.getData(), 0,
+          compressedDataBuffer.getLength());
+
+      CompressionInputStream inflateFilter = new BlockDecompressorStream(
+          deCompressedDataBuffer, new Lz4Decompressor(bufferSize), bufferSize);
+
+      inflateIn = new DataInputStream(new BufferedInputStream(inflateFilter));
+
+      byte[] result = new byte[BYTE_SIZE];
+      inflateIn.read(result);
+
+      assertArrayEquals("original array not equals compress/decompressed array", result,
+          bytes);
+    } catch (IOException e) {
+      fail("testLz4CompressorDecopressorLogicWithCompressionStreams ex error !!!");
+    } finally {
+      try {
+        if (deflateOut != null)
+          deflateOut.close();
+        if (inflateIn != null)
+          inflateIn.close();
+      } catch (Exception e) {
+      }
+    }
+  }  
+
+  public static byte[] generate(int size) {
+    byte[] array = new byte[size];
+    for (int i = 0; i < size; i++)
+      array[i] = (byte)rnd.nextInt(16);
+    return array;
+  }
+}

From 21c14c0b026d08deecc9edf004277c55bf7bb716 Mon Sep 17 00:00:00 2001
From: Konstantin Shvachko <shv@apache.org>
Date: Thu, 11 Apr 2013 20:28:30 +0000
Subject: [PATCH 38/52] MAPREDUCE-4981. Add WordMean, WordMedian,
 WordStandardDeviation to ExamplesDriver. Contributed by Plamen Jeliazkov.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467076 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt                        | 3 +++
 .../main/java/org/apache/hadoop/examples/ExampleDriver.java | 6 ++++++
 2 files changed, 9 insertions(+)

diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 1ae4676dc1c..330a7db908b 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -185,6 +185,9 @@ Release 2.0.5-beta - UNRELEASED
     history, instaed of simulating state machine events.
     (Jason Lowe and Robert Parker via sseth)
 
+    MAPREDUCE-4981. Add WordMean, WordMedian, WordStandardDeviation
+    to ExamplesDriver. (Plamen Jeliazkov via shv)
+
   OPTIMIZATIONS
 
   BUG FIXES
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/ExampleDriver.java b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/ExampleDriver.java
index cddfea60862..2d9a500280b 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/ExampleDriver.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/ExampleDriver.java
@@ -38,6 +38,12 @@ public class ExampleDriver {
     try {
       pgd.addClass("wordcount", WordCount.class, 
                    "A map/reduce program that counts the words in the input files.");
+      pgd.addClass("wordmean", WordMean.class,
+                   "A map/reduce program that counts the average length of the words in the input files.");
+      pgd.addClass("wordmedian", WordMedian.class,
+                   "A map/reduce program that counts the median length of the words in the input files.");
+      pgd.addClass("wordstandarddeviation", WordStandardDeviation.class,
+                   "A map/reduce program that counts the standard deviation of the length of the words in the input files.");
       pgd.addClass("aggregatewordcount", AggregateWordCount.class, 
                    "An Aggregate based map/reduce program that counts the words in the input files.");
       pgd.addClass("aggregatewordhist", AggregateWordHistogram.class, 

From 390deffbaf33fe50ac0adecebf47decf5d24ce5e Mon Sep 17 00:00:00 2001
From: Jason Darrell Lowe <jlowe@apache.org>
Date: Thu, 11 Apr 2013 21:17:56 +0000
Subject: [PATCH 39/52] HADOOP-9233. Cover package
 org.apache.hadoop.io.compress.zlib with unit tests. Contributed by Vadim
 Bondarev

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467090 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.txt                 |   6 +
 .../io/compress/CompressDecompressTester.java | 524 ++++++++++++++++++
 .../compress/TestCompressorDecompressor.java  | 101 ++++
 .../zlib/TestZlibCompressorDecompressor.java  | 362 ++++++++++++
 4 files changed, 993 insertions(+)
 create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java
 create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java
 create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/zlib/TestZlibCompressorDecompressor.java

diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 5a563d2d2f2..1951a646491 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -623,6 +623,9 @@ Release 2.0.5-beta - UNRELEASED
     HADOOP-9222. Cover package with org.apache.hadoop.io.lz4 unit tests (Vadim 
     Bondarev via jlowe)
 
+    HADOOP-9233. Cover package org.apache.hadoop.io.compress.zlib with unit
+    tests (Vadim Bondarev via jlowe)
+
 Release 2.0.4-alpha - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -1627,6 +1630,9 @@ Release 0.23.8 - UNRELEASED
     HADOOP-9222. Cover package with org.apache.hadoop.io.lz4 unit tests (Vadim 
     Bondarev via jlowe)
 
+    HADOOP-9233. Cover package org.apache.hadoop.io.compress.zlib with unit
+    tests (Vadim Bondarev via jlowe)
+
 Release 0.23.7 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java
new file mode 100644
index 00000000000..35f84b950e4
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java
@@ -0,0 +1,524 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.compress;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.compress.lz4.Lz4Compressor;
+import org.apache.hadoop.io.compress.snappy.SnappyCompressor;
+import org.apache.hadoop.io.compress.zlib.BuiltInZlibDeflater;
+import org.apache.hadoop.io.compress.zlib.ZlibCompressor;
+import org.apache.hadoop.io.compress.zlib.ZlibFactory;
+import org.apache.hadoop.util.NativeCodeLoader;
+import org.apache.log4j.Logger;
+import org.junit.Assert;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
+import static org.junit.Assert.*;
+
+public class CompressDecompressTester<T extends Compressor, E extends Decompressor> {
+
+  private static final Logger logger = Logger
+      .getLogger(CompressDecompressTester.class);
+
+  private final byte[] originalRawData;
+
+  private ImmutableList<TesterPair<T, E>> pairs = ImmutableList.of();
+  private ImmutableList.Builder<TesterPair<T, E>> builder = ImmutableList.builder();     
+
+  private ImmutableSet<CompressionTestStrategy> stateges = ImmutableSet.of();
+
+  private PreAssertionTester<T, E> assertionDelegate;
+  
+  public CompressDecompressTester(byte[] originalRawData) {
+    this.originalRawData = Arrays.copyOf(originalRawData,
+        originalRawData.length);
+    this.assertionDelegate = new PreAssertionTester<T, E>() {
+
+      @Override
+      public ImmutableList<TesterPair<T, E>> filterOnAssumeWhat(
+          ImmutableList<TesterPair<T, E>> pairs) {
+        ImmutableList.Builder<TesterPair<T, E>> builder = ImmutableList
+            .builder();
+
+        for (TesterPair<T, E> pair : pairs) {
+          if (isAvailable(pair))
+            builder.add(pair);
+        }
+        return builder.build();
+      }
+    };
+  }
+
+  private static boolean isNativeSnappyLoadable() {
+    boolean snappyAvailable = false;
+    boolean loaded = false;
+    try {
+      System.loadLibrary("snappy");
+      logger.warn("Snappy native library is available");
+      snappyAvailable = true;
+      boolean hadoopNativeAvailable = NativeCodeLoader.isNativeCodeLoaded();
+      loaded = snappyAvailable && hadoopNativeAvailable;
+      if (loaded) {
+        logger.info("Snappy native library loaded");
+      } else {
+        logger.warn("Snappy native library not loaded");
+      }
+    } catch (Throwable t) {
+      logger.warn("Failed to load snappy: ", t);
+      return false;
+    }
+    return loaded;
+  }
+
+  public static <T extends Compressor, E extends Decompressor> CompressDecompressTester<T, E> of(
+      byte[] rawData) {
+    return new CompressDecompressTester<T, E>(rawData);
+  }
+  
+
+  public CompressDecompressTester<T, E> withCompressDecompressPair(
+      T compressor, E decompressor) {
+    addPair(
+        compressor,
+        decompressor,
+        Joiner.on("_").join(compressor.getClass().getCanonicalName(),
+            decompressor.getClass().getCanonicalName()));
+    return this;
+  }
+  
+  public CompressDecompressTester<T, E> withTestCases(
+      ImmutableSet<CompressionTestStrategy> stateges) {
+    this.stateges = ImmutableSet.copyOf(stateges);
+    return this;
+  }
+
+  private void addPair(T compressor, E decompressor, String name) {
+    builder.add(new TesterPair<T, E>(name, compressor, decompressor));
+  }
+
+  public void test() throws InstantiationException, IllegalAccessException {
+    pairs = builder.build();
+    pairs = assertionDelegate.filterOnAssumeWhat(pairs);
+
+    for (TesterPair<T, E> pair : pairs) {
+      for (CompressionTestStrategy strategy : stateges) {
+        strategy.getTesterStrategy().assertCompression(pair.getName(),
+            pair.getCompressor(), pair.getDecompressor(),
+            Arrays.copyOf(originalRawData, originalRawData.length));
+      }
+    }
+    endAll(pairs);
+  }
+
+  private void endAll(ImmutableList<TesterPair<T, E>> pairs) {
+    for (TesterPair<T, E> pair : pairs)
+      pair.end();
+  }
+
+  interface PreAssertionTester<T extends Compressor, E extends Decompressor> {
+    ImmutableList<TesterPair<T, E>> filterOnAssumeWhat(
+        ImmutableList<TesterPair<T, E>> pairs);
+  }
+
+  public enum CompressionTestStrategy {
+
+    COMPRESS_DECOMPRESS_ERRORS(new TesterCompressionStrategy() {
+      private final Joiner joiner = Joiner.on("- ");
+
+      @Override
+      public void assertCompression(String name, Compressor compressor,
+          Decompressor decompressor, byte[] rawData) {
+        assertTrue(checkSetInputNullPointerException(compressor));
+        assertTrue(checkSetInputNullPointerException(decompressor));
+
+        assertTrue(checkCompressArrayIndexOutOfBoundsException(compressor,
+            rawData));
+        assertTrue(checkCompressArrayIndexOutOfBoundsException(decompressor,
+            rawData));
+
+        assertTrue(checkCompressNullPointerException(compressor, rawData));
+        assertTrue(checkCompressNullPointerException(decompressor, rawData));
+
+        assertTrue(checkSetInputArrayIndexOutOfBoundsException(compressor));
+        assertTrue(checkSetInputArrayIndexOutOfBoundsException(decompressor));
+      }
+
+      private boolean checkSetInputNullPointerException(Compressor compressor) {
+        try {
+          compressor.setInput(null, 0, 1);
+        } catch (NullPointerException npe) {
+          return true;
+        } catch (Exception ex) {
+          logger.error(joiner.join(compressor.getClass().getCanonicalName(),
+              "checkSetInputNullPointerException error !!!"));
+        }
+        return false;
+      }
+
+      private boolean checkCompressNullPointerException(Compressor compressor,
+          byte[] rawData) {
+        try {
+          compressor.setInput(rawData, 0, rawData.length);
+          compressor.compress(null, 0, 1);
+        } catch (NullPointerException npe) {
+          return true;
+        } catch (Exception ex) {
+          logger.error(joiner.join(compressor.getClass().getCanonicalName(),
+              "checkCompressNullPointerException error !!!"));
+        }
+        return false;
+      }
+
+      private boolean checkCompressNullPointerException(
+          Decompressor decompressor, byte[] rawData) {
+        try {
+          decompressor.setInput(rawData, 0, rawData.length);
+          decompressor.decompress(null, 0, 1);
+        } catch (NullPointerException npe) {
+          return true;
+        } catch (Exception ex) {
+          logger.error(joiner.join(decompressor.getClass().getCanonicalName(),
+              "checkCompressNullPointerException error !!!"));
+        }
+        return false;
+      }
+
+      private boolean checkSetInputNullPointerException(
+          Decompressor decompressor) {
+        try {
+          decompressor.setInput(null, 0, 1);
+        } catch (NullPointerException npe) {
+          return true;
+        } catch (Exception ex) {
+          logger.error(joiner.join(decompressor.getClass().getCanonicalName(),
+              "checkSetInputNullPointerException error !!!"));
+        }
+        return false;
+      }
+
+      private boolean checkSetInputArrayIndexOutOfBoundsException(
+          Compressor compressor) {
+        try {
+          compressor.setInput(new byte[] { (byte) 0 }, 0, -1);
+        } catch (ArrayIndexOutOfBoundsException e) {
+          return true;
+        } catch (Exception e) {
+          logger.error(joiner.join(compressor.getClass().getCanonicalName(),
+              "checkSetInputArrayIndexOutOfBoundsException error !!!"));
+        }
+        return false;
+      }
+
+      private boolean checkCompressArrayIndexOutOfBoundsException(
+          Compressor compressor, byte[] rawData) {
+        try {
+          compressor.setInput(rawData, 0, rawData.length);
+          compressor.compress(new byte[rawData.length], 0, -1);
+        } catch (ArrayIndexOutOfBoundsException e) {
+          return true;
+        } catch (Exception e) {
+          logger.error(joiner.join(compressor.getClass().getCanonicalName(),
+              "checkCompressArrayIndexOutOfBoundsException error !!!"));
+        }
+        return false;
+      }
+
+      private boolean checkCompressArrayIndexOutOfBoundsException(
+          Decompressor decompressor, byte[] rawData) {
+        try {
+          decompressor.setInput(rawData, 0, rawData.length);
+          decompressor.decompress(new byte[rawData.length], 0, -1);
+        } catch (ArrayIndexOutOfBoundsException e) {
+          return true;
+        } catch (Exception e) {
+          logger.error(joiner.join(decompressor.getClass().getCanonicalName(),
+              "checkCompressArrayIndexOutOfBoundsException error !!!"));
+        }
+        return false;
+      }
+
+      private boolean checkSetInputArrayIndexOutOfBoundsException(
+          Decompressor decompressor) {
+        try {
+          decompressor.setInput(new byte[] { (byte) 0 }, 0, -1);
+        } catch (ArrayIndexOutOfBoundsException e) {
+          return true;
+        } catch (Exception e) {
+          logger.error(joiner.join(decompressor.getClass().getCanonicalName(),
+              "checkNullPointerException error !!!"));
+        }
+        return false;
+      }
+
+    }),
+
+    COMPRESS_DECOMPRESS_SINGLE_BLOCK(new TesterCompressionStrategy() {
+      final Joiner joiner = Joiner.on("- ");
+
+      @Override
+      public void assertCompression(String name, Compressor compressor,
+          Decompressor decompressor, byte[] rawData) {
+
+        int cSize = 0;
+        int decompressedSize = 0;
+        byte[] compressedResult = new byte[rawData.length];
+        byte[] decompressedBytes = new byte[rawData.length];
+        try {
+          assertTrue(
+              joiner.join(name, "compressor.needsInput before error !!!"),
+              compressor.needsInput());
+          assertTrue(
+              joiner.join(name, "compressor.getBytesWritten before error !!!"),
+              compressor.getBytesWritten() == 0);
+          compressor.setInput(rawData, 0, rawData.length);
+          compressor.finish();
+          while (!compressor.finished()) {
+            cSize += compressor.compress(compressedResult, 0,
+                compressedResult.length);
+          }
+          compressor.reset();
+
+          assertTrue(
+              joiner.join(name, "decompressor.needsInput() before error !!!"),
+              decompressor.needsInput());
+          decompressor.setInput(compressedResult, 0, cSize);
+          assertFalse(
+              joiner.join(name, "decompressor.needsInput() after error !!!"),
+              decompressor.needsInput());
+          while (!decompressor.finished()) {
+            decompressedSize = decompressor.decompress(decompressedBytes, 0,
+                decompressedBytes.length);
+          }
+          decompressor.reset();
+          assertTrue(joiner.join(name, " byte size not equals error !!!"),
+              decompressedSize == rawData.length);
+          assertArrayEquals(
+              joiner.join(name, " byte arrays not equals error !!!"), rawData,
+              decompressedBytes);
+        } catch (Exception ex) {
+          fail(joiner.join(name, ex.getMessage()));
+        }
+      }
+    }),
+
+    COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM(new TesterCompressionStrategy() {
+      final Joiner joiner = Joiner.on("- ");
+      final ImmutableMap<Class<? extends Compressor>, Integer> emptySize = ImmutableMap
+          .of(Lz4Compressor.class, 4, ZlibCompressor.class, 16,
+              SnappyCompressor.class, 4, BuiltInZlibDeflater.class, 16);
+
+      @Override
+      void assertCompression(String name, Compressor compressor,
+          Decompressor decompressor, byte[] originalRawData) {
+        byte[] buf = null;
+        ByteArrayInputStream bytesIn = null;
+        BlockDecompressorStream blockDecompressorStream = null;
+        ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
+        // close without write
+        try {
+          compressor.reset();
+          // decompressor.end();
+          BlockCompressorStream blockCompressorStream = new BlockCompressorStream(
+              bytesOut, compressor, 1024, 0);
+          blockCompressorStream.close();
+          // check compressed output
+          buf = bytesOut.toByteArray();
+          int emSize = emptySize.get(compressor.getClass());
+          Assert.assertEquals(
+              joiner.join(name, "empty stream compressed output size != "
+                  + emSize), emSize, buf.length);
+          // use compressed output as input for decompression
+          bytesIn = new ByteArrayInputStream(buf);
+          // create decompression stream
+          blockDecompressorStream = new BlockDecompressorStream(bytesIn,
+              decompressor, 1024);
+          // no byte is available because stream was closed
+          assertEquals(joiner.join(name, " return value is not -1"), -1,
+              blockDecompressorStream.read());
+        } catch (IOException e) {
+          fail(joiner.join(name, e.getMessage()));
+        } finally {
+          if (blockDecompressorStream != null)
+            try {
+              bytesOut.close();
+              blockDecompressorStream.close();
+              bytesIn.close();
+              blockDecompressorStream.close();
+            } catch (IOException e) {
+            }
+        }
+      }
+
+    }),
+
+    COMPRESS_DECOMPRESS_BLOCK(new TesterCompressionStrategy() {
+      private final Joiner joiner = Joiner.on("- ");
+      private static final int BLOCK_SIZE = 512;
+      private final byte[] operationBlock = new byte[BLOCK_SIZE];
+      // Use default of 512 as bufferSize and compressionOverhead of
+      // (1% of bufferSize + 12 bytes) = 18 bytes (zlib algorithm).
+      private static final int overheadSpace = BLOCK_SIZE / 100 + 12;
+
+      @Override
+      public void assertCompression(String name, Compressor compressor,
+          Decompressor decompressor, byte[] originalRawData) {
+        int off = 0;
+        int len = originalRawData.length;
+        int maxSize = BLOCK_SIZE - overheadSpace;
+        int compresSize = 0;
+        List<Integer> blockLabels = new ArrayList<Integer>();
+        ByteArrayOutputStream compressedOut = new ByteArrayOutputStream();
+        ByteArrayOutputStream decompressOut = new ByteArrayOutputStream();
+        try {
+          if (originalRawData.length > maxSize) {
+            do {
+              int bufLen = Math.min(len, maxSize);
+              compressor.setInput(originalRawData, off, bufLen);
+              compressor.finish();
+              while (!compressor.finished()) {
+                compresSize = compressor.compress(operationBlock, 0,
+                    operationBlock.length);
+                compressedOut.write(operationBlock, 0, compresSize);
+                blockLabels.add(compresSize);
+              }
+              compressor.reset();
+              off += bufLen;
+              len -= bufLen;
+            } while (len > 0);
+          }
+
+          off = 0;
+          // compressed bytes
+          byte[] compressedBytes = compressedOut.toByteArray();
+          for (Integer step : blockLabels) {
+            decompressor.setInput(compressedBytes, off, step);
+            while (!decompressor.finished()) {
+              int dSize = decompressor.decompress(operationBlock, 0,
+                  operationBlock.length);
+              decompressOut.write(operationBlock, 0, dSize);
+            }
+            decompressor.reset();
+            off = off + step;
+          }
+          assertArrayEquals(
+              joiner.join(name, "byte arrays not equals error !!!"),
+              originalRawData, decompressOut.toByteArray());
+        } catch (Exception ex) {
+          fail(joiner.join(name, ex.getMessage()));
+        } finally {
+          try {
+            compressedOut.close();
+          } catch (IOException e) {
+          }
+          try {
+            decompressOut.close();
+          } catch (IOException e) {
+          }
+        }
+      }
+    });
+
+    private final TesterCompressionStrategy testerStrategy;
+
+    CompressionTestStrategy(TesterCompressionStrategy testStrategy) {
+      this.testerStrategy = testStrategy;
+    }
+
+    public TesterCompressionStrategy getTesterStrategy() {
+      return testerStrategy;
+    }
+  }
+
+  static final class TesterPair<T extends Compressor, E extends Decompressor> {
+    private final T compressor;
+    private final E decompressor;
+    private final String name;
+
+    TesterPair(String name, T compressor, E decompressor) {
+      this.compressor = compressor;
+      this.decompressor = decompressor;
+      this.name = name;
+    }
+
+    public void end() {
+      Configuration cfg = new Configuration();
+      compressor.reinit(cfg);
+      compressor.end();
+      decompressor.end();
+    }
+
+    public T getCompressor() {
+      return compressor;
+    }
+
+    public E getDecompressor() {
+      return decompressor;
+    }
+
+    public String getName() {
+      return name;
+    }
+  }
+  
+  /**
+   * Method for compressor availability check
+   */
+  private static <T extends Compressor, E extends Decompressor> boolean isAvailable(TesterPair<T, E> pair) {
+    Compressor compressor = pair.compressor;
+
+    if (compressor.getClass().isAssignableFrom(Lz4Compressor.class)
+            && (NativeCodeLoader.isNativeCodeLoaded()))
+      return true;
+
+    else if (compressor.getClass().isAssignableFrom(BuiltInZlibDeflater.class)
+            && NativeCodeLoader.isNativeCodeLoaded())
+      return true;
+
+    else if (compressor.getClass().isAssignableFrom(ZlibCompressor.class)) {
+      return ZlibFactory.isNativeZlibLoaded(new Configuration());
+    }              
+    else if (compressor.getClass().isAssignableFrom(SnappyCompressor.class)
+            && isNativeSnappyLoadable())
+      return true;
+    
+    return false;      
+  }
+  
+  abstract static class TesterCompressionStrategy {
+
+    protected final Logger logger = Logger.getLogger(getClass());
+
+    abstract void assertCompression(String name, Compressor compressor,
+        Decompressor decompressor, byte[] originalRawData);
+  }
+}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java
new file mode 100644
index 00000000000..a8ac993c47b
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.compress;
+
+import static org.junit.Assert.fail;
+import java.util.Random;
+import org.apache.hadoop.io.compress.CompressDecompressTester.CompressionTestStrategy;
+import org.apache.hadoop.io.compress.lz4.Lz4Compressor;
+import org.apache.hadoop.io.compress.lz4.Lz4Decompressor;
+import org.apache.hadoop.io.compress.snappy.SnappyCompressor;
+import org.apache.hadoop.io.compress.snappy.SnappyDecompressor;
+import org.apache.hadoop.io.compress.zlib.BuiltInZlibDeflater;
+import org.apache.hadoop.io.compress.zlib.BuiltInZlibInflater;
+import org.junit.Test;
+import com.google.common.collect.ImmutableSet;
+
+/** 
+ * Test for pairs:
+ * <pre>
+ * SnappyCompressor/SnappyDecompressor
+ * Lz4Compressor/Lz4Decompressor
+ * BuiltInZlibDeflater/new BuiltInZlibInflater
+ *
+ *
+ * Note: we can't use ZlibCompressor/ZlibDecompressor here 
+ * because his constructor can throw exception (if native libraries not found)
+ * For ZlibCompressor/ZlibDecompressor pair testing used {@code TestZlibCompressorDecompressor}   
+ *
+ * </pre>
+ *
+ */
+public class TestCompressorDecompressor {
+  
+  private static final Random rnd = new Random(12345L);
+  
+  @Test
+  public void testCompressorDecompressor() {
+    // no more for this data
+    int SIZE = 44 * 1024;
+    
+    byte[] rawData = generate(SIZE);
+    try {
+      CompressDecompressTester.of(rawData)
+          .withCompressDecompressPair(new SnappyCompressor(), new SnappyDecompressor())
+          .withCompressDecompressPair(new Lz4Compressor(), new Lz4Decompressor())
+          .withCompressDecompressPair(new BuiltInZlibDeflater(), new BuiltInZlibInflater())
+          .withTestCases(ImmutableSet.of(CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK,
+                      CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK,
+                      CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS,
+                      CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM))
+          .test();
+
+    } catch (Exception ex) {
+      fail("testCompressorDecompressor error !!!" + ex);
+    }
+  }
+  
+  @Test
+  public void testCompressorDecompressorWithExeedBufferLimit() {
+    int BYTE_SIZE = 100 * 1024;
+    byte[] rawData = generate(BYTE_SIZE);
+    try {
+      CompressDecompressTester.of(rawData)
+          .withCompressDecompressPair(
+              new SnappyCompressor(BYTE_SIZE + BYTE_SIZE / 2),
+              new SnappyDecompressor(BYTE_SIZE + BYTE_SIZE / 2))
+          .withCompressDecompressPair(new Lz4Compressor(BYTE_SIZE),
+              new Lz4Decompressor(BYTE_SIZE))
+          .withTestCases(ImmutableSet.of(CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK,
+                      CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK,
+                      CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS,
+                      CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM))
+          .test();
+
+    } catch (Exception ex) {
+      fail("testCompressorDecompressorWithExeedBufferLimit error !!!" + ex);
+    }
+  }
+       
+  public static byte[] generate(int size) {
+    byte[] array = new byte[size];
+    for (int i = 0; i < size; i++)
+      array[i] = (byte) rnd.nextInt(16);
+    return array;
+  }
+}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/zlib/TestZlibCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/zlib/TestZlibCompressorDecompressor.java
new file mode 100644
index 00000000000..6e792d1e4ea
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/zlib/TestZlibCompressorDecompressor.java
@@ -0,0 +1,362 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io.compress.zlib;
+
+import static org.junit.Assert.*;
+import static org.junit.Assume.*;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.io.compress.CompressDecompressTester;
+import org.apache.hadoop.io.compress.Compressor;
+import org.apache.hadoop.io.compress.Decompressor;
+import org.apache.hadoop.io.compress.DecompressorStream;
+import org.apache.hadoop.io.compress.CompressDecompressTester.CompressionTestStrategy;
+import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionLevel;
+import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionStrategy;
+import org.junit.Before;
+import org.junit.Test;
+import com.google.common.collect.ImmutableSet;
+
+public class TestZlibCompressorDecompressor {
+
+  private static final Random random = new Random(12345L);
+
+  @Before
+  public void before() {
+    assumeTrue(ZlibFactory.isNativeZlibLoaded(new Configuration()));
+  }  
+  
+  @Test
+  public void testZlibCompressorDecompressor() {
+    try {
+      int SIZE = 44 * 1024;
+      byte[] rawData = generate(SIZE);
+      
+      CompressDecompressTester.of(rawData)
+        .withCompressDecompressPair(new ZlibCompressor(), new ZlibDecompressor())
+        .withTestCases(ImmutableSet.of(CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK,
+           CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK,
+           CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS,
+           CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM))
+         .test();
+    } catch (Exception ex) {
+      fail("testCompressorDecompressor error !!!" + ex);
+    }
+  }
+  
+  @Test
+  public void testCompressorDecompressorWithExeedBufferLimit() {
+    int BYTE_SIZE = 100 * 1024;
+    byte[] rawData = generate(BYTE_SIZE);
+    try {
+      CompressDecompressTester.of(rawData)
+      .withCompressDecompressPair(
+        new ZlibCompressor(
+            org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionLevel.BEST_COMPRESSION,
+            CompressionStrategy.DEFAULT_STRATEGY,
+            org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionHeader.DEFAULT_HEADER,
+            BYTE_SIZE),
+         new ZlibDecompressor(
+            org.apache.hadoop.io.compress.zlib.ZlibDecompressor.CompressionHeader.DEFAULT_HEADER,
+            BYTE_SIZE))
+         .withTestCases(ImmutableSet.of(CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK,
+            CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK,
+            CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS,
+            CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM))
+          .test();
+    } catch (Exception ex) {
+      fail("testCompressorDecompressorWithExeedBufferLimit error !!!" + ex);
+    } 
+  }
+  
+  
+  @Test
+  public void testZlibCompressorDecompressorWithConfiguration() {
+    Configuration conf = new Configuration();
+    conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true);
+    if (ZlibFactory.isNativeZlibLoaded(conf)) {
+      byte[] rawData;
+      int tryNumber = 5;
+      int BYTE_SIZE = 10 * 1024;
+      Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
+      Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
+      rawData = generate(BYTE_SIZE);
+      try {
+        for (int i = 0; i < tryNumber; i++)
+          compressDecompressZlib(rawData, (ZlibCompressor) zlibCompressor,
+              (ZlibDecompressor) zlibDecompressor);
+        zlibCompressor.reinit(conf);
+      } catch (Exception ex) {
+        fail("testZlibCompressorDecompressorWithConfiguration ex error " + ex);
+      }
+    } else {
+      assertTrue("ZlibFactory is using native libs against request",
+          ZlibFactory.isNativeZlibLoaded(conf));
+    }
+  }
+
+  @Test
+  public void testZlibCompressDecompress() {
+    byte[] rawData = null;
+    int rawDataSize = 0;
+    rawDataSize = 1024 * 64;
+    rawData = generate(rawDataSize);
+    try {
+      ZlibCompressor compressor = new ZlibCompressor();
+      ZlibDecompressor decompressor = new ZlibDecompressor();
+      assertFalse("testZlibCompressDecompress finished error",
+          compressor.finished());
+      compressor.setInput(rawData, 0, rawData.length);
+      assertTrue("testZlibCompressDecompress getBytesRead before error",
+          compressor.getBytesRead() == 0);
+      compressor.finish();
+
+      byte[] compressedResult = new byte[rawDataSize];
+      int cSize = compressor.compress(compressedResult, 0, rawDataSize);
+      assertTrue("testZlibCompressDecompress getBytesRead ather error",
+          compressor.getBytesRead() == rawDataSize);
+      assertTrue(
+          "testZlibCompressDecompress compressed size no less then original size",
+          cSize < rawDataSize);
+      decompressor.setInput(compressedResult, 0, cSize);
+      byte[] decompressedBytes = new byte[rawDataSize];
+      decompressor.decompress(decompressedBytes, 0, decompressedBytes.length);
+      assertArrayEquals("testZlibCompressDecompress arrays not equals ",
+          rawData, decompressedBytes);
+      compressor.reset();
+      decompressor.reset();
+    } catch (IOException ex) {
+      fail("testZlibCompressDecompress ex !!!" + ex);
+    }
+  }
+  
+  @Test
+  public void testZlibCompressorDecompressorSetDictionary() {
+    Configuration conf = new Configuration();
+    conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true);
+    if (ZlibFactory.isNativeZlibLoaded(conf)) {
+      Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
+      Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
+
+      checkSetDictionaryNullPointerException(zlibCompressor);
+      checkSetDictionaryNullPointerException(zlibDecompressor);
+
+      checkSetDictionaryArrayIndexOutOfBoundsException(zlibDecompressor);
+      checkSetDictionaryArrayIndexOutOfBoundsException(zlibCompressor);
+    } else {
+      assertTrue("ZlibFactory is using native libs against request",
+          ZlibFactory.isNativeZlibLoaded(conf));
+    }
+  }
+
+  @Test
+  public void testZlibFactory() {
+    Configuration cfg = new Configuration();
+
+    assertTrue("testZlibFactory compression level error !!!",
+        CompressionLevel.DEFAULT_COMPRESSION == ZlibFactory
+            .getCompressionLevel(cfg));
+
+    assertTrue("testZlibFactory compression strategy error !!!",
+        CompressionStrategy.DEFAULT_STRATEGY == ZlibFactory
+            .getCompressionStrategy(cfg));
+
+    ZlibFactory.setCompressionLevel(cfg, CompressionLevel.BEST_COMPRESSION);
+    assertTrue("testZlibFactory compression strategy error !!!",
+        CompressionLevel.BEST_COMPRESSION == ZlibFactory
+            .getCompressionLevel(cfg));
+
+    ZlibFactory.setCompressionStrategy(cfg, CompressionStrategy.FILTERED);
+    assertTrue("testZlibFactory compression strategy error !!!",
+        CompressionStrategy.FILTERED == ZlibFactory.getCompressionStrategy(cfg));
+  }
+  
+
+  private boolean checkSetDictionaryNullPointerException(
+      Decompressor decompressor) {
+    try {
+      decompressor.setDictionary(null, 0, 1);
+    } catch (NullPointerException ex) {
+      return true;
+    } catch (Exception ex) {
+    }
+    return false;
+  }
+
+  private boolean checkSetDictionaryNullPointerException(Compressor compressor) {
+    try {
+      compressor.setDictionary(null, 0, 1);
+    } catch (NullPointerException ex) {
+      return true;
+    } catch (Exception ex) {
+    }
+    return false;
+  }
+
+  private boolean checkSetDictionaryArrayIndexOutOfBoundsException(
+      Compressor compressor) {
+    try {
+      compressor.setDictionary(new byte[] { (byte) 0 }, 0, -1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      return true;
+    } catch (Exception e) {
+    }
+    return false;
+  }
+
+  private boolean checkSetDictionaryArrayIndexOutOfBoundsException(
+      Decompressor decompressor) {
+    try {
+      decompressor.setDictionary(new byte[] { (byte) 0 }, 0, -1);
+    } catch (ArrayIndexOutOfBoundsException e) {
+      return true;
+    } catch (Exception e) {
+    }
+    return false;
+  }
+
+  private byte[] compressDecompressZlib(byte[] rawData,
+      ZlibCompressor zlibCompressor, ZlibDecompressor zlibDecompressor)
+      throws IOException {
+    int cSize = 0;
+    byte[] compressedByte = new byte[rawData.length];
+    byte[] decompressedRawData = new byte[rawData.length];
+    zlibCompressor.setInput(rawData, 0, rawData.length);
+    zlibCompressor.finish();
+    while (!zlibCompressor.finished()) {
+      cSize = zlibCompressor.compress(compressedByte, 0, compressedByte.length);
+    }
+    zlibCompressor.reset();
+
+    assertTrue(zlibDecompressor.getBytesWritten() == 0);
+    assertTrue(zlibDecompressor.getBytesRead() == 0);
+    assertTrue(zlibDecompressor.needsInput());
+    zlibDecompressor.setInput(compressedByte, 0, cSize);
+    assertFalse(zlibDecompressor.needsInput());
+    while (!zlibDecompressor.finished()) {
+      zlibDecompressor.decompress(decompressedRawData, 0,
+          decompressedRawData.length);
+    }
+    assertTrue(zlibDecompressor.getBytesWritten() == rawData.length);
+    assertTrue(zlibDecompressor.getBytesRead() == cSize);
+    zlibDecompressor.reset();
+    assertTrue(zlibDecompressor.getRemaining() == 0);
+    assertArrayEquals(
+        "testZlibCompressorDecompressorWithConfiguration array equals error",
+        rawData, decompressedRawData);
+
+    return decompressedRawData;
+  }
+
+  @Test
+  public void testBuiltInGzipDecompressorExceptions() {
+    BuiltInGzipDecompressor decompresser = new BuiltInGzipDecompressor();
+    try {
+      decompresser.setInput(null, 0, 1);
+    } catch (NullPointerException ex) {
+      // expected
+    } catch (Exception ex) {
+      fail("testBuiltInGzipDecompressorExceptions npe error " + ex);
+    }
+
+    try {
+      decompresser.setInput(new byte[] { 0 }, 0, -1);
+    } catch (ArrayIndexOutOfBoundsException ex) {
+      // expected
+    } catch (Exception ex) {
+      fail("testBuiltInGzipDecompressorExceptions aioob error" + ex);
+    }        
+    
+    assertTrue("decompresser.getBytesRead error",
+        decompresser.getBytesRead() == 0);
+    assertTrue("decompresser.getRemaining error",
+        decompresser.getRemaining() == 0);
+    decompresser.reset();
+    decompresser.end();
+
+    InputStream decompStream = null;
+    try {
+      // invalid 0 and 1 bytes , must be 31, -117
+      int buffSize = 1 * 1024;
+      byte buffer[] = new byte[buffSize];
+      Decompressor decompressor = new BuiltInGzipDecompressor();
+      DataInputBuffer gzbuf = new DataInputBuffer();
+      decompStream = new DecompressorStream(gzbuf, decompressor);
+      gzbuf.reset(new byte[] { 0, 0, 1, 1, 1, 1, 11, 1, 1, 1, 1 }, 11);
+      decompStream.read(buffer);
+    } catch (IOException ioex) {
+      // expected
+    } catch (Exception ex) {
+      fail("invalid 0 and 1 byte in gzip stream" + ex);
+    }
+
+    // invalid 2 byte, must be 8
+    try {
+      int buffSize = 1 * 1024;
+      byte buffer[] = new byte[buffSize];
+      Decompressor decompressor = new BuiltInGzipDecompressor();
+      DataInputBuffer gzbuf = new DataInputBuffer();
+      decompStream = new DecompressorStream(gzbuf, decompressor);
+      gzbuf.reset(new byte[] { 31, -117, 7, 1, 1, 1, 1, 11, 1, 1, 1, 1 }, 11);
+      decompStream.read(buffer);
+    } catch (IOException ioex) {
+      // expected
+    } catch (Exception ex) {
+      fail("invalid 2 byte in gzip stream" + ex);
+    }
+
+    try {
+      int buffSize = 1 * 1024;
+      byte buffer[] = new byte[buffSize];
+      Decompressor decompressor = new BuiltInGzipDecompressor();
+      DataInputBuffer gzbuf = new DataInputBuffer();
+      decompStream = new DecompressorStream(gzbuf, decompressor);
+      gzbuf.reset(new byte[] { 31, -117, 8, -32, 1, 1, 1, 11, 1, 1, 1, 1 }, 11);
+      decompStream.read(buffer);
+    } catch (IOException ioex) {
+      // expected
+    } catch (Exception ex) {
+      fail("invalid 3 byte in gzip stream" + ex);
+    }
+    try {
+      int buffSize = 1 * 1024;
+      byte buffer[] = new byte[buffSize];
+      Decompressor decompressor = new BuiltInGzipDecompressor();
+      DataInputBuffer gzbuf = new DataInputBuffer();
+      decompStream = new DecompressorStream(gzbuf, decompressor);
+      gzbuf.reset(new byte[] { 31, -117, 8, 4, 1, 1, 1, 11, 1, 1, 1, 1 }, 11);
+      decompStream.read(buffer);
+    } catch (IOException ioex) {
+      // expected
+    } catch (Exception ex) {
+      fail("invalid 3 byte make hasExtraField" + ex);
+    }
+  }
+  
+  public static byte[] generate(int size) {
+    byte[] data = new byte[size];
+    for (int i = 0; i < size; i++)
+      data[i] = (byte)random.nextInt(16);
+    return data;
+  }
+}

From 5fd460e688a6df5d6093bae4b7add73f8d44162c Mon Sep 17 00:00:00 2001
From: Vinod Kumar Vavilapalli <vinodkv@apache.org>
Date: Thu, 11 Apr 2013 22:27:45 +0000
Subject: [PATCH 40/52] MAPREUDUCE-5059. Change average merge time on Job
 overview page to be the time delta between the end of the shuffle and the
 start of the reduce. Contributed by Omkar Vinit Joshi.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467120 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt          |  4 ++
 .../mapreduce/v2/hs/webapp/dao/JobInfo.java   |  2 +-
 .../v2/hs/TestJobHistoryEntities.java         | 14 ++--
 .../v2/hs/webapp/dao/TestJobInfo.java         | 69 +++++++++++++++++++
 ...1329348468601-10-1-SUCCEEDED-default.jhist |  4 ++
 5 files changed, 85 insertions(+), 8 deletions(-)
 create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/TestJobInfo.java

diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 330a7db908b..5b0c65d1a05 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -188,6 +188,10 @@ Release 2.0.5-beta - UNRELEASED
     MAPREDUCE-4981. Add WordMean, WordMedian, WordStandardDeviation
     to ExamplesDriver. (Plamen Jeliazkov via shv)
 
+    MAPREUDUCE-5059. Change average merge time on Job overview page to be the
+    time delta between the end of the shuffle and the start of the reduce.
+    (Omkar Vinit Joshi via vinodkv)
+
   OPTIMIZATIONS
 
   BUG FIXES
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/JobInfo.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/JobInfo.java
index e5358dcb3e1..9a4f137da77 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/JobInfo.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/JobInfo.java
@@ -287,7 +287,7 @@ public class JobInfo {
             avgShuffleTime += (attempt.getShuffleFinishTime() - attempt
                 .getLaunchTime());
             avgMergeTime += attempt.getSortFinishTime()
-                - attempt.getLaunchTime();
+                - attempt.getShuffleFinishTime();
             avgReduceTime += (attempt.getFinishTime() - attempt
                 .getShuffleFinishTime());
           }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEntities.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEntities.java
index 40c1b73322f..0c4f3d7779e 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEntities.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryEntities.java
@@ -94,11 +94,11 @@ public class TestJobHistoryEntities {
     assertEquals(1, completedJob.getAMInfos().size());
     assertEquals(10, completedJob.getCompletedMaps());
     assertEquals(1, completedJob.getCompletedReduces());
-    assertEquals(11, completedJob.getTasks().size());
+    assertEquals(12, completedJob.getTasks().size());
     //Verify tasks loaded at this point.
     assertEquals(true, completedJob.tasksLoaded.get());
     assertEquals(10, completedJob.getTasks(TaskType.MAP).size());
-    assertEquals(1, completedJob.getTasks(TaskType.REDUCE).size());
+    assertEquals(2, completedJob.getTasks(TaskType.REDUCE).size());
     assertEquals("user", completedJob.getUserName());
     assertEquals(JobState.SUCCEEDED, completedJob.getState());
     JobReport jobReport = completedJob.getReport();
@@ -119,7 +119,7 @@ public class TestJobHistoryEntities {
     Map<TaskId, Task> mapTasks = completedJob.getTasks(TaskType.MAP);
     Map<TaskId, Task> reduceTasks = completedJob.getTasks(TaskType.REDUCE);
     assertEquals(10, mapTasks.size());
-    assertEquals(1, reduceTasks.size());
+    assertEquals(2, reduceTasks.size());
     
     Task mt1 = mapTasks.get(mt1Id);
     assertEquals(1, mt1.getAttempts().size());
@@ -134,7 +134,7 @@ public class TestJobHistoryEntities {
     assertEquals(TaskState.SUCCEEDED, rt1Report.getTaskState());
     assertEquals(rt1Id, rt1Report.getTaskId());
   }
-  
+
   @Test (timeout=10000)
   public void testCompletedTaskAttempt() throws Exception {
     HistoryFileInfo info = mock(HistoryFileInfo.class);
@@ -196,12 +196,12 @@ public class TestJobHistoryEntities {
     assertEquals("default",completedJob.getQueueName());
     // progress
     assertEquals(1.0, completedJob.getProgress(),0.001);
-    // 11 rows in answer
-    assertEquals(11,completedJob.getTaskAttemptCompletionEvents(0,1000).length);
+    // 12 rows in answer
+    assertEquals(12,completedJob.getTaskAttemptCompletionEvents(0,1000).length);
     // select first 10 rows
     assertEquals(10,completedJob.getTaskAttemptCompletionEvents(0,10).length);
     // select 5-10 rows include 5th
-    assertEquals(6,completedJob.getTaskAttemptCompletionEvents(5,10).length);
+    assertEquals(7,completedJob.getTaskAttemptCompletionEvents(5,10).length);
 
     // without errors
     assertEquals(1,completedJob.getDiagnostics().size());
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/TestJobInfo.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/TestJobInfo.java
new file mode 100644
index 00000000000..c59672fa719
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/TestJobInfo.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.v2.hs.webapp.dao;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.io.IOException;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobACLsManager;
+import org.apache.hadoop.mapreduce.v2.api.records.JobId;
+import org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager.HistoryFileInfo;
+import org.apache.hadoop.mapreduce.v2.hs.CompletedJob;
+import org.apache.hadoop.mapreduce.v2.hs.TestJobHistoryEntities;
+import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
+import org.junit.Test;
+
+public class TestJobInfo {
+
+  @Test(timeout = 10000)
+  public void testAverageMergeTime() throws IOException {
+    String historyFileName =
+        "job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist";
+    String confFileName =
+        "job_1329348432655_0001_conf.xml";
+    Configuration conf = new Configuration();
+    JobACLsManager jobAclsMgr = new JobACLsManager(conf);
+    Path fulleHistoryPath =
+        new Path(TestJobHistoryEntities.class.getClassLoader()
+            .getResource(historyFileName)
+            .getFile());
+    Path fullConfPath =
+        new Path(TestJobHistoryEntities.class.getClassLoader()
+            .getResource(confFileName)
+            .getFile());
+
+    HistoryFileInfo info = mock(HistoryFileInfo.class);
+    when(info.getConfFile()).thenReturn(fullConfPath);
+
+    JobId jobId = MRBuilderUtils.newJobId(1329348432655l, 1, 1);
+    CompletedJob completedJob =
+        new CompletedJob(conf, jobId, fulleHistoryPath, true, "user",
+            info, jobAclsMgr);
+    JobInfo jobInfo = new JobInfo(completedJob);
+    // There are 2 tasks with merge time of 45 and 55 respectively. So average
+    // merge time should be 50.
+    Assert.assertEquals(50L, jobInfo.getAvgMergeTime().longValue());
+  }
+}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist
index 484971898e0..017e52c53e2 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/resources/job_1329348432655_0001-1329348443227-user-Sleep+job-1329348468601-10-1-SUCCEEDED-default.jhist
@@ -15,6 +15,7 @@ Avro-Json
  {"type":"TASK_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskStarted":{"taskid":"task_1329348432655_0001_m_000008","taskType":"MAP","startTime":1329348448388,"splitLocations":""}}}
  {"type":"TASK_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskStarted":{"taskid":"task_1329348432655_0001_m_000009","taskType":"MAP","startTime":1329348448388,"splitLocations":""}}}
  {"type":"TASK_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskStarted":{"taskid":"task_1329348432655_0001_r_000000","taskType":"REDUCE","startTime":1329348448388,"splitLocations":""}}}
+ {"type":"TASK_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskStarted":{"taskid":"task_1329348432655_0001_r_000001","taskType":"REDUCE","startTime":1329348448388,"splitLocations":""}}}
  {"type":"MAP_ATTEMPT_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStarted":{"taskid":"task_1329348432655_0001_m_000000","taskType":"MAP","attemptId":"attempt_1329348432655_0001_m_000000_0","startTime":1329348450485,"trackerName":"localhost","httpPort":9999,"shufflePort":8080,"containerId":"container_1329348432655_0001_01_000002"}}}
  {"type":"MAP_ATTEMPT_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStarted":{"taskid":"task_1329348432655_0001_m_000002","taskType":"MAP","attemptId":"attempt_1329348432655_0001_m_000002_0","startTime":1329348450537,"trackerName":"localhost","httpPort":9999,"shufflePort":8080,"containerId":"container_1329348432655_0001_01_000004"}}}
  {"type":"MAP_ATTEMPT_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStarted":{"taskid":"task_1329348432655_0001_m_000004","taskType":"MAP","attemptId":"attempt_1329348432655_0001_m_000004_0","startTime":1329348450538,"trackerName":"localhost","httpPort":9999,"shufflePort":8080,"containerId":"container_1329348432655_0001_01_000006"}}}
@@ -40,6 +41,7 @@ Avro-Json
  {"type":"MAP_ATTEMPT_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStarted":{"taskid":"task_1329348432655_0001_m_000008","taskType":"MAP","attemptId":"attempt_1329348432655_0001_m_000008_0","startTime":1329348462765,"trackerName":"localhost","httpPort":9999,"shufflePort":8080,"containerId":"container_1329348432655_0001_01_000010"}}}
  {"type":"MAP_ATTEMPT_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStarted":{"taskid":"task_1329348432655_0001_m_000009","taskType":"MAP","attemptId":"attempt_1329348432655_0001_m_000009_0","startTime":1329348462792,"trackerName":"localhost","httpPort":9999,"shufflePort":8080,"containerId":"container_1329348432655_0001_01_000011"}}}
  {"type":"REDUCE_ATTEMPT_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStarted":{"taskid":"task_1329348432655_0001_r_000000","taskType":"REDUCE","attemptId":"attempt_1329348432655_0001_r_000000_0","startTime":1329348464995,"trackerName":"localhost","httpPort":9999,"shufflePort":8080,"containerId":"container_1329348432655_0001_01_000014"}}}
+ {"type":"REDUCE_ATTEMPT_STARTED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStarted":{"taskid":"task_1329348432655_0001_r_000001","taskType":"REDUCE","attemptId":"attempt_1329348432655_0001_r_000001_0","startTime":1329348464995,"trackerName":"localhost","httpPort":9999,"shufflePort":8080,"containerId":"container_1329348432655_0001_01_000014"}}}
  {"type":"MAP_ATTEMPT_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.MapAttemptFinished":{"taskid":"task_1329348432655_0001_m_000007","attemptId":"attempt_1329348432655_0001_m_000007_0","taskType":"MAP","taskStatus":"SUCCEEDED","mapFinishTime":1329348465534,"finishTime":1329348465965,"hostname":"localhost","port":45454,"rackname":"/default-rack","state":"Sleeping... (1) ms left","counters":{"name":"COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":120},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48051},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":48},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":1},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"MAP_INPUT_RECORDS","displayName":"Map input records","value":1},{"name":"MAP_OUTPUT_RECORDS","displayName":"Map output records","value":1},{"name":"MAP_OUTPUT_BYTES","displayName":"Map output bytes","value":4},{"name":"MAP_OUTPUT_MATERIALIZED_BYTES","displayName":"Map output materialized bytes","value":12},{"name":"SPLIT_RAW_BYTES","displayName":"Input split bytes","value":48},{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":1},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":0},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":194},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":320},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":185327616},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":713089024},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":165478400}]},{"name":"org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter","displayName":"File Input Format Counters ","counts":[{"name":"BYTES_READ","displayName":"Bytes Read","value":0}]}]},"clockSplits":[3464,18,19,18,18,19,18,18,19,18,18,19],"cpuUsages":[26,27,27,26,27,27,26,27,27,26,27,27],"vMemKbytes":[29015,87046,145078,203109,261140,319171,377203,435234,493266,551297,609328,667360],"physMemKbytes":[7541,22623,37705,52786,67869,82950,98033,113114,128197,143279,158360,173443]}}}
  {"type":"TASK_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskFinished":{"taskid":"task_1329348432655_0001_m_000007","taskType":"MAP","finishTime":1329348465965,"status":"SUCCEEDED","counters":{"name":"COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":120},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48051},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":48},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":1},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"MAP_INPUT_RECORDS","displayName":"Map input records","value":1},{"name":"MAP_OUTPUT_RECORDS","displayName":"Map output records","value":1},{"name":"MAP_OUTPUT_BYTES","displayName":"Map output bytes","value":4},{"name":"MAP_OUTPUT_MATERIALIZED_BYTES","displayName":"Map output materialized bytes","value":12},{"name":"SPLIT_RAW_BYTES","displayName":"Input split bytes","value":48},{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":1},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":0},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":194},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":320},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":185327616},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":713089024},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":165478400}]},{"name":"org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter","displayName":"File Input Format Counters ","counts":[{"name":"BYTES_READ","displayName":"Bytes Read","value":0}]}]}}}}
  {"type":"MAP_ATTEMPT_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.MapAttemptFinished":{"taskid":"task_1329348432655_0001_m_000009","attemptId":"attempt_1329348432655_0001_m_000009_0","taskType":"MAP","taskStatus":"SUCCEEDED","mapFinishTime":1329348465986,"finishTime":1329348466363,"hostname":"localhost","port":45454,"rackname":"/default-rack","state":"Sleeping... (1) ms left","counters":{"name":"COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":120},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48051},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":48},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":1},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"MAP_INPUT_RECORDS","displayName":"Map input records","value":1},{"name":"MAP_OUTPUT_RECORDS","displayName":"Map output records","value":1},{"name":"MAP_OUTPUT_BYTES","displayName":"Map output bytes","value":4},{"name":"MAP_OUTPUT_MATERIALIZED_BYTES","displayName":"Map output materialized bytes","value":12},{"name":"SPLIT_RAW_BYTES","displayName":"Input split bytes","value":48},{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":1},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":0},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":23},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":330},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":182169600},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":705945600},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":165478400}]},{"name":"org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter","displayName":"File Input Format Counters ","counts":[{"name":"BYTES_READ","displayName":"Bytes Read","value":0}]}]},"clockSplits":[3223,21,21,21,21,21,20,21,21,21,21,21],"cpuUsages":[27,28,27,28,27,28,27,28,27,28,27,28],"vMemKbytes":[28725,86175,143625,201074,258525,315974,373425,430874,488325,545775,603224,660675],"physMemKbytes":[7412,22237,37062,51887,66712,81537,96362,111187,126012,140837,155662,170487]}}}
@@ -47,5 +49,7 @@ Avro-Json
  {"type":"MAP_ATTEMPT_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.MapAttemptFinished":{"taskid":"task_1329348432655_0001_m_000008","attemptId":"attempt_1329348432655_0001_m_000008_0","taskType":"MAP","taskStatus":"SUCCEEDED","mapFinishTime":1329348467231,"finishTime":1329348467421,"hostname":"localhost","port":45454,"rackname":"/default-rack","state":"Sleeping... (1) ms left","counters":{"name":"COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":120},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48051},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":48},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":1},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"MAP_INPUT_RECORDS","displayName":"Map input records","value":1},{"name":"MAP_OUTPUT_RECORDS","displayName":"Map output records","value":1},{"name":"MAP_OUTPUT_BYTES","displayName":"Map output bytes","value":4},{"name":"MAP_OUTPUT_MATERIALIZED_BYTES","displayName":"Map output materialized bytes","value":12},{"name":"SPLIT_RAW_BYTES","displayName":"Input split bytes","value":48},{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":1},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":0},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":12},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":320},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":181297152},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":705019904},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":165478400}]},{"name":"org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter","displayName":"File Input Format Counters ","counts":[{"name":"BYTES_READ","displayName":"Bytes Read","value":0}]}]},"clockSplits":[4483,15,16,15,16,15,15,16,15,16,15,16],"cpuUsages":[26,27,27,26,27,27,26,27,27,26,27,27],"vMemKbytes":[28686,86061,143436,200810,258185,315560,372935,430309,487684,545059,602433,659808],"physMemKbytes":[7377,22131,36885,51638,66393,81146,95901,110654,125409,140163,154916,169671]}}}
  {"type":"TASK_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskFinished":{"taskid":"task_1329348432655_0001_m_000008","taskType":"MAP","finishTime":1329348467421,"status":"SUCCEEDED","counters":{"name":"COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":120},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48051},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":48},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":1},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"MAP_INPUT_RECORDS","displayName":"Map input records","value":1},{"name":"MAP_OUTPUT_RECORDS","displayName":"Map output records","value":1},{"name":"MAP_OUTPUT_BYTES","displayName":"Map output bytes","value":4},{"name":"MAP_OUTPUT_MATERIALIZED_BYTES","displayName":"Map output materialized bytes","value":12},{"name":"SPLIT_RAW_BYTES","displayName":"Input split bytes","value":48},{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":1},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":0},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":12},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":320},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":181297152},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":705019904},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":165478400}]},{"name":"org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter","displayName":"File Input Format Counters ","counts":[{"name":"BYTES_READ","displayName":"Bytes Read","value":0}]}]}}}}
  {"type":"REDUCE_ATTEMPT_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.ReduceAttemptFinished":{"taskid":"task_1329348432655_0001_r_000000","attemptId":"attempt_1329348432655_0001_r_000000_0","taskType":"REDUCE","taskStatus":"SUCCEEDED","shuffleFinishTime":1329348468462,"sortFinishTime":1329348468517,"finishTime":1329348468600,"hostname":"localhost","port":45454,"rackname":"/default-rack","state":"Sleeping... (1) ms left > reduce","counters":{"name":"COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":186},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48074},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":0},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":0},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"COMBINE_OUTPUT_RECORDS","displayName":"Combine output records","value":0},{"name":"REDUCE_INPUT_GROUPS","displayName":"Reduce input groups","value":1},{"name":"REDUCE_SHUFFLE_BYTES","displayName":"Reduce shuffle bytes","value":120},{"name":"REDUCE_INPUT_RECORDS","displayName":"Reduce input records","value":10},{"name":"REDUCE_OUTPUT_RECORDS","displayName":"Reduce output records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":10},{"name":"SHUFFLED_MAPS","displayName":"Shuffled Maps ","value":10},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":10},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":14},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":1070},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":82780160},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":714436608},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":60555264}]},{"name":"Shuffle Errors","displayName":"Shuffle Errors","counts":[{"name":"BAD_ID","displayName":"BAD_ID","value":0},{"name":"CONNECTION","displayName":"CONNECTION","value":0},{"name":"IO_ERROR","displayName":"IO_ERROR","value":0},{"name":"WRONG_LENGTH","displayName":"WRONG_LENGTH","value":0},{"name":"WRONG_MAP","displayName":"WRONG_MAP","value":0},{"name":"WRONG_REDUCE","displayName":"WRONG_REDUCE","value":0}]},{"name":"org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter","displayName":"File Output Format Counters ","counts":[{"name":"BYTES_WRITTEN","displayName":"Bytes Written","value":0}]}]},"clockSplits":[3530,6,7,6,7,6,6,7,6,7,6,7],"cpuUsages":[89,89,89,89,89,90,89,89,89,89,89,90],"vMemKbytes":[29070,87211,145352,203493,261634,319775,377916,436057,494198,552339,610480,668621],"physMemKbytes":[3367,10104,16841,23577,30314,37051,43788,50524,57261,63998,70734,77471]}}}
+ {"type":"REDUCE_ATTEMPT_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.ReduceAttemptFinished":{"taskid":"task_1329348432655_0001_r_000001","attemptId":"attempt_1329348432655_0001_r_000001_0","taskType":"REDUCE","taskStatus":"SUCCEEDED","shuffleFinishTime":1329348468462,"sortFinishTime":1329348468507,"finishTime":1329348468600,"hostname":"localhost","port":45454,"rackname":"/default-rack","state":"Sleeping... (1) ms left > reduce","counters":{"name":"COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":186},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48074},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":0},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":0},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"COMBINE_OUTPUT_RECORDS","displayName":"Combine output records","value":0},{"name":"REDUCE_INPUT_GROUPS","displayName":"Reduce input groups","value":1},{"name":"REDUCE_SHUFFLE_BYTES","displayName":"Reduce shuffle bytes","value":120},{"name":"REDUCE_INPUT_RECORDS","displayName":"Reduce input records","value":10},{"name":"REDUCE_OUTPUT_RECORDS","displayName":"Reduce output records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":10},{"name":"SHUFFLED_MAPS","displayName":"Shuffled Maps ","value":10},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":10},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":14},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":1070},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":82780160},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":714436608},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":60555264}]},{"name":"Shuffle Errors","displayName":"Shuffle Errors","counts":[{"name":"BAD_ID","displayName":"BAD_ID","value":0},{"name":"CONNECTION","displayName":"CONNECTION","value":0},{"name":"IO_ERROR","displayName":"IO_ERROR","value":0},{"name":"WRONG_LENGTH","displayName":"WRONG_LENGTH","value":0},{"name":"WRONG_MAP","displayName":"WRONG_MAP","value":0},{"name":"WRONG_REDUCE","displayName":"WRONG_REDUCE","value":0}]},{"name":"org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter","displayName":"File Output Format Counters ","counts":[{"name":"BYTES_WRITTEN","displayName":"Bytes Written","value":0}]}]},"clockSplits":[3530,6,7,6,7,6,6,7,6,7,6,7],"cpuUsages":[89,89,89,89,89,90,89,89,89,89,89,90],"vMemKbytes":[29070,87211,145352,203493,261634,319775,377916,436057,494198,552339,610480,668621],"physMemKbytes":[3367,10104,16841,23577,30314,37051,43788,50524,57261,63998,70734,77471]}}}
  {"type":"TASK_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskFinished":{"taskid":"task_1329348432655_0001_r_000000","taskType":"REDUCE","finishTime":1329348468600,"status":"SUCCEEDED","counters":{"name":"COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":186},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48074},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":0},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":0},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"COMBINE_OUTPUT_RECORDS","displayName":"Combine output records","value":0},{"name":"REDUCE_INPUT_GROUPS","displayName":"Reduce input groups","value":1},{"name":"REDUCE_SHUFFLE_BYTES","displayName":"Reduce shuffle bytes","value":120},{"name":"REDUCE_INPUT_RECORDS","displayName":"Reduce input records","value":10},{"name":"REDUCE_OUTPUT_RECORDS","displayName":"Reduce output records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":10},{"name":"SHUFFLED_MAPS","displayName":"Shuffled Maps ","value":10},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":10},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":14},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":1070},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":82780160},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":714436608},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":60555264}]},{"name":"Shuffle Errors","displayName":"Shuffle Errors","counts":[{"name":"BAD_ID","displayName":"BAD_ID","value":0},{"name":"CONNECTION","displayName":"CONNECTION","value":0},{"name":"IO_ERROR","displayName":"IO_ERROR","value":0},{"name":"WRONG_LENGTH","displayName":"WRONG_LENGTH","value":0},{"name":"WRONG_MAP","displayName":"WRONG_MAP","value":0},{"name":"WRONG_REDUCE","displayName":"WRONG_REDUCE","value":0}]},{"name":"org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter","displayName":"File Output Format Counters ","counts":[{"name":"BYTES_WRITTEN","displayName":"Bytes Written","value":0}]}]}}}}
+ {"type":"TASK_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.TaskFinished":{"taskid":"task_1329348432655_0001_r_000001","taskType":"REDUCE","finishTime":1329348468600,"status":"SUCCEEDED","counters":{"name":"COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":186},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48074},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":0},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":0},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"COMBINE_OUTPUT_RECORDS","displayName":"Combine output records","value":0},{"name":"REDUCE_INPUT_GROUPS","displayName":"Reduce input groups","value":1},{"name":"REDUCE_SHUFFLE_BYTES","displayName":"Reduce shuffle bytes","value":120},{"name":"REDUCE_INPUT_RECORDS","displayName":"Reduce input records","value":10},{"name":"REDUCE_OUTPUT_RECORDS","displayName":"Reduce output records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":10},{"name":"SHUFFLED_MAPS","displayName":"Shuffled Maps ","value":10},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":10},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":14},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":1070},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":82780160},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":714436608},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":60555264}]},{"name":"Shuffle Errors","displayName":"Shuffle Errors","counts":[{"name":"BAD_ID","displayName":"BAD_ID","value":0},{"name":"CONNECTION","displayName":"CONNECTION","value":0},{"name":"IO_ERROR","displayName":"IO_ERROR","value":0},{"name":"WRONG_LENGTH","displayName":"WRONG_LENGTH","value":0},{"name":"WRONG_MAP","displayName":"WRONG_MAP","value":0},{"name":"WRONG_REDUCE","displayName":"WRONG_REDUCE","value":0}]},{"name":"org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter","displayName":"File Output Format Counters ","counts":[{"name":"BYTES_WRITTEN","displayName":"Bytes Written","value":0}]}]}}}}
  {"type":"JOB_FINISHED","event":{"org.apache.hadoop.mapreduce.jobhistory.JobFinished":{"jobid":"job_1329348432655_0001","finishTime":1329348468601,"finishedMaps":10,"finishedReduces":1,"failedMaps":0,"failedReduces":0,"totalCounters":{"name":"TOTAL_COUNTERS","groups":[{"name":"Shuffle Errors","displayName":"Shuffle Errors","counts":[{"name":"BAD_ID","displayName":"BAD_ID","value":0},{"name":"CONNECTION","displayName":"CONNECTION","value":0},{"name":"IO_ERROR","displayName":"IO_ERROR","value":0},{"name":"WRONG_LENGTH","displayName":"WRONG_LENGTH","value":0},{"name":"WRONG_MAP","displayName":"WRONG_MAP","value":0},{"name":"WRONG_REDUCE","displayName":"WRONG_REDUCE","value":0}]},{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":1386},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":528584},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":480},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":10},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.JobCounter","displayName":"Job Counters ","counts":[{"name":"TOTAL_LAUNCHED_MAPS","displayName":"Launched map tasks","value":10},{"name":"TOTAL_LAUNCHED_REDUCES","displayName":"Launched reduce tasks","value":1},{"name":"OTHER_LOCAL_MAPS","displayName":"Other local map tasks","value":10},{"name":"SLOTS_MILLIS_MAPS","displayName":"Total time spent by all maps in occupied slots (ms)","value":0},{"name":"SLOTS_MILLIS_REDUCES","displayName":"Total time spent by all reduces in occupied slots (ms)","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"MAP_INPUT_RECORDS","displayName":"Map input records","value":10},{"name":"MAP_OUTPUT_RECORDS","displayName":"Map output records","value":10},{"name":"MAP_OUTPUT_BYTES","displayName":"Map output bytes","value":40},{"name":"MAP_OUTPUT_MATERIALIZED_BYTES","displayName":"Map output materialized bytes","value":120},{"name":"SPLIT_RAW_BYTES","displayName":"Input split bytes","value":480},{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"COMBINE_OUTPUT_RECORDS","displayName":"Combine output records","value":0},{"name":"REDUCE_INPUT_GROUPS","displayName":"Reduce input groups","value":1},{"name":"REDUCE_SHUFFLE_BYTES","displayName":"Reduce shuffle bytes","value":120},{"name":"REDUCE_INPUT_RECORDS","displayName":"Reduce input records","value":10},{"name":"REDUCE_OUTPUT_RECORDS","displayName":"Reduce output records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":20},{"name":"SHUFFLED_MAPS","displayName":"Shuffled Maps ","value":10},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":10},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":2256},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":4460},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":1923493888},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":7773462528},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":1778515968}]},{"name":"org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter","displayName":"File Input Format Counters ","counts":[{"name":"BYTES_READ","displayName":"Bytes Read","value":0}]},{"name":"org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter","displayName":"File Output Format Counters ","counts":[{"name":"BYTES_WRITTEN","displayName":"Bytes Written","value":0}]}]},"mapCounters":{"name":"MAP_COUNTERS","groups":[{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":1200},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":480510},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":480},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":10},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"MAP_INPUT_RECORDS","displayName":"Map input records","value":10},{"name":"MAP_OUTPUT_RECORDS","displayName":"Map output records","value":10},{"name":"MAP_OUTPUT_BYTES","displayName":"Map output bytes","value":40},{"name":"MAP_OUTPUT_MATERIALIZED_BYTES","displayName":"Map output materialized bytes","value":120},{"name":"SPLIT_RAW_BYTES","displayName":"Input split bytes","value":480},{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":10},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":0},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":2242},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":3390},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":1840713728},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":7059025920},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":1717960704}]},{"name":"org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter","displayName":"File Input Format Counters ","counts":[{"name":"BYTES_READ","displayName":"Bytes Read","value":0}]}]},"reduceCounters":{"name":"REDUCE_COUNTERS","groups":[{"name":"Shuffle Errors","displayName":"Shuffle Errors","counts":[{"name":"BAD_ID","displayName":"BAD_ID","value":0},{"name":"CONNECTION","displayName":"CONNECTION","value":0},{"name":"IO_ERROR","displayName":"IO_ERROR","value":0},{"name":"WRONG_LENGTH","displayName":"WRONG_LENGTH","value":0},{"name":"WRONG_MAP","displayName":"WRONG_MAP","value":0},{"name":"WRONG_REDUCE","displayName":"WRONG_REDUCE","value":0}]},{"name":"org.apache.hadoop.mapreduce.FileSystemCounter","displayName":"File System Counters","counts":[{"name":"FILE_BYTES_READ","displayName":"FILE: Number of bytes read","value":186},{"name":"FILE_BYTES_WRITTEN","displayName":"FILE: Number of bytes written","value":48074},{"name":"FILE_READ_OPS","displayName":"FILE: Number of read operations","value":0},{"name":"FILE_LARGE_READ_OPS","displayName":"FILE: Number of large read operations","value":0},{"name":"FILE_WRITE_OPS","displayName":"FILE: Number of write operations","value":0},{"name":"HDFS_BYTES_READ","displayName":"HDFS: Number of bytes read","value":0},{"name":"HDFS_BYTES_WRITTEN","displayName":"HDFS: Number of bytes written","value":0},{"name":"HDFS_READ_OPS","displayName":"HDFS: Number of read operations","value":0},{"name":"HDFS_LARGE_READ_OPS","displayName":"HDFS: Number of large read operations","value":0},{"name":"HDFS_WRITE_OPS","displayName":"HDFS: Number of write operations","value":0}]},{"name":"org.apache.hadoop.mapreduce.TaskCounter","displayName":"Map-Reduce Framework","counts":[{"name":"COMBINE_INPUT_RECORDS","displayName":"Combine input records","value":0},{"name":"COMBINE_OUTPUT_RECORDS","displayName":"Combine output records","value":0},{"name":"REDUCE_INPUT_GROUPS","displayName":"Reduce input groups","value":1},{"name":"REDUCE_SHUFFLE_BYTES","displayName":"Reduce shuffle bytes","value":120},{"name":"REDUCE_INPUT_RECORDS","displayName":"Reduce input records","value":10},{"name":"REDUCE_OUTPUT_RECORDS","displayName":"Reduce output records","value":0},{"name":"SPILLED_RECORDS","displayName":"Spilled Records","value":10},{"name":"SHUFFLED_MAPS","displayName":"Shuffled Maps ","value":10},{"name":"FAILED_SHUFFLE","displayName":"Failed Shuffles","value":0},{"name":"MERGED_MAP_OUTPUTS","displayName":"Merged Map outputs","value":10},{"name":"GC_TIME_MILLIS","displayName":"GC time elapsed (ms)","value":14},{"name":"CPU_MILLISECONDS","displayName":"CPU time spent (ms)","value":1070},{"name":"PHYSICAL_MEMORY_BYTES","displayName":"Physical memory (bytes) snapshot","value":82780160},{"name":"VIRTUAL_MEMORY_BYTES","displayName":"Virtual memory (bytes) snapshot","value":714436608},{"name":"COMMITTED_HEAP_BYTES","displayName":"Total committed heap usage (bytes)","value":60555264}]},{"name":"org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter","displayName":"File Output Format Counters ","counts":[{"name":"BYTES_WRITTEN","displayName":"Bytes Written","value":0}]}]}}}}

From 3c8bd60616588d270850f273f4101ab06c66af18 Mon Sep 17 00:00:00 2001
From: Vinod Kumar Vavilapalli <vinodkv@apache.org>
Date: Thu, 11 Apr 2013 22:54:02 +0000
Subject: [PATCH 41/52] MAPREDUCE-5094. Disabled memory monitoring by default
 in MiniMRYarnCluster to avoid some downstream tests failing. Contributed by
 Siddharth Seth.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467124 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt             |  3 +++
 .../org/apache/hadoop/mapreduce/MRConfig.java    | 13 ++++++++++++-
 .../hadoop/mapreduce/v2/MiniMRYarnCluster.java   | 16 +++++++++++-----
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 5b0c65d1a05..2669204e1f1 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -318,6 +318,9 @@ Release 2.0.4-alpha - UNRELEASED
     MAPREDUCE-5083. MiniMRCluster should use a random component when creating an
     actual cluster (Siddharth Seth via hitesh)
 
+    MAPREDUCE-5094. Disabled memory monitoring by default in MiniMRYarnCluster
+    to avoid some downstream tests failing. (Siddharth Seth via vinodkv)
+
 Release 2.0.3-alpha - 2013-02-06 
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java
index 375391320be..bbac5fcab9d 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java
@@ -103,4 +103,15 @@ public interface MRConfig {
     "mapreduce.ifile.readahead.bytes";
 
   public static final int DEFAULT_MAPRED_IFILE_READAHEAD_BYTES =
-    4 * 1024 * 1024;}
+    4 * 1024 * 1024;
+
+  /**
+   * Whether users are explicitly trying to control resource monitoring
+   * configuration for the MiniMRCluster. Disabled by default.
+   */
+  public static final String MAPREDUCE_MINICLUSTER_CONTROL_RESOURCE_MONITORING
+      = "mapreduce.minicluster.control-resource-monitoring";
+  public static final boolean
+      DEFAULT_MAPREDUCE_MINICLUSTER_CONTROL_RESOURCE_MONITORING = false;
+}
+  
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java
index 8edf4f15d93..10d7a71e1b8 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java
@@ -20,16 +20,13 @@ package org.apache.hadoop.mapreduce.v2;
 
 import java.io.File;
 import java.io.IOException;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileContext;
-import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.LocalContainerLauncher;
 import org.apache.hadoop.mapred.ShuffleHandler;
 import org.apache.hadoop.mapreduce.MRConfig;
@@ -76,6 +73,15 @@ public class MiniMRYarnCluster extends MiniYARNCluster {
       conf.set(MRJobConfig.MR_AM_STAGING_DIR, new File(getTestWorkDir(),
           "apps_staging_dir/").getAbsolutePath());
     }
+
+    // By default, VMEM monitoring disabled, PMEM monitoring enabled.
+    if (!conf.getBoolean(
+        MRConfig.MAPREDUCE_MINICLUSTER_CONTROL_RESOURCE_MONITORING,
+        MRConfig.DEFAULT_MAPREDUCE_MINICLUSTER_CONTROL_RESOURCE_MONITORING)) {
+      conf.setBoolean(YarnConfiguration.NM_PMEM_CHECK_ENABLED, false);
+      conf.setBoolean(YarnConfiguration.NM_VMEM_CHECK_ENABLED, false);
+    }
+
     conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY,  "000");
 
     try {

From 529170de44d7685adf717558fda2e77da0533f33 Mon Sep 17 00:00:00 2001
From: Vinod Kumar Vavilapalli <vinodkv@apache.org>
Date: Thu, 11 Apr 2013 23:09:43 +0000
Subject: [PATCH 42/52] Fixing CHANGES.txt entry for YARN-319.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467133 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 9709e053647..9646f4ae0a1 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -227,6 +227,10 @@ Release 2.0.5-beta - UNRELEASED
     YARN-539. Addressed memory leak of LocalResource objects NM when a resource
     localization fails. (Omkar Vinit Joshi via vinodkv)
 
+    YARN-319. Submitting a job to a fair scheduler queue for which the user
+    does not have permission causes the client to wait forever.
+    (shenhong via tomwhite)
+
 Release 2.0.4-alpha - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -443,10 +447,6 @@ Release 2.0.3-alpha - 2013-02-06
 
     YARN-302. Fair scheduler assignmultiple should default to false. (sandyr via tucu)
     
-    YARN-319. Submitting a job to a fair scheduler queue for which the user
-    does not have permission causes the client to wait forever.
-    (shenhong via tomwhite)
-
     YARN-372. Move InlineDispatcher from hadoop-yarn-server-resourcemanager to
     hadoop-yarn-common (sseth via hitesh)
 

From acb14d3dfe3ce49d320408e5e712b39ff00015ec Mon Sep 17 00:00:00 2001
From: Vinod Kumar Vavilapalli <vinodkv@apache.org>
Date: Thu, 11 Apr 2013 23:26:32 +0000
Subject: [PATCH 43/52] HADOOP-9471. Merged into 2.0.4-alpha. Fixing
 CHANGES.txt

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467139 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-common-project/hadoop-common/CHANGES.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 1951a646491..bc9b318107f 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -617,9 +617,6 @@ Release 2.0.5-beta - UNRELEASED
     HADOOP-9429. TestConfiguration fails with IBM JAVA. (Amir Sanjar via
     suresh)
 
-    HADOOP-9471. hadoop-client wrongfully excludes jetty-util JAR, 
-    breaking webhdfs. (tucu)
-
     HADOOP-9222. Cover package with org.apache.hadoop.io.lz4 unit tests (Vadim 
     Bondarev via jlowe)
 
@@ -652,6 +649,9 @@ Release 2.0.4-alpha - UNRELEASED
     HADOOP-9444. Modify hadoop-policy.xml to replace unexpanded variables to a
     default value of '*'. (Roman Shaposhnik via vinodkv)
 
+    HADOOP-9471. hadoop-client wrongfully excludes jetty-util JAR, 
+    breaking webhdfs. (tucu)
+
 Release 2.0.3-alpha - 2013-02-06 
 
   INCOMPATIBLE CHANGES

From 8e838bd03c44d8cd19a4b0b815b5992c6935c2a0 Mon Sep 17 00:00:00 2001
From: Konstantin Shvachko <shv@apache.org>
Date: Thu, 11 Apr 2013 23:49:16 +0000
Subject: [PATCH 44/52] MAPREDUCE-4985. Add compression option to TestDFSIO
 usage. Contributed by Plamen Jeliazkov.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467145 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt                           | 3 +++
 .../src/test/java/org/apache/hadoop/fs/TestDFSIO.java          | 1 +
 2 files changed, 4 insertions(+)

diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 2669204e1f1..0c64a5b6ad4 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -192,6 +192,9 @@ Release 2.0.5-beta - UNRELEASED
     time delta between the end of the shuffle and the start of the reduce.
     (Omkar Vinit Joshi via vinodkv)
 
+    MAPREDUCE-4985. Add compression option to TestDFSIO usage.
+    (Plamen Jeliazkov via shv)
+
   OPTIMIZATIONS
 
   BUG FIXES
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/TestDFSIO.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/TestDFSIO.java
index a13f8ed3bd5..a8c99e5be4b 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/TestDFSIO.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/TestDFSIO.java
@@ -99,6 +99,7 @@ public class TestDFSIO implements Tool {
                     " [genericOptions]" +
                     " -read [-random | -backward | -skip [-skipSize Size]] |" +
                     " -write | -append | -clean" +
+                    " [-compression codecClassName]" +
                     " [-nrFiles N]" +
                     " [-size Size[B|KB|MB|GB|TB]]" +
                     " [-resFile resultFileName] [-bufferSize Bytes]";

From 41c4cd08a0feb2fa6b1125ab70504ab70fe59a09 Mon Sep 17 00:00:00 2001
From: Bikas Saha <bikas@apache.org>
Date: Fri, 12 Apr 2013 03:00:29 +0000
Subject: [PATCH 45/52] MAPREDUCE-4885. Streaming tests have multiple failures
 on Windows. (Chris Nauroth via bikas)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467158 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-mapreduce-project/CHANGES.txt          |  3 ++
 hadoop-tools/hadoop-streaming/pom.xml         | 14 ++++++
 .../apache/hadoop/streaming/StreamJob.java    |  3 +-
 .../hadoop-streaming/src/test/bin/cat.cmd     | 18 +++++++
 .../src/test/bin/xargs_cat.cmd                | 18 +++++++
 .../apache/hadoop/streaming/TestFileArgs.java |  6 ++-
 .../streaming/TestMultipleArchiveFiles.java   |  7 +--
 .../streaming/TestMultipleCachefiles.java     |  4 +-
 .../streaming/TestStreamXmlRecordReader.java  |  2 +-
 .../hadoop/streaming/TestStreaming.java       | 49 +++++++++++++++----
 .../streaming/TestStreamingKeyValue.java      |  2 +-
 .../TestStreamingOutputKeyValueTypes.java     | 18 +++----
 .../streaming/TestStreamingTaskLog.java       | 17 ++++---
 .../apache/hadoop/streaming/TestSymLink.java  |  6 +--
 14 files changed, 129 insertions(+), 38 deletions(-)
 create mode 100644 hadoop-tools/hadoop-streaming/src/test/bin/cat.cmd
 create mode 100644 hadoop-tools/hadoop-streaming/src/test/bin/xargs_cat.cmd

diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 0c64a5b6ad4..93b5d63e767 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -131,6 +131,9 @@ Trunk (Unreleased)
     MAPREDUCE-5078. TestMRAppMaster fails on Windows due to mismatched path
     separators. (Chris Nauroth via sseth)
 
+    MAPREDUCE-4885. Streaming tests have multiple failures on Windows. (Chris
+    Nauroth via bikas)
+
   BREAKDOWN OF HADOOP-8562 SUBTASKS
 
     MAPREDUCE-4739. Some MapReduce tests fail to find winutils.
diff --git a/hadoop-tools/hadoop-streaming/pom.xml b/hadoop-tools/hadoop-streaming/pom.xml
index 99249693166..7265c0468d5 100644
--- a/hadoop-tools/hadoop-streaming/pom.xml
+++ b/hadoop-tools/hadoop-streaming/pom.xml
@@ -127,6 +127,20 @@
               </target>
             </configuration>
           </execution>
+          <execution>
+            <id>copy-test-bin</id>
+            <phase>process-test-resources</phase>
+            <goals>
+              <goal>run</goal>
+            </goals>
+            <configuration>
+              <target>
+                <copy todir="target/bin">
+                  <fileset dir="src/test/bin" />
+                </copy>
+              </target>
+            </configuration>
+          </execution>
         </executions>
       </plugin>
       <plugin>
diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java
index 2acc5563bd7..d18a7654e09 100644
--- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java
+++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java
@@ -294,8 +294,7 @@ public class StreamJob implements Tool {
         for (String file : values) {
           packageFiles_.add(file);
           try {
-            URI pathURI = new URI(file);
-            Path path = new Path(pathURI);
+            Path path = new Path(file);
             FileSystem localFs = FileSystem.getLocal(config_);
             String finalPath = path.makeQualified(localFs).toString();
             if(fileList.length() > 0) {
diff --git a/hadoop-tools/hadoop-streaming/src/test/bin/cat.cmd b/hadoop-tools/hadoop-streaming/src/test/bin/cat.cmd
new file mode 100644
index 00000000000..4b38e3e3b4b
--- /dev/null
+++ b/hadoop-tools/hadoop-streaming/src/test/bin/cat.cmd
@@ -0,0 +1,18 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements.  See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership.  The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License.  You may obtain a copy of the License at
+@rem
+@rem     http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+
+@for /F "usebackq tokens=* delims=" %%A in (`findstr .`) do @echo %%A
+@rem lines have been copied from stdin to stdout
diff --git a/hadoop-tools/hadoop-streaming/src/test/bin/xargs_cat.cmd b/hadoop-tools/hadoop-streaming/src/test/bin/xargs_cat.cmd
new file mode 100644
index 00000000000..f398a8d65c3
--- /dev/null
+++ b/hadoop-tools/hadoop-streaming/src/test/bin/xargs_cat.cmd
@@ -0,0 +1,18 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements.  See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership.  The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License.  You may obtain a copy of the License at
+@rem
+@rem     http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+
+@for /F "usebackq tokens=* delims=" %%A in (`findstr .`) do @type %%A
+@rem files named on stdin have been copied to stdout
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java
index ac577e4c7ec..e864e9d8555 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestFileArgs.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.streaming;
 
 import java.io.DataOutputStream;
+import java.io.File;
 import java.io.IOException;
 import java.util.Map;
 
@@ -27,6 +28,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.util.Shell;
 import org.junit.After;
 import org.junit.Before;
 
@@ -45,7 +47,8 @@ public class TestFileArgs extends TestStreaming
   private static final String EXPECTED_OUTPUT =
     "job.jar\t\nsidefile\t\n";
 
-  private static final String LS_PATH = "/bin/ls";
+  private static final String LS_PATH = Shell.WINDOWS ? "cmd /c dir /B" :
+    "/bin/ls";
 
   public TestFileArgs() throws IOException
   {
@@ -58,6 +61,7 @@ public class TestFileArgs extends TestStreaming
 
     map = LS_PATH;
     FileSystem.setDefaultUri(conf, "hdfs://" + namenode);
+    setTestDir(new File("/tmp/TestFileArgs"));
   }
 
   @Before
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java
index c5136e6dc7d..47b70ef6fbd 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java
@@ -70,8 +70,8 @@ public class TestMultipleArchiveFiles extends TestStreaming
     namenode = fileSys.getUri().getAuthority();
     mr  = new MiniMRCluster(1, namenode, 1);
 
-    map = "xargs cat";
-    reduce = "cat";
+    map = XARGS_CAT;
+    reduce = CAT;
   }
 
   @Override
@@ -84,7 +84,8 @@ public class TestMultipleArchiveFiles extends TestStreaming
   {
     fileSys.delete(new Path(INPUT_DIR), true);
     DataOutputStream dos = fileSys.create(new Path(INPUT_FILE));
-    String inputFileString = "symlink1/cacheArchive1\nsymlink2/cacheArchive2";
+    String inputFileString = "symlink1" + File.separator
+      + "cacheArchive1\nsymlink2" + File.separator + "cacheArchive2";
     dos.write(inputFileString.getBytes("UTF-8"));
     dos.close();
 
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java
index 357bfcfd0b3..ae8f57d231c 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleCachefiles.java
@@ -49,8 +49,8 @@ public class TestMultipleCachefiles
   String CACHE_FILE = "/testing-streaming/cache.txt";
   String CACHE_FILE_2 = "/testing-streaming/cache2.txt";
   String input = "check to see if we can read this none reduce";
-  String map = "xargs cat ";
-  String reduce = "cat";
+  String map = TestStreaming.XARGS_CAT;
+  String reduce = TestStreaming.CAT;
   String mapString = "testlink";
   String mapString2 = "testlink2";
   String cacheString = "This is just the cache string";
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java
index da0bdae484c..53009dbbabc 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamXmlRecordReader.java
@@ -33,7 +33,7 @@ public class TestStreamXmlRecordReader extends TestStreaming {
     INPUT_FILE = new File("target/input.xml");
     input = "<xmltag>\t\nroses.are.red\t\nviolets.are.blue\t\n" +
         "bunnies.are.pink\t\n</xmltag>\t\n";
-    map = "cat";
+    map = CAT;
     reduce = "NONE";
     outputExpect = input;
   }
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java
index 98ed1a299ea..4f39120a162 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreaming.java
@@ -33,7 +33,7 @@ import static org.junit.Assert.*;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.conf.Configuration;
-
+import org.apache.hadoop.util.Shell;
 
 /**
  * This class tests hadoopStreaming in MapReduce local mode.
@@ -43,6 +43,22 @@ public class TestStreaming
 
   public static final String STREAMING_JAR = JarFinder.getJar(StreamJob.class);
 
+  /**
+   * cat command used for copying stdin to stdout as mapper or reducer function.
+   * On Windows, use a cmd script that approximates the functionality of cat.
+   */
+  static final String CAT = Shell.WINDOWS ?
+    "cmd /c " + new File("target/bin/cat.cmd").getAbsolutePath() : "cat";
+
+  /**
+   * Command used for iterating through file names on stdin and copying each
+   * file's contents to stdout, used as mapper or reducer function.  On Windows,
+   * use a cmd script that approximates the functionality of xargs cat.
+   */
+  static final String XARGS_CAT = Shell.WINDOWS ?
+    "cmd /c " + new File("target/bin/xargs_cat.cmd").getAbsolutePath() :
+    "xargs cat";
+
   // "map" command: grep -E (red|green|blue)
   // reduce command: uniq
   protected File TEST_DIR;
@@ -66,9 +82,22 @@ public class TestStreaming
     UtilTest utilTest = new UtilTest(getClass().getName());
     utilTest.checkUserDir();
     utilTest.redirectIfAntJunit();
-    TEST_DIR = new File("target/TestStreaming").getAbsoluteFile();
-    OUTPUT_DIR = new File(TEST_DIR, "out");
-    INPUT_FILE = new File(TEST_DIR, "input.txt");
+    setTestDir(new File("target/TestStreaming").getAbsoluteFile());
+  }
+
+  /**
+   * Sets root of test working directory and resets any other paths that must be
+   * children of the test working directory.  Typical usage is for subclasses
+   * that use HDFS to override the test directory to the form "/tmp/<test name>"
+   * so that on Windows, tests won't attempt to use paths containing a ':' from
+   * the drive specifier.  The ':' character is considered invalid by HDFS.
+   * 
+   * @param testDir File to set
+   */
+  protected void setTestDir(File testDir) {
+    TEST_DIR = testDir;
+    OUTPUT_DIR = new File(testDir, "out");
+    INPUT_FILE = new File(testDir, "input.txt");
   }
 
   @Before
@@ -89,19 +118,18 @@ public class TestStreaming
 
   protected void createInput() throws IOException
   {
-    DataOutputStream out = getFileSystem().create(
-      new Path(INPUT_FILE.getAbsolutePath()));
+    DataOutputStream out = getFileSystem().create(new Path(
+      INPUT_FILE.getPath()));
     out.write(getInputData().getBytes("UTF-8"));
     out.close();
   }
 
   protected void setInputOutput() {
-    inputFile = INPUT_FILE.getAbsolutePath();
-    outDir = OUTPUT_DIR.getAbsolutePath();
+    inputFile = INPUT_FILE.getPath();
+    outDir = OUTPUT_DIR.getPath();
   }
 
   protected String[] genArgs() {
-    setInputOutput();
     args.add("-input");args.add(inputFile);
     args.add("-output");args.add(outDir);
     args.add("-mapper");args.add(map);
@@ -129,7 +157,7 @@ public class TestStreaming
   }
 
   protected void checkOutput() throws IOException {
-    Path outPath = new Path(OUTPUT_DIR.getAbsolutePath(), "part-00000");
+    Path outPath = new Path(OUTPUT_DIR.getPath(), "part-00000");
     FileSystem fs = getFileSystem();
     String output = StreamUtil.slurpHadoop(outPath, fs);
     fs.delete(outPath, true);
@@ -155,6 +183,7 @@ public class TestStreaming
    * @throws IOException
    */
   protected int runStreamJob() throws IOException {
+    setInputOutput();
     createInput();
     boolean mayExit = false;
 
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java
index 444355f4fbb..c21cb159f4f 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingKeyValue.java
@@ -76,7 +76,7 @@ public class TestStreamingKeyValue
     return new String[] {
       "-input", INPUT_FILE.getAbsolutePath(),
       "-output", OUTPUT_DIR.getAbsolutePath(),
-      "-mapper", "cat",
+      "-mapper", TestStreaming.CAT,
       "-jobconf", MRJobConfig.PRESERVE_FAILED_TASK_FILES + "=true", 
       "-jobconf", "stream.non.zero.exit.is.failure=true",
       "-jobconf", "stream.tmpdir="+System.getProperty("test.build.data","/tmp"),
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java
index f3158b26405..35eb752b23a 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingOutputKeyValueTypes.java
@@ -120,7 +120,7 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming {
   @Test
   public void testJavaMapperAndCommandReducer() throws Exception {
     map = "org.apache.hadoop.mapred.lib.IdentityMapper";
-    reduce = "cat";
+    reduce = CAT;
     super.testCommandLine();
   }
 
@@ -128,7 +128,7 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming {
   @Test
   public void testJavaMapperAndCommandReducerAndZeroReduces() throws Exception {
     map = "org.apache.hadoop.mapred.lib.IdentityMapper";
-    reduce = "cat";
+    reduce = CAT;
     args.add("-numReduceTasks");
     args.add("0");
     super.testCommandLine();
@@ -137,7 +137,7 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming {
   // Check with Command Mapper, Java Reducer
   @Test
   public void testCommandMapperAndJavaReducer() throws Exception {
-    map = "cat";
+    map = CAT;
     reduce = MyReducer.class.getName();
     super.testCommandLine();
   }
@@ -145,7 +145,7 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming {
   // Check with Command Mapper, Java Reducer and -numReduceTasks 0
   @Test
   public void testCommandMapperAndJavaReducerAndZeroReduces() throws Exception {
-    map = "cat";
+    map = CAT;
     reduce = MyReducer.class.getName();
     args.add("-numReduceTasks");
     args.add("0");
@@ -155,7 +155,7 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming {
   // Check with Command Mapper, Reducer = "NONE"
   @Test
   public void testCommandMapperWithReduceNone() throws Exception {
-    map = "cat";
+    map = CAT;
     reduce = "NONE";
     super.testCommandLine();
   }
@@ -163,8 +163,8 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming {
   // Check with Command Mapper, Command Reducer
   @Test
   public void testCommandMapperAndCommandReducer() throws Exception {
-    map = "cat";
-    reduce = "cat";
+    map = CAT;
+    reduce = CAT;
     super.testCommandLine();
   }
 
@@ -172,8 +172,8 @@ public class TestStreamingOutputKeyValueTypes extends TestStreaming {
   @Test
   public void testCommandMapperAndCommandReducerAndZeroReduces()
       throws Exception {
-    map = "cat";
-    reduce = "cat";
+    map = CAT;
+    reduce = CAT;
     args.add("-numReduceTasks");
     args.add("0");
     super.testCommandLine();
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java
index 823433c4c04..11c3b4e9b04 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestStreamingTaskLog.java
@@ -83,7 +83,7 @@ public class TestStreamingTaskLog {
    *  (b) hadoop.tasklog.totalLogFileSize
    * for the children of java tasks in streaming jobs.
    */
-  @Test (timeout = 30000)
+  @Test (timeout = 120000)
   public void testStreamingTaskLogWithHadoopCmd() {
     try {
       final int numSlaves = 1;
@@ -95,13 +95,14 @@ public class TestStreamingTaskLog {
         fs.delete(testDir, true);
       }
       fs.mkdirs(testDir);
-      File scriptFile = createScript(
-          testDir.toString() + "/testTaskLog.sh");
+      File scriptFile = createScript(testDir.toString() +
+          (Shell.WINDOWS ? "/testTaskLog.cmd" : "/testTaskLog.sh"));
       conf.setBoolean(JTConfig.JT_PERSIST_JOBSTATUS, false);
       mr = new MiniMRCluster(numSlaves, fs.getUri().toString(), 1, null, null, conf);
 
       writeInputFile(fs, inputPath);
-      map = scriptFile.getAbsolutePath();
+      map = Shell.WINDOWS ? "cmd /c " + scriptFile.getAbsolutePath() :
+        scriptFile.getAbsolutePath();
       
       runStreamJobAndValidateEnv();
       
@@ -120,8 +121,12 @@ public class TestStreamingTaskLog {
     File scriptFile = new File(script);
     UtilTest.recursiveDelete(scriptFile);
     FileOutputStream in = new FileOutputStream(scriptFile);
-    in.write(("cat > /dev/null 2>&1\n" +
-              "echo $HADOOP_ROOT_LOGGER $HADOOP_CLIENT_OPTS").getBytes());
+    if (Shell.WINDOWS) {
+      in.write("@echo %HADOOP_ROOT_LOGGER% %HADOOP_CLIENT_OPTS%".getBytes());
+    } else {
+      in.write(("cat > /dev/null 2>&1\n" +
+                "echo $HADOOP_ROOT_LOGGER $HADOOP_CLIENT_OPTS").getBytes());
+    }
     in.close();
     
     Shell.execCommand(Shell.getSetPermissionCommand("+x", false,
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java
index dba676a32db..730429d6daf 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestSymLink.java
@@ -47,13 +47,13 @@ public class TestSymLink
   String OUTPUT_DIR = "/testing-streaming/out";
   String CACHE_FILE = "/testing-streaming/cache.txt";
   String input = "check to see if we can read this none reduce";
-  String map = "xargs cat ";
-  String reduce = "cat";
+  String map = TestStreaming.XARGS_CAT;
+  String reduce = TestStreaming.CAT;
   String mapString = "testlink\n";
   String cacheString = "This is just the cache string";
   StreamJob job;
 
-  @Test (timeout = 60000)
+  @Test (timeout = 120000)
   public void testSymLink() throws Exception
   {
     boolean mayExit = false;

From c2592021f36b9dd97b4c9faf7a7a66e021a49694 Mon Sep 17 00:00:00 2001
From: Arun Murthy <acmurthy@apache.org>
Date: Fri, 12 Apr 2013 12:12:08 +0000
Subject: [PATCH 46/52] YARN-412. Fixed FifoScheduler to check hostname of a
 NodeManager rather than its host:port during scheduling which caused
 incorrect locality for containers. Contributed by Roger Hoover.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467244 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt               |  4 +
 .../scheduler/fifo/FifoScheduler.java         |  2 +-
 .../server/resourcemanager/MockNodes.java     |  3 +-
 .../scheduler/fifo/TestFifoScheduler.java     | 94 ++++++++++++++++++-
 4 files changed, 99 insertions(+), 4 deletions(-)

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 9646f4ae0a1..345167f7025 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -249,6 +249,10 @@ Release 2.0.4-alpha - UNRELEASED
     YARN-470. Support a way to disable resource monitoring on the NodeManager.
     (Siddharth Seth via hitesh)
     
+    YARN-412. Fixed FifoScheduler to check hostname of a NodeManager rather
+    than its host:port during scheduling which caused incorrect locality for
+    containers. (Roger Hoover via acmurthy)
+    
 Release 2.0.3-alpha - 2013-02-06 
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
index 2dbb498f9ba..d5a542700f4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
@@ -462,7 +462,7 @@ public class FifoScheduler implements ResourceScheduler, Configurable {
       FiCaSchedulerApp application, Priority priority) {
     int assignedContainers = 0;
     ResourceRequest request = 
-      application.getResourceRequest(priority, node.getRMNode().getNodeAddress());
+      application.getResourceRequest(priority, node.getHostName());
     if (request != null) {
       // Don't allocate on this node if we don't need containers on this rack
       ResourceRequest rackRequest =
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java
index c3fe72d99cb..ae6d5814a46 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java
@@ -209,6 +209,7 @@ public class MockNodes {
     final String rackName = "rack"+ rack;
     final int nid = hostnum;
     final String hostName = "host"+ nid;
+    final String nodeAddr = hostName + ":" + nid;
     final int port = 123;
     final NodeId nodeID = newNodeID(hostName, port);
     final String httpAddress = httpAddr;
@@ -218,7 +219,7 @@ public class MockNodes {
       nodeHealthStatus.setIsNodeHealthy(true);
       nodeHealthStatus.setHealthReport("HealthyMe");
     }
-    return new MockRMNodeImpl(nodeID, hostName, httpAddress, perNode, rackName,
+    return new MockRMNodeImpl(nodeID, nodeAddr, httpAddress, perNode, rackName,
         nodeHealthStatus, nid, hostName, state); 
   }
 
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java
index 8f45f4535cb..85076baaef3 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java
@@ -19,6 +19,8 @@
 package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 
 import junit.framework.Assert;
 
@@ -28,6 +30,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.net.NetworkTopology;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.QueueInfo;
 import org.apache.hadoop.yarn.api.records.Resource;
@@ -35,15 +38,22 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.AsyncDispatcher;
 import org.apache.hadoop.yarn.event.InlineDispatcher;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.resourcemanager.Application;
+import org.apache.hadoop.yarn.server.resourcemanager.MockNodes;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.Task;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
 import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.junit.After;
@@ -55,6 +65,9 @@ public class TestFifoScheduler {
   
   private ResourceManager resourceManager = null;
   
+  private static final RecordFactory recordFactory = 
+      RecordFactoryProvider.getRecordFactory(null);
+  
   @Before
   public void setUp() throws Exception {
     resourceManager = new ResourceManager();
@@ -78,14 +91,38 @@ public class TestFifoScheduler {
             .getRMContext());
   }
   
-  @Test
+  private ApplicationAttemptId createAppAttemptId(int appId, int attemptId) {
+    ApplicationAttemptId attId = recordFactory
+        .newRecordInstance(ApplicationAttemptId.class);
+    ApplicationId appIdImpl = recordFactory
+        .newRecordInstance(ApplicationId.class);
+    appIdImpl.setId(appId);
+    attId.setAttemptId(attemptId);
+    attId.setApplicationId(appIdImpl);
+    return attId;
+  }
+
+  private ResourceRequest createResourceRequest(int memory, String host,
+      int priority, int numContainers) {
+    ResourceRequest request = recordFactory
+        .newRecordInstance(ResourceRequest.class);
+    request.setCapability(Resources.createResource(memory));
+    request.setHostName(host);
+    request.setNumContainers(numContainers);
+    Priority prio = recordFactory.newRecordInstance(Priority.class);
+    prio.setPriority(priority);
+    request.setPriority(prio);
+    return request;
+  }
+
+  @Test(timeout=5000)
   public void testFifoSchedulerCapacityWhenNoNMs() {
     FifoScheduler scheduler = new FifoScheduler();
     QueueInfo queueInfo = scheduler.getQueueInfo(null, false, false);
     Assert.assertEquals(0.0f, queueInfo.getCurrentCapacity());
   }
   
-  @Test
+  @Test(timeout=5000)
   public void testAppAttemptMetrics() throws Exception {
     AsyncDispatcher dispatcher = new InlineDispatcher();
     RMContext rmContext = new RMContextImpl(dispatcher, null,
@@ -111,6 +148,59 @@ public class TestFifoScheduler {
     Assert.assertEquals(1, metrics.getAppsSubmitted());
   }
 
+  @Test(timeout=2000)
+  public void testNodeLocalAssignment() throws Exception {
+    AsyncDispatcher dispatcher = new InlineDispatcher();
+    RMContext rmContext = new RMContextImpl(dispatcher, null, null, null, null,
+        null, null, null);
+
+    FifoScheduler scheduler = new FifoScheduler();
+    scheduler.reinitialize(new Configuration(), rmContext);
+
+    RMNode node0 = MockNodes.newNodeInfo(1,
+        Resources.createResource(1024 * 64), 1234);
+    NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node0);
+    scheduler.handle(nodeEvent1);
+
+    int _appId = 1;
+    int _appAttemptId = 1;
+    ApplicationAttemptId appAttemptId = createAppAttemptId(_appId,
+        _appAttemptId);
+    AppAddedSchedulerEvent appEvent1 = new AppAddedSchedulerEvent(appAttemptId,
+        "queue1", "user1");
+    scheduler.handle(appEvent1);
+
+    int memory = 64;
+    int nConts = 3;
+    int priority = 20;
+
+    List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
+    ResourceRequest nodeLocal = createResourceRequest(memory,
+        node0.getHostName(), priority, nConts);
+    ResourceRequest rackLocal = createResourceRequest(memory,
+        node0.getRackName(), priority, nConts);
+    ResourceRequest any = createResourceRequest(memory, ResourceRequest.ANY, priority,
+        nConts);
+    ask.add(nodeLocal);
+    ask.add(rackLocal);
+    ask.add(any);
+    scheduler.allocate(appAttemptId, ask, new ArrayList<ContainerId>());
+
+    NodeUpdateSchedulerEvent node0Update = new NodeUpdateSchedulerEvent(node0);
+
+    // Before the node update event, there are 3 local requests outstanding
+    Assert.assertEquals(3, nodeLocal.getNumContainers());
+
+    scheduler.handle(node0Update);
+
+    // After the node update event, check that there are no more local requests
+    // outstanding
+    Assert.assertEquals(0, nodeLocal.getNumContainers());
+    //Also check that the containers were scheduled
+    SchedulerAppReport info = scheduler.getSchedulerAppInfo(appAttemptId);
+    Assert.assertEquals(3, info.getLiveContainers().size());
+  }
+  
 //  @Test
   public void testFifoScheduler() throws Exception {
 

From f863543206a67ba295454609d618e16f91309b84 Mon Sep 17 00:00:00 2001
From: Kihwal Lee <kihwal@apache.org>
Date: Fri, 12 Apr 2013 15:18:07 +0000
Subject: [PATCH 47/52] HDFS-4477. Secondary namenode may retain old tokens.
 Contributed by Daryn Sharp.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467307 13f79535-47bb-0310-9956-ffa450edef68
---
 .../AbstractDelegationTokenSecretManager.java | 28 ++++++--
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |  2 +
 .../DelegationTokenSecretManager.java         | 17 +++++
 .../hdfs/server/namenode/FSNamesystem.java    | 15 ++++
 .../namenode/TestSecurityTokenEditLog.java    | 69 +++++++++++++++++++
 5 files changed, 125 insertions(+), 6 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java
index aa4fa28ad1f..fde11e72cf4 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java
@@ -27,8 +27,10 @@ import java.io.DataInputStream;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Map;
+import java.util.Set;
 
 import javax.crypto.SecretKey;
 
@@ -144,6 +146,10 @@ extends AbstractDelegationTokenIdentifier>
     return;
   }
   
+  protected void logExpireToken(TokenIdent ident) throws IOException {
+    return;
+  }
+
   /** 
    * Update the current master key 
    * This is called once by startThreads before tokenRemoverThread is created, 
@@ -363,15 +369,25 @@ extends AbstractDelegationTokenIdentifier>
   }
   
   /** Remove expired delegation tokens from cache */
-  private synchronized void removeExpiredToken() {
+  private void removeExpiredToken() throws IOException {
     long now = Time.now();
-    Iterator<DelegationTokenInformation> i = currentTokens.values().iterator();
-    while (i.hasNext()) {
-      long renewDate = i.next().getRenewDate();
-      if (now > renewDate) {
-        i.remove();
+    Set<TokenIdent> expiredTokens = new HashSet<TokenIdent>();
+    synchronized (this) {
+      Iterator<Map.Entry<TokenIdent, DelegationTokenInformation>> i =
+          currentTokens.entrySet().iterator();
+      while (i.hasNext()) {
+        Map.Entry<TokenIdent, DelegationTokenInformation> entry = i.next();
+        long renewDate = entry.getValue().getRenewDate();
+        if (renewDate < now) {
+          expiredTokens.add(entry.getKey());
+          i.remove();
+        }
       }
     }
+    // don't hold lock on 'this' to avoid edit log updates blocking token ops
+    for (TokenIdent ident : expiredTokens) {
+      logExpireToken(ident);
+    }
   }
 
   public void stopThreads() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 73f69954bd1..bde5d8285c1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -2488,6 +2488,8 @@ Release 0.23.8 - UNRELEASED
 
   BUG FIXES
 
+    HDFS-4477. Secondary namenode may retain old tokens (daryn via kihwal)
+
 Release 0.23.7 - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
index d18ac9f456d..896c20c85b6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
@@ -310,6 +310,23 @@ public class DelegationTokenSecretManager
       namesystem.logUpdateMasterKey(key);
     }
   }
+  
+  @Override //AbstractDelegationTokenManager
+  protected void logExpireToken(final DelegationTokenIdentifier dtId)
+      throws IOException {
+    synchronized (noInterruptsLock) {
+      // The edit logging code will fail catastrophically if it
+      // is interrupted during a logSync, since the interrupt
+      // closes the edit log files. Doing this inside the
+      // above lock and then checking interruption status
+      // prevents this bug.
+      if (Thread.interrupted()) {
+        throw new InterruptedIOException(
+            "Interrupted before expiring delegation token");
+      }
+      namesystem.logExpireDelegationToken(dtId);
+    }
+  }
 
   /** A utility method for creating credentials. */
   public static Credentials createCredentials(final NameNode namenode,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 95b9cf02f0f..07b87c7074c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -5358,6 +5358,21 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     getEditLog().logSync();
   }
   
+  /**
+   * Log the cancellation of expired tokens to edit logs
+   * 
+   * @param id token identifier to cancel
+   */
+  public void logExpireDelegationToken(DelegationTokenIdentifier id) {
+    assert !isInSafeMode() :
+      "this should never be called while in safemode, since we stop " +
+      "the DT manager before entering safemode!";
+    // No need to hold FSN lock since we don't access any internal
+    // structures, and this is stopped before the FSN shuts itself
+    // down, etc.
+    getEditLog().logCancelDelegationToken(id);
+  }  
+  
   private void logReassignLease(String leaseHolder, String src,
       String newHolder) {
     assert hasWriteLock();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java
index dd679d1a9af..2df1459d4d5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 import static org.junit.Assert.assertEquals;
 
 import java.io.File;
@@ -30,12 +31,14 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
+import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.junit.Test;
+import static org.mockito.Mockito.*;
 
 /**
  * This class tests the creation and validation of a checkpoint.
@@ -163,4 +166,70 @@ public class TestSecurityTokenEditLog {
       if(cluster != null) cluster.shutdown();
     }
   }
+  
+  @Test(timeout=10000)
+  public void testEditsForCancelOnTokenExpire() throws IOException,
+  InterruptedException {
+    long renewInterval = 2000;
+    Configuration conf = new Configuration();
+    conf.setBoolean(
+        DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
+    conf.setLong(DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY, renewInterval);
+    conf.setLong(DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY, renewInterval*2);
+
+    Text renewer = new Text(UserGroupInformation.getCurrentUser().getUserName());
+    FSImage fsImage = mock(FSImage.class);
+    FSEditLog log = mock(FSEditLog.class);
+    doReturn(log).when(fsImage).getEditLog();   
+    FSNamesystem fsn = new FSNamesystem(conf, fsImage);
+    
+    DelegationTokenSecretManager dtsm = fsn.getDelegationTokenSecretManager();
+    try {
+      dtsm.startThreads();
+      
+      // get two tokens
+      Token<DelegationTokenIdentifier> token1 = fsn.getDelegationToken(renewer);
+      Token<DelegationTokenIdentifier> token2 = fsn.getDelegationToken(renewer);
+      DelegationTokenIdentifier ident1 =
+          (DelegationTokenIdentifier)token1.decodeIdentifier();
+      DelegationTokenIdentifier ident2 =
+          (DelegationTokenIdentifier)token2.decodeIdentifier();
+      
+      // verify we got the tokens
+      verify(log, times(1)).logGetDelegationToken(eq(ident1), anyLong());
+      verify(log, times(1)).logGetDelegationToken(eq(ident2), anyLong());
+      
+      // this is a little tricky because DTSM doesn't let us set scan interval
+      // so need to periodically sleep, then stop/start threads to force scan
+      
+      // renew first token 1/2 to expire
+      Thread.sleep(renewInterval/2);
+      fsn.renewDelegationToken(token2);
+      verify(log, times(1)).logRenewDelegationToken(eq(ident2), anyLong());
+      // force scan and give it a little time to complete
+      dtsm.stopThreads(); dtsm.startThreads();
+      Thread.sleep(250);
+      // no token has expired yet 
+      verify(log, times(0)).logCancelDelegationToken(eq(ident1));
+      verify(log, times(0)).logCancelDelegationToken(eq(ident2));
+      
+      // sleep past expiration of 1st non-renewed token
+      Thread.sleep(renewInterval/2);
+      dtsm.stopThreads(); dtsm.startThreads();
+      Thread.sleep(250);
+      // non-renewed token should have implicitly been cancelled
+      verify(log, times(1)).logCancelDelegationToken(eq(ident1));
+      verify(log, times(0)).logCancelDelegationToken(eq(ident2));
+      
+      // sleep past expiration of 2nd renewed token
+      Thread.sleep(renewInterval/2);
+      dtsm.stopThreads(); dtsm.startThreads();
+      Thread.sleep(250);
+      // both tokens should have been implicitly cancelled by now
+      verify(log, times(1)).logCancelDelegationToken(eq(ident1));
+      verify(log, times(1)).logCancelDelegationToken(eq(ident2));
+    } finally {
+      dtsm.stopThreads();
+    }
+  }
 }

From 2dfc42d61304f90ff8fc4fe10f9fd2ade3a64a72 Mon Sep 17 00:00:00 2001
From: Konstantin Shvachko <shv@apache.org>
Date: Fri, 12 Apr 2013 17:51:30 +0000
Subject: [PATCH 48/52] HADOOP-9211. Set default max heap size in
 HADOOP_CLIENT_OPTS to 512m in order to avoid OOME. Contributed by Plamen
 Jeliazkov.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467380 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-common-project/hadoop-common/CHANGES.txt                | 3 +++
 .../hadoop-common/src/main/conf/hadoop-env.sh                  | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index bc9b318107f..224c7fbe192 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -623,6 +623,9 @@ Release 2.0.5-beta - UNRELEASED
     HADOOP-9233. Cover package org.apache.hadoop.io.compress.zlib with unit
     tests (Vadim Bondarev via jlowe)
 
+    HADOOP-9211. Set default max heap size in HADOOP_CLIENT_OPTS to 512m
+    in order to avoid OOME. (Plamen Jeliazkov via shv)
+
 Release 2.0.4-alpha - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh
index 42a0d05aaad..41289a9acda 100644
--- a/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh
+++ b/hadoop-common-project/hadoop-common/src/main/conf/hadoop-env.sh
@@ -62,7 +62,7 @@ export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANOD
 export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"
 
 # The following applies to multiple commands (fs, dfs, fsck, distcp etc)
-export HADOOP_CLIENT_OPTS="-Xmx128m $HADOOP_CLIENT_OPTS"
+export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
 #HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"
 
 # On secure datanodes, user to run the datanode as after dropping privileges

From 5e94420456ddd756e80512753a6575506ebef05e Mon Sep 17 00:00:00 2001
From: Arun Murthy <acmurthy@apache.org>
Date: Fri, 12 Apr 2013 21:19:39 +0000
Subject: [PATCH 49/52] YARN-412. Pushing to 2.0.5-beta only.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467470 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-yarn-project/CHANGES.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 345167f7025..4aa9c364fcf 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -231,6 +231,10 @@ Release 2.0.5-beta - UNRELEASED
     does not have permission causes the client to wait forever.
     (shenhong via tomwhite)
 
+    YARN-412. Fixed FifoScheduler to check hostname of a NodeManager rather
+    than its host:port during scheduling which caused incorrect locality for
+    containers. (Roger Hoover via acmurthy)
+    
 Release 2.0.4-alpha - UNRELEASED
 
   INCOMPATIBLE CHANGES
@@ -249,10 +253,6 @@ Release 2.0.4-alpha - UNRELEASED
     YARN-470. Support a way to disable resource monitoring on the NodeManager.
     (Siddharth Seth via hitesh)
     
-    YARN-412. Fixed FifoScheduler to check hostname of a NodeManager rather
-    than its host:port during scheduling which caused incorrect locality for
-    containers. (Roger Hoover via acmurthy)
-    
 Release 2.0.3-alpha - 2013-02-06 
 
   INCOMPATIBLE CHANGES

From 242028a3fb887708dea5ef557c0ded22e014ac7d Mon Sep 17 00:00:00 2001
From: Konstantin Shvachko <shv@apache.org>
Date: Sat, 13 Apr 2013 01:35:58 +0000
Subject: [PATCH 50/52] HDFS-4639. startFileInternal() should not increment
 generation stamp. Contributed by Plamen Jeliazkov.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467534 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt                    | 3 +++
 .../org/apache/hadoop/hdfs/server/namenode/FSDirectory.java    | 3 +--
 .../org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java   | 3 +--
 .../src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java   | 2 +-
 .../org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java | 2 +-
 5 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index bde5d8285c1..32bcb996964 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -503,6 +503,9 @@ Release 2.0.5-beta - UNRELEASED
 
     HDFS-4643. Fix flakiness in TestQuorumJournalManager. (todd)
 
+    HDFS-4639. startFileInternal() should not increment generation stamp.
+    (Plamen Jeliazkov via shv)
+
 Release 2.0.4-alpha - UNRELEASED
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
index 07679b2eb7d..b5ef8ef2e97 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
@@ -235,8 +235,7 @@ public class FSDirectory implements Closeable {
                 long preferredBlockSize,
                 String clientName,
                 String clientMachine,
-                DatanodeDescriptor clientNode,
-                long generationStamp) 
+                DatanodeDescriptor clientNode)
     throws FileAlreadyExistsException, QuotaExceededException,
       UnresolvedLinkException {
     waitForReady();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 07b87c7074c..fbf97a80a79 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -1907,9 +1907,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
        checkFsObjectLimit();
 
         // increment global generation stamp
-        long genstamp = nextGenerationStamp();
         INodeFileUnderConstruction newNode = dir.addFile(src, permissions,
-            replication, blockSize, holder, clientMachine, clientNode, genstamp);
+            replication, blockSize, holder, clientMachine, clientNode);
         if (newNode == null) {
           throw new IOException("DIR* NameSystem.startFile: " +
                                 "Unable to add file to namespace.");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java
index b0879683637..2e7c8b47d36 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java
@@ -53,7 +53,7 @@ import com.google.common.base.Joiner;
 */
 public class TestDFSUpgrade {
  
-  private static final int EXPECTED_TXID = 49;
+  private static final int EXPECTED_TXID = 45;
   private static final Log LOG = LogFactory.getLog(TestDFSUpgrade.class.getName());
   private Configuration conf;
   private int testCounter = 0;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
index db0e9b8e058..8ee07cbbbfc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
@@ -1129,7 +1129,7 @@ public class TestCheckpoint {
         throw new IOException(e);
       }
       
-      final int EXPECTED_TXNS_FIRST_SEG = 12;
+      final int EXPECTED_TXNS_FIRST_SEG = 11;
       
       // the following steps should have happened:
       //   edits_inprogress_1 -> edits_1-12  (finalized)

From e408d9904b1105b7eebd952cba0d61efe8802b22 Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Sat, 13 Apr 2013 20:22:02 +0000
Subject: [PATCH 51/52] HADOOP-9473. Typo in FileUtil copy() method.
 Contributed by Glen Mazza.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467694 13f79535-47bb-0310-9956-ffa450edef68
---
 .../src/main/java/org/apache/hadoop/fs/FileUtil.java            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java
index 2cc834852e4..b61477e9d7b 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java
@@ -284,7 +284,7 @@ public class FileUtil {
     // Check if dest is directory
     if (!dstFS.exists(dst)) {
       throw new IOException("`" + dst +"': specified destination directory " +
-                            "doest not exist");
+                            "does not exist");
     } else {
       FileStatus sdst = dstFS.getFileStatus(dst);
       if (!sdst.isDirectory()) 

From 108e0e0953493dd17a1d50314c9ac82e44280a00 Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Sat, 13 Apr 2013 20:26:51 +0000
Subject: [PATCH 52/52] HADOOP-9473. Add change description missed in previous
 commit 1467694

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1467696 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-common-project/hadoop-common/CHANGES.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 224c7fbe192..3d1edc5eacb 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -626,6 +626,8 @@ Release 2.0.5-beta - UNRELEASED
     HADOOP-9211. Set default max heap size in HADOOP_CLIENT_OPTS to 512m
     in order to avoid OOME. (Plamen Jeliazkov via shv)
 
+    HADOOP-9473. Typo in FileUtil copy() method. (Glen Mazza via suresh)
+
 Release 2.0.4-alpha - UNRELEASED
 
   INCOMPATIBLE CHANGES