Merge r1234388 through r1236385 from 0.23.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23-PB@1236395 13f79535-47bb-0310-9956-ffa450edef68
2012-01-26 21:37:26 +00:00 · 2012-01-26 21:37:26 +00:00 · 359c746ca7
parent 6e8fe7af37 ccaba4ba7e
commit 359c746ca7
131 changed files with 12719 additions and 394 deletions
--- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml
+++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-mapreduce-dist.xml
@ -127,6 +127,17 @@
        <unpack>false</unpack>
      </binaries>
    </moduleSet>
+    <moduleSet>
+      <includes>
+        <include>org.apache.hadoop:hadoop-mapreduce-client-jobclient</include>
+      </includes>
+      <binaries>
+        <attachmentClassifier>tests</attachmentClassifier>
+        <outputDirectory>share/hadoop/${hadoop.component}</outputDirectory>
+        <includeDependencies>false</includeDependencies>
+        <unpack>false</unpack>
+      </binaries>
+    </moduleSet>
  </moduleSets>
  <dependencySets>
    <dependencySet>
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@ -125,6 +125,21 @@ Release 0.23.1 - Unreleased

    HADOOP-7975. Add LZ4 as an entry in the default codec list, missed by HADOOP-7657 (harsh)

+    HADOOP-7987. Support setting the run-as user in unsecure mode. (jitendra)
+
+    HADOOP-4515. Configuration#getBoolean must not be case sensitive. (Sho Shimauchi via harsh)
+
+    HADOOP-6490. Use StringUtils over String#replace in Path#normalizePath.
+    (Uma Maheswara Rao G via harsh)
+
+    HADOOP-7574. Improve FSShell -stat, add user/group elements.
+    (XieXianshan via harsh)
+
+    HADOOP-7736. Remove duplicate Path#normalizePath call. (harsh)
+
+    HADOOP-7919. Remove the unused hadoop.logfile.* properties from the 
+    core-default.xml file. (harsh)
+
  OPTIMIZATIONS

  BUG FIXES
@ -207,6 +222,9 @@ Release 0.23.1 - Unreleased
   HADOOP-7986. Adding config for MapReduce History Server protocol in
   hadoop-policy.xml for service level authorization. (Mahadev Konar via vinodkv)

+   HADOOP-7981. Improve documentation for org.apache.hadoop.io.compress.
+   Decompressor.getRemaining (Jonathan Eagles via mahadev)
+
 Release 0.23.0 - 2011-11-01 

  INCOMPATIBLE CHANGES
--- a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/commands_manual.xml
+++ b/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/commands_manual.xml
@ -753,11 +753,6 @@
 			
 			<section>
 				<title> secondarynamenode </title>
-				<note>
-					The Secondary NameNode has been deprecated. Instead, consider using the
-					<a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Checkpoint+Node">Checkpoint Node</a> or 
-					<a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Backup+Node">Backup Node</a>. 
-				</note>
 				<p>	
 					Runs the HDFS secondary 
 					namenode. See <a href="http://hadoop.apache.org/hdfs/docs/current/hdfs_user_guide.html#Secondary+NameNode">Secondary NameNode</a> 
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
@ -826,6 +826,12 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
   */
  public boolean getBoolean(String name, boolean defaultValue) {
    String valueString = getTrimmed(name);
+    if (null == valueString || "".equals(valueString)) {
+      return defaultValue;
+    }
+
+    valueString = valueString.toLowerCase();
+
    if ("true".equals(valueString))
      return true;
    else if ("false".equals(valueString))
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java
@ -18,10 +18,12 @@

 package org.apache.hadoop.fs;

-import java.net.*;
-import java.io.*;
-import org.apache.avro.reflect.Stringable;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;

+import org.apache.avro.reflect.Stringable;
+import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
@ -76,7 +78,7 @@ public class Path implements Comparable {
    }
    URI resolved = parentUri.resolve(child.uri);
    initialize(resolved.getScheme(), resolved.getAuthority(),
-               normalizePath(resolved.getPath()), resolved.getFragment());
+               resolved.getPath(), resolved.getFragment());
  }

  private void checkPathArg( String path ) {
@ -158,8 +160,8 @@ public class Path implements Comparable {

  private String normalizePath(String path) {
    // remove double slashes & backslashes
-    path = path.replace("//", "/");
-    path = path.replace("\\", "/");
+    path = StringUtils.replace(path, "//", "/");
+    path = StringUtils.replace(path, "\\", "/");
    
    // trim trailing slash from non-root path (ignoring windows drive)
    int minLength = hasWindowsDrive(path, true) ? 4 : 1;
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Stat.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Stat.java
@ -32,9 +32,11 @@ import org.apache.hadoop.fs.FileStatus;
 * Print statistics about path in specified format.
 * Format sequences:
 *   %b: Size of file in blocks
+ *   %g: Group name of owner
 *   %n: Filename
 *   %o: Block size
 *   %r: replication
+ *   %u: User name of owner
 *   %y: UTC date as &quot;yyyy-MM-dd HH:mm:ss&quot;
 *   %Y: Milliseconds since January 1, 1970 UTC
 */
@ -50,8 +52,8 @@ class Stat extends FsCommand {
  public static final String USAGE = "[format] <path> ...";
  public static final String DESCRIPTION =
    "Print statistics about the file/directory at <path>\n" +
-    "in the specified format. Format accepts filesize in blocks (%b), filename (%n),\n" +
-    "block size (%o), replication (%r), modification date (%y, %Y)\n";
+    "in the specified format. Format accepts filesize in blocks (%b), group name of owner(%g),\n" +
+    "filename (%n), block size (%o), replication (%r), user name of owner(%u), modification date (%y, %Y)\n";

  protected static final SimpleDateFormat timeFmt;
  static {
@ -92,6 +94,9 @@ class Stat extends FsCommand {
                ? "directory" 
                : (stat.isFile() ? "regular file" : "symlink"));
            break;
+          case 'g':
+            buf.append(stat.getGroup());
+            break;
          case 'n':
            buf.append(item.path.getName());
            break;
@ -101,6 +106,9 @@ class Stat extends FsCommand {
          case 'r':
            buf.append(stat.getReplication());
            break;
+          case 'u':
+            buf.append(stat.getOwner());
+            break;
          case 'y':
            buf.append(timeFmt.format(new Date(stat.getModificationTime())));
            break;
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Decompressor.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Decompressor.java
@ -49,7 +49,7 @@ public interface Decompressor {
  public void setInput(byte[] b, int off, int len);
  
  /**
-   * Returns true if the input data buffer is empty and 
+   * Returns <code>true</code> if the input data buffer is empty and 
   * {@link #setInput(byte[], int, int)} should be called to
   * provide more input. 
   * 
@ -76,8 +76,11 @@ public interface Decompressor {
  public boolean needsDictionary();

  /**
-   * Returns true if the end of the decompressed 
-   * data output stream has been reached.
+   * Returns <code>true</code> if the end of the decompressed 
+   * data output stream has been reached. Indicates a concatenated data stream
+   * when finished() returns <code>true</code> and {@link #getRemaining()}
+   * returns a positive value. finished() will be reset with the
+   * {@link #reset()} method.
   * @return <code>true</code> if the end of the decompressed
   * data output stream has been reached.
   */
@ -98,15 +101,23 @@ public interface Decompressor {
  public int decompress(byte[] b, int off, int len) throws IOException;

  /**
-   * Returns the number of bytes remaining in the compressed-data buffer;
-   * typically called after the decompressor has finished decompressing
-   * the current gzip stream (a.k.a. "member").
+   * Returns the number of bytes remaining in the compressed data buffer.
+   * Indicates a concatenated data stream if {@link #finished()} returns
+   * <code>true</code> and getRemaining() returns a positive value. If
+   * {@link #finished()} returns <code>true</code> and getRemaining() returns
+   * a zero value, indicates that the end of data stream has been reached and
+   * is not a concatenated data stream. 
+   * @return The number of bytes remaining in the compressed data buffer.
   */
  public int getRemaining();

  /**
   * Resets decompressor and input and output buffers so that a new set of
-   * input data can be processed.
+   * input data can be processed. If {@link #finished()}} returns
+   * <code>true</code> and {@link #getRemaining()} returns a positive value,
+   * reset() is called before processing of the next data stream in the
+   * concatenated data stream. {@link #finished()} will be reset and will
+   * return <code>false</code> when reset() is called.
   */
  public void reset();

--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
@ -80,6 +80,7 @@ public class UserGroupInformation {
   * Percentage of the ticket window to use before we renew ticket.
   */
  private static final float TICKET_RENEW_WINDOW = 0.80f;
+  static final String HADOOP_USER_NAME = "HADOOP_USER_NAME";
  
  /** 
   * UgiMetrics maintains UGI activity statistics
@ -137,7 +138,16 @@ public class UserGroupInformation {
          LOG.debug("using kerberos user:"+user);
        }
      }
-      // if we don't have a kerberos user, use the OS user
+      //If we don't have a kerberos user and security is disabled, check
+      //if user is specified in the environment or properties
+      if (!isSecurityEnabled() && (user == null)) {
+        String envUser = System.getenv(HADOOP_USER_NAME);
+        if (envUser == null) {
+          envUser = System.getProperty(HADOOP_USER_NAME);
+        }
+        user = envUser == null ? null : new User(envUser);
+      }
+      // use the OS user
      if (user == null) {
        user = getCanonicalUser(OS_PRINCIPAL_CLASS);
        if (LOG.isDebugEnabled()) {
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@ -134,20 +134,6 @@
  </description>
 </property>

-<!--- logging properties -->
-
-<property>
-  <name>hadoop.logfile.size</name>
-  <value>10000000</value>
-  <description>The max size of each log file</description>
-</property>
-
-<property>
-  <name>hadoop.logfile.count</name>
-  <value>10</value>
-  <description>The max number of log files</description>
-</property>
-
 <!-- i/o properties -->
 <property>
  <name>io.file.buffer.size</name>
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
@ -451,6 +451,9 @@ public class TestConfiguration extends TestCase {
    appendProperty("test.bool3", "  true ");
    appendProperty("test.bool4", " false ");
    appendProperty("test.bool5", "foo");
+    appendProperty("test.bool6", "TRUE");
+    appendProperty("test.bool7", "FALSE");
+    appendProperty("test.bool8", "");
    endConfig();
    Path fileResource = new Path(CONFIG);
    conf.addResource(fileResource);
@ -459,6 +462,9 @@ public class TestConfiguration extends TestCase {
    assertEquals(true, conf.getBoolean("test.bool3", false));
    assertEquals(false, conf.getBoolean("test.bool4", true));
    assertEquals(true, conf.getBoolean("test.bool5", true));
+    assertEquals(true, conf.getBoolean("test.bool6", false));
+    assertEquals(false, conf.getBoolean("test.bool7", true));
+    assertEquals(false, conf.getBoolean("test.bool8", false));
  }
  
  public void testFloatValues() throws IOException {
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserFromEnv.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserFromEnv.java
@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.hadoop.security;
+
+import java.io.IOException;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestUserFromEnv {
+
+  @Test
+  public void testUserFromEnvironment() throws IOException {
+    System.setProperty(UserGroupInformation.HADOOP_USER_NAME, "randomUser");
+    Assert.assertEquals("randomUser", UserGroupInformation.getLoginUser()
+        .getUserName());
+  }
+}
--- a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml
+++ b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml
@ -610,11 +610,11 @@
        </comparator>
        <comparator>
          <type>RegexpComparator</type>
-          <expected-output>^( |\t)*in the specified format. Format accepts filesize in blocks \(%b\), filename \(%n\),( )*</expected-output>
+          <expected-output>^( |\t)*in the specified format. Format accepts filesize in blocks \(%b\), group name of owner\(%g\),( )*</expected-output>
        </comparator>
        <comparator>
          <type>RegexpComparator</type>
-          <expected-output>^( |\t)*block size \(%o\), replication \(%r\), modification date \(%y, %Y\)( )*</expected-output>
+          <expected-output>^( |\t)*filename \(%n\), block size \(%o\), replication \(%r\), user name of owner\(%u\), modification date \(%y, %Y\)( )*</expected-output>
        </comparator>
      </comparators>
    </test>
--- a/hadoop-hdfs-project/dev-support/test-patch.properties
+++ b/hadoop-hdfs-project/dev-support/test-patch.properties
@ -18,4 +18,4 @@

 OK_RELEASEAUDIT_WARNINGS=0
 OK_FINDBUGS_WARNINGS=0
-OK_JAVADOC_WARNINGS=2
+OK_JAVADOC_WARNINGS=0
--- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml
@ -53,6 +53,11 @@
      <artifactId>mockito-all</artifactId>
      <scope>test</scope>
    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-annotations</artifactId>
+      <scope>provided</scope>
+    </dependency>
    <dependency>
      <groupId>com.sun.jersey</groupId>
      <artifactId>jersey-server</artifactId>
--- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/server/HttpFSServer.java
@ -219,7 +219,7 @@ public class HttpFSServer {
   * operation is @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.GetOpValues#LISTSTATUS}
   * @param doAs user being impersonated, defualt value is none. It can be used
   * only if the current user is a HttpFSServer proxyuser.
-   * @param override, default is true. Used only for
+   * @param override default is true. Used only for
   * @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.PutOpValues#CREATE} operations.
   * @param blockSize block size to set, used only by
   * @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.PutOpValues#CREATE} operations.
@ -419,7 +419,7 @@ public class HttpFSServer {
   * @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.PutOpValues#SETOWNER} operations.
   * @param group group to set, used only for
   * @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.PutOpValues#SETOWNER} operations.
-   * @param override, default is true. Used only for
+   * @param override default is true. Used only for
   * @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.PutOpValues#CREATE} operations.
   * @param blockSize block size to set, used only by
   * @link org.apache.hadoop.fs.http.client.HttpFSFileSystem.PutOpValues#CREATE} operations.
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@ -201,6 +201,10 @@ Release 0.23.1 - UNRELEASED

    HDFS-2817. Combine the two TestSafeMode test suites. (todd)

+    HDFS-2818. Fix a missing space issue in HDFS webapps' title tags. (Devaraj K via harsh)
+
+    HDFS-2397. Undeprecate SecondaryNameNode (eli)
+
  OPTIMIZATIONS

    HDFS-2130. Switch default checksum to CRC32C. (todd)
@ -215,6 +219,12 @@ Release 0.23.1 - UNRELEASED
    for a client on the same node as the block file.  (Andrew Purtell,
    Suresh Srinivas and Jitendra Nath Pandey via szetszwo)

+    HDFS-2825. Add test hook to turn off the writer preferring its local
+    DN. (todd)
+
+    HDFS-2826. Add test case for HDFS-1476 (safemode can initialize
+    replication queues before exiting) (todd)
+
  BUG FIXES

    HDFS-2541. For a sufficiently large value of blocks, the DN Scanner 
@ -276,6 +286,15 @@ Release 0.23.1 - UNRELEASED
    HDFS-2816. Fix missing license header in httpfs findbugsExcludeFile.xml.
    (hitesh via tucu)

+    HDFS-2822. processMisReplicatedBlock incorrectly identifies
+    under-construction blocks as under-replicated. (todd)
+
+    HDFS-442. dfsthroughput in test jar throws NPE (harsh)
+
+    HDFS-2836. HttpFSServer still has 2 javadoc warnings in trunk (revans2 via tucu)
+
+    HDFS-2837. mvn javadoc:javadoc not seeing LimitedPrivate class (revans2 via tucu)
+
 Release 0.23.0 - 2011-11-01 

  INCOMPATIBLE CHANGES
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/hdfs_user_guide.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/src/documentation/content/xdocs/hdfs_user_guide.xml
@ -112,17 +112,18 @@
    		problems.
    	</li>
    	<li>
-    		Secondary NameNode (deprecated): performs periodic checkpoints of the 
+    		Secondary NameNode: performs periodic checkpoints of the 
    		namespace and helps keep the size of file containing log of HDFS 
    		modifications within certain limits at the NameNode.
-    		Replaced by Checkpoint node.
    	</li>
+
    	<li>
    		Checkpoint node: performs periodic checkpoints of the namespace and
    		helps minimize the size of the log stored at the NameNode 
    		containing changes to the HDFS.
-    		Replaces the role previously filled by the Secondary NameNode. 
-    		NameNode allows multiple Checkpoint nodes simultaneously, 
+    		Replaces the role previously filled by the Secondary NameNode,
+                though is not yet battle hardened.
+    		The NameNode allows multiple Checkpoint nodes simultaneously, 
    		as long as there are no Backup nodes registered with the system.
    	</li>
    	<li>
@ -132,6 +133,7 @@
    		which is always in sync with the active NameNode namespace state.
    		Only one Backup node may be registered with the NameNode at once.
    	</li>
+
      </ul>
    </li>
    </ul>
@ -234,12 +236,6 @@
   
   </section> 
 	<section> <title>Secondary NameNode</title>
-   <note>
-   The Secondary NameNode has been deprecated. 
-   Instead, consider using the 
-   <a href="hdfs_user_guide.html#Checkpoint+node">Checkpoint Node</a> or 
-   <a href="hdfs_user_guide.html#Backup+node">Backup Node</a>.
-   </note>
   <p>	
     The NameNode stores modifications to the file system as a log
     appended to a native file system file, <code>edits</code>. 
@ -287,7 +283,9 @@
     <a href="http://hadoop.apache.org/common/docs/current/commands_manual.html#secondarynamenode">secondarynamenode</a>.
   </p>
   
-   </section><section> <title> Checkpoint Node </title>
+   </section>
+
+   <section> <title> Checkpoint Node </title>
   <p>NameNode persists its namespace using two files: <code>fsimage</code>,
      which is the latest checkpoint of the namespace and <code>edits</code>,
      a journal (log) of changes to the namespace since the checkpoint.
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@ -1793,7 +1793,8 @@ public class BlockManager {
  public void processMisReplicatedBlocks() {
    assert namesystem.hasWriteLock();

-    long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0;
+    long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0,
+         nrUnderConstruction = 0;
    neededReplications.clear();
    for (BlockInfo block : blocksMap.getBlocks()) {
      INodeFile fileINode = block.getINode();
@ -1803,6 +1804,12 @@ public class BlockManager {
        addToInvalidates(block);
        continue;
      }
+      if (!block.isComplete()) {
+        // Incomplete blocks are never considered mis-replicated --
+        // they'll be reached when they are completed or recovered.
+        nrUnderConstruction++;
+        continue;
+      }
      // calculate current replication
      short expectedReplication = fileINode.getReplication();
      NumberReplicas num = countNodes(block);
@ -1826,6 +1833,7 @@ public class BlockManager {
    LOG.info("Number of invalid blocks          = " + nrInvalid);
    LOG.info("Number of under-replicated blocks = " + nrUnderReplicated);
    LOG.info("Number of  over-replicated blocks = " + nrOverReplicated);
+    LOG.info("Number of blocks being written    = " + nrUnderConstruction);
  }

  /** Set replication for the blocks. */
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
@ -38,6 +38,8 @@ import org.apache.hadoop.net.NetworkTopology;
 import org.apache.hadoop.net.Node;
 import org.apache.hadoop.net.NodeBase;

+import com.google.common.annotations.VisibleForTesting;
+
 /** The class is responsible for choosing the desired number of targets
 * for placing block replicas.
 * The replica placement strategy is that if the writer is on a datanode,
@ -49,6 +51,7 @@ import org.apache.hadoop.net.NodeBase;
@InterfaceAudience.Private
 public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
  private boolean considerLoad; 
+  private boolean preferLocalNode = true;
  private NetworkTopology clusterMap;
  private FSClusterStats stats;
  static final String enableDebugLogging = "For more information, please enable"
@ -223,7 +226,7 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
    if (localMachine == null)
      return chooseRandom(NodeBase.ROOT, excludedNodes, 
                          blocksize, maxNodesPerRack, results);
-      
+    if (preferLocalNode) {
      // otherwise try local machine first
      Node oldNode = excludedNodes.put(localMachine, localMachine);
      if (oldNode == null) { // was not in the excluded list
@ -233,7 +236,7 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
          return localMachine;
        }
      } 
-      
+    }      
    // try a node on local rack
    return chooseLocalRack(localMachine, excludedNodes, 
                           blocksize, maxNodesPerRack, results);
@ -568,5 +571,10 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
    }
    return cur;
  }
+  
+  @VisibleForTesting
+  void setPreferLocalNode(boolean prefer) {
+    this.preferLocalNode = prefer;
+  }
 }

--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@ -172,6 +172,7 @@ import org.apache.hadoop.util.VersionInfo;
 import org.mortbay.util.ajax.JSON;

 import com.google.common.base.Preconditions;
+import com.google.common.annotations.VisibleForTesting;

 /***************************************************
 * FSNamesystem does the actual bookkeeping work for the
@ -2842,7 +2843,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    /** Total number of blocks. */
    int blockTotal; 
    /** Number of safe blocks. */
-    private int blockSafe;
+    int blockSafe;
    /** Number of blocks needed to satisfy safe mode threshold condition */
    private int blockThreshold;
    /** Number of blocks needed before populating replication queues */
@ -2850,7 +2851,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    /** time of the last status printout */
    private long lastStatusReport = 0;
    /** flag indicating whether replication queues have been initialized */
-    private boolean initializedReplQueues = false;
+    boolean initializedReplQueues = false;
    /** Was safemode entered automatically because available resources were low. */
    private boolean resourcesLow = false;
    
@ -2980,9 +2981,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     */
    private synchronized void initializeReplQueues() {
      LOG.info("initializing replication queues");
-      if (isPopulatingReplQueues()) {
-        LOG.warn("Replication queues already initialized.");
-      }
+      assert !isPopulatingReplQueues() : "Already initialized repl queues";
      long startTimeMisReplicatedScan = now();
      blockManager.processMisReplicatedBlocks();
      initializedReplQueues = true;
@ -4412,4 +4411,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      byte[] password) throws InvalidToken {
    getDelegationTokenSecretManager().verifyToken(identifier, password);
  }
+
+  @VisibleForTesting
+  public SafeModeInfo getSafeModeInfoForTests() {
+    return safeMode;
+  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
@ -87,7 +87,6 @@ import com.google.common.collect.ImmutableList;
 * primary NameNode.
 *
 **********************************************************/
-@Deprecated // use BackupNode with -checkpoint argument instead.
@InterfaceAudience.Private
 public class SecondaryNameNode implements Runnable {
    
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/corrupt_files.jsp
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/corrupt_files.jsp
@ -41,7 +41,7 @@
 <!DOCTYPE html>
 <html>
 <link rel="stylesheet" type="text/css" href="/static/hadoop.css">
-<title>Hadoop <%=namenodeRole%> <%=namenodeLabel%></title>
+<title>Hadoop <%=namenodeRole%>&nbsp;<%=namenodeLabel%></title>
 <body>
 <h1><%=namenodeRole%> '<%=namenodeLabel%>'</h1>
 <%=NamenodeJspHelper.getVersionTable(fsn)%>
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp
@ -37,7 +37,7 @@
 <html>

 <link rel="stylesheet" type="text/css" href="/static/hadoop.css">
-<title>Hadoop <%=namenodeRole%> <%=namenodeLabel%></title>
+<title>Hadoop <%=namenodeRole%>&nbsp;<%=namenodeLabel%></title>
    
 <body>
 <h1><%=namenodeRole%> '<%=namenodeLabel%>'</h1>
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfsnodelist.jsp
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfsnodelist.jsp
@ -37,7 +37,7 @@ String namenodeLabel = nn.getNameNodeAddress().getHostName() + ":" + nn.getNameN
 <html>

 <link rel="stylesheet" type="text/css" href="/static/hadoop.css">
-<title>Hadoop <%=namenodeRole%> <%=namenodeLabel%></title>
+<title>Hadoop <%=namenodeRole%>&nbsp;<%=namenodeLabel%></title>
  
 <body>
 <h1><%=namenodeRole%> '<%=namenodeLabel%>'</h1>
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BenchmarkThroughput.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BenchmarkThroughput.java
@ -193,6 +193,10 @@ public class BenchmarkThroughput extends Configured implements Tool {
    BUFFER_SIZE = conf.getInt("dfsthroughput.buffer.size", 4 * 1024);

    String localDir = conf.get("mapred.temp.dir");
+    if (localDir == null) {
+      localDir = conf.get("hadoop.tmp.dir");
+      conf.set("mapred.temp.dir", localDir);
+    }
    dir = new LocalDirAllocator("mapred.temp.dir");

    System.setProperty("test.build.data", localDir);
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java
@ -19,22 +19,37 @@
 package org.apache.hadoop.hdfs;

 import java.io.IOException;
+import java.util.List;

 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.hadoop.test.GenericTestUtils;

 import static org.junit.Assert.*;
 import org.junit.Before;
 import org.junit.After;
 import org.junit.Test;

+import com.google.common.base.Supplier;
+import com.google.common.collect.Lists;
+
 /**
 * Tests to verify safe mode correctness.
 */
 public class TestSafeMode {
+  private static final Path TEST_PATH = new Path("/test");
+  private static final int BLOCK_SIZE = 1024;
  Configuration conf; 
  MiniDFSCluster cluster;
  FileSystem fs;
@ -43,6 +58,7 @@ public class TestSafeMode {
  @Before
  public void startUp() throws IOException {
    conf = new HdfsConfiguration();
+    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitActive();      
    fs = cluster.getFileSystem();
@ -83,7 +99,7 @@ public class TestSafeMode {
    
    // create two files with one block each.
    DFSTestUtil.createFile(fs, file1, 1000, (short)1, 0);
-    DFSTestUtil.createFile(fs, file2, 2000, (short)1, 0);
+    DFSTestUtil.createFile(fs, file2, 1000, (short)1, 0);
    fs.close();
    cluster.shutdown();
    
@ -128,6 +144,106 @@ public class TestSafeMode {
    assertEquals("", status);
  }
  
+  /**
+   * Test that the NN initializes its under-replicated blocks queue
+   * before it is ready to exit safemode (HDFS-1476)
+   */
+  @Test(timeout=45000)
+  public void testInitializeReplQueuesEarly() throws Exception {
+    // Spray the blocks around the cluster when we add DNs instead of
+    // concentrating all blocks on the first node.
+    BlockManagerTestUtil.setWritingPrefersLocalNode(
+        cluster.getNamesystem().getBlockManager(), false);
+    
+    cluster.startDataNodes(conf, 2, true, StartupOption.REGULAR, null);
+    cluster.waitActive();
+    DFSTestUtil.createFile(fs, TEST_PATH, 15*BLOCK_SIZE, (short)1, 1L);
+    
+    
+    List<DataNodeProperties> dnprops = Lists.newLinkedList();
+    dnprops.add(cluster.stopDataNode(0));
+    dnprops.add(cluster.stopDataNode(0));
+    dnprops.add(cluster.stopDataNode(0));
+    
+    cluster.getConfiguration(0).setFloat(
+        DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY, 1f/15f);
+    
+    cluster.restartNameNode();
+    final NameNode nn = cluster.getNameNode();
+    
+    String status = nn.getNamesystem().getSafemode();
+    assertEquals("Safe mode is ON.The reported blocks 0 needs additional " +
+        "15 blocks to reach the threshold 0.9990 of total blocks 15. " +
+        "Safe mode will be turned off automatically.", status);
+    assertFalse("Mis-replicated block queues should not be initialized " +
+        "until threshold is crossed",
+        NameNodeAdapter.safeModeInitializedReplQueues(nn));
+    
+    cluster.restartDataNode(dnprops.remove(0));
+
+    // Wait for the block report from the restarted DN to come in.
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        return NameNodeAdapter.getSafeModeSafeBlocks(nn) > 0;
+      }
+    }, 10, 10000);
+    // SafeMode is fine-grain synchronized, so the processMisReplicatedBlocks
+    // call is still going on at this point - wait until it's done by grabbing
+    // the lock.
+    nn.getNamesystem().writeLock();
+    nn.getNamesystem().writeUnlock();
+    int safe = NameNodeAdapter.getSafeModeSafeBlocks(nn);
+    assertTrue("Expected first block report to make some but not all blocks " +
+        "safe. Got: " + safe, safe >= 1 && safe < 15);
+    BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
+    
+    assertTrue(NameNodeAdapter.safeModeInitializedReplQueues(nn));
+    assertEquals(15 - safe, nn.getNamesystem().getUnderReplicatedBlocks());
+    
+    cluster.restartDataNodes();
+  }
+
+  /**
+   * Test that, when under-replicated blocks are processed at the end of
+   * safe-mode, blocks currently under construction are not considered
+   * under-construction or missing. Regression test for HDFS-2822.
+   */
+  @Test
+  public void testRbwBlocksNotConsideredUnderReplicated() throws IOException {
+    List<FSDataOutputStream> stms = Lists.newArrayList();
+    try {
+      // Create some junk blocks so that the NN doesn't just immediately
+      // exit safemode on restart.
+      DFSTestUtil.createFile(fs, new Path("/junk-blocks"),
+          BLOCK_SIZE*4, (short)1, 1L);
+      // Create several files which are left open. It's important to
+      // create several here, because otherwise the first iteration of the
+      // replication monitor will pull them off the replication queue and
+      // hide this bug from the test!
+      for (int i = 0; i < 10; i++) {
+        FSDataOutputStream stm = fs.create(
+            new Path("/append-" + i), true, BLOCK_SIZE, (short) 1, BLOCK_SIZE);
+        stms.add(stm);
+        stm.write(1);
+        stm.hflush();
+      }
+
+      cluster.restartNameNode();
+      FSNamesystem ns = cluster.getNameNode(0).getNamesystem();
+      BlockManagerTestUtil.updateState(ns.getBlockManager());
+      assertEquals(0, ns.getPendingReplicationBlocks());
+      assertEquals(0, ns.getCorruptReplicaBlocks());
+      assertEquals(0, ns.getMissingBlocksCount());
+
+    } finally {
+      for (FSDataOutputStream stm : stms) {
+        IOUtils.closeStream(stm);
+      }
+      cluster.shutdown();
+    }
+  }
+
  public interface FSRun {
    public abstract void run(FileSystem fs) throws IOException;
  }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
@ -27,6 +27,8 @@ import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.util.Daemon;

+import com.google.common.base.Preconditions;
+
 public class BlockManagerTestUtil {
  public static void setNodeReplicationLimit(final BlockManager blockManager,
      final int limit) {
@ -122,4 +124,17 @@ public class BlockManagerTestUtil {
    return blockManager.computeDatanodeWork();
  }
  
+  
+  /**
+   * Change whether the block placement policy will prefer the writer's
+   * local Datanode or not.
+   * @param prefer
+   */
+  public static void setWritingPrefersLocalNode(
+      BlockManager bm, boolean prefer) {
+    BlockPlacementPolicy bpp = bm.getBlockPlacementPolicy();
+    Preconditions.checkState(bpp instanceof BlockPlacementPolicyDefault,
+        "Must use default policy, got %s", bpp.getClass());
+    ((BlockPlacementPolicyDefault)bpp).setPreferLocalNode(prefer);
+  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
@ -24,6 +24,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.SafeModeInfo;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.ipc.Server;

@ -97,4 +98,28 @@ public class NameNodeAdapter {
      ns.readUnlock();
    }
  }
+  
+  /**
+   * @return the number of blocks marked safe by safemode, or -1
+   * if safemode is not running.
+   */
+  public static int getSafeModeSafeBlocks(NameNode nn) {
+    SafeModeInfo smi = nn.getNamesystem().getSafeModeInfoForTests();
+    if (smi == null) {
+      return -1;
+    }
+    return smi.blockSafe;
+  }
+  
+  /**
+   * @return true if safemode is not running, or if safemode has already
+   * initialized the replication queues
+   */
+  public static boolean safeModeInitializedReplQueues(NameNode nn) {
+    SafeModeInfo smi = nn.getNamesystem().getSafeModeInfoForTests();
+    if (smi == null) {
+      return true;
+    }
+    return smi.initializedReplQueues;
+  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
@ -203,7 +203,6 @@ public class TestCheckpoint extends TestCase {
  /*
   * Simulate namenode crashing after rolling edit log.
   */
-  @SuppressWarnings("deprecation")
  public void testSecondaryNamenodeError1()
    throws IOException {
    LOG.info("Starting testSecondaryNamenodeError1");
@ -265,7 +264,6 @@ public class TestCheckpoint extends TestCase {
  /*
   * Simulate a namenode crash after uploading new image
   */
-  @SuppressWarnings("deprecation")
  public void testSecondaryNamenodeError2() throws IOException {
    LOG.info("Starting testSecondaryNamenodeError2");
    Configuration conf = new HdfsConfiguration();
@ -324,7 +322,6 @@ public class TestCheckpoint extends TestCase {
  /*
   * Simulate a secondary namenode crash after rolling the edit log.
   */
-  @SuppressWarnings("deprecation")
  public void testSecondaryNamenodeError3() throws IOException {
    LOG.info("Starting testSecondaryNamenodeError3");
    Configuration conf = new HdfsConfiguration();
@ -394,7 +391,6 @@ public class TestCheckpoint extends TestCase {
   * back to the name-node.
   * Used to truncate primary fsimage file.
   */
-  @SuppressWarnings("deprecation")
  public void testSecondaryFailsToReturnImage() throws IOException {
    LOG.info("Starting testSecondaryFailsToReturnImage");
    Configuration conf = new HdfsConfiguration();
@ -471,7 +467,6 @@ public class TestCheckpoint extends TestCase {
   * @param errorType the ErrorSimulator type to trigger
   * @param exceptionSubstring an expected substring of the triggered exception
   */
-  @SuppressWarnings("deprecation")
  private void doSendFailTest(int errorType, String exceptionSubstring)
      throws IOException {
    Configuration conf = new HdfsConfiguration();
@ -586,7 +581,6 @@ public class TestCheckpoint extends TestCase {
  /**
   * Test that the SecondaryNameNode properly locks its storage directories.
   */
-  @SuppressWarnings("deprecation")
  public void testSecondaryNameNodeLocking() throws Exception {
    // Start a primary NN so that the secondary will start successfully
    Configuration conf = new HdfsConfiguration();
@ -679,7 +673,6 @@ public class TestCheckpoint extends TestCase {
   * 2. if the NN does not contain an image, importing a checkpoint
   *    succeeds and re-saves the image
   */
-  @SuppressWarnings("deprecation")
  public void testImportCheckpoint() throws Exception {
    Configuration conf = new HdfsConfiguration();
    Path testPath = new Path("/testfile");
@ -760,16 +753,12 @@ public class TestCheckpoint extends TestCase {
      throw new IOException("Cannot create directory " + dir);
  }
  
-  // This deprecation suppress warning does not work due to known Java bug:
-  // http://bugs.sun.com/view_bug.do?bug_id=6460147
-  @SuppressWarnings("deprecation")
  SecondaryNameNode startSecondaryNameNode(Configuration conf
                                          ) throws IOException {
    conf.set(DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, "0.0.0.0:0");
    return new SecondaryNameNode(conf);
  }
  
-  @SuppressWarnings("deprecation")
  SecondaryNameNode startSecondaryNameNode(Configuration conf, int index)
      throws IOException {
    Configuration snnConf = new Configuration(conf);
@ -782,7 +771,6 @@ public class TestCheckpoint extends TestCase {
  /**
   * Tests checkpoint in HDFS.
   */
-  @SuppressWarnings("deprecation")
  public void testCheckpoint() throws IOException {
    Path file1 = new Path("checkpoint.dat");
    Path file2 = new Path("checkpoint2.dat");
@ -1009,7 +997,6 @@ public class TestCheckpoint extends TestCase {
   * - it then fails again for the same reason
   * - it then tries to checkpoint a third time
   */
-  @SuppressWarnings("deprecation")
  public void testCheckpointAfterTwoFailedUploads() throws IOException {
    MiniDFSCluster cluster = null;
    SecondaryNameNode secondary = null;
@ -1064,7 +1051,6 @@ public class TestCheckpoint extends TestCase {
   * 
   * @throws IOException
   */
-  @SuppressWarnings("deprecation")
  public void testMultipleSecondaryNamenodes() throws IOException {
    Configuration conf = new HdfsConfiguration();
    String nameserviceId1 = "ns1";
@ -1114,7 +1100,6 @@ public class TestCheckpoint extends TestCase {
   * Test that the secondary doesn't have to re-download image
   * if it hasn't changed.
   */
-  @SuppressWarnings("deprecation")
  public void testSecondaryImageDownload() throws IOException {
    LOG.info("Starting testSecondaryImageDownload");
    Configuration conf = new HdfsConfiguration();
@ -1197,7 +1182,6 @@ public class TestCheckpoint extends TestCase {
   * It verifies that this works even though the earlier-txid checkpoint gets
   * uploaded after the later-txid checkpoint.
   */
-  @SuppressWarnings("deprecation")
  public void testMultipleSecondaryNNsAgainstSameNN() throws Exception {
    Configuration conf = new HdfsConfiguration();

@ -1283,7 +1267,6 @@ public class TestCheckpoint extends TestCase {
   * It verifies that one of the two gets an error that it's uploading a
   * duplicate checkpoint, and the other one succeeds.
   */
-  @SuppressWarnings("deprecation")
  public void testMultipleSecondaryNNsAgainstSameNN2() throws Exception {
    Configuration conf = new HdfsConfiguration();

@ -1382,7 +1365,6 @@ public class TestCheckpoint extends TestCase {
   * is running. The secondary should shut itself down if if talks to a NN
   * with the wrong namespace.
   */
-  @SuppressWarnings("deprecation")
  public void testReformatNNBetweenCheckpoints() throws IOException {
    MiniDFSCluster cluster = null;
    SecondaryNameNode secondary = null;
@ -1637,7 +1619,6 @@ public class TestCheckpoint extends TestCase {
  /**
   * Test that the 2NN triggers a checkpoint after the configurable interval
   */
-  @SuppressWarnings("deprecation")
  public void testCheckpointTriggerOnTxnCount() throws Exception {
    MiniDFSCluster cluster = null;
    SecondaryNameNode secondary = null;
@ -1691,7 +1672,6 @@ public class TestCheckpoint extends TestCase {
   * logs that connect the 2NN's old checkpoint to the current txid
   * get archived. Then, the 2NN tries to checkpoint again.
   */
-  @SuppressWarnings("deprecation")
  public void testSecondaryHasVeryOutOfDateImage() throws IOException {
    MiniDFSCluster cluster = null;
    SecondaryNameNode secondary = null;
@ -1729,7 +1709,6 @@ public class TestCheckpoint extends TestCase {
    }
  }
  
-  @SuppressWarnings("deprecation")
  public void testCommandLineParsing() throws ParseException {
    SecondaryNameNode.CommandLineOpts opts =
      new SecondaryNameNode.CommandLineOpts();
@ -1764,7 +1743,6 @@ public class TestCheckpoint extends TestCase {
    } catch (ParseException e) {}
  }

-  @SuppressWarnings("deprecation")
  private void cleanup(SecondaryNameNode snn) {
    if (snn != null) {
      try {
@ -1780,7 +1758,6 @@ public class TestCheckpoint extends TestCase {
   * Assert that if any two files have the same name across the 2NNs
   * and NN, they should have the same content too.
   */
-  @SuppressWarnings("deprecation")
  private void assertParallelFilesInvariant(MiniDFSCluster cluster,
      ImmutableList<SecondaryNameNode> secondaries) throws Exception {
    List<File> allCurrentDirs = Lists.newArrayList();
@ -1792,7 +1769,6 @@ public class TestCheckpoint extends TestCase {
        ImmutableSet.of("VERSION"));    
  }
  
-  @SuppressWarnings("deprecation")
  private List<File> getCheckpointCurrentDirs(SecondaryNameNode secondary) {
    List<File> ret = Lists.newArrayList();
    for (URI u : secondary.getCheckpointDirs()) {
@ -1802,7 +1778,6 @@ public class TestCheckpoint extends TestCase {
    return ret;
  }

-  @SuppressWarnings("deprecation")
  private CheckpointStorage spyOnSecondaryImage(SecondaryNameNode secondary1) {
    CheckpointStorage spy = Mockito.spy((CheckpointStorage)secondary1.getFSImage());;
    secondary1.setFSImage(spy);
@ -1812,7 +1787,6 @@ public class TestCheckpoint extends TestCase {
  /**
   * A utility class to perform a checkpoint in a different thread.
   */
-  @SuppressWarnings("deprecation")
  private static class DoCheckpointThread extends Thread {
    private final SecondaryNameNode snn;
    private volatile Throwable thrown = null;
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameEditsConfigs.java
@ -106,9 +106,6 @@ public class TestNameEditsConfigs extends TestCase {
    assertTrue(!fileSys.exists(name));
  }

-  // This deprecation suppress warning does not work due to known Java bug:
-  // http://bugs.sun.com/view_bug.do?bug_id=6460147
-  @SuppressWarnings("deprecation")
  SecondaryNameNode startSecondaryNameNode(Configuration conf
                                          ) throws IOException {
    conf.set(DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, "0.0.0.0:0");
@ -128,7 +125,6 @@ public class TestNameEditsConfigs extends TestCase {
   * sure we are reading proper edits and image.
   * @throws Exception 
   */
-  @SuppressWarnings("deprecation")
  public void testNameEditsConfigs() throws Exception {
    Path file1 = new Path("TestNameEditsConfigs1");
    Path file2 = new Path("TestNameEditsConfigs2");
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecondaryWebUi.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecondaryWebUi.java
@ -30,7 +30,6 @@ import org.junit.Test;

 public class TestSecondaryWebUi {

-  @SuppressWarnings("deprecation")
  @Test
  public void testSecondaryWebUi() throws IOException {
    Configuration conf = new Configuration();
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java
@ -120,7 +120,6 @@ public class TestStartup extends TestCase {
   * start MiniDFScluster, create a file (to create edits) and do a checkpoint  
   * @throws IOException
   */
-  @SuppressWarnings("deprecation")
  public void createCheckPoint() throws IOException {
    LOG.info("--starting mini cluster");
    // manage dirs parameter set to false 
@ -300,7 +299,6 @@ public class TestStartup extends TestCase {
   * secondary node copies fsimage and edits into correct separate directories.
   * @throws IOException
   */
-  @SuppressWarnings("deprecation")
  public void testSNNStartup() throws IOException{
    //setUpConfig();
    LOG.info("--starting SecondNN startup test");
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStorageRestore.java
@ -153,7 +153,6 @@ public class TestStorageRestore {
   * 7. run doCheckpoint
   * 8. verify that all the image and edits files are the same.
   */
-  @SuppressWarnings("deprecation")
  @Test
  public void testStorageRestore() throws Exception {
    int numDatanodes = 0;
@ -310,7 +309,6 @@ public class TestStorageRestore {
   * then try to perform a checkpoint. The NN should not serve up the image or
   * edits from the restored (empty) dir.
   */
-  @SuppressWarnings("deprecation")
  @Test
  public void testMultipleSecondaryCheckpoint() throws IOException {
    
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@ -142,6 +142,14 @@ Release 0.23.1 - Unreleased

    MAPREDUCE-3692. yarn-resourcemanager out and log files can get big. (eli)

+    MAPREDUCE-3710. Improved FileInputFormat to return better locality for the
+    last split. (Siddarth Seth via vinodkv)
+
+    MAPREDUCE-2765. DistCp Rewrite. (Mithun Radhakrishnan via mahadev)
+
+    MAPREDUCE-3737. The Web Application Proxy's is not documented very well.
+    (Robert Evans via mahadev)
+
  OPTIMIZATIONS

    MAPREDUCE-3567. Extraneous JobConf objects in AM heap. (Vinod Kumar
@ -165,7 +173,13 @@ Release 0.23.1 - Unreleased
    MAPREDUCE-3512. Batching JobHistory flushing to DFS so that we don't flush
    for every event slowing down AM. (Siddarth Seth via vinodkv)

+    MAPREDUCE-3718. Change default AM heartbeat interval to 1 second. (Hitesh
+    Shah via sseth)
+
  BUG FIXES
+    MAPREDUCE-3194. "mapred mradmin" command is broken in mrv2
+                     (Jason Lowe via bobby)
+
    MAPREDUCE-3462. Fix Gridmix JUnit testcase failures. 
                    (Ravi Prakash and Ravi Gummadi via amarrk)

@ -499,6 +513,48 @@ Release 0.23.1 - Unreleased
    MAPREDUCE-3705. ant build fails on 0.23 branch. (Thomas Graves via
    mahadev)
 
+    MAPREDUCE-3691. webservices add support to compress response.
+    (Thomas Graves via mahadev)
+
+    MAPREDUCE-3702. internal server error trying access application master 
+    via proxy with filter enabled (Thomas Graves via mahadev)
+
+    MAPREDUCE-3646. Remove redundant URL info from "mapred job" output.
+    (Jonathan Eagles via mahadev)
+
+    MAPREDUCE-3681. Fixed computation of queue's usedCapacity. (acmurthy) 
+
+    MAPREDUCE-3505. yarn APPLICATION_CLASSPATH needs to be overridable. 
+    (ahmed via tucu)
+
+    MAPREDUCE-3714. Fixed EventFetcher and Fetcher threads to shut-down properly
+    so that reducers don't hang in corner cases. (vinodkv)
+
+    MAPREDUCE-3712. The mapreduce tar does not contain the hadoop-mapreduce-client-
+    jobclient-tests.jar. (mahadev)
+
+    MAPREDUCE-3717. JobClient test jar has missing files to run all the test programs.
+    (mahadev)
+
+    MAPREDUCE-3630. Fixes a NullPointer exception while running TeraGen - if a
+    map is asked to generate 0 records. (Mahadev Konar via sseth)
+
+    MAPREDUCE-3683. Fixed maxCapacity of queues to be product of parent
+    maxCapacities. (acmurthy)
+
+    MAPREDUCE-3713. Fixed the way head-room is allocated to applications by
+    CapacityScheduler so that it deducts current-usage per user and not
+    per-application. (Arun C Murthy via vinodkv)
+
+    MAPREDUCE-3721. Fixed a race in shuffle which caused reduces to hang.
+    (sseth via acmurthy) 
+
+    MAPREDUCE-3733. Add Apache License Header to hadoop-distcp/pom.xml.
+    (mahadev)
+
+    MAPREDUCE-3735. Add distcp jar to the distribution (tar).
+    (mahadev)
+
 Release 0.23.0 - 2011-11-01 

  INCOMPATIBLE CHANGES
--- a/hadoop-mapreduce-project/bin/mapred
+++ b/hadoop-mapreduce-project/bin/mapred
@ -30,9 +30,6 @@ fi
 function print_usage(){
  echo "Usage: mapred [--config confdir] COMMAND"
  echo "       where COMMAND is one of:"
-  echo "  mradmin              run a Map-Reduce admin client"
-  echo "  jobtracker           run the MapReduce job Tracker node" 
-  echo "  tasktracker          run a MapReduce task Tracker node" 
  echo "  pipes                run a Pipes job"
  echo "  job                  manipulate MapReduce jobs"
  echo "  queue                get information regarding JobQueues"
@ -51,16 +48,7 @@ fi
 COMMAND=$1
 shift

-if [ "$COMMAND" = "mradmin" ] ; then
-  CLASS=org.apache.hadoop.mapred.tools.MRAdmin
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-elif [ "$COMMAND" = "jobtracker" ] ; then
-  CLASS=org.apache.hadoop.mapred.JobTracker
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_JOBTRACKER_OPTS"
-elif [ "$COMMAND" = "tasktracker" ] ; then
-  CLASS=org.apache.hadoop.mapred.TaskTracker
-  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_TASKTRACKER_OPTS"
-elif [ "$COMMAND" = "job" ] ; then
+if [ "$COMMAND" = "job" ] ; then
  CLASS=org.apache.hadoop.mapred.JobClient
 elif [ "$COMMAND" = "queue" ] ; then
  CLASS=org.apache.hadoop.mapred.JobQueueClient
@ -75,6 +63,13 @@ elif [ "$COMMAND" = "classpath" ] ; then
 elif [ "$COMMAND" = "groups" ] ; then
  CLASS=org.apache.hadoop.mapred.tools.GetGroups
  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+elif [ "$COMMAND" = "mradmin" ] \
+    || [ "$COMMAND" = "jobtracker" ] \
+    || [ "$COMMAND" = "tasktracker" ] ; then
+  echo "Sorry, the $COMMAND command is no longer supported."
+  echo "You may find similar functionality with the \"yarn\" shell command."
+  print_usage
+  exit
 else
  echo $COMMAND - invalid command
  print_usage
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
@ -522,13 +522,13 @@ public abstract class TaskAttemptImpl implements
   * a parent CLC and use it for all the containers, so this should go away
   * once the mr-generated-classpath stuff is gone.
   */
-  private static String getInitialClasspath() throws IOException {
+  private static String getInitialClasspath(Configuration conf) throws IOException {
    synchronized (classpathLock) {
      if (initialClasspathFlag.get()) {
        return initialClasspath;
      }
      Map<String, String> env = new HashMap<String, String>();
-      MRApps.setClasspath(env);
+      MRApps.setClasspath(env, conf);
      initialClasspath = env.get(Environment.CLASSPATH.name());
      initialClasspathFlag.set(true);
      return initialClasspath;
@ -631,7 +631,7 @@ public abstract class TaskAttemptImpl implements
      Apps.addToEnvironment(
          environment,  
          Environment.CLASSPATH.name(), 
-          getInitialClasspath());
+          getInitialClasspath(conf));
    } catch (IOException e) {
      throw new YarnException(e);
    }
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml
@ -38,6 +38,10 @@
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-mapreduce-client-core</artifactId>
    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-server-common</artifactId>
+    </dependency>
  </dependencies>

  <build>
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java
@ -54,6 +54,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResourceType;
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.util.Apps;
 import org.apache.hadoop.yarn.util.BuilderUtils;
@ -171,7 +172,7 @@ public class MRApps extends Apps {
  }

  private static void setMRFrameworkClasspath(
-      Map<String, String> environment) throws IOException {
+      Map<String, String> environment, Configuration conf) throws IOException {
    InputStream classpathFileStream = null;
    BufferedReader reader = null;
    try {
@ -208,8 +209,10 @@ public class MRApps extends Apps {
      }

      // Add standard Hadoop classes
-      for (String c : ApplicationConstants.APPLICATION_CLASSPATH) {
-        Apps.addToEnvironment(environment, Environment.CLASSPATH.name(), c);
+      for (String c : conf.get(YarnConfiguration.YARN_APPLICATION_CLASSPATH)
+          .split(",")) {
+        Apps.addToEnvironment(environment, Environment.CLASSPATH.name(), c
+            .trim());
      }
    } finally {
      if (classpathFileStream != null) {
@ -222,8 +225,8 @@ public class MRApps extends Apps {
    // TODO: Remove duplicates.
  }
  
-  public static void setClasspath(Map<String, String> environment) 
-      throws IOException {
+  public static void setClasspath(Map<String, String> environment,
+      Configuration conf) throws IOException {
    Apps.addToEnvironment(
        environment, 
        Environment.CLASSPATH.name(), 
@ -232,7 +235,7 @@ public class MRApps extends Apps {
        environment, 
        Environment.CLASSPATH.name(),
        Environment.PWD.$() + Path.SEPARATOR + "*");
-    MRApps.setMRFrameworkClasspath(environment);
+    MRApps.setMRFrameworkClasspath(environment, conf);
  }
  
  private static final String STAGING_CONSTANT = ".staging";
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java
@ -18,7 +18,12 @@

 package org.apache.hadoop.mapreduce.v2.util;

+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
@ -121,4 +126,17 @@ public class TestMRApps {
        "/my/path/to/staging/dummy-user/.staging/job_dummy-job_12345/job.xml", jobFile);
  }

+  @Test public void testSetClasspath() throws IOException {
+    Job job = Job.getInstance();
+    Map<String, String> environment = new HashMap<String, String>();
+    MRApps.setClasspath(environment, job.getConfiguration());
+    assertEquals("job.jar:$PWD/*:$HADOOP_CONF_DIR:" +
+        "$HADOOP_COMMON_HOME/share/hadoop/common/*:" +
+        "$HADOOP_COMMON_HOME/share/hadoop/common/lib/*:" +
+        "$HADOOP_HDFS_HOME/share/hadoop/hdfs/*:" +
+        "$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*:" +
+        "$YARN_HOME/share/hadoop/mapreduce/*:" +
+        "$YARN_HOME/share/hadoop/mapreduce/lib/*",
+        environment.get("CLASSPATH"));
+  }
 }
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java
@ -289,8 +289,10 @@ public abstract class FileInputFormat<K, V> implements InputFormat<K, V> {
        }
        
        if (bytesRemaining != 0) {
-          splits.add(makeSplit(path, length-bytesRemaining, bytesRemaining, 
-                     blkLocations[blkLocations.length-1].getHosts()));
+          String[] splitHosts = getSplitHosts(blkLocations, length
+              - bytesRemaining, bytesRemaining, clusterMap);
+          splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining,
+              splitHosts));
        }
      } else if (length != 0) {
        String[] splitHosts = getSplitHosts(blkLocations,0,length,clusterMap);
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java
@ -1216,6 +1216,7 @@ public class Job extends JobContextImpl implements JobContext {
      }
    });
    state = JobState.RUNNING;
+    LOG.info("The url to track the job: " + getTrackingURL());
   }
  
  /**
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
@ -417,7 +417,7 @@ public interface MRJobConfig {
  /** How often the AM should send heartbeats to the RM.*/
  public static final String MR_AM_TO_RM_HEARTBEAT_INTERVAL_MS =
    MR_AM_PREFIX + "scheduler.heartbeat.interval-ms";
-  public static final int DEFAULT_MR_AM_TO_RM_HEARTBEAT_INTERVAL_MS = 2000;
+  public static final int DEFAULT_MR_AM_TO_RM_HEARTBEAT_INTERVAL_MS = 1000;

  /**
   * If contact with RM is lost, the AM will wait MR_AM_TO_RM_WAIT_INTERVAL_MS
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java
@ -286,8 +286,9 @@ public abstract class FileInputFormat<K, V> extends InputFormat<K, V> {
          }

          if (bytesRemaining != 0) {
+            int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
            splits.add(makeSplit(path, length-bytesRemaining, bytesRemaining,
-                       blkLocations[blkLocations.length-1].getHosts()));
+                       blkLocations[blkIndex].getHosts()));
          }
        } else { // not splitable
          splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts()));
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/EventFetcher.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/EventFetcher.java
@ -27,6 +27,7 @@ import org.apache.hadoop.mapred.TaskCompletionEvent;
 import org.apache.hadoop.mapred.TaskUmbilicalProtocol;
 import org.apache.hadoop.mapreduce.TaskAttemptID;

+@SuppressWarnings("deprecation")
 class EventFetcher<K,V> extends Thread {
  private static final long SLEEP_TIME = 1000;
  private static final int MAX_EVENTS_TO_FETCH = 10000;
@ -42,6 +43,8 @@ class EventFetcher<K,V> extends Thread {
  
  private int maxMapRuntime = 0;

+  private volatile boolean stopped = false;
+  
  public EventFetcher(TaskAttemptID reduce,
                      TaskUmbilicalProtocol umbilical,
                      ShuffleScheduler<K,V> scheduler,
@ -60,7 +63,7 @@ class EventFetcher<K,V> extends Thread {
    LOG.info(reduce + " Thread started: " + getName());
    
    try {
-      while (true && !Thread.currentThread().isInterrupted()) {
+      while (!stopped && !Thread.currentThread().isInterrupted()) {
        try {
          int numNewMaps = getMapCompletionEvents();
          failures = 0;
@ -71,6 +74,9 @@ class EventFetcher<K,V> extends Thread {
          if (!Thread.currentThread().isInterrupted()) {
            Thread.sleep(SLEEP_TIME);
          }
+        } catch (InterruptedException e) {
+          LOG.info("EventFetcher is interrupted.. Returning");
+          return;
        } catch (IOException ie) {
          LOG.info("Exception in getting events", ie);
          // check to see whether to abort
@ -91,6 +97,16 @@ class EventFetcher<K,V> extends Thread {
    }
  }

+  public void shutDown() {
+    this.stopped = true;
+    interrupt();
+    try {
+      join(5000);
+    } catch(InterruptedException ie) {
+      LOG.warn("Got interrupted while joining " + getName(), ie);
+    }
+  }
+  
  /** 
   * Queries the {@link TaskTracker} for a set of map-completion events 
   * from a given event ID.
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java
@ -48,6 +48,7 @@ import org.apache.hadoop.mapreduce.task.reduce.MapOutput.Type;
 import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.ReflectionUtils;

+@SuppressWarnings({"deprecation"})
 class Fetcher<K,V> extends Thread {
  
  private static final Log LOG = LogFactory.getLog(Fetcher.class);
@ -88,6 +89,8 @@ class Fetcher<K,V> extends Thread {
  private final Decompressor decompressor;
  private final SecretKey jobTokenSecret;

+  private volatile boolean stopped = false;
+
  public Fetcher(JobConf job, TaskAttemptID reduceId, 
                 ShuffleScheduler<K,V> scheduler, MergeManager<K,V> merger,
                 Reporter reporter, ShuffleClientMetrics metrics,
@ -135,7 +138,7 @@ class Fetcher<K,V> extends Thread {
  
  public void run() {
    try {
-      while (true && !Thread.currentThread().isInterrupted()) {
+      while (!stopped && !Thread.currentThread().isInterrupted()) {
        MapHost host = null;
        try {
          // If merge is on, block
@ -161,6 +164,16 @@ class Fetcher<K,V> extends Thread {
    }
  }

+  public void shutDown() throws InterruptedException {
+    this.stopped = true;
+    interrupt();
+    try {
+      join(5000);
+    } catch (InterruptedException ie) {
+      LOG.warn("Got interrupt while joining " + getName(), ie);
+    }
+  }
+
  /**
   * The crux of the matter...
   * 
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManager.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManager.java
@ -92,6 +92,7 @@ public class MergeManager<K, V> {
  
  private final long memoryLimit;
  private long usedMemory;
+  private long commitMemory;
  private final long maxSingleShuffleLimit;
  
  private final int memToMemMergeOutputsThreshold; 
@ -181,6 +182,13 @@ public class MergeManager<K, V> {
             "ioSortFactor=" + ioSortFactor + ", " +
             "memToMemMergeOutputsThreshold=" + memToMemMergeOutputsThreshold);

+    if (this.maxSingleShuffleLimit >= this.mergeThreshold) {
+      throw new RuntimeException("Invlaid configuration: "
+          + "maxSingleShuffleLimit should be less than mergeThreshold"
+          + "maxSingleShuffleLimit: " + this.maxSingleShuffleLimit
+          + "mergeThreshold: " + this.mergeThreshold);
+    }
+
    boolean allowMemToMemMerge = 
      jobConf.getBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, false);
    if (allowMemToMemMerge) {
@ -245,16 +253,16 @@ public class MergeManager<K, V> {
    // all the stalled threads
    
    if (usedMemory > memoryLimit) {
-      LOG.debug(mapId + ": Stalling shuffle since usedMemory (" + usedMemory + 
-               ") is greater than memoryLimit (" + memoryLimit + ")"); 
-      
+      LOG.debug(mapId + ": Stalling shuffle since usedMemory (" + usedMemory
+          + ") is greater than memoryLimit (" + memoryLimit + ")." + 
+          " CommitMemory is (" + commitMemory + ")"); 
      return stallShuffle;
    }
    
    // Allow the in-memory shuffle to progress
-    LOG.debug(mapId + ": Proceeding with shuffle since usedMemory (" +
-        usedMemory + 
-        ") is lesser than memoryLimit (" + memoryLimit + ")"); 
+    LOG.debug(mapId + ": Proceeding with shuffle since usedMemory ("
+        + usedMemory + ") is lesser than memoryLimit (" + memoryLimit + ")."
+        + "CommitMemory is (" + commitMemory + ")"); 
    return unconditionalReserve(mapId, requestedSize, true);
  }
  
@ -270,18 +278,24 @@ public class MergeManager<K, V> {
  }
  
  synchronized void unreserve(long size) {
+    commitMemory -= size;
    usedMemory -= size;
  }

  public synchronized void closeInMemoryFile(MapOutput<K,V> mapOutput) { 
    inMemoryMapOutputs.add(mapOutput);
    LOG.info("closeInMemoryFile -> map-output of size: " + mapOutput.getSize()
-        + ", inMemoryMapOutputs.size() -> " + inMemoryMapOutputs.size());
+        + ", inMemoryMapOutputs.size() -> " + inMemoryMapOutputs.size()
+        + ", commitMemory -> " + commitMemory + ", usedMemory ->" + usedMemory);
+
+    commitMemory+= mapOutput.getSize();

    synchronized (inMemoryMerger) {
-      if (!inMemoryMerger.isInProgress() && usedMemory >= mergeThreshold) {
-        LOG.info("Starting inMemoryMerger's merge since usedMemory=" +
-            usedMemory + " > mergeThreshold=" + mergeThreshold);
+      // Can hang if mergeThreshold is really low.
+      if (!inMemoryMerger.isInProgress() && commitMemory >= mergeThreshold) {
+        LOG.info("Starting inMemoryMerger's merge since commitMemory=" +
+            commitMemory + " > mergeThreshold=" + mergeThreshold + 
+            ". Current usedMemory=" + usedMemory);
        inMemoryMapOutputs.addAll(inMemoryMergedMapOutputs);
        inMemoryMergedMapOutputs.clear();
        inMemoryMerger.startMerge(inMemoryMapOutputs);
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java
@ -19,8 +19,6 @@ package org.apache.hadoop.mapreduce.task.reduce;

 import java.io.IOException;

-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.FileSystem;
@ -33,17 +31,17 @@ import org.apache.hadoop.mapred.RawKeyValueIterator;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapred.Task;
+import org.apache.hadoop.mapred.Task.CombineOutputCollector;
 import org.apache.hadoop.mapred.TaskStatus;
 import org.apache.hadoop.mapred.TaskUmbilicalProtocol;
-import org.apache.hadoop.mapred.Task.CombineOutputCollector;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.hadoop.util.Progress;

@InterfaceAudience.Private
@InterfaceStability.Unstable
+@SuppressWarnings({"deprecation", "unchecked", "rawtypes"})
 public class Shuffle<K, V> implements ExceptionReporter {
-  private static final Log LOG = LogFactory.getLog(Shuffle.class);
  private static final int PROGRESS_FREQUENCY = 2000;
  
  private final TaskAttemptID reduceId;
@ -100,7 +98,6 @@ public class Shuffle<K, V> implements ExceptionReporter {
                                    this, mergePhase, mapOutputFile);
  }

-  @SuppressWarnings("unchecked")
  public RawKeyValueIterator run() throws IOException, InterruptedException {
    // Start the map-completion events fetcher thread
    final EventFetcher<K,V> eventFetcher = 
@ -130,19 +127,11 @@ public class Shuffle<K, V> implements ExceptionReporter {
    }

    // Stop the event-fetcher thread
-    eventFetcher.interrupt();
-    try {
-      eventFetcher.join();
-    } catch(Throwable t) {
-      LOG.info("Failed to stop " + eventFetcher.getName(), t);
-    }
+    eventFetcher.shutDown();
    
    // Stop the map-output fetcher threads
    for (Fetcher<K,V> fetcher : fetchers) {
-      fetcher.interrupt();
-    }
-    for (Fetcher<K,V> fetcher : fetchers) {
-      fetcher.join();
+      fetcher.shutDown();
    }
    fetchers = null;
    
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml
@ -102,6 +102,13 @@
            <phase>test-compile</phase>
          </execution>
        </executions>
+        <configuration>       
+         <archive>
+          <manifest>
+           <mainClass>org.apache.hadoop.test.MapredTestDriver</mainClass>
+         </manifest>
+         </archive>
+        </configuration>
      </plugin>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java
@ -175,7 +175,6 @@ public class ClientServiceDelegate {
                + ":" + addr.getPort()));
            newUgi.addToken(clientToken);
          }
-          LOG.info("The url to track the job: " + application.getTrackingUrl());
          LOG.debug("Connecting to " + serviceAddr);
          final String tempStr = serviceAddr;
          realProxy = newUgi.doAs(new PrivilegedExceptionAction<MRClientProtocol>() {
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java
@ -406,7 +406,7 @@ public class YARNRunner implements ClientProtocol {
    // Setup the CLASSPATH in environment
    // i.e. add { job jar, CWD, Hadoop jars} to classpath.
    Map<String, String> environment = new HashMap<String, String>();
-    MRApps.setClasspath(environment);
+    MRApps.setClasspath(environment, conf);

    // Parse distributed cache
    MRApps.setupDistributedCache(jobConf, localResources);
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/GenericMRLoadGenerator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/GenericMRLoadGenerator.java
@ -29,7 +29,6 @@ import java.util.Stack;

 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.examples.RandomTextWriter;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@ -40,6 +39,7 @@ import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.io.WritableUtils;
 import org.apache.hadoop.mapred.lib.NullOutputFormat;
+import org.apache.hadoop.mapreduce.RandomTextWriter;
 import org.apache.hadoop.util.GenericOptionsParser;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.Tool;
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestFileInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestFileInputFormat.java
@ -17,6 +17,10 @@
 */
 package org.apache.hadoop.mapred;

+import static org.mockito.Matchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
 import java.io.DataOutputStream;
 import java.io.IOException;

@ -32,6 +36,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.io.Text;

+@SuppressWarnings("deprecation")
 public class TestFileInputFormat extends TestCase {

  Configuration conf = new Configuration();
@ -186,6 +191,102 @@ public class TestFileInputFormat extends TestCase {
    assertEquals(splits.length, 2);
  }

+  @SuppressWarnings("rawtypes")
+  public void testLastInputSplitAtSplitBoundary() throws Exception {
+    FileInputFormat fif = new FileInputFormatForTest(1024l * 1024 * 1024,
+        128l * 1024 * 1024);
+    JobConf job = new JobConf();
+    InputSplit[] splits = fif.getSplits(job, 8);
+    assertEquals(8, splits.length);
+    for (int i = 0; i < splits.length; i++) {
+      InputSplit split = splits[i];
+      assertEquals(("host" + i), split.getLocations()[0]);
+    }
+  }
+
+  @SuppressWarnings("rawtypes")
+  public void testLastInputSplitExceedingSplitBoundary() throws Exception {
+    FileInputFormat fif = new FileInputFormatForTest(1027l * 1024 * 1024,
+        128l * 1024 * 1024);
+    JobConf job = new JobConf();
+    InputSplit[] splits = fif.getSplits(job, 8);
+    assertEquals(8, splits.length);
+    for (int i = 0; i < splits.length; i++) {
+      InputSplit split = splits[i];
+      assertEquals(("host" + i), split.getLocations()[0]);
+    }
+  }
+
+  @SuppressWarnings("rawtypes")
+  public void testLastInputSplitSingleSplit() throws Exception {
+    FileInputFormat fif = new FileInputFormatForTest(100l * 1024 * 1024,
+        128l * 1024 * 1024);
+    JobConf job = new JobConf();
+    InputSplit[] splits = fif.getSplits(job, 1);
+    assertEquals(1, splits.length);
+    for (int i = 0; i < splits.length; i++) {
+      InputSplit split = splits[i];
+      assertEquals(("host" + i), split.getLocations()[0]);
+    }
+  }
+
+  private class FileInputFormatForTest<K, V> extends FileInputFormat<K, V> {
+
+    long splitSize;
+    long length;
+
+    FileInputFormatForTest(long length, long splitSize) {
+      this.length = length;
+      this.splitSize = splitSize;
+    }
+
+    @Override
+    public RecordReader<K, V> getRecordReader(InputSplit split, JobConf job,
+        Reporter reporter) throws IOException {
+      return null;
+    }
+
+    @Override
+    protected FileStatus[] listStatus(JobConf job) throws IOException {
+      FileStatus mockFileStatus = mock(FileStatus.class);
+      when(mockFileStatus.getBlockSize()).thenReturn(splitSize);
+      when(mockFileStatus.isDirectory()).thenReturn(false);
+      Path mockPath = mock(Path.class);
+      FileSystem mockFs = mock(FileSystem.class);
+
+      BlockLocation[] blockLocations = mockBlockLocations(length, splitSize);
+      when(mockFs.getFileBlockLocations(mockFileStatus, 0, length)).thenReturn(
+          blockLocations);
+      when(mockPath.getFileSystem(any(Configuration.class))).thenReturn(mockFs);
+
+      when(mockFileStatus.getPath()).thenReturn(mockPath);
+      when(mockFileStatus.getLen()).thenReturn(length);
+
+      FileStatus[] fs = new FileStatus[1];
+      fs[0] = mockFileStatus;
+      return fs;
+    }
+
+    @Override
+    protected long computeSplitSize(long blockSize, long minSize, long maxSize) {
+      return splitSize;
+    }
+
+    private BlockLocation[] mockBlockLocations(long size, long splitSize) {
+      int numLocations = (int) (size / splitSize);
+      if (size % splitSize != 0)
+        numLocations++;
+      BlockLocation[] blockLocations = new BlockLocation[numLocations];
+      for (int i = 0; i < numLocations; i++) {
+        String[] names = new String[] { "b" + i };
+        String[] hosts = new String[] { "host" + i };
+        blockLocations[i] = new BlockLocation(names, hosts, i * splitSize,
+            Math.min(splitSize, size - (splitSize * i)));
+      }
+      return blockLocations;
+    }
+  }
+
  static void writeFile(Configuration conf, Path name,
      short replication, int numBlocks) throws IOException {
    FileSystem fileSys = FileSystem.get(conf);
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/ThreadedMapBenchmark.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/ThreadedMapBenchmark.java
@ -25,7 +25,6 @@ import java.util.Random;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.examples.RandomWriter;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.BytesWritable;
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/GenericMRLoadGenerator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/GenericMRLoadGenerator.java
@ -29,7 +29,6 @@ import java.util.Stack;

 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.examples.RandomTextWriter;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomTextWriter.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomTextWriter.java
@ -0,0 +1,757 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.ClusterStatus;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapreduce.*;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * This program uses map/reduce to just run a distributed job where there is
+ * no interaction between the tasks and each task writes a large unsorted
+ * random sequence of words.
+ * In order for this program to generate data for terasort with a 5-10 words
+ * per key and 20-100 words per value, have the following config:
+ * <xmp>
+ * <?xml version="1.0"?>
+ * <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+ * <configuration>
+ *   <property>
+ *     <name>mapreduce.randomtextwriter.minwordskey</name>
+ *     <value>5</value>
+ *   </property>
+ *   <property>
+ *     <name>mapreduce.randomtextwriter.maxwordskey</name>
+ *     <value>10</value>
+ *   </property>
+ *   <property>
+ *     <name>mapreduce.randomtextwriter.minwordsvalue</name>
+ *     <value>20</value>
+ *   </property>
+ *   <property>
+ *     <name>mapreduce.randomtextwriter.maxwordsvalue</name>
+ *     <value>100</value>
+ *   </property>
+ *   <property>
+ *     <name>mapreduce.randomtextwriter.totalbytes</name>
+ *     <value>1099511627776</value>
+ *   </property>
+ * </configuration></xmp>
+ * 
+ * Equivalently, {@link RandomTextWriter} also supports all the above options
+ * and ones supported by {@link Tool} via the command-line.
+ * 
+ * To run: bin/hadoop jar hadoop-${version}-examples.jar randomtextwriter
+ *            [-outFormat <i>output format class</i>] <i>output</i> 
+ */
+public class RandomTextWriter extends Configured implements Tool {
+  public static final String TOTAL_BYTES = 
+    "mapreduce.randomtextwriter.totalbytes";
+  public static final String BYTES_PER_MAP = 
+    "mapreduce.randomtextwriter.bytespermap";
+  public static final String MAPS_PER_HOST = 
+    "mapreduce.randomtextwriter.mapsperhost";
+  public static final String MAX_VALUE = "mapreduce.randomtextwriter.maxwordsvalue";
+  public static final String MIN_VALUE = "mapreduce.randomtextwriter.minwordsvalue";
+  public static final String MIN_KEY = "mapreduce.randomtextwriter.minwordskey";
+  public static final String MAX_KEY = "mapreduce.randomtextwriter.maxwordskey";
+  
+  static int printUsage() {
+    System.out.println("randomtextwriter " +
+                       "[-outFormat <output format class>] " + 
+                       "<output>");
+    ToolRunner.printGenericCommandUsage(System.out);
+    return 2;
+  }
+  
+  /**
+   * User counters
+   */
+  static enum Counters { RECORDS_WRITTEN, BYTES_WRITTEN }
+
+  static class RandomTextMapper extends Mapper<Text, Text, Text, Text> {
+    
+    private long numBytesToWrite;
+    private int minWordsInKey;
+    private int wordsInKeyRange;
+    private int minWordsInValue;
+    private int wordsInValueRange;
+    private Random random = new Random();
+    
+    /**
+     * Save the configuration value that we need to write the data.
+     */
+    public void setup(Context context) {
+      Configuration conf = context.getConfiguration();
+      numBytesToWrite = conf.getLong(BYTES_PER_MAP,
+                                    1*1024*1024*1024);
+      minWordsInKey = conf.getInt(MIN_KEY, 5);
+      wordsInKeyRange = (conf.getInt(MAX_KEY, 10) - minWordsInKey);
+      minWordsInValue = conf.getInt(MIN_VALUE, 10);
+      wordsInValueRange = (conf.getInt(MAX_VALUE, 100) - minWordsInValue);
+    }
+    
+    /**
+     * Given an output filename, write a bunch of random records to it.
+     */
+    public void map(Text key, Text value,
+                    Context context) throws IOException,InterruptedException {
+      int itemCount = 0;
+      while (numBytesToWrite > 0) {
+        // Generate the key/value 
+        int noWordsKey = minWordsInKey + 
+          (wordsInKeyRange != 0 ? random.nextInt(wordsInKeyRange) : 0);
+        int noWordsValue = minWordsInValue + 
+          (wordsInValueRange != 0 ? random.nextInt(wordsInValueRange) : 0);
+        Text keyWords = generateSentence(noWordsKey);
+        Text valueWords = generateSentence(noWordsValue);
+        
+        // Write the sentence 
+        context.write(keyWords, valueWords);
+        
+        numBytesToWrite -= (keyWords.getLength() + valueWords.getLength());
+        
+        // Update counters, progress etc.
+        context.getCounter(Counters.BYTES_WRITTEN).increment(
+                  keyWords.getLength() + valueWords.getLength());
+        context.getCounter(Counters.RECORDS_WRITTEN).increment(1);
+        if (++itemCount % 200 == 0) {
+          context.setStatus("wrote record " + itemCount + ". " + 
+                             numBytesToWrite + " bytes left.");
+        }
+      }
+      context.setStatus("done with " + itemCount + " records.");
+    }
+    
+    private Text generateSentence(int noWords) {
+      StringBuffer sentence = new StringBuffer();
+      String space = " ";
+      for (int i=0; i < noWords; ++i) {
+        sentence.append(words[random.nextInt(words.length)]);
+        sentence.append(space);
+      }
+      return new Text(sentence.toString());
+    }
+  }
+  
+  /**
+   * This is the main routine for launching a distributed random write job.
+   * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
+   * The reduce doesn't do anything.
+   * 
+   * @throws IOException 
+   */
+  public int run(String[] args) throws Exception {    
+    if (args.length == 0) {
+      return printUsage();    
+    }
+    
+    Configuration conf = getConf();
+    JobClient client = new JobClient(conf);
+    ClusterStatus cluster = client.getClusterStatus();
+    int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
+    long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
+                                             1*1024*1024*1024);
+    if (numBytesToWritePerMap == 0) {
+      System.err.println("Cannot have " + BYTES_PER_MAP +" set to 0");
+      return -2;
+    }
+    long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
+         numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
+    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
+    if (numMaps == 0 && totalBytesToWrite > 0) {
+      numMaps = 1;
+      conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
+    }
+    conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
+    
+    Job job = new Job(conf);
+    
+    job.setJarByClass(RandomTextWriter.class);
+    job.setJobName("random-text-writer");
+    
+    job.setOutputKeyClass(Text.class);
+    job.setOutputValueClass(Text.class);
+    
+    job.setInputFormatClass(RandomWriter.RandomInputFormat.class);
+    job.setMapperClass(RandomTextMapper.class);        
+    
+    Class<? extends OutputFormat> outputFormatClass = 
+      SequenceFileOutputFormat.class;
+    List<String> otherArgs = new ArrayList<String>();
+    for(int i=0; i < args.length; ++i) {
+      try {
+        if ("-outFormat".equals(args[i])) {
+          outputFormatClass = 
+            Class.forName(args[++i]).asSubclass(OutputFormat.class);
+        } else {
+          otherArgs.add(args[i]);
+        }
+      } catch (ArrayIndexOutOfBoundsException except) {
+        System.out.println("ERROR: Required parameter missing from " +
+            args[i-1]);
+        return printUsage(); // exits
+      }
+    }
+
+    job.setOutputFormatClass(outputFormatClass);
+    FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
+    
+    System.out.println("Running " + numMaps + " maps.");
+    
+    // reducer NONE
+    job.setNumReduceTasks(0);
+    
+    Date startTime = new Date();
+    System.out.println("Job started: " + startTime);
+    int ret = job.waitForCompletion(true) ? 0 : 1;
+    Date endTime = new Date();
+    System.out.println("Job ended: " + endTime);
+    System.out.println("The job took " + 
+                       (endTime.getTime() - startTime.getTime()) /1000 + 
+                       " seconds.");
+    
+    return ret;
+  }
+  
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(new Configuration(), new RandomTextWriter(), args);
+    System.exit(res);
+  }
+
+  /**
+   * A random list of 100 words from /usr/share/dict/words
+   */
+  private static String[] words = {
+                                   "diurnalness", "Homoiousian",
+                                   "spiranthic", "tetragynian",
+                                   "silverhead", "ungreat",
+                                   "lithograph", "exploiter",
+                                   "physiologian", "by",
+                                   "hellbender", "Filipendula",
+                                   "undeterring", "antiscolic",
+                                   "pentagamist", "hypoid",
+                                   "cacuminal", "sertularian",
+                                   "schoolmasterism", "nonuple",
+                                   "gallybeggar", "phytonic",
+                                   "swearingly", "nebular",
+                                   "Confervales", "thermochemically",
+                                   "characinoid", "cocksuredom",
+                                   "fallacious", "feasibleness",
+                                   "debromination", "playfellowship",
+                                   "tramplike", "testa",
+                                   "participatingly", "unaccessible",
+                                   "bromate", "experientialist",
+                                   "roughcast", "docimastical",
+                                   "choralcelo", "blightbird",
+                                   "peptonate", "sombreroed",
+                                   "unschematized", "antiabolitionist",
+                                   "besagne", "mastication",
+                                   "bromic", "sviatonosite",
+                                   "cattimandoo", "metaphrastical",
+                                   "endotheliomyoma", "hysterolysis",
+                                   "unfulminated", "Hester",
+                                   "oblongly", "blurredness",
+                                   "authorling", "chasmy",
+                                   "Scorpaenidae", "toxihaemia",
+                                   "Dictograph", "Quakerishly",
+                                   "deaf", "timbermonger",
+                                   "strammel", "Thraupidae",
+                                   "seditious", "plerome",
+                                   "Arneb", "eristically",
+                                   "serpentinic", "glaumrie",
+                                   "socioromantic", "apocalypst",
+                                   "tartrous", "Bassaris",
+                                   "angiolymphoma", "horsefly",
+                                   "kenno", "astronomize",
+                                   "euphemious", "arsenide",
+                                   "untongued", "parabolicness",
+                                   "uvanite", "helpless",
+                                   "gemmeous", "stormy",
+                                   "templar", "erythrodextrin",
+                                   "comism", "interfraternal",
+                                   "preparative", "parastas",
+                                   "frontoorbital", "Ophiosaurus",
+                                   "diopside", "serosanguineous",
+                                   "ununiformly", "karyological",
+                                   "collegian", "allotropic",
+                                   "depravity", "amylogenesis",
+                                   "reformatory", "epidymides",
+                                   "pleurotropous", "trillium",
+                                   "dastardliness", "coadvice",
+                                   "embryotic", "benthonic",
+                                   "pomiferous", "figureheadship",
+                                   "Megaluridae", "Harpa",
+                                   "frenal", "commotion",
+                                   "abthainry", "cobeliever",
+                                   "manilla", "spiciferous",
+                                   "nativeness", "obispo",
+                                   "monilioid", "biopsic",
+                                   "valvula", "enterostomy",
+                                   "planosubulate", "pterostigma",
+                                   "lifter", "triradiated",
+                                   "venialness", "tum",
+                                   "archistome", "tautness",
+                                   "unswanlike", "antivenin",
+                                   "Lentibulariaceae", "Triphora",
+                                   "angiopathy", "anta",
+                                   "Dawsonia", "becomma",
+                                   "Yannigan", "winterproof",
+                                   "antalgol", "harr",
+                                   "underogating", "ineunt",
+                                   "cornberry", "flippantness",
+                                   "scyphostoma", "approbation",
+                                   "Ghent", "Macraucheniidae",
+                                   "scabbiness", "unanatomized",
+                                   "photoelasticity", "eurythermal",
+                                   "enation", "prepavement",
+                                   "flushgate", "subsequentially",
+                                   "Edo", "antihero",
+                                   "Isokontae", "unforkedness",
+                                   "porriginous", "daytime",
+                                   "nonexecutive", "trisilicic",
+                                   "morphiomania", "paranephros",
+                                   "botchedly", "impugnation",
+                                   "Dodecatheon", "obolus",
+                                   "unburnt", "provedore",
+                                   "Aktistetae", "superindifference",
+                                   "Alethea", "Joachimite",
+                                   "cyanophilous", "chorograph",
+                                   "brooky", "figured",
+                                   "periclitation", "quintette",
+                                   "hondo", "ornithodelphous",
+                                   "unefficient", "pondside",
+                                   "bogydom", "laurinoxylon",
+                                   "Shiah", "unharmed",
+                                   "cartful", "noncrystallized",
+                                   "abusiveness", "cromlech",
+                                   "japanned", "rizzomed",
+                                   "underskin", "adscendent",
+                                   "allectory", "gelatinousness",
+                                   "volcano", "uncompromisingly",
+                                   "cubit", "idiotize",
+                                   "unfurbelowed", "undinted",
+                                   "magnetooptics", "Savitar",
+                                   "diwata", "ramosopalmate",
+                                   "Pishquow", "tomorn",
+                                   "apopenptic", "Haversian",
+                                   "Hysterocarpus", "ten",
+                                   "outhue", "Bertat",
+                                   "mechanist", "asparaginic",
+                                   "velaric", "tonsure",
+                                   "bubble", "Pyrales",
+                                   "regardful", "glyphography",
+                                   "calabazilla", "shellworker",
+                                   "stradametrical", "havoc",
+                                   "theologicopolitical", "sawdust",
+                                   "diatomaceous", "jajman",
+                                   "temporomastoid", "Serrifera",
+                                   "Ochnaceae", "aspersor",
+                                   "trailmaking", "Bishareen",
+                                   "digitule", "octogynous",
+                                   "epididymitis", "smokefarthings",
+                                   "bacillite", "overcrown",
+                                   "mangonism", "sirrah",
+                                   "undecorated", "psychofugal",
+                                   "bismuthiferous", "rechar",
+                                   "Lemuridae", "frameable",
+                                   "thiodiazole", "Scanic",
+                                   "sportswomanship", "interruptedness",
+                                   "admissory", "osteopaedion",
+                                   "tingly", "tomorrowness",
+                                   "ethnocracy", "trabecular",
+                                   "vitally", "fossilism",
+                                   "adz", "metopon",
+                                   "prefatorial", "expiscate",
+                                   "diathermacy", "chronist",
+                                   "nigh", "generalizable",
+                                   "hysterogen", "aurothiosulphuric",
+                                   "whitlowwort", "downthrust",
+                                   "Protestantize", "monander",
+                                   "Itea", "chronographic",
+                                   "silicize", "Dunlop",
+                                   "eer", "componental",
+                                   "spot", "pamphlet",
+                                   "antineuritic", "paradisean",
+                                   "interruptor", "debellator",
+                                   "overcultured", "Florissant",
+                                   "hyocholic", "pneumatotherapy",
+                                   "tailoress", "rave",
+                                   "unpeople", "Sebastian",
+                                   "thermanesthesia", "Coniferae",
+                                   "swacking", "posterishness",
+                                   "ethmopalatal", "whittle",
+                                   "analgize", "scabbardless",
+                                   "naught", "symbiogenetically",
+                                   "trip", "parodist",
+                                   "columniform", "trunnel",
+                                   "yawler", "goodwill",
+                                   "pseudohalogen", "swangy",
+                                   "cervisial", "mediateness",
+                                   "genii", "imprescribable",
+                                   "pony", "consumptional",
+                                   "carposporangial", "poleax",
+                                   "bestill", "subfebrile",
+                                   "sapphiric", "arrowworm",
+                                   "qualminess", "ultraobscure",
+                                   "thorite", "Fouquieria",
+                                   "Bermudian", "prescriber",
+                                   "elemicin", "warlike",
+                                   "semiangle", "rotular",
+                                   "misthread", "returnability",
+                                   "seraphism", "precostal",
+                                   "quarried", "Babylonism",
+                                   "sangaree", "seelful",
+                                   "placatory", "pachydermous",
+                                   "bozal", "galbulus",
+                                   "spermaphyte", "cumbrousness",
+                                   "pope", "signifier",
+                                   "Endomycetaceae", "shallowish",
+                                   "sequacity", "periarthritis",
+                                   "bathysphere", "pentosuria",
+                                   "Dadaism", "spookdom",
+                                   "Consolamentum", "afterpressure",
+                                   "mutter", "louse",
+                                   "ovoviviparous", "corbel",
+                                   "metastoma", "biventer",
+                                   "Hydrangea", "hogmace",
+                                   "seizing", "nonsuppressed",
+                                   "oratorize", "uncarefully",
+                                   "benzothiofuran", "penult",
+                                   "balanocele", "macropterous",
+                                   "dishpan", "marten",
+                                   "absvolt", "jirble",
+                                   "parmelioid", "airfreighter",
+                                   "acocotl", "archesporial",
+                                   "hypoplastral", "preoral",
+                                   "quailberry", "cinque",
+                                   "terrestrially", "stroking",
+                                   "limpet", "moodishness",
+                                   "canicule", "archididascalian",
+                                   "pompiloid", "overstaid",
+                                   "introducer", "Italical",
+                                   "Christianopaganism", "prescriptible",
+                                   "subofficer", "danseuse",
+                                   "cloy", "saguran",
+                                   "frictionlessly", "deindividualization",
+                                   "Bulanda", "ventricous",
+                                   "subfoliar", "basto",
+                                   "scapuloradial", "suspend",
+                                   "stiffish", "Sphenodontidae",
+                                   "eternal", "verbid",
+                                   "mammonish", "upcushion",
+                                   "barkometer", "concretion",
+                                   "preagitate", "incomprehensible",
+                                   "tristich", "visceral",
+                                   "hemimelus", "patroller",
+                                   "stentorophonic", "pinulus",
+                                   "kerykeion", "brutism",
+                                   "monstership", "merciful",
+                                   "overinstruct", "defensibly",
+                                   "bettermost", "splenauxe",
+                                   "Mormyrus", "unreprimanded",
+                                   "taver", "ell",
+                                   "proacquittal", "infestation",
+                                   "overwoven", "Lincolnlike",
+                                   "chacona", "Tamil",
+                                   "classificational", "lebensraum",
+                                   "reeveland", "intuition",
+                                   "Whilkut", "focaloid",
+                                   "Eleusinian", "micromembrane",
+                                   "byroad", "nonrepetition",
+                                   "bacterioblast", "brag",
+                                   "ribaldrous", "phytoma",
+                                   "counteralliance", "pelvimetry",
+                                   "pelf", "relaster",
+                                   "thermoresistant", "aneurism",
+                                   "molossic", "euphonym",
+                                   "upswell", "ladhood",
+                                   "phallaceous", "inertly",
+                                   "gunshop", "stereotypography",
+                                   "laryngic", "refasten",
+                                   "twinling", "oflete",
+                                   "hepatorrhaphy", "electrotechnics",
+                                   "cockal", "guitarist",
+                                   "topsail", "Cimmerianism",
+                                   "larklike", "Llandovery",
+                                   "pyrocatechol", "immatchable",
+                                   "chooser", "metrocratic",
+                                   "craglike", "quadrennial",
+                                   "nonpoisonous", "undercolored",
+                                   "knob", "ultratense",
+                                   "balladmonger", "slait",
+                                   "sialadenitis", "bucketer",
+                                   "magnificently", "unstipulated",
+                                   "unscourged", "unsupercilious",
+                                   "packsack", "pansophism",
+                                   "soorkee", "percent",
+                                   "subirrigate", "champer",
+                                   "metapolitics", "spherulitic",
+                                   "involatile", "metaphonical",
+                                   "stachyuraceous", "speckedness",
+                                   "bespin", "proboscidiform",
+                                   "gul", "squit",
+                                   "yeelaman", "peristeropode",
+                                   "opacousness", "shibuichi",
+                                   "retinize", "yote",
+                                   "misexposition", "devilwise",
+                                   "pumpkinification", "vinny",
+                                   "bonze", "glossing",
+                                   "decardinalize", "transcortical",
+                                   "serphoid", "deepmost",
+                                   "guanajuatite", "wemless",
+                                   "arval", "lammy",
+                                   "Effie", "Saponaria",
+                                   "tetrahedral", "prolificy",
+                                   "excerpt", "dunkadoo",
+                                   "Spencerism", "insatiately",
+                                   "Gilaki", "oratorship",
+                                   "arduousness", "unbashfulness",
+                                   "Pithecolobium", "unisexuality",
+                                   "veterinarian", "detractive",
+                                   "liquidity", "acidophile",
+                                   "proauction", "sural",
+                                   "totaquina", "Vichyite",
+                                   "uninhabitedness", "allegedly",
+                                   "Gothish", "manny",
+                                   "Inger", "flutist",
+                                   "ticktick", "Ludgatian",
+                                   "homotransplant", "orthopedical",
+                                   "diminutively", "monogoneutic",
+                                   "Kenipsim", "sarcologist",
+                                   "drome", "stronghearted",
+                                   "Fameuse", "Swaziland",
+                                   "alen", "chilblain",
+                                   "beatable", "agglomeratic",
+                                   "constitutor", "tendomucoid",
+                                   "porencephalous", "arteriasis",
+                                   "boser", "tantivy",
+                                   "rede", "lineamental",
+                                   "uncontradictableness", "homeotypical",
+                                   "masa", "folious",
+                                   "dosseret", "neurodegenerative",
+                                   "subtransverse", "Chiasmodontidae",
+                                   "palaeotheriodont", "unstressedly",
+                                   "chalcites", "piquantness",
+                                   "lampyrine", "Aplacentalia",
+                                   "projecting", "elastivity",
+                                   "isopelletierin", "bladderwort",
+                                   "strander", "almud",
+                                   "iniquitously", "theologal",
+                                   "bugre", "chargeably",
+                                   "imperceptivity", "meriquinoidal",
+                                   "mesophyte", "divinator",
+                                   "perfunctory", "counterappellant",
+                                   "synovial", "charioteer",
+                                   "crystallographical", "comprovincial",
+                                   "infrastapedial", "pleasurehood",
+                                   "inventurous", "ultrasystematic",
+                                   "subangulated", "supraoesophageal",
+                                   "Vaishnavism", "transude",
+                                   "chrysochrous", "ungrave",
+                                   "reconciliable", "uninterpleaded",
+                                   "erlking", "wherefrom",
+                                   "aprosopia", "antiadiaphorist",
+                                   "metoxazine", "incalculable",
+                                   "umbellic", "predebit",
+                                   "foursquare", "unimmortal",
+                                   "nonmanufacture", "slangy",
+                                   "predisputant", "familist",
+                                   "preaffiliate", "friarhood",
+                                   "corelysis", "zoonitic",
+                                   "halloo", "paunchy",
+                                   "neuromimesis", "aconitine",
+                                   "hackneyed", "unfeeble",
+                                   "cubby", "autoschediastical",
+                                   "naprapath", "lyrebird",
+                                   "inexistency", "leucophoenicite",
+                                   "ferrogoslarite", "reperuse",
+                                   "uncombable", "tambo",
+                                   "propodiale", "diplomatize",
+                                   "Russifier", "clanned",
+                                   "corona", "michigan",
+                                   "nonutilitarian", "transcorporeal",
+                                   "bought", "Cercosporella",
+                                   "stapedius", "glandularly",
+                                   "pictorially", "weism",
+                                   "disilane", "rainproof",
+                                   "Caphtor", "scrubbed",
+                                   "oinomancy", "pseudoxanthine",
+                                   "nonlustrous", "redesertion",
+                                   "Oryzorictinae", "gala",
+                                   "Mycogone", "reappreciate",
+                                   "cyanoguanidine", "seeingness",
+                                   "breadwinner", "noreast",
+                                   "furacious", "epauliere",
+                                   "omniscribent", "Passiflorales",
+                                   "uninductive", "inductivity",
+                                   "Orbitolina", "Semecarpus",
+                                   "migrainoid", "steprelationship",
+                                   "phlogisticate", "mesymnion",
+                                   "sloped", "edificator",
+                                   "beneficent", "culm",
+                                   "paleornithology", "unurban",
+                                   "throbless", "amplexifoliate",
+                                   "sesquiquintile", "sapience",
+                                   "astucious", "dithery",
+                                   "boor", "ambitus",
+                                   "scotching", "uloid",
+                                   "uncompromisingness", "hoove",
+                                   "waird", "marshiness",
+                                   "Jerusalem", "mericarp",
+                                   "unevoked", "benzoperoxide",
+                                   "outguess", "pyxie",
+                                   "hymnic", "euphemize",
+                                   "mendacity", "erythremia",
+                                   "rosaniline", "unchatteled",
+                                   "lienteria", "Bushongo",
+                                   "dialoguer", "unrepealably",
+                                   "rivethead", "antideflation",
+                                   "vinegarish", "manganosiderite",
+                                   "doubtingness", "ovopyriform",
+                                   "Cephalodiscus", "Muscicapa",
+                                   "Animalivora", "angina",
+                                   "planispheric", "ipomoein",
+                                   "cuproiodargyrite", "sandbox",
+                                   "scrat", "Munnopsidae",
+                                   "shola", "pentafid",
+                                   "overstudiousness", "times",
+                                   "nonprofession", "appetible",
+                                   "valvulotomy", "goladar",
+                                   "uniarticular", "oxyterpene",
+                                   "unlapsing", "omega",
+                                   "trophonema", "seminonflammable",
+                                   "circumzenithal", "starer",
+                                   "depthwise", "liberatress",
+                                   "unleavened", "unrevolting",
+                                   "groundneedle", "topline",
+                                   "wandoo", "umangite",
+                                   "ordinant", "unachievable",
+                                   "oversand", "snare",
+                                   "avengeful", "unexplicit",
+                                   "mustafina", "sonable",
+                                   "rehabilitative", "eulogization",
+                                   "papery", "technopsychology",
+                                   "impressor", "cresylite",
+                                   "entame", "transudatory",
+                                   "scotale", "pachydermatoid",
+                                   "imaginary", "yeat",
+                                   "slipped", "stewardship",
+                                   "adatom", "cockstone",
+                                   "skyshine", "heavenful",
+                                   "comparability", "exprobratory",
+                                   "dermorhynchous", "parquet",
+                                   "cretaceous", "vesperal",
+                                   "raphis", "undangered",
+                                   "Glecoma", "engrain",
+                                   "counteractively", "Zuludom",
+                                   "orchiocatabasis", "Auriculariales",
+                                   "warriorwise", "extraorganismal",
+                                   "overbuilt", "alveolite",
+                                   "tetchy", "terrificness",
+                                   "widdle", "unpremonished",
+                                   "rebilling", "sequestrum",
+                                   "equiconvex", "heliocentricism",
+                                   "catabaptist", "okonite",
+                                   "propheticism", "helminthagogic",
+                                   "calycular", "giantly",
+                                   "wingable", "golem",
+                                   "unprovided", "commandingness",
+                                   "greave", "haply",
+                                   "doina", "depressingly",
+                                   "subdentate", "impairment",
+                                   "decidable", "neurotrophic",
+                                   "unpredict", "bicorporeal",
+                                   "pendulant", "flatman",
+                                   "intrabred", "toplike",
+                                   "Prosobranchiata", "farrantly",
+                                   "toxoplasmosis", "gorilloid",
+                                   "dipsomaniacal", "aquiline",
+                                   "atlantite", "ascitic",
+                                   "perculsive", "prospectiveness",
+                                   "saponaceous", "centrifugalization",
+                                   "dinical", "infravaginal",
+                                   "beadroll", "affaite",
+                                   "Helvidian", "tickleproof",
+                                   "abstractionism", "enhedge",
+                                   "outwealth", "overcontribute",
+                                   "coldfinch", "gymnastic",
+                                   "Pincian", "Munychian",
+                                   "codisjunct", "quad",
+                                   "coracomandibular", "phoenicochroite",
+                                   "amender", "selectivity",
+                                   "putative", "semantician",
+                                   "lophotrichic", "Spatangoidea",
+                                   "saccharogenic", "inferent",
+                                   "Triconodonta", "arrendation",
+                                   "sheepskin", "taurocolla",
+                                   "bunghole", "Machiavel",
+                                   "triakistetrahedral", "dehairer",
+                                   "prezygapophysial", "cylindric",
+                                   "pneumonalgia", "sleigher",
+                                   "emir", "Socraticism",
+                                   "licitness", "massedly",
+                                   "instructiveness", "sturdied",
+                                   "redecrease", "starosta",
+                                   "evictor", "orgiastic",
+                                   "squdge", "meloplasty",
+                                   "Tsonecan", "repealableness",
+                                   "swoony", "myesthesia",
+                                   "molecule", "autobiographist",
+                                   "reciprocation", "refective",
+                                   "unobservantness", "tricae",
+                                   "ungouged", "floatability",
+                                   "Mesua", "fetlocked",
+                                   "chordacentrum", "sedentariness",
+                                   "various", "laubanite",
+                                   "nectopod", "zenick",
+                                   "sequentially", "analgic",
+                                   "biodynamics", "posttraumatic",
+                                   "nummi", "pyroacetic",
+                                   "bot", "redescend",
+                                   "dispermy", "undiffusive",
+                                   "circular", "trillion",
+                                   "Uraniidae", "ploration",
+                                   "discipular", "potentness",
+                                   "sud", "Hu",
+                                   "Eryon", "plugger",
+                                   "subdrainage", "jharal",
+                                   "abscission", "supermarket",
+                                   "countergabion", "glacierist",
+                                   "lithotresis", "minniebush",
+                                   "zanyism", "eucalypteol",
+                                   "sterilely", "unrealize",
+                                   "unpatched", "hypochondriacism",
+                                   "critically", "cheesecutter",
+                                  };
+}
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomWriter.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomWriter.java
@ -0,0 +1,298 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.ClusterStatus;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapreduce.*;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * This program uses map/reduce to just run a distributed job where there is
+ * no interaction between the tasks and each task write a large unsorted
+ * random binary sequence file of BytesWritable.
+ * In order for this program to generate data for terasort with 10-byte keys
+ * and 90-byte values, have the following config:
+ * <xmp>
+ * <?xml version="1.0"?>
+ * <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+ * <configuration>
+ *   <property>
+ *     <name>mapreduce.randomwriter.minkey</name>
+ *     <value>10</value>
+ *   </property>
+ *   <property>
+ *     <name>mapreduce.randomwriter.maxkey</name>
+ *     <value>10</value>
+ *   </property>
+ *   <property>
+ *     <name>mapreduce.randomwriter.minvalue</name>
+ *     <value>90</value>
+ *   </property>
+ *   <property>
+ *     <name>mapreduce.randomwriter.maxvalue</name>
+ *     <value>90</value>
+ *   </property>
+ *   <property>
+ *     <name>mapreduce.randomwriter.totalbytes</name>
+ *     <value>1099511627776</value>
+ *   </property>
+ * </configuration></xmp>
+ * 
+ * Equivalently, {@link RandomWriter} also supports all the above options
+ * and ones supported by {@link GenericOptionsParser} via the command-line.
+ */
+public class RandomWriter extends Configured implements Tool {
+  public static final String TOTAL_BYTES = "mapreduce.randomwriter.totalbytes";
+  public static final String BYTES_PER_MAP = 
+    "mapreduce.randomwriter.bytespermap";
+  public static final String MAPS_PER_HOST = 
+    "mapreduce.randomwriter.mapsperhost";
+  public static final String MAX_VALUE = "mapreduce.randomwriter.maxvalue";
+  public static final String MIN_VALUE = "mapreduce.randomwriter.minvalue";
+  public static final String MIN_KEY = "mapreduce.randomwriter.minkey";
+  public static final String MAX_KEY = "mapreduce.randomwriter.maxkey";
+  
+  /**
+   * User counters
+   */
+  static enum Counters { RECORDS_WRITTEN, BYTES_WRITTEN }
+  
+  /**
+   * A custom input format that creates virtual inputs of a single string
+   * for each map.
+   */
+  static class RandomInputFormat extends InputFormat<Text, Text> {
+
+    /** 
+     * Generate the requested number of file splits, with the filename
+     * set to the filename of the output file.
+     */
+    public List<InputSplit> getSplits(JobContext job) throws IOException {
+      List<InputSplit> result = new ArrayList<InputSplit>();
+      Path outDir = FileOutputFormat.getOutputPath(job);
+      int numSplits = 
+            job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1);
+      for(int i=0; i < numSplits; ++i) {
+        result.add(new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, 
+                                  (String[])null));
+      }
+      return result;
+    }
+
+    /**
+     * Return a single record (filename, "") where the filename is taken from
+     * the file split.
+     */
+    static class RandomRecordReader extends RecordReader<Text, Text> {
+      Path name;
+      Text key = null;
+      Text value = new Text();
+      public RandomRecordReader(Path p) {
+        name = p;
+      }
+      
+      public void initialize(InputSplit split,
+                             TaskAttemptContext context)
+      throws IOException, InterruptedException {
+    	  
+      }
+      
+      public boolean nextKeyValue() {
+        if (name != null) {
+          key = new Text();
+          key.set(name.getName());
+          name = null;
+          return true;
+        }
+        return false;
+      }
+      
+      public Text getCurrentKey() {
+        return key;
+      }
+      
+      public Text getCurrentValue() {
+        return value;
+      }
+      
+      public void close() {}
+
+      public float getProgress() {
+        return 0.0f;
+      }
+    }
+
+    public RecordReader<Text, Text> createRecordReader(InputSplit split,
+        TaskAttemptContext context) throws IOException, InterruptedException {
+      return new RandomRecordReader(((FileSplit) split).getPath());
+    }
+  }
+
+  static class RandomMapper extends Mapper<WritableComparable, Writable,
+                      BytesWritable, BytesWritable> {
+    
+    private long numBytesToWrite;
+    private int minKeySize;
+    private int keySizeRange;
+    private int minValueSize;
+    private int valueSizeRange;
+    private Random random = new Random();
+    private BytesWritable randomKey = new BytesWritable();
+    private BytesWritable randomValue = new BytesWritable();
+    
+    private void randomizeBytes(byte[] data, int offset, int length) {
+      for(int i=offset + length - 1; i >= offset; --i) {
+        data[i] = (byte) random.nextInt(256);
+      }
+    }
+    
+    /**
+     * Given an output filename, write a bunch of random records to it.
+     */
+    public void map(WritableComparable key, 
+                    Writable value,
+                    Context context) throws IOException,InterruptedException {
+      int itemCount = 0;
+      while (numBytesToWrite > 0) {
+        int keyLength = minKeySize + 
+          (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0);
+        randomKey.setSize(keyLength);
+        randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength());
+        int valueLength = minValueSize +
+          (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0);
+        randomValue.setSize(valueLength);
+        randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength());
+        context.write(randomKey, randomValue);
+        numBytesToWrite -= keyLength + valueLength;
+        context.getCounter(Counters.BYTES_WRITTEN).increment(keyLength + valueLength);
+        context.getCounter(Counters.RECORDS_WRITTEN).increment(1);
+        if (++itemCount % 200 == 0) {
+          context.setStatus("wrote record " + itemCount + ". " + 
+                             numBytesToWrite + " bytes left.");
+        }
+      }
+      context.setStatus("done with " + itemCount + " records.");
+    }
+    
+    /**
+     * Save the values out of the configuaration that we need to write
+     * the data.
+     */
+    @Override
+    public void setup(Context context) {
+      Configuration conf = context.getConfiguration();
+      numBytesToWrite = conf.getLong(BYTES_PER_MAP,
+                                    1*1024*1024*1024);
+      minKeySize = conf.getInt(MIN_KEY, 10);
+      keySizeRange = 
+        conf.getInt(MAX_KEY, 1000) - minKeySize;
+      minValueSize = conf.getInt(MIN_VALUE, 0);
+      valueSizeRange = 
+        conf.getInt(MAX_VALUE, 20000) - minValueSize;
+    }
+  }
+  
+  /**
+   * This is the main routine for launching a distributed random write job.
+   * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
+   * The reduce doesn't do anything.
+   * 
+   * @throws IOException 
+   */
+  public int run(String[] args) throws Exception {    
+    if (args.length == 0) {
+      System.out.println("Usage: writer <out-dir>");
+      ToolRunner.printGenericCommandUsage(System.out);
+      return 2;
+    }
+    
+    Path outDir = new Path(args[0]);
+    Configuration conf = getConf();
+    JobClient client = new JobClient(conf);
+    ClusterStatus cluster = client.getClusterStatus();
+    int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
+    long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
+                                             1*1024*1024*1024);
+    if (numBytesToWritePerMap == 0) {
+      System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0");
+      return -2;
+    }
+    long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
+         numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
+    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
+    if (numMaps == 0 && totalBytesToWrite > 0) {
+      numMaps = 1;
+      conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
+    }
+    conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
+
+    Job job = new Job(conf);
+    
+    job.setJarByClass(RandomWriter.class);
+    job.setJobName("random-writer");
+    FileOutputFormat.setOutputPath(job, outDir);
+    job.setOutputKeyClass(BytesWritable.class);
+    job.setOutputValueClass(BytesWritable.class);
+    job.setInputFormatClass(RandomInputFormat.class);
+    job.setMapperClass(RandomMapper.class);        
+    job.setReducerClass(Reducer.class);
+    job.setOutputFormatClass(SequenceFileOutputFormat.class);
+    
+    System.out.println("Running " + numMaps + " maps.");
+    
+    // reducer NONE
+    job.setNumReduceTasks(0);
+    
+    Date startTime = new Date();
+    System.out.println("Job started: " + startTime);
+    int ret = job.waitForCompletion(true) ? 0 : 1;
+    Date endTime = new Date();
+    System.out.println("Job ended: " + endTime);
+    System.out.println("The job took " + 
+                       (endTime.getTime() - startTime.getTime()) /1000 + 
+                       " seconds.");
+    
+    return ret;
+  }
+  
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(new Configuration(), new RandomWriter(), args);
+    System.exit(res);
+  }
+
+}
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestFileInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestFileInputFormat.java
@ -19,7 +19,9 @@
 package org.apache.hadoop.mapreduce.lib.input;

 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;

 import org.junit.Test;
 import static org.junit.Assert.*;
@ -28,10 +30,15 @@ import static org.mockito.Mockito.*;
 import static org.apache.hadoop.test.MockitoMaker.*;

 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;

 public class TestFileInputFormat {

@ -80,4 +87,108 @@ public class TestFileInputFormat {
    ispy.getSplits(job);
    verify(conf).setLong(FileInputFormat.NUM_INPUT_FILES, 1);
  }
+  
+  @Test
+  @SuppressWarnings({"rawtypes", "unchecked"})
+  public void testLastInputSplitAtSplitBoundary() throws Exception {
+    FileInputFormat fif = new FileInputFormatForTest(1024l * 1024 * 1024,
+        128l * 1024 * 1024);
+    Configuration conf = new Configuration();
+    JobContext jobContext = mock(JobContext.class);
+    when(jobContext.getConfiguration()).thenReturn(conf);
+    List<InputSplit> splits = fif.getSplits(jobContext);
+    assertEquals(8, splits.size());
+    for (int i = 0 ; i < splits.size() ; i++) {
+      InputSplit split = splits.get(i);
+      assertEquals(("host" + i), split.getLocations()[0]);
+    }
+  }
+  
+  @Test
+  @SuppressWarnings({ "rawtypes", "unchecked" })
+  public void testLastInputSplitExceedingSplitBoundary() throws Exception {
+    FileInputFormat fif = new FileInputFormatForTest(1027l * 1024 * 1024,
+        128l * 1024 * 1024);
+    Configuration conf = new Configuration();
+    JobContext jobContext = mock(JobContext.class);
+    when(jobContext.getConfiguration()).thenReturn(conf);
+    List<InputSplit> splits = fif.getSplits(jobContext);
+    assertEquals(8, splits.size());
+    for (int i = 0; i < splits.size(); i++) {
+      InputSplit split = splits.get(i);
+      assertEquals(("host" + i), split.getLocations()[0]);
+    }
+  }
+
+  @Test
+  @SuppressWarnings({ "rawtypes", "unchecked" })
+  public void testLastInputSplitSingleSplit() throws Exception {
+    FileInputFormat fif = new FileInputFormatForTest(100l * 1024 * 1024,
+        128l * 1024 * 1024);
+    Configuration conf = new Configuration();
+    JobContext jobContext = mock(JobContext.class);
+    when(jobContext.getConfiguration()).thenReturn(conf);
+    List<InputSplit> splits = fif.getSplits(jobContext);
+    assertEquals(1, splits.size());
+    for (int i = 0; i < splits.size(); i++) {
+      InputSplit split = splits.get(i);
+      assertEquals(("host" + i), split.getLocations()[0]);
+    }
+  }
+
+  private class FileInputFormatForTest<K, V> extends FileInputFormat<K, V> {
+
+    long splitSize;
+    long length;
+
+    FileInputFormatForTest(long length, long splitSize) {
+      this.length = length;
+      this.splitSize = splitSize;
+    }
+
+    @Override
+    public RecordReader<K, V> createRecordReader(InputSplit split,
+        TaskAttemptContext context) throws IOException, InterruptedException {
+      return null;
+    }
+
+    @Override
+    protected List<FileStatus> listStatus(JobContext job) throws IOException {
+      FileStatus mockFileStatus = mock(FileStatus.class);
+      when(mockFileStatus.getBlockSize()).thenReturn(splitSize);
+      Path mockPath = mock(Path.class);
+      FileSystem mockFs = mock(FileSystem.class);
+
+      BlockLocation[] blockLocations = mockBlockLocations(length, splitSize);
+      when(mockFs.getFileBlockLocations(mockFileStatus, 0, length)).thenReturn(
+          blockLocations);
+      when(mockPath.getFileSystem(any(Configuration.class))).thenReturn(mockFs);
+
+      when(mockFileStatus.getPath()).thenReturn(mockPath);
+      when(mockFileStatus.getLen()).thenReturn(length);
+
+      List<FileStatus> list = new ArrayList<FileStatus>();
+      list.add(mockFileStatus);
+      return list;
+    }
+
+    @Override
+    protected long computeSplitSize(long blockSize, long minSize, long maxSize) {
+      return splitSize;
+    }
+
+    private BlockLocation[] mockBlockLocations(long size, long splitSize) {
+      int numLocations = (int) (size / splitSize);
+      if (size % splitSize != 0)
+        numLocations++;
+      BlockLocation[] blockLocations = new BlockLocation[numLocations];
+      for (int i = 0; i < numLocations; i++) {
+        String[] names = new String[] { "b" + i };
+        String[] hosts = new String[] { "host" + i };
+        blockLocations[i] = new BlockLocation(names, hosts, i * splitSize,
+            Math.min(splitSize, size - (splitSize * i)));
+      }
+      return blockLocations;
+    }
+  }
 }
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/test/MapredTestDriver.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/test/MapredTestDriver.java
--- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraGen.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraGen.java
@ -238,9 +238,11 @@ public class TeraGen extends Configured implements Tool {

    @Override
    public void cleanup(Context context) {
+      if (checksumCounter != null) {
        checksumCounter.increment(total.getLow8());
      }
    }
+  }

  private static void usage() throws IOException {
    System.err.println("teragen <num rows> <output dir>");
@ -307,5 +309,4 @@ public class TeraGen extends Configured implements Tool {
    int res = ToolRunner.run(new Configuration(), new TeraGen(), args);
    System.exit(res);
  }
-
 }
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java
@ -85,20 +85,6 @@ public interface ApplicationConstants {

  public static final String STDOUT = "stdout";

-  /**
-   * Classpath for typical applications.
-   */
-  public static final String[] APPLICATION_CLASSPATH =
-      new String[] {
-        "$HADOOP_CONF_DIR",
-        "$HADOOP_COMMON_HOME/share/hadoop/common/*",
-        "$HADOOP_COMMON_HOME/share/hadoop/common/lib/*",
-        "$HADOOP_HDFS_HOME/share/hadoop/hdfs/*",
-        "$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*",
-        "$YARN_HOME/share/hadoop/mapreduce/*",
-        "$YARN_HOME/share/hadoop/mapreduce/lib/*"
-      };
-  
  /**
   * Environment for Applications.
   * 
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@ -508,6 +508,10 @@ public class YarnConfiguration extends Configuration {
  public static final long DEFAULT_NM_PROCESS_KILL_WAIT_MS =
      2000;

+  /** Standard Hadoop classes */
+  public static final String YARN_APPLICATION_CLASSPATH = YARN_PREFIX
+      + "application.classpath";
+
  public YarnConfiguration() {
    super();
  }
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApp.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApp.java
@ -36,6 +36,7 @@ import com.google.common.collect.Lists;
 import com.google.inject.Provides;
 import com.google.inject.servlet.GuiceFilter;
 import com.google.inject.servlet.ServletModule;
+import com.sun.jersey.api.container.filter.GZIPContentEncodingFilter;
 import com.sun.jersey.api.core.ResourceConfig;
 import com.sun.jersey.core.util.FeaturesAndProperties;
 import com.sun.jersey.guice.spi.container.servlet.GuiceContainer;
@ -160,6 +161,8 @@ public abstract class WebApp extends ServletModule {
      params.put(ResourceConfig.FEATURE_IMPLICIT_VIEWABLES, "true");
      params.put(ServletContainer.FEATURE_FILTER_FORWARD_ON_404, "true");
      params.put(FeaturesAndProperties.FEATURE_XMLROOTELEMENT_PROCESSING, "true");
+      params.put(ResourceConfig.PROPERTY_CONTAINER_REQUEST_FILTERS, GZIPContentEncodingFilter.class.getName());
+      params.put(ResourceConfig.PROPERTY_CONTAINER_RESPONSE_FILTERS, GZIPContentEncodingFilter.class.getName());
      filter("/*").through(GuiceContainer.class, params);
    }

--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/resources/yarn-default.xml
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/resources/yarn-default.xml
@ -482,4 +482,18 @@
     <name>yarn.web-proxy.address</name>
     <value/>
  </property>
+
+  <property>
+    <description>Classpath for typical applications.</description>
+     <name>yarn.application.classpath</name>
+     <value>
+        $HADOOP_CONF_DIR,
+        $HADOOP_COMMON_HOME/share/hadoop/common/*,
+        $HADOOP_COMMON_HOME/share/hadoop/common/lib/*,
+        $HADOOP_HDFS_HOME/share/hadoop/hdfs/*,
+        $HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*,
+        $YARN_HOME/share/hadoop/mapreduce/*,
+        $YARN_HOME/share/hadoop/mapreduce/lib/*
+     </value>
+  </property>
 </configuration>
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java
@ -295,10 +295,6 @@ public class SchedulerApp {
    }
  }

-  public synchronized void setAvailableResourceLimit(Resource globalLimit) {
-    this.resourceLimit = globalLimit; 
-  }
-
  public synchronized RMContainer getRMContainer(ContainerId id) {
    return liveContainers.get(id);
  }
@ -446,20 +442,21 @@ public class SchedulerApp {
    return reservedContainers;
  }
  
+  public synchronized void setHeadroom(Resource globalLimit) {
+    this.resourceLimit = globalLimit; 
+  }
+
  /**
   * Get available headroom in terms of resources for the application's user.
   * @return available resource headroom
   */
  public synchronized Resource getHeadroom() {
-    Resource limit = Resources.subtract(resourceLimit, currentConsumption);
-    Resources.subtractFrom(limit, currentReservation);
-
    // Corner case to deal with applications being slightly over-limit
-    if (limit.getMemory() < 0) {
-      limit.setMemory(0);
+    if (resourceLimit.getMemory() < 0) {
+      resourceLimit.setMemory(0);
    }
    
-    return limit;
+    return resourceLimit;
  }

  public Queue getQueue() {
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueueUtils.java
@ -17,12 +17,19 @@
 */
 package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;

+import org.apache.hadoop.yarn.api.records.Resource;
+
 class CSQueueUtils {
  
  public static void checkMaxCapacity(String queueName, 
      float capacity, float maximumCapacity) {
-    if (Math.round(100 * maximumCapacity) != CapacitySchedulerConfiguration.UNDEFINED && 
+    if (maximumCapacity < 0.0f || maximumCapacity > 1.0f || 
        maximumCapacity < capacity) {
+      throw new IllegalArgumentException(
+          "Illegal value  of maximumCapacity " + maximumCapacity + 
+          " used in call to setMaxCapacity for queue " + queueName);
+    }
+    if (maximumCapacity < capacity) {
      throw new IllegalArgumentException(
          "Illegal call to setMaxCapacity. " +
          "Queue '" + queueName + "' has " +
@ -31,4 +38,25 @@ class CSQueueUtils {
    }
  }

+  public static float computeAbsoluteMaximumCapacity(
+      float maximumCapacity, CSQueue parent) {
+    float parentAbsMaxCapacity = 
+        (parent == null) ? 1.0f : parent.getAbsoluteMaximumCapacity();
+    return (parentAbsMaxCapacity * maximumCapacity);
+  }
+
+  public static int computeMaxActiveApplications(Resource clusterResource,
+      float maxAMResourcePercent, float absoluteCapacity) {
+    return 
+        Math.max(
+            (int)((clusterResource.getMemory() / (float)LeafQueue.DEFAULT_AM_RESOURCE) * 
+                   maxAMResourcePercent * absoluteCapacity), 
+            1);
+  }
+
+  public static int computeMaxActiveApplicationsPerUser(
+      int maxActiveApplications, int userLimit, float userLimitFactor) {
+    return (int)(maxActiveApplications * (userLimit / 100.0f) * userLimitFactor);
+  }
+  
 }
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java
@ -149,7 +149,7 @@ public class CapacitySchedulerConfiguration extends Configuration {
      throw new IllegalArgumentException("Illegal " +
      		"capacity of " + capacity + " for queue " + queue);
    }
-    LOG.debug("CSConf - setCapacity: queuePrefix=" + getQueuePrefix(queue) + 
+    LOG.debug("CSConf - getCapacity: queuePrefix=" + getQueuePrefix(queue) + 
        ", capacity=" + capacity);
    return capacity;
  }
@ -162,11 +162,15 @@ public class CapacitySchedulerConfiguration extends Configuration {

  public int getMaximumCapacity(String queue) {
    int maxCapacity = 
-      getInt(getQueuePrefix(queue) + MAXIMUM_CAPACITY, UNDEFINED);
+      getInt(getQueuePrefix(queue) + MAXIMUM_CAPACITY, MAXIMUM_CAPACITY_VALUE);
    return maxCapacity;
  }
  
  public void setMaximumCapacity(String queue, int maxCapacity) {
+    if (maxCapacity > MAXIMUM_CAPACITY_VALUE) {
+      throw new IllegalArgumentException("Illegal " +
+          "maximum-capacity of " + maxCapacity + " for queue " + queue);
+    }
    setInt(getQueuePrefix(queue) + MAXIMUM_CAPACITY, maxCapacity);
    LOG.debug("CSConf - setMaxCapacity: queuePrefix=" + getQueuePrefix(queue) + 
        ", maxCapacity=" + maxCapacity);
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
@ -144,10 +144,10 @@ public class LeafQueue implements CSQueue {
      (float)cs.getConfiguration().getCapacity(getQueuePath()) / 100;
    float absoluteCapacity = parent.getAbsoluteCapacity() * capacity;

-    float maximumCapacity = (float)cs.getConfiguration().getMaximumCapacity(getQueuePath()) / 100;
+    float maximumCapacity = 
+        (float)cs.getConfiguration().getMaximumCapacity(getQueuePath()) / 100;
    float absoluteMaxCapacity = 
-      (Math.round(maximumCapacity * 100) == CapacitySchedulerConfiguration.UNDEFINED) ? 
-          Float.MAX_VALUE : (parent.getAbsoluteCapacity() * maximumCapacity);
+        CSQueueUtils.computeAbsoluteMaximumCapacity(maximumCapacity, parent);

    int userLimit = cs.getConfiguration().getUserLimit(getQueuePath());
    float userLimitFactor = 
@ -161,10 +161,10 @@ public class LeafQueue implements CSQueue {
    this.maxAMResourcePercent = 
        cs.getConfiguration().getMaximumApplicationMasterResourcePercent();
    int maxActiveApplications = 
-        computeMaxActiveApplications(cs.getClusterResources(), 
+        CSQueueUtils.computeMaxActiveApplications(cs.getClusterResources(), 
            maxAMResourcePercent, absoluteCapacity);
    int maxActiveApplicationsPerUser = 
-        computeMaxActiveApplicationsPerUser(maxActiveApplications, userLimit, 
+        CSQueueUtils.computeMaxActiveApplicationsPerUser(maxActiveApplications, userLimit, 
            userLimitFactor);

    this.queueInfo = recordFactory.newRecordInstance(QueueInfo.class);
@ -193,20 +193,6 @@ public class LeafQueue implements CSQueue {
    this.activeApplications = new TreeSet<SchedulerApp>(applicationComparator);
  }

-  private int computeMaxActiveApplications(Resource clusterResource,
-      float maxAMResourcePercent, float absoluteCapacity) {
-    return 
-        Math.max(
-            (int)((clusterResource.getMemory() / (float)DEFAULT_AM_RESOURCE) * 
-                   maxAMResourcePercent * absoluteCapacity), 
-            1);
-  }
-  
-  private int computeMaxActiveApplicationsPerUser(int maxActiveApplications, 
-      int userLimit, float userLimitFactor) {
-    return (int)(maxActiveApplications * (userLimit / 100.0f) * userLimitFactor);
-  }
-  
  private synchronized void setupQueueConfigs(
      float capacity, float absoluteCapacity, 
      float maximumCapacity, float absoluteMaxCapacity,
@ -254,8 +240,8 @@ public class LeafQueue implements CSQueue {
        "maxCapacity = " + maximumCapacity +
        " [= configuredMaxCapacity ]" + "\n" +
        "absoluteMaxCapacity = " + absoluteMaxCapacity +
-        " [= Float.MAX_VALUE if maximumCapacity undefined, " +
-        "(parentAbsoluteCapacity * maximumCapacity) / 100 otherwise ]" + "\n" +
+        " [= 1.0 maximumCapacity undefined, " +
+        "(parentAbsoluteMaxCapacity * maximumCapacity) / 100 otherwise ]" + "\n" +
        "userLimit = " + userLimit +
        " [= configuredUserLimit ]" + "\n" +
        "userLimitFactor = " + userLimitFactor +
@ -272,9 +258,9 @@ public class LeafQueue implements CSQueue {
        "maxActiveApplicationsPerUser = " + maxActiveApplicationsPerUser +
        " [= (int)(maxActiveApplications * (userLimit / 100.0f) * userLimitFactor) ]" + "\n" +
        "utilization = " + utilization +
-        " [= usedResourcesMemory / queueLimit ]" + "\n" +
+        " [= usedResourcesMemory /  (clusterResourceMemory * absoluteCapacity)]" + "\n" +
        "usedCapacity = " + usedCapacity +
-        " [= usedResourcesMemory / (clusterResourceMemory * capacity) ]" + "\n" +
+        " [= usedResourcesMemory / (clusterResourceMemory * parent.absoluteCapacity)]" + "\n" +
        "maxAMResourcePercent = " + maxAMResourcePercent +
        " [= configuredMaximumAMResourcePercent ]" + "\n" +
        "minimumAllocationFactor = " + minimumAllocationFactor +
@ -400,9 +386,7 @@ public class LeafQueue implements CSQueue {
    
    this.maximumCapacity = maximumCapacity;
    this.absoluteMaxCapacity = 
-      (Math.round(maximumCapacity * 100) == CapacitySchedulerConfiguration.UNDEFINED) ? 
-          Float.MAX_VALUE : 
-          (parent.getAbsoluteCapacity() * maximumCapacity);
+        CSQueueUtils.computeAbsoluteMaximumCapacity(maximumCapacity, parent);
  }
  
  /**
@ -502,9 +486,14 @@ public class LeafQueue implements CSQueue {
  }

  public String toString() {
-    return queueName + ":" + capacity + ":" + absoluteCapacity + ":" + 
-    getUsedCapacity() + ":" + getUtilization() + ":" + 
-    getNumApplications() + ":" + getNumContainers();
+    return queueName + ": " + 
+        "capacity=" + capacity + ", " + 
+        "absoluteCapacity=" + absoluteCapacity + ", " + 
+        "usedResources=" + usedResources.getMemory() + "MB, " + 
+        "usedCapacity=" + getUsedCapacity() + ", " + 
+        "utilization=" + getUtilization() + ", " + 
+        "numApps=" + getNumApplications() + ", " + 
+        "numContainers=" + getNumContainers();  
  }

  private synchronized User getUser(String userName) {
@ -731,12 +720,11 @@ public class LeafQueue implements CSQueue {
      if(LOG.isDebugEnabled()) {
        LOG.debug("pre-assignContainers for application "
        + application.getApplicationId());
-      }
        application.showRequests();
+      }

      synchronized (application) {
-        computeAndSetUserResourceLimit(application, clusterResource);
-        
+        // Schedule in priority order
        for (Priority priority : application.getPriorities()) {
          // Required resource
          Resource required = 
@ -747,15 +735,21 @@ public class LeafQueue implements CSQueue {
            continue;
          }

-          // Are we going over limits by allocating to this application?
-          // Maximum Capacity of the queue
+          // Compute & set headroom
+          // Note: We set the headroom with the highest priority request 
+          //       as the target. 
+          //       This works since we never assign lower priority requests
+          //       before all higher priority ones are serviced.
+          Resource userLimit = 
+              computeAndSetUserResourceLimit(application, clusterResource, 
+                  required);
+
+          // Check queue max-capacity limit
          if (!assignToQueue(clusterResource, required)) {
            return NULL_ASSIGNMENT;
          }

-          // User limits
-          Resource userLimit = 
-            computeUserLimit(application, clusterResource, required); 
+          // Check user limit
          if (!assignToUser(application.getUser(), userLimit)) {
            break; 
          }
@ -830,25 +824,28 @@ public class LeafQueue implements CSQueue {
    float potentialNewCapacity = 
      (float)(usedResources.getMemory() + required.getMemory()) / 
        clusterResource.getMemory();
+    if (potentialNewCapacity > absoluteMaxCapacity) {
      LOG.info(getQueueName() + 
          " usedResources: " + usedResources.getMemory() +
+          " clusterResources: " + clusterResource.getMemory() +
          " currentCapacity " + ((float)usedResources.getMemory())/clusterResource.getMemory() + 
          " required " + required.getMemory() +
          " potentialNewCapacity: " + potentialNewCapacity + " ( " +
          " max-capacity: " + absoluteMaxCapacity + ")");
-    if (potentialNewCapacity > absoluteMaxCapacity) {
      return false;
    }
    return true;
  }

-  private void computeAndSetUserResourceLimit(SchedulerApp application, 
-      Resource clusterResource) {
-    Resource userLimit = 
-        computeUserLimit(application, clusterResource, Resources.none());
-    application.setAvailableResourceLimit(userLimit);
-    metrics.setAvailableResourcesToUser(application.getUser(), 
-        application.getHeadroom());
+  private Resource computeAndSetUserResourceLimit(SchedulerApp application, 
+      Resource clusterResource, Resource required) {
+    String user = application.getUser();
+    Resource limit = computeUserLimit(application, clusterResource, required);
+    Resource headroom = 
+        Resources.subtract(limit, getUser(user).getConsumedResources());
+    application.setHeadroom(headroom);
+    metrics.setAvailableResourcesToUser(user, headroom);
+    return limit;
  }
  
  private int roundUp(int memory) {
@ -919,7 +916,7 @@ public class LeafQueue implements CSQueue {
    User user = getUser(userName);
    
    // Note: We aren't considering the current request since there is a fixed
-    // overhead of the AM, but it's a >= check, so... 
+    // overhead of the AM, but it's a > check, not a >= check, so... 
    if ((user.getConsumedResources().getMemory()) > limit.getMemory()) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("User " + userName + " in queue " + getQueueName() + 
@ -1237,8 +1234,8 @@ public class LeafQueue implements CSQueue {
        // happen under scheduler's lock... 
        // So, this is, in effect, a transaction across application & node
        if (rmContainer.getState() == RMContainerState.RESERVED) {
-          application.unreserve(node, rmContainer.getReservedPriority());
-          node.unreserveResource(application);
+          unreserve(application, rmContainer.getReservedPriority(), 
+              node, rmContainer);
        } else {
          application.containerCompleted(rmContainer, containerStatus, event);
          node.releaseContainer(container);
@ -1303,23 +1300,24 @@ public class LeafQueue implements CSQueue {
  public synchronized void updateClusterResource(Resource clusterResource) {
    // Update queue properties
    maxActiveApplications = 
-        computeMaxActiveApplications(clusterResource, maxAMResourcePercent, 
+        CSQueueUtils.computeMaxActiveApplications(clusterResource, maxAMResourcePercent, 
            absoluteCapacity);
    maxActiveApplicationsPerUser = 
-        computeMaxActiveApplicationsPerUser(maxActiveApplications, userLimit, 
+        CSQueueUtils.computeMaxActiveApplicationsPerUser(maxActiveApplications, userLimit, 
            userLimitFactor);
    
    // Update application properties
    for (SchedulerApp application : activeApplications) {
-      computeAndSetUserResourceLimit(application, clusterResource);
+      computeAndSetUserResourceLimit(
+          application, clusterResource, Resources.none());
    }
  }
  
  private synchronized void updateResource(Resource clusterResource) {
    float queueLimit = clusterResource.getMemory() * absoluteCapacity;
    setUtilization(usedResources.getMemory() / queueLimit);
-    setUsedCapacity(
-        usedResources.getMemory() / (clusterResource.getMemory() * capacity));
+    setUsedCapacity(usedResources.getMemory()
+        / (clusterResource.getMemory() * parent.getAbsoluteCapacity()));

    Resource resourceLimit = 
      Resources.createResource(roundUp((int)queueLimit));
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java
@ -118,16 +118,14 @@ public class ParentQueue implements CSQueue {
    }

    float capacity = (float) rawCapacity / 100;
-
    float parentAbsoluteCapacity = 
-      (parent == null) ? 1.0f : parent.getAbsoluteCapacity();
+      (rootQueue) ? 1.0f : parent.getAbsoluteCapacity();
    float absoluteCapacity = parentAbsoluteCapacity * capacity; 

    float  maximumCapacity =
      (float) cs.getConfiguration().getMaximumCapacity(getQueuePath()) / 100;
    float absoluteMaxCapacity = 
-      (Math.round(maximumCapacity * 100) == CapacitySchedulerConfiguration.UNDEFINED) ? 
-          Float.MAX_VALUE :  (parentAbsoluteCapacity * maximumCapacity);
+          CSQueueUtils.computeAbsoluteMaximumCapacity(maximumCapacity, parent);
    
    QueueState state = cs.getConfiguration().getState(getQueuePath());

@ -333,10 +331,15 @@ public class ParentQueue implements CSQueue {
  }

  public String toString() {
-    return queueName + ":" + capacity + ":" + absoluteCapacity + ":" + 
-      getUsedCapacity() + ":" + getUtilization() + ":" + 
-      getNumApplications() + ":" + getNumContainers() + ":" + 
-      childQueues.size() + " child-queues";
+    return queueName + ": " +
+        "numChildQueue= " + childQueues.size() + ", " + 
+        "capacity=" + capacity + ", " +  
+        "absoluteCapacity=" + absoluteCapacity + ", " +
+        "usedResources=" + usedResources.getMemory() + "MB, " + 
+        "usedCapacity=" + getUsedCapacity() + ", " + 
+        "utilization=" + getUtilization() + ", " +
+        "numApps=" + getNumApplications() + ", " + 
+        "numContainers=" + getNumContainers();
  }
  
  @Override
@ -492,12 +495,8 @@ public class ParentQueue implements CSQueue {
    CSQueueUtils.checkMaxCapacity(getQueueName(), capacity, maximumCapacity);
    
    this.maximumCapacity = maximumCapacity;
-    float parentAbsoluteCapacity = 
-        (rootQueue) ? 100.0f : parent.getAbsoluteCapacity();
    this.absoluteMaxCapacity = 
-      (maximumCapacity == CapacitySchedulerConfiguration.UNDEFINED) ? 
-          Float.MAX_VALUE : 
-          (parentAbsoluteCapacity * maximumCapacity);
+        CSQueueUtils.computeAbsoluteMaximumCapacity(maximumCapacity, parent);
  }

  @Override
@ -689,9 +688,11 @@ public class ParentQueue implements CSQueue {
  
  private synchronized void updateResource(Resource clusterResource) {
    float queueLimit = clusterResource.getMemory() * absoluteCapacity;
+    float parentAbsoluteCapacity = 
+        (rootQueue) ? 1.0f : parent.getAbsoluteCapacity();
    setUtilization(usedResources.getMemory() / queueLimit);
-    setUsedCapacity(
-        usedResources.getMemory() / (clusterResource.getMemory() * capacity));
+    setUsedCapacity(usedResources.getMemory() 
+        / (clusterResource.getMemory() * parentAbsoluteCapacity));
  
    Resource resourceLimit = 
      Resources.createResource((int)queueLimit);
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
@ -358,7 +358,7 @@ public class FifoScheduler implements ResourceScheduler {
        }
      }
      
-      application.setAvailableResourceLimit(clusterResource);
+      application.setHeadroom(clusterResource);
      
      LOG.debug("post-assignContainers");
      application.showRequests();
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationLimits.java
@ -21,16 +21,24 @@ import static org.junit.Assert.*;
 import static org.mockito.Mockito.*;

 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.QueueACL;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApp;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
 import org.junit.After;
@ -283,38 +291,76 @@ public class TestApplicationLimits {
    final String user_0 = "user_0";
    final String user_1 = "user_1";
    
-    int APPLICATION_ID = 0;
+    RecordFactory recordFactory = 
+        RecordFactoryProvider.getRecordFactory(null);
+    RMContext rmContext = TestUtils.getMockRMContext();

-    // Submit first application from user_0, check headroom
-    SchedulerApp app_0_0 = getMockApplication(APPLICATION_ID++, user_0);
+    Priority priority_1 = TestUtils.createMockPriority(1);
+
+    // Submit first application with some resource-requests from user_0, 
+    // and check headroom
+    final ApplicationAttemptId appAttemptId_0_0 = 
+        TestUtils.getMockApplicationAttemptId(0, 0); 
+    SchedulerApp app_0_0 = 
+        spy(new SchedulerApp(appAttemptId_0_0, user_0, queue, rmContext, null));
    queue.submitApplication(app_0_0, user_0, A);
-    queue.assignContainers(clusterResource, node_0); // Schedule to compute
+
+    List<ResourceRequest> app_0_0_requests = new ArrayList<ResourceRequest>();
+    app_0_0_requests.add(
+        TestUtils.createResourceRequest(RMNodeImpl.ANY, 1*GB, 2, 
+            priority_1, recordFactory));
+    app_0_0.updateResourceRequests(app_0_0_requests);
+
+    // Schedule to compute 
+    queue.assignContainers(clusterResource, node_0);
    Resource expectedHeadroom = Resources.createResource(10*16*GB);
-    verify(app_0_0).setAvailableResourceLimit(eq(expectedHeadroom));
+    verify(app_0_0).setHeadroom(eq(expectedHeadroom));

    // Submit second application from user_0, check headroom
-    SchedulerApp app_0_1 = getMockApplication(APPLICATION_ID++, user_0);
+    final ApplicationAttemptId appAttemptId_0_1 = 
+        TestUtils.getMockApplicationAttemptId(1, 0); 
+    SchedulerApp app_0_1 = 
+        spy(new SchedulerApp(appAttemptId_0_1, user_0, queue, rmContext, null));
    queue.submitApplication(app_0_1, user_0, A);
+    
+    List<ResourceRequest> app_0_1_requests = new ArrayList<ResourceRequest>();
+    app_0_1_requests.add(
+        TestUtils.createResourceRequest(RMNodeImpl.ANY, 1*GB, 2, 
+            priority_1, recordFactory));
+    app_0_1.updateResourceRequests(app_0_1_requests);
+
+    // Schedule to compute 
    queue.assignContainers(clusterResource, node_0); // Schedule to compute
-    verify(app_0_0, times(2)).setAvailableResourceLimit(eq(expectedHeadroom));
-    verify(app_0_1).setAvailableResourceLimit(eq(expectedHeadroom));// no change
+    verify(app_0_0, times(2)).setHeadroom(eq(expectedHeadroom));
+    verify(app_0_1).setHeadroom(eq(expectedHeadroom));// no change
    
    // Submit first application from user_1, check  for new headroom
-    SchedulerApp app_1_0 = getMockApplication(APPLICATION_ID++, user_1);
+    final ApplicationAttemptId appAttemptId_1_0 = 
+        TestUtils.getMockApplicationAttemptId(2, 0); 
+    SchedulerApp app_1_0 = 
+        spy(new SchedulerApp(appAttemptId_1_0, user_1, queue, rmContext, null));
    queue.submitApplication(app_1_0, user_1, A);
+
+    List<ResourceRequest> app_1_0_requests = new ArrayList<ResourceRequest>();
+    app_1_0_requests.add(
+        TestUtils.createResourceRequest(RMNodeImpl.ANY, 1*GB, 2, 
+            priority_1, recordFactory));
+    app_1_0.updateResourceRequests(app_1_0_requests);
+    
+    // Schedule to compute 
    queue.assignContainers(clusterResource, node_0); // Schedule to compute
    expectedHeadroom = Resources.createResource(10*16*GB / 2); // changes
-    verify(app_0_0).setAvailableResourceLimit(eq(expectedHeadroom));
-    verify(app_0_1).setAvailableResourceLimit(eq(expectedHeadroom));
-    verify(app_1_0).setAvailableResourceLimit(eq(expectedHeadroom));
+    verify(app_0_0).setHeadroom(eq(expectedHeadroom));
+    verify(app_0_1).setHeadroom(eq(expectedHeadroom));
+    verify(app_1_0).setHeadroom(eq(expectedHeadroom));

    // Now reduce cluster size and check for the smaller headroom
    clusterResource = Resources.createResource(90*16*GB);
    queue.assignContainers(clusterResource, node_0); // Schedule to compute
    expectedHeadroom = Resources.createResource(9*16*GB / 2); // changes
-    verify(app_0_0).setAvailableResourceLimit(eq(expectedHeadroom));
-    verify(app_0_1).setAvailableResourceLimit(eq(expectedHeadroom));
-    verify(app_1_0).setAvailableResourceLimit(eq(expectedHeadroom));
+    verify(app_0_0).setHeadroom(eq(expectedHeadroom));
+    verify(app_0_1).setHeadroom(eq(expectedHeadroom));
+    verify(app_1_0).setHeadroom(eq(expectedHeadroom));
  }
  

--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
@ -255,7 +255,7 @@ public class TestLeafQueue {
    // Manipulate queue 'a'
    LeafQueue a = stubLeafQueue((LeafQueue)queues.get(A));
    //unset maxCapacity
-    a.setMaxCapacity(-0.01f);
+    a.setMaxCapacity(1.0f);

    // Users
    final String user_0 = "user_0";
@ -377,7 +377,7 @@ public class TestLeafQueue {
    // Mock the queue
    LeafQueue a = stubLeafQueue((LeafQueue)queues.get(A));
    //unset maxCapacity
-    a.setMaxCapacity(-0.01f);
+    a.setMaxCapacity(1.0f);
    
    // Users
    final String user_0 = "user_0";
@ -491,7 +491,7 @@ public class TestLeafQueue {
    
    // Revert max-capacity and user-limit-factor
    // Now, allocations should goto app_3 since it's under user-limit 
-    a.setMaxCapacity(-0.01f);
+    a.setMaxCapacity(1.0f);
    a.setUserLimitFactor(1);
    a.assignContainers(clusterResource, node_0);
    assertEquals(7*GB, a.getUsedResources().getMemory()); 
@ -548,7 +548,7 @@ public class TestLeafQueue {
    // Manipulate queue 'a'
    LeafQueue a = stubLeafQueue((LeafQueue)queues.get(A));
    //unset maxCapacity
-    a.setMaxCapacity(-0.01f);
+    a.setMaxCapacity(1.0f);

    // Users
    final String user_0 = "user_0";
@ -571,7 +571,7 @@ public class TestLeafQueue {
    String host_0 = "host_0";
    SchedulerNode node_0 = TestUtils.getMockNode(host_0, DEFAULT_RACK, 0, 4*GB);
    
-    final int numNodes = 1;
+    final int numNodes = 2;
    Resource clusterResource = Resources.createResource(numNodes * (4*GB));
    when(csContext.getNumClusterNodes()).thenReturn(numNodes);
    
@ -646,7 +646,7 @@ public class TestLeafQueue {
    // Manipulate queue 'a'
    LeafQueue a = stubLeafQueue((LeafQueue)queues.get(A));
    //unset maxCapacity
-    a.setMaxCapacity(-0.01f);
+    a.setMaxCapacity(1.0f);
    a.setUserLimitFactor(10);

    // Users
@ -673,7 +673,7 @@ public class TestLeafQueue {
    String host_1 = "host_1";
    SchedulerNode node_1 = TestUtils.getMockNode(host_1, DEFAULT_RACK, 0, 4*GB);
    
-    final int numNodes = 2;
+    final int numNodes = 3;
    Resource clusterResource = Resources.createResource(numNodes * (4*GB));
    when(csContext.getNumClusterNodes()).thenReturn(numNodes);
    when(csContext.getMaximumResourceCapability()).thenReturn(
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java
@ -138,12 +138,34 @@ public class TestParentQueue {
    when(queue).assignContainers(eq(clusterResource), eq(node));
  }
  
+  private float computeQueueUsedCapacity(CSQueue queue, 
+      int expectedMemory, Resource clusterResource) {
+    return (
+        ((float)expectedMemory / clusterResource.getMemory()) *
+        queue.getParent().getAbsoluteCapacity()
+      );
+  }
+  
  private float computeQueueUtilization(CSQueue queue, 
      int expectedMemory, Resource clusterResource) {
    return (expectedMemory / 
        (clusterResource.getMemory() * queue.getAbsoluteCapacity()));
  }
  
+  final static float DELTA = 0.0001f;
+  private void verifyQueueMetrics(CSQueue queue, 
+      int expectedMemory, Resource clusterResource) {
+    assertEquals(
+        computeQueueUtilization(queue, expectedMemory, clusterResource), 
+        queue.getUtilization(), 
+        DELTA);
+    assertEquals(
+        computeQueueUsedCapacity(queue, expectedMemory, clusterResource), 
+        queue.getUsedCapacity(), 
+        DELTA);
+
+  }
+  
  @Test
  public void testSingleLevelQueues() throws Exception {
    // Setup queue configs
@ -173,15 +195,13 @@ public class TestParentQueue {
    // Start testing
    LeafQueue a = (LeafQueue)queues.get(A);
    LeafQueue b = (LeafQueue)queues.get(B);
-    final float delta = 0.0001f;
    
    // Simulate B returning a container on node_0
    stubQueueAllocation(a, clusterResource, node_0, 0*GB);
    stubQueueAllocation(b, clusterResource, node_0, 1*GB);
    root.assignContainers(clusterResource, node_0);
-    assertEquals(0.0f, a.getUtilization(), delta);
-    assertEquals(computeQueueUtilization(b, 1*GB, clusterResource), 
-        b.getUtilization(), delta);
+    verifyQueueMetrics(a, 0*GB, clusterResource);
+    verifyQueueMetrics(b, 1*GB, clusterResource);
    
    // Now, A should get the scheduling opportunity since A=0G/6G, B=1G/14G
    stubQueueAllocation(a, clusterResource, node_1, 2*GB);
@ -192,10 +212,8 @@ public class TestParentQueue {
        any(SchedulerNode.class));
    allocationOrder.verify(b).assignContainers(eq(clusterResource), 
        any(SchedulerNode.class));
-    assertEquals(computeQueueUtilization(a, 2*GB, clusterResource), 
-        a.getUtilization(), delta);
-    assertEquals(computeQueueUtilization(b, 2*GB, clusterResource), 
-        b.getUtilization(), delta);
+    verifyQueueMetrics(a, 2*GB, clusterResource);
+    verifyQueueMetrics(b, 2*GB, clusterResource);

    // Now, B should get the scheduling opportunity 
    // since A has 2/6G while B has 2/14G
@ -207,10 +225,8 @@ public class TestParentQueue {
        any(SchedulerNode.class));
    allocationOrder.verify(a).assignContainers(eq(clusterResource), 
        any(SchedulerNode.class));
-    assertEquals(computeQueueUtilization(a, 3*GB, clusterResource), 
-        a.getUtilization(), delta);
-    assertEquals(computeQueueUtilization(b, 4*GB, clusterResource), 
-        b.getUtilization(), delta);
+    verifyQueueMetrics(a, 3*GB, clusterResource);
+    verifyQueueMetrics(b, 4*GB, clusterResource);

    // Now, B should still get the scheduling opportunity 
    // since A has 3/6G while B has 4/14G
@ -222,10 +238,8 @@ public class TestParentQueue {
        any(SchedulerNode.class));
    allocationOrder.verify(a).assignContainers(eq(clusterResource), 
        any(SchedulerNode.class));
-    assertEquals(computeQueueUtilization(a, 3*GB, clusterResource), 
-        a.getUtilization(), delta);
-    assertEquals(computeQueueUtilization(b, 8*GB, clusterResource), 
-        b.getUtilization(), delta);
+    verifyQueueMetrics(a, 3*GB, clusterResource);
+    verifyQueueMetrics(b, 8*GB, clusterResource);

    // Now, A should get the scheduling opportunity 
    // since A has 3/6G while B has 8/14G
@ -237,10 +251,8 @@ public class TestParentQueue {
        any(SchedulerNode.class));
    allocationOrder.verify(a).assignContainers(eq(clusterResource), 
        any(SchedulerNode.class));
-    assertEquals(computeQueueUtilization(a, 4*GB, clusterResource), 
-        a.getUtilization(), delta);
-    assertEquals(computeQueueUtilization(b, 9*GB, clusterResource), 
-        b.getUtilization(), delta);
+    verifyQueueMetrics(a, 4*GB, clusterResource);
+    verifyQueueMetrics(b, 9*GB, clusterResource);
  }

  private static final String C = "c";
@ -323,22 +335,16 @@ public class TestParentQueue {
    CSQueue b2 = queues.get(B2);
    CSQueue b3 = queues.get(B3);

-    final float delta = 0.0001f;
-    
    // Simulate C returning a container on node_0
    stubQueueAllocation(a, clusterResource, node_0, 0*GB);
    stubQueueAllocation(b, clusterResource, node_0, 0*GB);
    stubQueueAllocation(c, clusterResource, node_0, 1*GB);
    stubQueueAllocation(d, clusterResource, node_0, 0*GB);
    root.assignContainers(clusterResource, node_0);
-    assertEquals(computeQueueUtilization(a, 0*GB, clusterResource), 
-        a.getUtilization(), delta);
-    assertEquals(computeQueueUtilization(b, 0*GB, clusterResource), 
-        b.getUtilization(), delta);
-    assertEquals(computeQueueUtilization(c, 1*GB, clusterResource), 
-        c.getUtilization(), delta);
-    assertEquals(computeQueueUtilization(d, 0*GB, clusterResource), 
-        d.getUtilization(), delta);
+    verifyQueueMetrics(a, 0*GB, clusterResource);
+    verifyQueueMetrics(b, 0*GB, clusterResource);
+    verifyQueueMetrics(c, 1*GB, clusterResource);
+    verifyQueueMetrics(d, 0*GB, clusterResource);
    reset(a); reset(b); reset(c);

    // Now get B2 to allocate
@ -347,12 +353,9 @@ public class TestParentQueue {
    stubQueueAllocation(b2, clusterResource, node_1, 4*GB);
    stubQueueAllocation(c, clusterResource, node_1, 0*GB);
    root.assignContainers(clusterResource, node_1);
-    assertEquals(computeQueueUtilization(a, 0*GB, clusterResource), 
-        a.getUtilization(), delta);
-    assertEquals(computeQueueUtilization(b, 4*GB, clusterResource), 
-        b.getUtilization(), delta);
-    assertEquals(computeQueueUtilization(c, 1*GB, clusterResource), 
-        c.getUtilization(), delta);
+    verifyQueueMetrics(a, 0*GB, clusterResource);
+    verifyQueueMetrics(b, 4*GB, clusterResource);
+    verifyQueueMetrics(c, 1*GB, clusterResource);
    reset(a); reset(b); reset(c);
    
    // Now get both A1, C & B3 to allocate in right order
@ -368,12 +371,9 @@ public class TestParentQueue {
        any(SchedulerNode.class));
    allocationOrder.verify(b).assignContainers(eq(clusterResource), 
        any(SchedulerNode.class));
-    assertEquals(computeQueueUtilization(a, 1*GB, clusterResource), 
-        a.getUtilization(), delta);
-    assertEquals(computeQueueUtilization(b, 6*GB, clusterResource), 
-        b.getUtilization(), delta);
-    assertEquals(computeQueueUtilization(c, 3*GB, clusterResource), 
-        c.getUtilization(), delta);
+    verifyQueueMetrics(a, 1*GB, clusterResource);
+    verifyQueueMetrics(b, 6*GB, clusterResource);
+    verifyQueueMetrics(c, 3*GB, clusterResource);
    reset(a); reset(b); reset(c);
    
    // Now verify max-capacity
@ -399,14 +399,10 @@ public class TestParentQueue {
        any(SchedulerNode.class));
    allocationOrder.verify(c).assignContainers(eq(clusterResource), 
        any(SchedulerNode.class));
-    assertEquals(computeQueueUtilization(a, 3*GB, clusterResource), 
-        a.getUtilization(), delta);
-    assertEquals(computeQueueUtilization(b, 8*GB, clusterResource), 
-        b.getUtilization(), delta);
-    assertEquals(computeQueueUtilization(c, 4*GB, clusterResource), 
-        c.getUtilization(), delta);
+    verifyQueueMetrics(a, 3*GB, clusterResource);
+    verifyQueueMetrics(b, 8*GB, clusterResource);
+    verifyQueueMetrics(c, 4*GB, clusterResource);
    reset(a); reset(b); reset(c);
-    
  }

  @Test
@ -438,15 +434,13 @@ public class TestParentQueue {
    // Start testing
    LeafQueue a = (LeafQueue)queues.get(A);
    LeafQueue b = (LeafQueue)queues.get(B);
-    final float delta = 0.0001f;
    
    // Simulate B returning a container on node_0
    stubQueueAllocation(a, clusterResource, node_0, 0*GB, NodeType.OFF_SWITCH);
    stubQueueAllocation(b, clusterResource, node_0, 1*GB, NodeType.OFF_SWITCH);
    root.assignContainers(clusterResource, node_0);
-    assertEquals(0.0f, a.getUtilization(), delta);
-    assertEquals(computeQueueUtilization(b, 1*GB, clusterResource), 
-        b.getUtilization(), delta);
+    verifyQueueMetrics(a, 0*GB, clusterResource);
+    verifyQueueMetrics(b, 1*GB, clusterResource);
    
    // Now, A should get the scheduling opportunity since A=0G/6G, B=1G/14G
    // also, B gets a scheduling opportunity since A allocates RACK_LOCAL
@ -458,10 +452,8 @@ public class TestParentQueue {
        any(SchedulerNode.class));
    allocationOrder.verify(b).assignContainers(eq(clusterResource), 
        any(SchedulerNode.class));
-    assertEquals(computeQueueUtilization(a, 2*GB, clusterResource), 
-        a.getUtilization(), delta);
-    assertEquals(computeQueueUtilization(b, 2*GB, clusterResource), 
-        b.getUtilization(), delta);
+    verifyQueueMetrics(a, 2*GB, clusterResource);
+    verifyQueueMetrics(b, 2*GB, clusterResource);
    
    // Now, B should get the scheduling opportunity 
    // since A has 2/6G while B has 2/14G, 
@ -474,10 +466,8 @@ public class TestParentQueue {
        any(SchedulerNode.class));
    allocationOrder.verify(a).assignContainers(eq(clusterResource), 
        any(SchedulerNode.class));
-    assertEquals(computeQueueUtilization(a, 2*GB, clusterResource), 
-        a.getUtilization(), delta);
-    assertEquals(computeQueueUtilization(b, 4*GB, clusterResource), 
-        b.getUtilization(), delta);
+    verifyQueueMetrics(a, 2*GB, clusterResource);
+    verifyQueueMetrics(b, 4*GB, clusterResource);

  }
  
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueParsing.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestQueueParsing.java
@ -30,6 +30,8 @@ public class TestQueueParsing {

  private static final Log LOG = LogFactory.getLog(TestQueueParsing.class);
  
+  private static final double DELTA = 0.000001;
+  
  @Test
  public void testQueueParsing() throws Exception {
    CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration();
@ -37,6 +39,20 @@ public class TestQueueParsing {

    CapacityScheduler capacityScheduler = new CapacityScheduler();
    capacityScheduler.reinitialize(conf, null, null);
+    
+    CSQueue a = capacityScheduler.getQueue("a");
+    Assert.assertEquals(0.10, a.getAbsoluteCapacity(), DELTA);
+    Assert.assertEquals(0.15, a.getAbsoluteMaximumCapacity(), DELTA);
+    
+    CSQueue b1 = capacityScheduler.getQueue("b1");
+    Assert.assertEquals(0.2 * 0.5, b1.getAbsoluteCapacity(), DELTA);
+    Assert.assertEquals("Parent B has no MAX_CAP", 
+        0.85, b1.getAbsoluteMaximumCapacity(), DELTA);
+    
+    CSQueue c12 = capacityScheduler.getQueue("c12");
+    Assert.assertEquals(0.7 * 0.5 * 0.45, c12.getAbsoluteCapacity(), DELTA);
+    Assert.assertEquals(0.7 * 0.55 * 0.7, 
+        c12.getAbsoluteMaximumCapacity(), DELTA);
  }
  
  private void setupQueueConfiguration(CapacitySchedulerConfiguration conf) {
@ -47,12 +63,14 @@ public class TestQueueParsing {
    
    final String A = CapacitySchedulerConfiguration.ROOT + ".a";
    conf.setCapacity(A, 10);
+    conf.setMaximumCapacity(A, 15);
    
    final String B = CapacitySchedulerConfiguration.ROOT + ".b";
    conf.setCapacity(B, 20);
    
    final String C = CapacitySchedulerConfiguration.ROOT + ".c";
    conf.setCapacity(C, 70);
+    conf.setMaximumCapacity(C, 70);

    LOG.info("Setup top-level queues");
    
@ -61,15 +79,20 @@ public class TestQueueParsing {
    final String A2 = A + ".a2";
    conf.setQueues(A, new String[] {"a1", "a2"});
    conf.setCapacity(A1, 30);
+    conf.setMaximumCapacity(A1, 45);
    conf.setCapacity(A2, 70);
+    conf.setMaximumCapacity(A2, 85);
    
    final String B1 = B + ".b1";
    final String B2 = B + ".b2";
    final String B3 = B + ".b3";
    conf.setQueues(B, new String[] {"b1", "b2", "b3"});
    conf.setCapacity(B1, 50);
+    conf.setMaximumCapacity(B1, 85);
    conf.setCapacity(B2, 30);
+    conf.setMaximumCapacity(B2, 35);
    conf.setCapacity(B3, 20);
+    conf.setMaximumCapacity(B3, 35);

    final String C1 = C + ".c1";
    final String C2 = C + ".c2";
@ -77,9 +100,13 @@ public class TestQueueParsing {
    final String C4 = C + ".c4";
    conf.setQueues(C, new String[] {"c1", "c2", "c3", "c4"});
    conf.setCapacity(C1, 50);
+    conf.setMaximumCapacity(C1, 55);
    conf.setCapacity(C2, 10);
+    conf.setMaximumCapacity(C2, 25);
    conf.setCapacity(C3, 35);
+    conf.setMaximumCapacity(C3, 38);
    conf.setCapacity(C4, 5);
+    conf.setMaximumCapacity(C4, 5);
    
    LOG.info("Setup 2nd-level queues");
    
@ -89,8 +116,11 @@ public class TestQueueParsing {
    final String C13 = C1 + ".c13";
    conf.setQueues(C1, new String[] {"c11", "c12", "c13"});
    conf.setCapacity(C11, 15);
+    conf.setMaximumCapacity(C11, 30);
    conf.setCapacity(C12, 45);
+    conf.setMaximumCapacity(C12, 70);
    conf.setCapacity(C13, 40);
+    conf.setMaximumCapacity(C13, 40);
    
    LOG.info("Setup 3rd-level queues");
  }
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesCapacitySched.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesCapacitySched.java
@ -235,12 +235,13 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
        Element qElem = (Element) queues.item(j);
        String qName = WebServicesTestUtils.getXmlString(qElem, "queueName");
        String q = CapacitySchedulerConfiguration.ROOT + "." + qName;
-        verifySubQueueXML(qElem, q, 100);
+        verifySubQueueXML(qElem, q, 100, 100);
      }
    }
  }

-  public void verifySubQueueXML(Element qElem, String q, float parentAbsCapacity)
+  public void verifySubQueueXML(Element qElem, String q, 
+      float parentAbsCapacity, float parentAbsMaxCapacity)
      throws Exception {
    NodeList queues = qElem.getElementsByTagName("subQueues");
    QueueInfo qi = (queues != null) ? new QueueInfo() : new LeafQueueInfo();
@ -258,14 +259,15 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
        WebServicesTestUtils.getXmlString(qElem, "usedResources");
    qi.queueName = WebServicesTestUtils.getXmlString(qElem, "queueName");
    qi.state = WebServicesTestUtils.getXmlString(qElem, "state");
-    verifySubQueueGeneric(q, qi, parentAbsCapacity);
+    verifySubQueueGeneric(q, qi, parentAbsCapacity, parentAbsMaxCapacity);

    if (queues != null) {
      for (int j = 0; j < queues.getLength(); j++) {
        Element subqElem = (Element) queues.item(j);
        String qName = WebServicesTestUtils.getXmlString(subqElem, "queueName");
        String q2 = q + "." + qName;
-        verifySubQueueXML(subqElem, q2, qi.absoluteCapacity);
+        verifySubQueueXML(subqElem, q2, 
+            qi.absoluteCapacity, qi.absoluteMaxCapacity);
      }
    } else {
      LeafQueueInfo lqi = (LeafQueueInfo) qi;
@ -309,7 +311,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
    for (int i = 0; i < arr.length(); i++) {
      JSONObject obj = arr.getJSONObject(i);
      String q = CapacitySchedulerConfiguration.ROOT + "." + obj.getString("queueName");
-      verifySubQueue(obj, q, 100);
+      verifySubQueue(obj, q, 100, 100);
    }
  }

@ -323,7 +325,8 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
    assertTrue("queueName doesn't match", "root".matches(queueName));
  }

-  private void verifySubQueue(JSONObject info, String q, float parentAbsCapacity)
+  private void verifySubQueue(JSONObject info, String q, 
+      float parentAbsCapacity, float parentAbsMaxCapacity)
      throws JSONException, Exception {
    int numExpectedElements = 11;
    boolean isParentQueue = true;
@ -345,7 +348,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
    qi.queueName = info.getString("queueName");
    qi.state = info.getString("state");

-    verifySubQueueGeneric(q, qi, parentAbsCapacity);
+    verifySubQueueGeneric(q, qi, parentAbsCapacity, parentAbsMaxCapacity);

    if (isParentQueue) {
      JSONArray arr = info.getJSONArray("subQueues");
@ -353,7 +356,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
      for (int i = 0; i < arr.length(); i++) {
        JSONObject obj = arr.getJSONObject(i);
        String q2 = q + "." + obj.getString("queueName");
-        verifySubQueue(obj, q2, qi.absoluteCapacity);
+        verifySubQueue(obj, q2, qi.absoluteCapacity, qi.absoluteMaxCapacity);
      }
    } else {
      LeafQueueInfo lqi = (LeafQueueInfo) qi;
@ -371,7 +374,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
  }

  private void verifySubQueueGeneric(String q, QueueInfo info,
-      float parentAbsCapacity) throws Exception {
+      float parentAbsCapacity, float parentAbsMaxCapacity) throws Exception {
    String[] qArr = q.split("\\.");
    assertTrue("q name invalid: " + q, qArr.length > 1);
    String qshortName = qArr[qArr.length - 1];
@ -380,7 +383,7 @@ public class TestRMWebServicesCapacitySched extends JerseyTest {
    assertEquals("capacity doesn't match", csConf.getCapacity(q),
        info.capacity, 1e-3f);
    float expectCapacity = csConf.getMaximumCapacity(q);
-    float expectAbsMaxCapacity = parentAbsCapacity * (info.maxCapacity/100);
+    float expectAbsMaxCapacity = parentAbsMaxCapacity * (info.maxCapacity/100);
    if (CapacitySchedulerConfiguration.UNDEFINED == expectCapacity) {
      expectCapacity = 100;
      expectAbsMaxCapacity = 100;
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/amfilter/AmIpFilter.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/amfilter/AmIpFilter.java
@ -57,7 +57,7 @@ public class AmIpFilter implements Filter {
    proxyUriBase = conf.getInitParameter(PROXY_URI_BASE);
  }
  
-  private Set<String> getProxyAddresses() throws ServletException {
+  protected Set<String> getProxyAddresses() throws ServletException {
    long now = System.currentTimeMillis();
    synchronized(this) {
      if(proxyAddresses == null || (lastUpdate + updateInterval) >= now) {
@ -97,12 +97,15 @@ public class AmIpFilter implements Filter {
    }
    
    String user = null;
+    
+    if (httpReq.getCookies() != null) {
      for(Cookie c: httpReq.getCookies()) {
        if(WebAppProxyServlet.PROXY_USER_COOKIE_NAME.equals(c.getName())){
          user = c.getValue();
          break;
        }
      }
+    }
    if(user == null) {
      LOG.warn("Could not find "+WebAppProxyServlet.PROXY_USER_COOKIE_NAME
          +" cookie, so user will not be set");
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/amfilter/TestAmFilter.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/amfilter/TestAmFilter.java
@ -0,0 +1,121 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.webproxy.amfilter;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import javax.servlet.Filter;
+import javax.servlet.FilterChain;
+import javax.servlet.FilterConfig;
+import javax.servlet.ServletContext;
+import javax.servlet.ServletException;
+import javax.servlet.ServletRequest;
+import javax.servlet.ServletResponse;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+import org.mockito.Mockito;
+
+
+public class TestAmFilter  {
+
+  private String proxyHost = "bogushost.com";
+  private String proxyUri = "http://bogus";
+
+  private class TestAmIpFilter extends AmIpFilter {
+
+    private Set<String> proxyAddresses = null;
+
+    protected Set<String> getProxyAddresses() {
+      if(proxyAddresses == null) {
+        proxyAddresses = new HashSet<String>();
+      }
+      proxyAddresses.add(proxyHost);
+      return proxyAddresses;
+    }
+  }
+
+
+  private static class DummyFilterConfig implements FilterConfig {
+    final Map<String, String> map;
+
+
+    DummyFilterConfig(Map<String,String> map) {
+      this.map = map;
+    }
+
+    @Override
+    public String getFilterName() {
+      return "dummy";
+    }
+    @Override
+    public String getInitParameter(String arg0) {
+      return map.get(arg0);
+    }
+    @Override
+    public Enumeration<String> getInitParameterNames() {
+      return Collections.enumeration(map.keySet());
+    }
+    @Override
+    public ServletContext getServletContext() {
+      return null;
+    }
+  }
+
+
+  @Test
+  public void filterNullCookies() throws Exception {
+    HttpServletRequest request = Mockito.mock(HttpServletRequest.class);
+
+    Mockito.when(request.getCookies()).thenReturn(null);
+    Mockito.when(request.getRemoteAddr()).thenReturn(proxyHost);
+
+    HttpServletResponse response = Mockito.mock(HttpServletResponse.class);
+
+    final AtomicBoolean invoked = new AtomicBoolean();
+
+    FilterChain chain = new FilterChain() {
+      @Override
+      public void doFilter(ServletRequest servletRequest, ServletResponse servletResponse)
+        throws IOException, ServletException {
+        invoked.set(true);
+      }
+    };
+
+    Map<String, String> params = new HashMap<String, String>();
+    params.put(AmIpFilter.PROXY_HOST, proxyHost);
+    params.put(AmIpFilter.PROXY_URI_BASE, proxyUri);
+    FilterConfig conf = new DummyFilterConfig(params);
+    Filter filter = new TestAmIpFilter();
+    filter.init(conf);
+    filter.doFilter(request, response, chain);
+    Assert.assertTrue(invoked.get());
+    filter.destroy();
+  }
+}
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ClusterSetup.apt.vm
@ -95,7 +95,7 @@ Hadoop MapReduce Next Generation - Cluster Setup
 *--------------------------------------+--------------------------------------+
 | DataNode                             | HADOOP_DATANODE_OPTS                 |
 *--------------------------------------+--------------------------------------+
-| Backup NameNode                      | HADOOP_SECONDARYNAMENODE_OPTS        |
+| Secondary NameNode                   | HADOOP_SECONDARYNAMENODE_OPTS        |
 *--------------------------------------+--------------------------------------+
 | ResourceManager                      | YARN_RESOURCEMANAGER_OPTS            |
 *--------------------------------------+--------------------------------------+
@ -537,15 +537,15 @@ Hadoop MapReduce Next Generation - Cluster Setup
      
  It's recommended to have them share a Unix group, for e.g. <<<hadoop>>>.
      
-*--------------------------------------+--------------------------------------+
+*--------------------------------------+----------------------------------------------------------------------+
 || User:Group                          || Daemons                                                             |
-*--------------------------------------+--------------------------------------+
-| hdfs:hadoop                          | NameNode, Backup NameNode, DataNode  |
-*--------------------------------------+--------------------------------------+
+*--------------------------------------+----------------------------------------------------------------------+
+| hdfs:hadoop                          | NameNode, Secondary NameNode, Checkpoint Node, Backup Node, DataNode |
+*--------------------------------------+----------------------------------------------------------------------+
 | yarn:hadoop                          | ResourceManager, NodeManager                                         |
-*--------------------------------------+--------------------------------------+
+*--------------------------------------+----------------------------------------------------------------------+
 | mapred:hadoop                        | MapReduce JobHistory Server                                          |
-*--------------------------------------+--------------------------------------+
+*--------------------------------------+----------------------------------------------------------------------+
      
  * <<<Permissions for both HDFS and local fileSystem paths>>>
     
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/WebApplicationProxy.apt.vm
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/WebApplicationProxy.apt.vm
@ -0,0 +1,49 @@
+~~ Licensed under the Apache License, Version 2.0 (the "License");
+~~ you may not use this file except in compliance with the License.
+~~ You may obtain a copy of the License at
+~~
+~~   http://www.apache.org/licenses/LICENSE-2.0
+~~
+~~ Unless required by applicable law or agreed to in writing, software
+~~ distributed under the License is distributed on an "AS IS" BASIS,
+~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+~~ See the License for the specific language governing permissions and
+~~ limitations under the License. See accompanying LICENSE file.
+
+  ---
+  YARN
+  ---
+  ---
+  ${maven.build.timestamp}
+
+Web Application Proxy
+
+  The Web Application Proxy is part of YARN.  By default it will run as part of
+  the Resource Manager(RM), but can be configured to run in stand alone mode.
+  The reason for the proxy is to reduce the possibility of web based attacks
+  through YARN.
+
+  In YARN the Application Master(AM) has the responsibility to provide a web UI
+  and to send that link to the RM.  This opens up a number of potential
+  issues.  The RM runs as a trusted user, and people visiting that web
+  address will treat it, and links it provides to them as trusted, when in
+  reality the AM is running as a non-trusted user, and the links it gives to
+  the RM could point to anything malicious or otherwise.  The Web Application
+  Proxy mitigates this risk by warning users that do not own the given
+  application that they are connecting to an untrusted site.
+
+  In addition to this the proxy also tries to reduce the impact that a malicious
+  AM could have on a user.  It primarily does this by stripping out cookies from
+  the user, and replacing them with a single cookie providing the user name of
+  the logged in user.  This is because most web based authentication systems will
+  identify a user based off of a cookie.  By providing this cookie to an
+  untrusted application it opens up the potential for an exploit.  If the cookie
+  is designed properly that potential should be fairly minimal, but this is just
+  to reduce that potential attack vector.  The current proxy implementation does
+  nothing to prevent the AM from providing links to malicious external sites,
+  nor does it do anything to prevent malicious javascript code from running as
+  well.  In fact javascript can be used to get the cookies, so stripping the
+  cookies from the request has minimal benefit at this time.
+
+  In the future we hope to address the attack vectors described above and make
+  attaching to an AM's web UI safer.
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/index.apt.vm
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/index.apt.vm
@ -47,4 +47,6 @@ MapReduce NextGen aka YARN aka MRv2

  * {{{./CapacityScheduler.html}Capacity Scheduler}}

+  * {{{./WebApplicationProxy.html}Web Application Proxy}}
+

--- a/hadoop-project/pom.xml
+++ b/hadoop-project/pom.xml
@ -223,6 +223,11 @@
        <artifactId>hadoop-archives</artifactId>
        <version>${project.version}</version>
      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-distcp</artifactId>
+        <version>${project.version}</version>
+      </dependency>
      <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-rumen</artifactId>
@ -709,11 +714,21 @@
          <artifactId>maven-project-info-reports-plugin</artifactId>
          <version>2.4</version>
        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-resources-plugin</artifactId>
+          <version>2.2</version>
+        </plugin>
        <plugin>
          <groupId>org.codehaus.mojo</groupId>
          <artifactId>exec-maven-plugin</artifactId>
          <version>1.2</version>
        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-pdf-plugin</artifactId>
+          <version>1.1</version>
+        </plugin>
      </plugins>
    </pluginManagement>

@ -811,6 +826,14 @@
          </excludes>
        </configuration>
      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-pdf-plugin</artifactId>
+        <configuration>
+          <outputDirectory>${project.reporting.outputDirectory}</outputDirectory>
+          <includeReports>false</includeReports>
+        </configuration>
+      </plugin>
    </plugins>
  </build>

--- a/hadoop-project/src/site/site.xml
+++ b/hadoop-project/src/site/site.xml
@ -61,6 +61,7 @@
      <item name="YARN Architecture" href="hadoop-yarn/hadoop-yarn-site/YARN.html"/>
      <item name="Writing Yarn Applications" href="hadoop-yarn/hadoop-yarn-site/WritingYarnApplications.html"/>
      <item name="Capacity Scheduler" href="hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html"/>
+      <item name="Web Application Proxy" href="hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html"/>
    </menu>

    <menu name="YARN REST API's" inherit="top">
--- a/hadoop-tools/hadoop-distcp/README
+++ b/hadoop-tools/hadoop-distcp/README
@ -0,0 +1,7 @@
+DistCp (distributed copy) is a tool used for large inter/intra-cluster copying. 
+It uses Map/Reduce to effect its distribution, error handling and recovery, 
+and reporting. It expands a list of files and directories into input to map tasks, 
+each of which will copy a partition of the files specified in the source list.
+
+Version 0.1 (2010/08/02 sriksun)
+ - Initial Version
--- a/hadoop-tools/hadoop-distcp/pom.xml
+++ b/hadoop-tools/hadoop-distcp/pom.xml
@ -0,0 +1,198 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+<project>
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.hadoop</groupId>
+    <artifactId>hadoop-project</artifactId>
+    <version>0.23.1-SNAPSHOT</version>
+    <relativePath>../../hadoop-project</relativePath>
+  </parent>
+  <groupId>org.apache.hadoop</groupId>
+  <artifactId>hadoop-distcp</artifactId>
+  <version>0.23.1-SNAPSHOT</version>
+  <description>Apache Hadoop Distributed Copy</description>
+  <name>Apache Hadoop Distributed Copy</name>
+  <packaging>jar</packaging>
+
+  <properties>
+    <file.encoding>UTF-8</file.encoding>
+    <downloadSources>true</downloadSources>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-annotations</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-mapreduce-client-app</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-mapreduce-client-hs</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-mapreduce-client-core</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
+      <scope>test</scope>
+      <type>test-jar</type>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs</artifactId>
+      <scope>test</scope>
+      <type>test-jar</type>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <scope>test</scope>
+      <type>test-jar</type>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <resources>
+      <resource>
+        <directory>src/main/resources</directory>
+        <filtering>true</filtering>
+      </resource>
+    </resources>
+    <testResources>
+      <testResource>
+        <directory>src/test/resources</directory>
+        <filtering>true</filtering>
+      </testResource>
+    </testResources>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <forkMode>always</forkMode>
+          <forkedProcessTimeoutInSeconds>600</forkedProcessTimeoutInSeconds>
+          <argLine>-Xmx1024m</argLine>
+          <includes>
+            <include>**/Test*.java</include>
+          </includes>
+          <redirectTestOutputToFile>true</redirectTestOutputToFile>
+          <systemProperties>
+            <property>
+              <name>test.build.data</name>
+              <value>${basedir}/target/test/data</value>
+            </property>
+            <property>
+              <name>hadoop.log.dir</name>
+              <value>target/test/logs</value>
+            </property>
+            <property>
+              <name>org.apache.commons.logging.Log</name>
+              <value>org.apache.commons.logging.impl.SimpleLog</value>
+            </property>
+            <property>
+              <name>org.apache.commons.logging.simplelog.defaultlog</name>
+              <value>warn</value>
+            </property>
+          </systemProperties>
+        </configuration>
+      </plugin>
+      <plugin>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>copy-dependencies</goal>
+            </goals>
+            <configuration>
+              <outputDirectory>${project.build.directory}/lib</outputDirectory>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <configuration>
+          <enableRulesSummary>true</enableRulesSummary>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <configuration>
+          <archive>
+            <manifest>
+              <mainClass>org.apache.hadoop.tools.DistCp</mainClass>
+            </manifest>
+          </archive>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-source-plugin</artifactId>
+        <configuration>
+          <attach>true</attach>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-pdf-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>pdf</id>
+            <phase>package</phase>
+            <goals>
+              <goal>pdf</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+</project>
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/CopyListing.java
@ -0,0 +1,218 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.tools.util.DistCpUtils;
+import org.apache.hadoop.security.Credentials;
+
+import java.io.IOException;
+
+/**
+ * The CopyListing abstraction is responsible for how the list of
+ * sources and targets is constructed, for DistCp's copy function.
+ * The copy-listing should be a SequenceFile<Text, FileStatus>,
+ * located at the path specified to buildListing(),
+ * each entry being a pair of (Source relative path, source file status),
+ * all the paths being fully qualified.
+ */
+public abstract class CopyListing extends Configured {
+
+  private Credentials credentials;
+
+  /**
+   * Build listing function creates the input listing that distcp uses to
+   * perform the copy.
+   *
+   * The build listing is a sequence file that has relative path of a file in the key
+   * and the file status information of the source file in the value
+   *
+   * For instance if the source path is /tmp/data and the traversed path is
+   * /tmp/data/dir1/dir2/file1, then the sequence file would contain
+   *
+   * key: /dir1/dir2/file1 and value: FileStatus(/tmp/data/dir1/dir2/file1)
+   *
+   * File would also contain directory entries. Meaning, if /tmp/data/dir1/dir2/file1
+   * is the only file under /tmp/data, the resulting sequence file would contain the
+   * following entries
+   *
+   * key: /dir1 and value: FileStatus(/tmp/data/dir1)
+   * key: /dir1/dir2 and value: FileStatus(/tmp/data/dir1/dir2)
+   * key: /dir1/dir2/file1 and value: FileStatus(/tmp/data/dir1/dir2/file1)
+   *
+   * Cases requiring special handling:
+   * If source path is a file (/tmp/file1), contents of the file will be as follows
+   *
+   * TARGET DOES NOT EXIST: Key-"", Value-FileStatus(/tmp/file1)
+   * TARGET IS FILE       : Key-"", Value-FileStatus(/tmp/file1)
+   * TARGET IS DIR        : Key-"/file1", Value-FileStatus(/tmp/file1)  
+   *
+   * @param pathToListFile - Output file where the listing would be stored
+   * @param options - Input options to distcp
+   * @throws IOException - Exception if any
+   */
+  public final void buildListing(Path pathToListFile,
+                                 DistCpOptions options) throws IOException {
+    validatePaths(options);
+    doBuildListing(pathToListFile, options);
+    Configuration config = getConf();
+
+    config.set(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, pathToListFile.toString());
+    config.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, getBytesToCopy());
+    config.setLong(DistCpConstants.CONF_LABEL_TOTAL_NUMBER_OF_RECORDS, getNumberOfPaths());
+
+    checkForDuplicates(pathToListFile);
+  }
+
+  /**
+   * Validate input and output paths
+   *
+   * @param options - Input options
+   * @throws InvalidInputException: If inputs are invalid
+   * @throws IOException: any Exception with FS 
+   */
+  protected abstract void validatePaths(DistCpOptions options)
+      throws IOException, InvalidInputException;
+
+  /**
+   * The interface to be implemented by sub-classes, to create the source/target file listing.
+   * @param pathToListFile Path on HDFS where the listing file is written.
+   * @param options Input Options for DistCp (indicating source/target paths.)
+   * @throws IOException: Thrown on failure to create the listing file.
+   */
+  protected abstract void doBuildListing(Path pathToListFile,
+                                         DistCpOptions options) throws IOException;
+
+  /**
+   * Return the total bytes that distCp should copy for the source paths
+   * This doesn't consider whether file is same should be skipped during copy
+   *
+   * @return total bytes to copy
+   */
+  protected abstract long getBytesToCopy();
+
+  /**
+   * Return the total number of paths to distcp, includes directories as well
+   * This doesn't consider whether file/dir is already present and should be skipped during copy
+   *
+   * @return Total number of paths to distcp
+   */
+  protected abstract long getNumberOfPaths();
+
+  /**
+   * Validate the final resulting path listing to see if there are any duplicate entries
+   *
+   * @param pathToListFile - path listing build by doBuildListing
+   * @throws IOException - Any issues while checking for duplicates and throws
+   * @throws DuplicateFileException - if there are duplicates
+   */
+  private void checkForDuplicates(Path pathToListFile)
+      throws DuplicateFileException, IOException {
+
+    Configuration config = getConf();
+    FileSystem fs = pathToListFile.getFileSystem(config);
+
+    Path sortedList = DistCpUtils.sortListing(fs, config, pathToListFile);
+
+    SequenceFile.Reader reader = new SequenceFile.Reader(
+                          config, SequenceFile.Reader.file(sortedList));
+    try {
+      Text lastKey = new Text("*"); //source relative path can never hold *
+      FileStatus lastFileStatus = new FileStatus();
+
+      Text currentKey = new Text();
+      while (reader.next(currentKey)) {
+        if (currentKey.equals(lastKey)) {
+          FileStatus currentFileStatus = new FileStatus();
+          reader.getCurrentValue(currentFileStatus);
+          throw new DuplicateFileException("File " + lastFileStatus.getPath() + " and " +
+              currentFileStatus.getPath() + " would cause duplicates. Aborting");
+        }
+        reader.getCurrentValue(lastFileStatus);
+        lastKey.set(currentKey);
+      }
+    } finally {
+      IOUtils.closeStream(reader);
+    }
+  }
+
+  /**
+   * Protected constructor, to initialize configuration.
+   * @param configuration The input configuration,
+   *                        with which the source/target FileSystems may be accessed.
+   * @param credentials - Credentials object on which the FS delegation tokens are cached.If null
+   * delegation token caching is skipped
+   */
+  protected CopyListing(Configuration configuration, Credentials credentials) {
+    setConf(configuration);
+    setCredentials(credentials);
+  }
+
+  /**
+   * set Credentials store, on which FS delegatin token will be cached
+   * @param credentials - Credentials object
+   */
+  protected void setCredentials(Credentials credentials) {
+    this.credentials = credentials;
+  }
+
+  /**
+   * get credentials to update the delegation tokens for accessed FS objects
+   * @return Credentials object
+   */
+  protected Credentials getCredentials() {
+    return credentials;
+  }
+
+  /**
+   * Public Factory method with which the appropriate CopyListing implementation may be retrieved.
+   * @param configuration The input configuration.
+   * @param credentials Credentials object on which the FS delegation tokens are cached
+   * @param options The input Options, to help choose the appropriate CopyListing Implementation.
+   * @return An instance of the appropriate CopyListing implementation.
+   */
+  public static CopyListing getCopyListing(Configuration configuration,
+                                           Credentials credentials,
+                                           DistCpOptions options) {
+    if (options.getSourceFileListing() == null) {
+      return new GlobbedCopyListing(configuration, credentials);
+    } else {
+      return new FileBasedCopyListing(configuration, credentials);
+    }
+  }
+
+  static class DuplicateFileException extends RuntimeException {
+    public DuplicateFileException(String message) {
+      super(message);
+    }
+  }
+
+  static class InvalidInputException extends RuntimeException {
+    public InvalidInputException(String message) {
+      super(message);
+    }
+  }
+}
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCp.java
@ -0,0 +1,405 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.JobSubmissionFiles;
+import org.apache.hadoop.mapreduce.Cluster;
+import org.apache.hadoop.tools.CopyListing.*;
+import org.apache.hadoop.tools.mapred.CopyMapper;
+import org.apache.hadoop.tools.mapred.CopyOutputFormat;
+import org.apache.hadoop.tools.util.DistCpUtils;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import java.io.IOException;
+import java.util.Random;
+
+/**
+ * DistCp is the main driver-class for DistCpV2.
+ * For command-line use, DistCp::main() orchestrates the parsing of command-line
+ * parameters and the launch of the DistCp job.
+ * For programmatic use, a DistCp object can be constructed by specifying
+ * options (in a DistCpOptions object), and DistCp::execute() may be used to
+ * launch the copy-job. DistCp may alternatively be sub-classed to fine-tune
+ * behaviour.
+ */
+public class DistCp extends Configured implements Tool {
+  private static final Log LOG = LogFactory.getLog(DistCp.class);
+
+  private DistCpOptions inputOptions;
+  private Path metaFolder;
+
+  private static final String PREFIX = "_distcp";
+  private static final String WIP_PREFIX = "._WIP_";
+  private static final String DISTCP_DEFAULT_XML = "distcp-default.xml";
+  public static final Random rand = new Random();
+
+  private boolean submitted;
+  private FileSystem jobFS;
+
+  /**
+   * Public Constructor. Creates DistCp object with specified input-parameters.
+   * (E.g. source-paths, target-location, etc.)
+   * @param inputOptions Options (indicating source-paths, target-location.)
+   * @param configuration The Hadoop configuration against which the Copy-mapper must run.
+   * @throws Exception, on failure.
+   */
+  public DistCp(Configuration configuration, DistCpOptions inputOptions) throws Exception {
+    Configuration config = new Configuration(configuration);
+    config.addResource(DISTCP_DEFAULT_XML);
+    setConf(config);
+    this.inputOptions = inputOptions;
+    this.metaFolder   = createMetaFolderPath();
+  }
+
+  /**
+   * To be used with the ToolRunner. Not for public consumption.
+   */
+  private DistCp() {}
+
+  /**
+   * Implementation of Tool::run(). Orchestrates the copy of source file(s)
+   * to target location, by:
+   *  1. Creating a list of files to be copied to target.
+   *  2. Launching a Map-only job to copy the files. (Delegates to execute().)
+   * @param argv List of arguments passed to DistCp, from the ToolRunner.
+   * @return On success, it returns 0. Else, -1.
+   */
+  public int run(String[] argv) {
+    try {
+      inputOptions = (OptionsParser.parse(argv));
+
+      LOG.info("Input Options: " + inputOptions);
+    } catch (Throwable e) {
+      LOG.error("Invalid arguments: ", e);
+      System.err.println("Invalid arguments: " + e.getMessage());
+      OptionsParser.usage();      
+      return DistCpConstants.INVALID_ARGUMENT;
+    }
+    
+    try {
+      execute();
+    } catch (InvalidInputException e) {
+      LOG.error("Invalid input: ", e);
+      return DistCpConstants.INVALID_ARGUMENT;
+    } catch (DuplicateFileException e) {
+      LOG.error("Duplicate files in input path: ", e);
+      return DistCpConstants.DUPLICATE_INPUT;
+    } catch (Exception e) {
+      LOG.error("Exception encountered ", e);
+      return DistCpConstants.UNKNOWN_ERROR;
+    }
+    return DistCpConstants.SUCCESS;
+  }
+
+  /**
+   * Implements the core-execution. Creates the file-list for copy,
+   * and launches the Hadoop-job, to do the copy.
+   * @return Job handle
+   * @throws Exception, on failure.
+   */
+  public Job execute() throws Exception {
+    assert inputOptions != null;
+    assert getConf() != null;
+
+    Job job = null;
+    try {
+      metaFolder = createMetaFolderPath();
+      jobFS = metaFolder.getFileSystem(getConf());
+
+      job = createJob();
+      createInputFileListing(job);
+
+      job.submit();
+      submitted = true;
+    } finally {
+      if (!submitted) {
+        cleanup();
+      }
+    }
+
+    String jobID = job.getJobID().toString();
+    job.getConfiguration().set(DistCpConstants.CONF_LABEL_DISTCP_JOB_ID, jobID);
+    
+    LOG.info("DistCp job-id: " + jobID);
+    if (inputOptions.shouldBlock()) {
+      job.waitForCompletion(true);
+    }
+    return job;
+  }
+
+  /**
+   * Create Job object for submitting it, with all the configuration
+   *
+   * @return Reference to job object.
+   * @throws IOException - Exception if any
+   */
+  private Job createJob() throws IOException {
+    String jobName = "distcp";
+    String userChosenName = getConf().get(JobContext.JOB_NAME);
+    if (userChosenName != null)
+      jobName += ": " + userChosenName;
+    Job job = Job.getInstance(getConf());
+    job.setJobName(jobName);
+    job.setInputFormatClass(DistCpUtils.getStrategy(getConf(), inputOptions));
+    job.setJarByClass(CopyMapper.class);
+    configureOutputFormat(job);
+
+    job.setMapperClass(CopyMapper.class);
+    job.setNumReduceTasks(0);
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(Text.class);
+    job.setOutputFormatClass(CopyOutputFormat.class);
+    job.getConfiguration().set(JobContext.MAP_SPECULATIVE, "false");
+    job.getConfiguration().set(JobContext.NUM_MAPS,
+                  String.valueOf(inputOptions.getMaxMaps()));
+
+    if (inputOptions.getSslConfigurationFile() != null) {
+      setupSSLConfig(job);
+    }
+
+    inputOptions.appendToConf(job.getConfiguration());
+    return job;
+  }
+
+  /**
+   * Setup ssl configuration on the job configuration to enable hsftp access
+   * from map job. Also copy the ssl configuration file to Distributed cache
+   *
+   * @param job - Reference to job's handle
+   * @throws java.io.IOException - Exception if unable to locate ssl config file
+   */
+  private void setupSSLConfig(Job job) throws IOException  {
+    Configuration configuration = job.getConfiguration();
+    Path sslConfigPath = new Path(configuration.
+        getResource(inputOptions.getSslConfigurationFile()).toString());
+
+    addSSLFilesToDistCache(job, sslConfigPath);
+    configuration.set(DistCpConstants.CONF_LABEL_SSL_CONF, sslConfigPath.getName());
+    configuration.set(DistCpConstants.CONF_LABEL_SSL_KEYSTORE, sslConfigPath.getName());
+  }
+
+  /**
+   * Add SSL files to distributed cache. Trust store, key store and ssl config xml
+   *
+   * @param job - Job handle
+   * @param sslConfigPath - ssl Configuration file specified through options
+   * @throws IOException - If any
+   */
+  private void addSSLFilesToDistCache(Job job,
+                                      Path sslConfigPath) throws IOException {
+    Configuration configuration = job.getConfiguration();
+    FileSystem localFS = FileSystem.getLocal(configuration);
+
+    Configuration sslConf = new Configuration(false);
+    sslConf.addResource(sslConfigPath);
+
+    Path localStorePath = getLocalStorePath(sslConf,
+                            DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION);
+    job.addCacheFile(localStorePath.makeQualified(localFS.getUri(),
+                                      localFS.getWorkingDirectory()).toUri());
+    configuration.set(DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION,
+                      localStorePath.getName());
+
+    localStorePath = getLocalStorePath(sslConf,
+                             DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION);
+    job.addCacheFile(localStorePath.makeQualified(localFS.getUri(),
+                                      localFS.getWorkingDirectory()).toUri());
+    configuration.set(DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION,
+                                      localStorePath.getName());
+
+    job.addCacheFile(sslConfigPath.makeQualified(localFS.getUri(),
+                                      localFS.getWorkingDirectory()).toUri());
+
+  }
+
+  /**
+   * Get Local Trust store/key store path
+   *
+   * @param sslConf - Config from SSL Client xml
+   * @param storeKey - Key for either trust store or key store
+   * @return - Path where the store is present
+   * @throws IOException -If any
+   */
+  private Path getLocalStorePath(Configuration sslConf, String storeKey) throws IOException {
+    if (sslConf.get(storeKey) != null) {
+      return new Path(sslConf.get(storeKey));
+    } else {
+      throw new IOException("Store for " + storeKey + " is not set in " +
+          inputOptions.getSslConfigurationFile());
+    }
+  }
+
+  /**
+   * Setup output format appropriately
+   *
+   * @param job - Job handle
+   * @throws IOException - Exception if any
+   */
+  private void configureOutputFormat(Job job) throws IOException {
+    final Configuration configuration = job.getConfiguration();
+    Path targetPath = inputOptions.getTargetPath();
+    FileSystem targetFS = targetPath.getFileSystem(configuration);
+    targetPath = targetPath.makeQualified(targetFS.getUri(),
+                                          targetFS.getWorkingDirectory());
+
+    if (inputOptions.shouldAtomicCommit()) {
+      Path workDir = inputOptions.getAtomicWorkPath();
+      if (workDir == null) {
+        workDir = targetPath.getParent();
+      }
+      workDir = new Path(workDir, WIP_PREFIX + targetPath.getName()
+                                + rand.nextInt());
+      FileSystem workFS = workDir.getFileSystem(configuration);
+      if (!DistCpUtils.compareFs(targetFS, workFS)) {
+        throw new IllegalArgumentException("Work path " + workDir +
+            " and target path " + targetPath + " are in different file system");
+      }
+      CopyOutputFormat.setWorkingDirectory(job, workDir);
+    } else {
+      CopyOutputFormat.setWorkingDirectory(job, targetPath);
+    }
+    CopyOutputFormat.setCommitDirectory(job, targetPath);
+
+    Path logPath = inputOptions.getLogPath();
+    if (logPath == null) {
+      logPath = new Path(metaFolder, "_logs");
+    } else {
+      LOG.info("DistCp job log path: " + logPath);
+    }
+    CopyOutputFormat.setOutputPath(job, logPath);
+  }
+
+  /**
+   * Create input listing by invoking an appropriate copy listing
+   * implementation. Also add delegation tokens for each path
+   * to job's credential store
+   *
+   * @param job - Handle to job
+   * @return Returns the path where the copy listing is created
+   * @throws IOException - If any
+   */
+  private Path createInputFileListing(Job job) throws IOException {
+    Path fileListingPath = getFileListingPath();
+    CopyListing copyListing = CopyListing.getCopyListing(job.getConfiguration(),
+        job.getCredentials(), inputOptions);
+    copyListing.buildListing(fileListingPath, inputOptions);
+    return fileListingPath;
+  }
+
+  /**
+   * Get default name of the copy listing file. Use the meta folder
+   * to create the copy listing file
+   *
+   * @return - Path where the copy listing file has to be saved
+   * @throws IOException - Exception if any
+   */
+  private Path getFileListingPath() throws IOException {
+    String fileListPathStr = metaFolder + "/fileList.seq";
+    Path path = new Path(fileListPathStr);
+    return new Path(path.toUri().normalize().toString());
+  }
+
+  /**
+   * Create a default working folder for the job, under the
+   * job staging directory
+   *
+   * @return Returns the working folder information
+   * @throws Exception - EXception if any
+   */
+  private Path createMetaFolderPath() throws Exception {
+    Configuration configuration = getConf();
+    Path stagingDir = JobSubmissionFiles.getStagingDir(
+            new Cluster(configuration), configuration);
+    Path metaFolderPath = new Path(stagingDir, PREFIX + String.valueOf(rand.nextInt()));
+    if (LOG.isDebugEnabled())
+      LOG.debug("Meta folder location: " + metaFolderPath);
+    configuration.set(DistCpConstants.CONF_LABEL_META_FOLDER, metaFolderPath.toString());    
+    return metaFolderPath;
+  }
+
+  /**
+   * Main function of the DistCp program. Parses the input arguments (via OptionsParser),
+   * and invokes the DistCp::run() method, via the ToolRunner.
+   * @param argv Command-line arguments sent to DistCp.
+   */
+  public static void main(String argv[]) {
+    try {
+      DistCp distCp = new DistCp();
+      Cleanup CLEANUP = new Cleanup(distCp);
+
+      Runtime.getRuntime().addShutdownHook(CLEANUP);
+      System.exit(ToolRunner.run(getDefaultConf(), distCp, argv));
+    }
+    catch (Exception e) {
+      LOG.error("Couldn't complete DistCp operation: ", e);
+      System.exit(DistCpConstants.UNKNOWN_ERROR);
+    }
+  }
+
+  /**
+   * Loads properties from distcp-default.xml into configuration
+   * object
+   * @return Configuration which includes properties from distcp-default.xml
+   */
+  private static Configuration getDefaultConf() {
+    Configuration config = new Configuration();
+    config.addResource(DISTCP_DEFAULT_XML);
+    return config;
+  }
+
+  private synchronized void cleanup() {
+    try {
+      if (metaFolder == null) return;
+
+      jobFS.delete(metaFolder, true);
+      metaFolder = null;
+    } catch (IOException e) {
+      LOG.error("Unable to cleanup meta folder: " + metaFolder, e);
+    }
+  }
+
+  private boolean isSubmitted() {
+    return submitted;
+  }
+
+  private static class Cleanup extends Thread {
+    private final DistCp distCp;
+
+    public Cleanup(DistCp distCp) {
+      this.distCp = distCp;
+    }
+
+    @Override
+    public void run() {
+      if (distCp.isSubmitted()) return;
+
+      distCp.cleanup();
+    }
+  }
+}
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpConstants.java
@ -0,0 +1,104 @@
+package org.apache.hadoop.tools;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Utility class to hold commonly used constants.
+ */
+public class DistCpConstants {
+
+  /* Default number of maps to use for DistCp */
+  public static final int DEFAULT_MAPS = 20;
+
+  /* Default bandwidth if none specified */
+  public static final int DEFAULT_BANDWIDTH_MB = 100;
+
+  /* Default strategy for copying. Implementation looked up
+     from distcp-default.xml
+   */
+  public static final String UNIFORMSIZE = "uniformsize";
+
+  /**
+   *  Constants mapping to command line switches/input options
+   */
+  public static final String CONF_LABEL_ATOMIC_COPY = "distcp.atomic.copy";
+  public static final String CONF_LABEL_WORK_PATH = "distcp.work.path";
+  public static final String CONF_LABEL_LOG_PATH = "distcp.log.path";
+  public static final String CONF_LABEL_IGNORE_FAILURES = "distcp.ignore.failures";
+  public static final String CONF_LABEL_PRESERVE_STATUS = "distcp.preserve.status";
+  public static final String CONF_LABEL_SYNC_FOLDERS = "distcp.sync.folders";
+  public static final String CONF_LABEL_DELETE_MISSING = "distcp.delete.missing.source";
+  public static final String CONF_LABEL_SSL_CONF = "distcp.keystore.resource";
+  public static final String CONF_LABEL_MAX_MAPS = "distcp.max.maps";
+  public static final String CONF_LABEL_SOURCE_LISTING = "distcp.source.listing";
+  public static final String CONF_LABEL_COPY_STRATEGY = "distcp.copy.strategy";
+  public static final String CONF_LABEL_SKIP_CRC = "distcp.skip.crc";
+  public static final String CONF_LABEL_OVERWRITE = "distcp.copy.overwrite";
+  public static final String CONF_LABEL_BANDWIDTH_MB = "distcp.map.bandwidth.mb";
+
+  /* Total bytes to be copied. Updated by copylisting. Unfiltered count */
+  public static final String CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED = "mapred.total.bytes.expected";
+
+  /* Total number of paths to copy, includes directories. Unfiltered count */
+  public static final String CONF_LABEL_TOTAL_NUMBER_OF_RECORDS = "mapred.number.of.records";
+
+  /* SSL keystore resource */
+  public static final String CONF_LABEL_SSL_KEYSTORE = "dfs.https.client.keystore.resource";
+
+  /* If input is based -f <<source listing>>, file containing the src paths */
+  public static final String CONF_LABEL_LISTING_FILE_PATH = "distcp.listing.file.path";
+
+  /* Directory where the mapreduce job will write to. If not atomic commit, then same
+    as CONF_LABEL_TARGET_FINAL_PATH
+   */
+  public static final String CONF_LABEL_TARGET_WORK_PATH = "distcp.target.work.path";
+
+  /* Directory where the final data will be committed to. If not atomic commit, then same
+    as CONF_LABEL_TARGET_WORK_PATH
+   */
+  public static final String CONF_LABEL_TARGET_FINAL_PATH = "distcp.target.final.path";
+
+  /**
+   * DistCp job id for consumers of the Disctp 
+   */
+  public static final String CONF_LABEL_DISTCP_JOB_ID = "distcp.job.id";
+
+  /* Meta folder where the job's intermediate data is kept */
+  public static final String CONF_LABEL_META_FOLDER = "distcp.meta.folder";
+
+  /**
+   * Conf label for SSL Trust-store location.
+   */
+  public static final String CONF_LABEL_SSL_TRUST_STORE_LOCATION
+      = "ssl.client.truststore.location";
+
+  /**
+   * Conf label for SSL Key-store location.
+   */
+  public static final String CONF_LABEL_SSL_KEY_STORE_LOCATION
+      = "ssl.client.keystore.location";
+
+  /**
+   * Constants for DistCp return code to shell / consumer of ToolRunner's run
+   */
+  public static final int SUCCESS = 0;
+  public static final int INVALID_ARGUMENT = -1;
+  public static final int DUPLICATE_INPUT = -2;
+  public static final int UNKNOWN_ERROR = -999;
+}
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptionSwitch.java
@ -0,0 +1,218 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools;
+
+import org.apache.commons.cli.Option;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * Enumeration mapping configuration keys to distcp command line
+ * options.
+ */
+public enum DistCpOptionSwitch {
+
+  /**
+   * Ignores any failures during copy, and continues with rest.
+   * Logs failures in a file
+   */
+  IGNORE_FAILURES(DistCpConstants.CONF_LABEL_IGNORE_FAILURES,
+      new Option("i", false, "Ignore failures during copy")),
+
+  /**
+   * Preserves status of file/path in the target.
+   * Default behavior with -p, is to preserve replication,
+   * block size, user, group and permission on the target file
+   *
+   * If any of the optional switches are present among rbugp, then
+   * only the corresponding file attribute is preserved
+   *
+   */
+  PRESERVE_STATUS(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
+      new Option("p", true, "preserve status (rbugp)" +
+          "(replication, block-size, user, group, permission)")),
+
+  /**
+   * Update target location by copying only files that are missing
+   * in the target. This can be used to periodically sync two folders
+   * across source and target. Typically used with DELETE_MISSING
+   * Incompatible with ATOMIC_COMMIT
+   */
+  SYNC_FOLDERS(DistCpConstants.CONF_LABEL_SYNC_FOLDERS, 
+      new Option("update", false, "Update target, copying only missing" +
+          "files or directories")),
+
+  /**
+   * Deletes missing files in target that are missing from source
+   * This allows the target to be in sync with the source contents
+   * Typically used in conjunction with SYNC_FOLDERS
+   * Incompatible with ATOMIC_COMMIT
+   */
+  DELETE_MISSING(DistCpConstants.CONF_LABEL_DELETE_MISSING,
+      new Option("delete", false, "Delete from target, " +
+          "files missing in source")),
+
+  /**
+   * Configuration file to use with hftps:// for securely copying
+   * files across clusters. Typically the configuration file contains
+   * truststore/keystore information such as location, password and type
+   */
+  SSL_CONF(DistCpConstants.CONF_LABEL_SSL_CONF,
+      new Option("mapredSslConf", true, "Configuration for ssl config file" +
+          ", to use with hftps://")),
+
+  /**
+   * Max number of maps to use during copy. DistCp will split work
+   * as equally as possible among these maps
+   */
+  MAX_MAPS(DistCpConstants.CONF_LABEL_MAX_MAPS, 
+      new Option("m", true, "Max number of concurrent maps to use for copy")),
+
+  /**
+   * Source file listing can be provided to DistCp in a file.
+   * This allows DistCp to copy random list of files from source
+   * and copy them to target
+   */
+  SOURCE_FILE_LISTING(DistCpConstants.CONF_LABEL_SOURCE_LISTING,
+      new Option("f", true, "List of files that need to be copied")),
+
+  /**
+   * Copy all the source files and commit them atomically to the target
+   * This is typically useful in cases where there is a process
+   * polling for availability of a file/dir. This option is incompatible
+   * with SYNC_FOLDERS & DELETE_MISSING
+   */
+  ATOMIC_COMMIT(DistCpConstants.CONF_LABEL_ATOMIC_COPY,
+      new Option("atomic", false, "Commit all changes or none")),
+
+  /**
+   * Work path to be used only in conjunction in Atomic commit
+   */
+  WORK_PATH(DistCpConstants.CONF_LABEL_WORK_PATH,
+      new Option("tmp", true, "Intermediate work path to be used for atomic commit")),
+
+  /**
+   * Log path where distcp output logs are written to
+   */
+  LOG_PATH(DistCpConstants.CONF_LABEL_LOG_PATH,
+      new Option("log", true, "Folder on DFS where distcp execution logs are saved")),
+
+  /**
+   * Copy strategy is use. This could be dynamic or uniform size etc.
+   * DistCp would use an appropriate input format based on this.
+   */
+  COPY_STRATEGY(DistCpConstants.CONF_LABEL_COPY_STRATEGY,
+      new Option("strategy", true, "Copy strategy to use. Default is " +
+          "dividing work based on file sizes")),
+
+  /**
+   * Skip CRC checks between source and target, when determining what
+   * files need to be copied.
+   */
+  SKIP_CRC(DistCpConstants.CONF_LABEL_SKIP_CRC,
+      new Option("skipcrccheck", false, "Whether to skip CRC checks between " +
+          "source and target paths.")),
+
+  /**
+   * Overwrite target-files unconditionally.
+   */
+  OVERWRITE(DistCpConstants.CONF_LABEL_OVERWRITE,
+      new Option("overwrite", false, "Choose to overwrite target files " +
+          "unconditionally, even if they exist.")),
+
+  /**
+   * Should DisctpExecution be blocking
+   */
+  BLOCKING("",
+      new Option("async", false, "Should distcp execution be blocking")),
+
+  FILE_LIMIT("",
+      new Option("filelimit", true, "(Deprecated!) Limit number of files " +
+              "copied to <= n")),
+
+  SIZE_LIMIT("",
+      new Option("sizelimit", true, "(Deprecated!) Limit number of files " +
+              "copied to <= n bytes")),
+
+  /**
+   * Specify bandwidth per map in MB
+   */
+  BANDWIDTH(DistCpConstants.CONF_LABEL_BANDWIDTH_MB,
+      new Option("bandwidth", true, "Specify bandwidth per map in MB"));
+
+  private final String confLabel;
+  private final Option option;
+
+  DistCpOptionSwitch(String confLabel, Option option) {
+    this.confLabel = confLabel;
+    this.option = option;
+  }
+
+  /**
+   * Get Configuration label for the option
+   * @return configuration label name
+   */
+  public String getConfigLabel() {
+    return confLabel;
+  }
+
+  /**
+   * Get CLI Option corresponding to the distcp option
+   * @return option
+   */
+  public Option getOption() {
+    return option;
+  }
+
+  /**
+   * Get Switch symbol
+   * @return switch symbol char
+   */
+  public String getSwitch() {
+    return option.getOpt();
+  }
+
+  @Override
+  public String toString() {
+    return  super.name() + " {" +
+        "confLabel='" + confLabel + '\'' +
+        ", option=" + option + '}';
+  }
+
+  /**
+   * Helper function to add an option to hadoop configuration object
+   * @param conf - Configuration object to include the option
+   * @param option - Option to add
+   * @param value - Value
+   */
+  public static void addToConf(Configuration conf,
+                               DistCpOptionSwitch option,
+                               String value) {
+    conf.set(option.getConfigLabel(), value);
+  }
+
+  /**
+   * Helper function to set an option to hadoop configuration object
+   * @param conf - Configuration object to include the option
+   * @param option - Option to add
+   */
+  public static void addToConf(Configuration conf,
+                               DistCpOptionSwitch option) {
+    conf.set(option.getConfigLabel(), "true");
+  }
+}
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpOptions.java
@ -0,0 +1,525 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.tools.util.DistCpUtils;
+
+import java.util.EnumSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * The Options class encapsulates all DistCp options.
+ * These may be set from command-line (via the OptionsParser)
+ * or may be set manually.
+ */
+public class DistCpOptions {
+
+  private boolean atomicCommit = false;
+  private boolean syncFolder = false;
+  private boolean deleteMissing = false;
+  private boolean ignoreFailures = false;
+  private boolean overwrite = false;
+  private boolean skipCRC = false;
+  private boolean blocking = true;
+
+  private int maxMaps = DistCpConstants.DEFAULT_MAPS;
+  private int mapBandwidth = DistCpConstants.DEFAULT_BANDWIDTH_MB;
+
+  private String sslConfigurationFile;
+
+  private String copyStrategy = DistCpConstants.UNIFORMSIZE;
+
+  private EnumSet<FileAttribute> preserveStatus = EnumSet.noneOf(FileAttribute.class);
+
+  private Path atomicWorkPath;
+
+  private Path logPath;
+
+  private Path sourceFileListing;
+  private List<Path> sourcePaths;
+
+  private Path targetPath;
+
+  public static enum FileAttribute{
+    REPLICATION, BLOCKSIZE, USER, GROUP, PERMISSION;
+
+    public static FileAttribute getAttribute(char symbol) {
+      for (FileAttribute attribute : values()) {
+        if (attribute.name().charAt(0) == Character.toUpperCase(symbol)) {
+          return attribute;
+        }
+      }
+      throw new NoSuchElementException("No attribute for " + symbol);
+    }
+  }
+
+  /**
+   * Constructor, to initialize source/target paths.
+   * @param sourcePaths List of source-paths (including wildcards)
+   *                     to be copied to target.
+   * @param targetPath Destination path for the dist-copy.
+   */
+  public DistCpOptions(List<Path> sourcePaths, Path targetPath) {
+    assert sourcePaths != null && !sourcePaths.isEmpty() : "Invalid source paths";
+    assert targetPath != null : "Invalid Target path";
+
+    this.sourcePaths = sourcePaths;
+    this.targetPath = targetPath;
+  }
+
+  /**
+   * Constructor, to initialize source/target paths.
+   * @param sourceFileListing File containing list of source paths
+   * @param targetPath Destination path for the dist-copy.
+   */
+  public DistCpOptions(Path sourceFileListing, Path targetPath) {
+    assert sourceFileListing != null : "Invalid source paths";
+    assert targetPath != null : "Invalid Target path";
+
+    this.sourceFileListing = sourceFileListing;
+    this.targetPath = targetPath;
+  }
+
+  /**
+   * Copy constructor.
+   * @param that DistCpOptions being copied from.
+   */
+  public DistCpOptions(DistCpOptions that) {
+    if (this != that && that != null) {
+      this.atomicCommit = that.atomicCommit;
+      this.syncFolder = that.syncFolder;
+      this.deleteMissing = that.deleteMissing;
+      this.ignoreFailures = that.ignoreFailures;
+      this.overwrite = that.overwrite;
+      this.skipCRC = that.skipCRC;
+      this.blocking = that.blocking;
+      this.maxMaps = that.maxMaps;
+      this.mapBandwidth = that.mapBandwidth;
+      this.sslConfigurationFile = that.getSslConfigurationFile();
+      this.copyStrategy = that.copyStrategy;
+      this.preserveStatus = that.preserveStatus;
+      this.atomicWorkPath = that.getAtomicWorkPath();
+      this.logPath = that.getLogPath();
+      this.sourceFileListing = that.getSourceFileListing();
+      this.sourcePaths = that.getSourcePaths();
+      this.targetPath = that.getTargetPath();
+    }
+  }
+
+  /**
+   * Should the data be committed atomically?
+   *
+   * @return true if data should be committed automically. false otherwise
+   */
+  public boolean shouldAtomicCommit() {
+    return atomicCommit;
+  }
+
+  /**
+   * Set if data need to be committed automatically
+   *
+   * @param atomicCommit - boolean switch
+   */
+  public void setAtomicCommit(boolean atomicCommit) {
+    validate(DistCpOptionSwitch.ATOMIC_COMMIT, atomicCommit);
+    this.atomicCommit = atomicCommit;
+  }
+
+  /**
+   * Should the data be sync'ed between source and target paths?
+   *
+   * @return true if data should be sync'ed up. false otherwise
+   */
+  public boolean shouldSyncFolder() {
+    return syncFolder;
+  }
+
+  /**
+   * Set if source and target folder contents be sync'ed up
+   *
+   * @param syncFolder - boolean switch
+   */
+  public void setSyncFolder(boolean syncFolder) {
+    validate(DistCpOptionSwitch.SYNC_FOLDERS, syncFolder);
+    this.syncFolder = syncFolder;
+  }
+
+  /**
+   * Should target files missing in source should be deleted?
+   *
+   * @return true if zoombie target files to be removed. false otherwise
+   */
+  public boolean shouldDeleteMissing() {
+    return deleteMissing;
+  }
+
+  /**
+   * Set if files only present in target should be deleted
+   *
+   * @param deleteMissing - boolean switch
+   */
+  public void setDeleteMissing(boolean deleteMissing) {
+    validate(DistCpOptionSwitch.DELETE_MISSING, deleteMissing);
+    this.deleteMissing = deleteMissing;
+  }
+
+  /**
+   * Should failures be logged and ignored during copy?
+   *
+   * @return true if failures are to be logged and ignored. false otherwise
+   */
+  public boolean shouldIgnoreFailures() {
+    return ignoreFailures;
+  }
+
+  /**
+   * Set if failures during copy be ignored
+   *
+   * @param ignoreFailures - boolean switch
+   */
+  public void setIgnoreFailures(boolean ignoreFailures) {
+    this.ignoreFailures = ignoreFailures;
+  }
+
+  /**
+   * Should DistCp be running in blocking mode
+   *
+   * @return true if should run in blocking, false otherwise
+   */
+  public boolean shouldBlock() {
+    return blocking;
+  }
+
+  /**
+   * Set if Disctp should run blocking or non-blocking
+   *
+   * @param blocking - boolean switch
+   */
+  public void setBlocking(boolean blocking) {
+    this.blocking = blocking;
+  }
+
+  /**
+   * Should files be overwritten always?
+   *
+   * @return true if files in target that may exist before distcp, should always
+   *         be overwritten. false otherwise
+   */
+  public boolean shouldOverwrite() {
+    return overwrite;
+  }
+
+  /**
+   * Set if files should always be overwritten on target
+   *
+   * @param overwrite - boolean switch
+   */
+  public void setOverwrite(boolean overwrite) {
+    validate(DistCpOptionSwitch.OVERWRITE, overwrite);
+    this.overwrite = overwrite;
+  }
+
+  /**
+   * Should CRC/checksum check be skipped while checking files are identical
+   *
+   * @return true if checksum check should be skipped while checking files are
+   *         identical. false otherwise
+   */
+  public boolean shouldSkipCRC() {
+    return skipCRC;
+  }
+
+  /**
+   * Set if checksum comparison should be skipped while determining if
+   * source and destination files are identical
+   *
+   * @param skipCRC - boolean switch
+   */
+  public void setSkipCRC(boolean skipCRC) {
+    validate(DistCpOptionSwitch.SKIP_CRC, skipCRC);
+    this.skipCRC = skipCRC;
+  }
+
+  /** Get the max number of maps to use for this copy
+   *
+   * @return Max number of maps
+   */
+  public int getMaxMaps() {
+    return maxMaps;
+  }
+
+  /**
+   * Set the max number of maps to use for copy
+   *
+   * @param maxMaps - Number of maps
+   */
+  public void setMaxMaps(int maxMaps) {
+    this.maxMaps = maxMaps;
+  }
+
+  /** Get the map bandwidth in MB
+   *
+   * @return Bandwidth in MB
+   */
+  public int getMapBandwidth() {
+    return mapBandwidth;
+  }
+
+  /**
+   * Set per map bandwidth
+   *
+   * @param mapBandwidth - per map bandwidth
+   */
+  public void setMapBandwidth(int mapBandwidth) {
+    assert mapBandwidth > 0 : "Bandwidth " + mapBandwidth + " is invalid (should be > 0)";
+    this.mapBandwidth = mapBandwidth;
+  }
+
+  /**
+   * Get path where the ssl configuration file is present to use for hftps://
+   *
+   * @return Path on local file system
+   */
+  public String getSslConfigurationFile() {
+    return sslConfigurationFile;
+  }
+
+  /**
+   * Set the SSL configuration file path to use with hftps:// (local path)
+   *
+   * @param sslConfigurationFile - Local ssl config file path
+   */
+  public void setSslConfigurationFile(String sslConfigurationFile) {
+    this.sslConfigurationFile = sslConfigurationFile;
+  }
+
+  /**
+   * Returns an iterator with the list of file attributes to preserve
+   *
+   * @return iterator of file attributes to preserve
+   */
+  public Iterator<FileAttribute> preserveAttributes() {
+    return preserveStatus.iterator();
+  }
+
+  /**
+   * Checks if the input attibute should be preserved or not
+   *
+   * @param attribute - Attribute to check
+   * @return True if attribute should be preserved, false otherwise
+   */
+  public boolean shouldPreserve(FileAttribute attribute) {
+    return preserveStatus.contains(attribute);
+  }
+
+  /**
+   * Add file attributes that need to be preserved. This method may be
+   * called multiple times to add attributes.
+   *
+   * @param fileAttribute - Attribute to add, one at a time
+   */
+  public void preserve(FileAttribute fileAttribute) {
+    for (FileAttribute attribute : preserveStatus) {
+      if (attribute.equals(fileAttribute)) {
+        return;
+      }
+    }
+    preserveStatus.add(fileAttribute);
+  }
+
+  /** Get work path for atomic commit. If null, the work
+   * path would be parentOf(targetPath) + "/._WIP_" + nameOf(targetPath)
+   *
+   * @return Atomic work path on the target cluster. Null if not set
+   */
+  public Path getAtomicWorkPath() {
+    return atomicWorkPath;
+  }
+
+  /**
+   * Set the work path for atomic commit
+   *
+   * @param atomicWorkPath - Path on the target cluster
+   */
+  public void setAtomicWorkPath(Path atomicWorkPath) {
+    this.atomicWorkPath = atomicWorkPath;
+  }
+
+  /** Get output directory for writing distcp logs. Otherwise logs
+   * are temporarily written to JobStagingDir/_logs and deleted
+   * upon job completion
+   *
+   * @return Log output path on the cluster where distcp job is run
+   */
+  public Path getLogPath() {
+    return logPath;
+  }
+
+  /**
+   * Set the log path where distcp output logs are stored
+   * Uses JobStagingDir/_logs by default
+   *
+   * @param logPath - Path where logs will be saved
+   */
+  public void setLogPath(Path logPath) {
+    this.logPath = logPath;
+  }
+
+  /**
+   * Get the copy strategy to use. Uses appropriate input format
+   *
+   * @return copy strategy to use
+   */
+  public String getCopyStrategy() {
+    return copyStrategy;
+  }
+
+  /**
+   * Set the copy strategy to use. Should map to a strategy implementation
+   * in distp-default.xml
+   *
+   * @param copyStrategy - copy Strategy to use
+   */
+  public void setCopyStrategy(String copyStrategy) {
+    this.copyStrategy = copyStrategy;
+  }
+
+  /**
+   * File path (hdfs:// or file://) that contains the list of actual
+   * files to copy
+   *
+   * @return - Source listing file path
+   */
+  public Path getSourceFileListing() {
+    return sourceFileListing;
+  }
+
+  /**
+   * Getter for sourcePaths.
+   * @return List of source-paths.
+   */
+  public List<Path> getSourcePaths() {
+    return sourcePaths;
+  }
+
+  /**
+   * Setter for sourcePaths.
+   * @param sourcePaths The new list of source-paths.
+   */
+  public void setSourcePaths(List<Path> sourcePaths) {
+    assert sourcePaths != null && sourcePaths.size() != 0;
+    this.sourcePaths = sourcePaths;
+  }
+
+  /**
+   * Getter for the targetPath.
+   * @return The target-path.
+   */
+  public Path getTargetPath() {
+    return targetPath;
+  }
+
+  public void validate(DistCpOptionSwitch option, boolean value) {
+
+    boolean syncFolder = (option == DistCpOptionSwitch.SYNC_FOLDERS ?
+        value : this.syncFolder);
+    boolean overwrite = (option == DistCpOptionSwitch.OVERWRITE ?
+        value : this.overwrite);
+    boolean deleteMissing = (option == DistCpOptionSwitch.DELETE_MISSING ?
+        value : this.deleteMissing);
+    boolean atomicCommit = (option == DistCpOptionSwitch.ATOMIC_COMMIT ?
+        value : this.atomicCommit);
+    boolean skipCRC = (option == DistCpOptionSwitch.SKIP_CRC ?
+        value : this.skipCRC);
+
+    if (syncFolder && atomicCommit) {
+      throw new IllegalArgumentException("Atomic commit can't be used with " +
+          "sync folder or overwrite options");
+    }
+
+    if (deleteMissing && !(overwrite || syncFolder)) {
+      throw new IllegalArgumentException("Delete missing is applicable " +
+          "only with update or overwrite options");
+    }
+
+    if (overwrite && syncFolder) {
+      throw new IllegalArgumentException("Overwrite and update options are " +
+          "mutually exclusive");
+    }
+
+    if (!syncFolder && skipCRC) {
+      throw new IllegalArgumentException("Skip CRC is valid only with update options");
+    }
+
+  }
+
+  /**
+   * Add options to configuration. These will be used in the Mapper/committer
+   *
+   * @param conf - Configruation object to which the options need to be added
+   */
+  public void appendToConf(Configuration conf) {
+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.ATOMIC_COMMIT,
+        String.valueOf(atomicCommit));
+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.IGNORE_FAILURES,
+        String.valueOf(ignoreFailures));
+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.SYNC_FOLDERS,
+        String.valueOf(syncFolder));
+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.DELETE_MISSING,
+        String.valueOf(deleteMissing));
+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.OVERWRITE,
+        String.valueOf(overwrite));
+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.SKIP_CRC,
+        String.valueOf(skipCRC));
+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.BANDWIDTH,
+        String.valueOf(mapBandwidth));
+    DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.PRESERVE_STATUS,
+        DistCpUtils.packAttributes(preserveStatus));
+  }
+
+  /**
+   * Utility to easily string-ify Options, for logging.
+   *
+   * @return String representation of the Options.
+   */
+  @Override
+  public String toString() {
+    return "DistCpOptions{" +
+        "atomicCommit=" + atomicCommit +
+        ", syncFolder=" + syncFolder +
+        ", deleteMissing=" + deleteMissing +
+        ", ignoreFailures=" + ignoreFailures +
+        ", maxMaps=" + maxMaps +
+        ", sslConfigurationFile='" + sslConfigurationFile + '\'' +
+        ", copyStrategy='" + copyStrategy + '\'' +
+        ", sourceFileListing=" + sourceFileListing +
+        ", sourcePaths=" + sourcePaths +
+        ", targetPath=" + targetPath +
+        '}';
+  }
+
+  @Override
+  protected DistCpOptions clone() throws CloneNotSupportedException {
+    return (DistCpOptions) super.clone();
+  }
+}
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/FileBasedCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/FileBasedCopyListing.java
@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.security.Credentials;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * FileBasedCopyListing implements the CopyListing interface,
+ * to create the copy-listing for DistCp,
+ * by iterating over all source paths mentioned in a specified input-file.
+ */
+public class FileBasedCopyListing extends CopyListing {
+
+  private final CopyListing globbedListing;
+  /**
+   * Constructor, to initialize base-class.
+   * @param configuration The input Configuration object.
+   * @param credentials - Credentials object on which the FS delegation tokens are cached. If null
+   * delegation token caching is skipped
+   */
+  public FileBasedCopyListing(Configuration configuration, Credentials credentials) {
+    super(configuration, credentials);
+    globbedListing = new GlobbedCopyListing(getConf(), credentials);
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  protected void validatePaths(DistCpOptions options)
+      throws IOException, InvalidInputException {
+  }
+
+  /**
+   * Implementation of CopyListing::buildListing().
+   *   Iterates over all source paths mentioned in the input-file.
+   * @param pathToListFile Path on HDFS where the listing file is written.
+   * @param options Input Options for DistCp (indicating source/target paths.)
+   * @throws IOException
+   */
+  @Override
+  public void doBuildListing(Path pathToListFile, DistCpOptions options) throws IOException {
+    DistCpOptions newOption = new DistCpOptions(options);
+    newOption.setSourcePaths(fetchFileList(options.getSourceFileListing()));
+    globbedListing.buildListing(pathToListFile, newOption);
+  }
+
+  private List<Path> fetchFileList(Path sourceListing) throws IOException {
+    List<Path> result = new ArrayList<Path>();
+    FileSystem fs = sourceListing.getFileSystem(getConf());
+    BufferedReader input = null;
+    try {
+      input = new BufferedReader(new InputStreamReader(fs.open(sourceListing)));
+      String line = input.readLine();
+      while (line != null) {
+        result.add(new Path(line));
+        line = input.readLine();
+      }
+    } finally {
+      IOUtils.closeStream(input);
+    }
+    return result;
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  protected long getBytesToCopy() {
+    return globbedListing.getBytesToCopy();
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  protected long getNumberOfPaths() {
+    return globbedListing.getNumberOfPaths();
+  }
+}
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/GlobbedCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/GlobbedCopyListing.java
@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.security.Credentials;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+/**
+ * GlobbedCopyListing implements the CopyListing interface, to create the copy
+ * listing-file by "globbing" all specified source paths (wild-cards and all.)
+ */
+public class GlobbedCopyListing extends CopyListing {
+  private static final Log LOG = LogFactory.getLog(GlobbedCopyListing.class);
+
+  private final CopyListing simpleListing;
+  /**
+   * Constructor, to initialize the configuration.
+   * @param configuration The input Configuration object.
+   * @param credentials Credentials object on which the FS delegation tokens are cached. If null
+   * delegation token caching is skipped
+   */
+  public GlobbedCopyListing(Configuration configuration, Credentials credentials) {
+    super(configuration, credentials);
+    simpleListing = new SimpleCopyListing(getConf(), credentials) ;
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  protected void validatePaths(DistCpOptions options)
+      throws IOException, InvalidInputException {
+  }
+
+  /**
+   * Implementation of CopyListing::buildListing().
+   * Creates the copy listing by "globbing" all source-paths.
+   * @param pathToListingFile The location at which the copy-listing file
+   *                           is to be created.
+   * @param options Input Options for DistCp (indicating source/target paths.)
+   * @throws IOException
+   */
+  @Override
+  public void doBuildListing(Path pathToListingFile,
+                             DistCpOptions options) throws IOException {
+
+    List<Path> globbedPaths = new ArrayList<Path>();
+    if (options.getSourcePaths().isEmpty()) {
+      throw new InvalidInputException("Nothing to process. Source paths::EMPTY");  
+    }
+
+    for (Path p : options.getSourcePaths()) {
+      FileSystem fs = p.getFileSystem(getConf());
+      FileStatus[] inputs = fs.globStatus(p);
+
+      if(inputs != null && inputs.length > 0) {
+        for (FileStatus onePath: inputs) {
+          globbedPaths.add(onePath.getPath());
+        }
+      } else {
+        throw new InvalidInputException(p + " doesn't exist");        
+      }
+    }
+
+    DistCpOptions optionsGlobbed = new DistCpOptions(options);
+    optionsGlobbed.setSourcePaths(globbedPaths);
+    simpleListing.buildListing(pathToListingFile, optionsGlobbed);
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  protected long getBytesToCopy() {
+    return simpleListing.getBytesToCopy();
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  protected long getNumberOfPaths() {
+    return simpleListing.getNumberOfPaths();
+  }
+
+}
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/OptionsParser.java
@ -0,0 +1,246 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools;
+
+import org.apache.commons.cli.*;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
+
+import java.util.*;
+
+/**
+ * The OptionsParser parses out the command-line options passed to DistCp,
+ * and interprets those specific to DistCp, to create an Options object.
+ */
+public class OptionsParser {
+
+  private static final Log LOG = LogFactory.getLog(OptionsParser.class);
+
+  private static final Options cliOptions = new Options();      
+
+  static {
+    for (DistCpOptionSwitch option : DistCpOptionSwitch.values()) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Adding option " + option.getOption());
+      }
+      cliOptions.addOption(option.getOption());
+    }
+  }
+
+  private static class CustomParser extends GnuParser {
+    @Override
+    protected String[] flatten(Options options, String[] arguments, boolean stopAtNonOption) {
+      for (int index = 0; index < arguments.length; index++) {
+        if (arguments[index].equals("-" + DistCpOptionSwitch.PRESERVE_STATUS.getSwitch())) {
+          arguments[index] = "-prbugp";
+        }
+      }
+      return super.flatten(options, arguments, stopAtNonOption);
+    }
+  }
+
+  /**
+   * The parse method parses the command-line options, and creates
+   * a corresponding Options object.
+   * @param args Command-line arguments (excluding the options consumed
+   *              by the GenericOptionsParser).
+   * @return The Options object, corresponding to the specified command-line.
+   * @throws IllegalArgumentException: Thrown if the parse fails.
+   */
+  public static DistCpOptions parse(String args[]) throws IllegalArgumentException {
+
+    CommandLineParser parser = new CustomParser();
+
+    CommandLine command;
+    try {
+      command = parser.parse(cliOptions, args, true);
+    } catch (ParseException e) {
+      throw new IllegalArgumentException("Unable to parse arguments. " +
+        Arrays.toString(args), e);
+    }
+
+    DistCpOptions option;
+    Path targetPath;
+    List<Path> sourcePaths = new ArrayList<Path>();
+
+    String leftOverArgs[] = command.getArgs();
+    if (leftOverArgs == null || leftOverArgs.length < 1) {
+      throw new IllegalArgumentException("Target path not specified");
+    }
+
+    //Last Argument is the target path
+    targetPath = new Path(leftOverArgs[leftOverArgs.length -1].trim());
+
+    //Copy any source paths in the arguments to the list
+    for (int index = 0; index < leftOverArgs.length - 1; index++) {
+      sourcePaths.add(new Path(leftOverArgs[index].trim()));
+    }
+
+    /* If command has source file listing, use it else, fall back on source paths in args
+       If both are present, throw exception and bail */
+    if (command.hasOption(DistCpOptionSwitch.SOURCE_FILE_LISTING.getSwitch())) {
+      if (!sourcePaths.isEmpty()) {
+        throw new IllegalArgumentException("Both source file listing and source paths present");
+      }
+      option = new DistCpOptions(new Path(getVal(command, DistCpOptionSwitch.
+              SOURCE_FILE_LISTING.getSwitch())), targetPath);
+    } else {
+      if (sourcePaths.isEmpty()) {
+        throw new IllegalArgumentException("Neither source file listing nor source paths present");
+      }
+      option = new DistCpOptions(sourcePaths, targetPath);
+    }
+
+    //Process all the other option switches and set options appropriately
+    if (command.hasOption(DistCpOptionSwitch.IGNORE_FAILURES.getSwitch())) {
+      option.setIgnoreFailures(true);
+    }
+
+    if (command.hasOption(DistCpOptionSwitch.ATOMIC_COMMIT.getSwitch())) {
+      option.setAtomicCommit(true);
+    }
+
+    if (command.hasOption(DistCpOptionSwitch.WORK_PATH.getSwitch()) &&
+        option.shouldAtomicCommit()) {
+      String workPath = getVal(command, DistCpOptionSwitch.WORK_PATH.getSwitch());
+      if (workPath != null && !workPath.isEmpty()) {
+        option.setAtomicWorkPath(new Path(workPath));
+      }
+    } else if (command.hasOption(DistCpOptionSwitch.WORK_PATH.getSwitch())) {
+      throw new IllegalArgumentException("-tmp work-path can only be specified along with -atomic");      
+    }
+
+    if (command.hasOption(DistCpOptionSwitch.LOG_PATH.getSwitch())) {
+      option.setLogPath(new Path(getVal(command, DistCpOptionSwitch.LOG_PATH.getSwitch())));
+    }
+
+    if (command.hasOption(DistCpOptionSwitch.SYNC_FOLDERS.getSwitch())) {
+      option.setSyncFolder(true);
+    }
+
+    if (command.hasOption(DistCpOptionSwitch.OVERWRITE.getSwitch())) {
+      option.setOverwrite(true);
+    }
+
+    if (command.hasOption(DistCpOptionSwitch.DELETE_MISSING.getSwitch())) {
+      option.setDeleteMissing(true);
+    }
+
+    if (command.hasOption(DistCpOptionSwitch.SKIP_CRC.getSwitch())) {
+      option.setSkipCRC(true);
+    }
+
+    if (command.hasOption(DistCpOptionSwitch.BLOCKING.getSwitch())) {
+      option.setBlocking(false);
+    }
+
+    if (command.hasOption(DistCpOptionSwitch.BANDWIDTH.getSwitch())) {
+      try {
+        Integer mapBandwidth = Integer.parseInt(
+            getVal(command, DistCpOptionSwitch.BANDWIDTH.getSwitch()).trim());
+        option.setMapBandwidth(mapBandwidth);
+      } catch (NumberFormatException e) {
+        throw new IllegalArgumentException("Bandwidth specified is invalid: " +
+            getVal(command, DistCpOptionSwitch.BANDWIDTH.getSwitch()), e);
+      }
+    }
+
+    if (command.hasOption(DistCpOptionSwitch.SSL_CONF.getSwitch())) {
+      option.setSslConfigurationFile(command.
+          getOptionValue(DistCpOptionSwitch.SSL_CONF.getSwitch()));
+    }
+
+    if (command.hasOption(DistCpOptionSwitch.MAX_MAPS.getSwitch())) {
+      try {
+        Integer maps = Integer.parseInt(
+            getVal(command, DistCpOptionSwitch.MAX_MAPS.getSwitch()).trim());
+        option.setMaxMaps(maps);
+      } catch (NumberFormatException e) {
+        throw new IllegalArgumentException("Number of maps is invalid: " +
+            getVal(command, DistCpOptionSwitch.MAX_MAPS.getSwitch()), e);
+      }
+    }
+
+    if (command.hasOption(DistCpOptionSwitch.COPY_STRATEGY.getSwitch())) {
+      option.setCopyStrategy(
+            getVal(command, DistCpOptionSwitch.COPY_STRATEGY.getSwitch()));
+    }
+
+    if (command.hasOption(DistCpOptionSwitch.PRESERVE_STATUS.getSwitch())) {
+      String attributes =
+          getVal(command, DistCpOptionSwitch.PRESERVE_STATUS.getSwitch());
+      if (attributes == null || attributes.isEmpty()) {
+        for (FileAttribute attribute : FileAttribute.values()) {
+          option.preserve(attribute);
+        }
+      } else {
+        for (int index = 0; index < attributes.length(); index++) {
+          option.preserve(FileAttribute.
+              getAttribute(attributes.charAt(index)));
+        }
+      }
+    }
+
+    if (command.hasOption(DistCpOptionSwitch.FILE_LIMIT.getSwitch())) {
+      String fileLimitString = getVal(command,
+                              DistCpOptionSwitch.FILE_LIMIT.getSwitch().trim());
+      try {
+        Integer.parseInt(fileLimitString);
+      }
+      catch (NumberFormatException e) {
+        throw new IllegalArgumentException("File-limit is invalid: "
+                                            + fileLimitString, e);
+      }
+      LOG.warn(DistCpOptionSwitch.FILE_LIMIT.getSwitch() + " is a deprecated" +
+              " option. Ignoring.");
+    }
+
+    if (command.hasOption(DistCpOptionSwitch.SIZE_LIMIT.getSwitch())) {
+      String sizeLimitString = getVal(command,
+                              DistCpOptionSwitch.SIZE_LIMIT.getSwitch().trim());
+      try {
+        Long.parseLong(sizeLimitString);
+      }
+      catch (NumberFormatException e) {
+        throw new IllegalArgumentException("Size-limit is invalid: "
+                                            + sizeLimitString, e);
+      }
+      LOG.warn(DistCpOptionSwitch.SIZE_LIMIT.getSwitch() + " is a deprecated" +
+              " option. Ignoring.");
+    }
+
+    return option;
+  }
+
+  private static String getVal(CommandLine command, String swtch) {
+    String optionValue = command.getOptionValue(swtch);
+    if (optionValue == null) {
+      return null;
+    } else {
+      return optionValue.trim();
+    }
+  }
+
+  public static void usage() {
+    HelpFormatter formatter = new HelpFormatter();
+    formatter.printHelp("distcp OPTIONS [source_path...] <target_path>\n\nOPTIONS", cliOptions);
+  }
+}
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/SimpleCopyListing.java
@ -0,0 +1,275 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.tools.util.DistCpUtils;
+import org.apache.hadoop.mapreduce.security.TokenCache;
+import org.apache.hadoop.security.Credentials;
+
+import java.io.*;
+import java.util.Stack;
+
+/**
+ * The SimpleCopyListing is responsible for making the exhaustive list of
+ * all files/directories under its specified list of input-paths.
+ * These are written into the specified copy-listing file.
+ * Note: The SimpleCopyListing doesn't handle wild-cards in the input-paths.
+ */
+public class SimpleCopyListing extends CopyListing {
+  private static final Log LOG = LogFactory.getLog(SimpleCopyListing.class);
+
+  private long totalPaths = 0;
+  private long totalBytesToCopy = 0;
+
+  /**
+   * Protected constructor, to initialize configuration.
+   *
+   * @param configuration The input configuration, with which the source/target FileSystems may be accessed.
+   * @param credentials - Credentials object on which the FS delegation tokens are cached. If null
+   * delegation token caching is skipped
+   */
+  protected SimpleCopyListing(Configuration configuration, Credentials credentials) {
+    super(configuration, credentials);
+  }
+
+  @Override
+  protected void validatePaths(DistCpOptions options)
+      throws IOException, InvalidInputException {
+
+    Path targetPath = options.getTargetPath();
+    FileSystem targetFS = targetPath.getFileSystem(getConf());
+    boolean targetIsFile = targetFS.isFile(targetPath);
+
+    //If target is a file, then source has to be single file
+    if (targetIsFile) {
+      if (options.getSourcePaths().size() > 1) {
+        throw new InvalidInputException("Multiple source being copied to a file: " +
+            targetPath);
+      }
+
+      Path srcPath = options.getSourcePaths().get(0);
+      FileSystem sourceFS = srcPath.getFileSystem(getConf());
+      if (!sourceFS.isFile(srcPath)) {
+        throw new InvalidInputException("Cannot copy " + srcPath +
+            ", which is not a file to " + targetPath);
+      }
+    }
+
+    if (options.shouldAtomicCommit() && targetFS.exists(targetPath)) {
+      throw new InvalidInputException("Target path for atomic-commit already exists: " +
+        targetPath + ". Cannot atomic-commit to pre-existing target-path.");
+    }
+
+    for (Path path: options.getSourcePaths()) {
+      FileSystem fs = path.getFileSystem(getConf());
+      if (!fs.exists(path)) {
+        throw new InvalidInputException(path + " doesn't exist");
+      }
+    }
+
+    /* This is requires to allow map tasks to access each of the source
+       clusters. This would retrieve the delegation token for each unique
+       file system and add them to job's private credential store
+     */
+    Credentials credentials = getCredentials();
+    if (credentials != null) {
+      Path[] inputPaths = options.getSourcePaths().toArray(new Path[1]);
+      TokenCache.obtainTokensForNamenodes(credentials, inputPaths, getConf());
+    }
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException {
+
+    SequenceFile.Writer fileListWriter = null;
+
+    try {
+      fileListWriter = getWriter(pathToListingFile);
+
+      for (Path path: options.getSourcePaths()) {
+        FileSystem sourceFS = path.getFileSystem(getConf());
+        path = makeQualified(path);
+
+        FileStatus rootStatus = sourceFS.getFileStatus(path);
+        Path sourcePathRoot = computeSourceRootPath(rootStatus, options);
+        boolean localFile = (rootStatus.getClass() != FileStatus.class);
+
+        FileStatus[] sourceFiles = sourceFS.listStatus(path);
+        if (sourceFiles != null && sourceFiles.length > 0) {
+          for (FileStatus sourceStatus: sourceFiles) {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("Recording source-path: " + sourceStatus.getPath() + " for copy.");
+            }
+            writeToFileListing(fileListWriter, sourceStatus, sourcePathRoot, localFile);
+
+            if (isDirectoryAndNotEmpty(sourceFS, sourceStatus)) {
+              if (LOG.isDebugEnabled()) {
+                LOG.debug("Traversing non-empty source dir: " + sourceStatus.getPath());
+              }
+              traverseNonEmptyDirectory(fileListWriter, sourceStatus, sourcePathRoot, localFile);
+            }
+          }
+        } else {
+          writeToFileListing(fileListWriter, rootStatus, sourcePathRoot, localFile);
+        }
+      }
+    } finally {
+      IOUtils.closeStream(fileListWriter);
+    }
+  }
+
+  private Path computeSourceRootPath(FileStatus sourceStatus,
+                                     DistCpOptions options) throws IOException {
+
+    Path target = options.getTargetPath();
+    FileSystem targetFS = target.getFileSystem(getConf());
+
+    boolean solitaryFile = options.getSourcePaths().size() == 1
+                                                && !sourceStatus.isDirectory();
+
+    if (solitaryFile) {
+      if (targetFS.isFile(target) || !targetFS.exists(target)) {
+        return sourceStatus.getPath();
+      } else {
+        return sourceStatus.getPath().getParent();
+      }
+    } else {
+      boolean specialHandling = (options.getSourcePaths().size() == 1 && !targetFS.exists(target)) ||
+          options.shouldSyncFolder() || options.shouldOverwrite();
+
+      return specialHandling && sourceStatus.isDirectory() ? sourceStatus.getPath() :
+          sourceStatus.getPath().getParent();
+    }
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  protected long getBytesToCopy() {
+    return totalBytesToCopy;
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  protected long getNumberOfPaths() {
+    return totalPaths;
+  }
+
+  private Path makeQualified(Path path) throws IOException {
+    final FileSystem fs = path.getFileSystem(getConf());
+    return path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
+  }
+
+  private SequenceFile.Writer getWriter(Path pathToListFile) throws IOException {
+    FileSystem fs = pathToListFile.getFileSystem(getConf());
+    if (fs.exists(pathToListFile)) {
+      fs.delete(pathToListFile, false);
+    }
+    return SequenceFile.createWriter(getConf(),
+            SequenceFile.Writer.file(pathToListFile),
+            SequenceFile.Writer.keyClass(Text.class),
+            SequenceFile.Writer.valueClass(FileStatus.class),
+            SequenceFile.Writer.compression(SequenceFile.CompressionType.NONE));
+  }
+
+  private static boolean isDirectoryAndNotEmpty(FileSystem fileSystem,
+                                    FileStatus fileStatus) throws IOException {
+    return fileStatus.isDirectory() && getChildren(fileSystem, fileStatus).length > 0;
+  }
+
+  private static FileStatus[] getChildren(FileSystem fileSystem,
+                                         FileStatus parent) throws IOException {
+    return fileSystem.listStatus(parent.getPath());
+  }
+
+  private void traverseNonEmptyDirectory(SequenceFile.Writer fileListWriter,
+                                         FileStatus sourceStatus,
+                                         Path sourcePathRoot, boolean localFile)
+                                         throws IOException {
+    FileSystem sourceFS = sourcePathRoot.getFileSystem(getConf());
+    Stack<FileStatus> pathStack = new Stack<FileStatus>();
+    pathStack.push(sourceStatus);
+
+    while (!pathStack.isEmpty()) {
+      for (FileStatus child: getChildren(sourceFS, pathStack.pop())) {
+        if (LOG.isDebugEnabled())
+          LOG.debug("Recording source-path: "
+                    + sourceStatus.getPath() + " for copy.");
+        writeToFileListing(fileListWriter, child, sourcePathRoot, localFile);
+        if (isDirectoryAndNotEmpty(sourceFS, child)) {
+          if (LOG.isDebugEnabled())
+            LOG.debug("Traversing non-empty source dir: "
+                       + sourceStatus.getPath());
+          pathStack.push(child);
+        }
+      }
+    }
+  }
+
+  private void writeToFileListing(SequenceFile.Writer fileListWriter,
+                                  FileStatus fileStatus, Path sourcePathRoot,
+                                  boolean localFile) throws IOException {
+    if (fileStatus.getPath().equals(sourcePathRoot) && fileStatus.isDirectory())
+      return; // Skip the root-paths.
+
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("REL PATH: " + DistCpUtils.getRelativePath(sourcePathRoot,
+        fileStatus.getPath()) + ", FULL PATH: " + fileStatus.getPath());
+    }
+
+    FileStatus status = fileStatus;
+    if (localFile) {
+      status = getFileStatus(fileStatus);
+    }
+
+    fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
+        fileStatus.getPath())), status);
+    fileListWriter.sync();
+
+    if (!fileStatus.isDirectory()) {
+      totalBytesToCopy += fileStatus.getLen();
+    }
+    totalPaths++;
+  }
+
+  private static final ByteArrayOutputStream buffer = new ByteArrayOutputStream(64);
+  private DataInputBuffer in = new DataInputBuffer();
+  
+  private FileStatus getFileStatus(FileStatus fileStatus) throws IOException {
+    FileStatus status = new FileStatus();
+
+    buffer.reset();
+    DataOutputStream out = new DataOutputStream(buffer);
+    fileStatus.write(out);
+
+    in.reset(buffer.toByteArray(), 0, buffer.size());
+    status.readFields(in);
+    return status;
+  }
+}
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyCommitter.java
@ -0,0 +1,297 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.mapred;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.*;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
+import org.apache.hadoop.tools.*;
+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
+import org.apache.hadoop.tools.util.DistCpUtils;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.EnumSet;
+import java.util.List;
+
+/**
+ * The CopyCommitter class is DistCp's OutputCommitter implementation. It is
+ * responsible for handling the completion/cleanup of the DistCp run.
+ * Specifically, it does the following:
+ *  1. Cleanup of the meta-folder (where DistCp maintains its file-list, etc.)
+ *  2. Preservation of user/group/replication-factor on any directories that
+ *     have been copied. (Files are taken care of in their map-tasks.)
+ *  3. Atomic-move of data from the temporary work-folder to the final path
+ *     (if atomic-commit was opted for).
+ *  4. Deletion of files from the target that are missing at source (if opted for).
+ *  5. Cleanup of any partially copied files, from previous, failed attempts.
+ */
+public class CopyCommitter extends FileOutputCommitter {
+  private static final Log LOG = LogFactory.getLog(CopyCommitter.class);
+
+  private final TaskAttemptContext taskAttemptContext;
+
+  /**
+   * Create a output committer
+   *
+   * @param outputPath the job's output path
+   * @param context    the task's context
+   * @throws IOException - Exception if any
+   */
+  public CopyCommitter(Path outputPath, TaskAttemptContext context) throws IOException {
+    super(outputPath, context);
+    this.taskAttemptContext = context;
+  }
+
+  /** @inheritDoc */
+  @Override
+  public void commitJob(JobContext jobContext) throws IOException {
+    Configuration conf = jobContext.getConfiguration();
+    super.commitJob(jobContext);
+
+    cleanupTempFiles(jobContext);
+
+    String attributes = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
+    if (attributes != null && !attributes.isEmpty()) {
+      preserveFileAttributesForDirectories(conf);
+    }
+
+    try {
+      if (conf.getBoolean(DistCpConstants.CONF_LABEL_DELETE_MISSING, false)) {
+        deleteMissing(conf);
+      } else if (conf.getBoolean(DistCpConstants.CONF_LABEL_ATOMIC_COPY, false)) {
+        commitData(conf);
+      }
+      taskAttemptContext.setStatus("Commit Successful");
+    }
+    finally {
+      cleanup(conf);
+    }
+  }
+
+  /** @inheritDoc */
+  @Override
+  public void abortJob(JobContext jobContext,
+                       JobStatus.State state) throws IOException {
+    try {
+      super.abortJob(jobContext, state);
+    } finally {
+      cleanupTempFiles(jobContext);
+      cleanup(jobContext.getConfiguration());
+    }
+  }
+
+  private void cleanupTempFiles(JobContext context) {
+    try {
+      Configuration conf = context.getConfiguration();
+
+      Path targetWorkPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
+      FileSystem targetFS = targetWorkPath.getFileSystem(conf);
+
+      String jobId = context.getJobID().toString();
+      deleteAttemptTempFiles(targetWorkPath, targetFS, jobId);
+      deleteAttemptTempFiles(targetWorkPath.getParent(), targetFS, jobId);
+    } catch (Throwable t) {
+      LOG.warn("Unable to cleanup temp files", t);
+    }
+  }
+
+  private void deleteAttemptTempFiles(Path targetWorkPath,
+                                      FileSystem targetFS,
+                                      String jobId) throws IOException {
+
+    FileStatus[] tempFiles = targetFS.globStatus(
+        new Path(targetWorkPath, ".distcp.tmp." + jobId.replaceAll("job","attempt") + "*"));
+
+    if (tempFiles != null && tempFiles.length > 0) {
+      for (FileStatus file : tempFiles) {
+        LOG.info("Cleaning up " + file.getPath());
+        targetFS.delete(file.getPath(), false);
+      }
+    }
+  }
+
+  /**
+   * Cleanup meta folder and other temporary files
+   *
+   * @param conf - Job Configuration
+   */
+  private void cleanup(Configuration conf) {
+    Path metaFolder = new Path(conf.get(DistCpConstants.CONF_LABEL_META_FOLDER));
+    try {
+      FileSystem fs = metaFolder.getFileSystem(conf);
+      LOG.info("Cleaning up temporary work folder: " + metaFolder);
+      fs.delete(metaFolder, true);
+    } catch (IOException ignore) {
+      LOG.error("Exception encountered ", ignore);
+    }
+  }
+
+  // This method changes the target-directories' file-attributes (owner,
+  // user/group permissions, etc.) based on the corresponding source directories.
+  private void preserveFileAttributesForDirectories(Configuration conf) throws IOException {
+    String attrSymbols = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
+    LOG.info("About to preserve attributes: " + attrSymbols);
+
+    EnumSet<FileAttribute> attributes = DistCpUtils.unpackAttributes(attrSymbols);
+
+    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
+    FileSystem clusterFS = sourceListing.getFileSystem(conf);
+    SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
+                                      SequenceFile.Reader.file(sourceListing));
+    long totalLen = clusterFS.getFileStatus(sourceListing).getLen();
+
+    Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
+
+    long preservedEntries = 0;
+    try {
+      FileStatus srcFileStatus = new FileStatus();
+      Text srcRelPath = new Text();
+
+      // Iterate over every source path that was copied.
+      while (sourceReader.next(srcRelPath, srcFileStatus)) {
+        // File-attributes for files are set at the time of copy,
+        // in the map-task.
+        if (! srcFileStatus.isDirectory()) continue;
+
+        Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath);
+
+        // Skip the root folder.
+        // Status can't be preserved on root-folder. (E.g. multiple paths may
+        // be copied to a single target folder. Which source-attributes to use
+        // on the target is undefined.)
+        if (targetRoot.equals(targetFile)) continue;
+
+        FileSystem targetFS = targetFile.getFileSystem(conf);
+        DistCpUtils.preserve(targetFS, targetFile, srcFileStatus,  attributes);
+
+        taskAttemptContext.progress();
+        taskAttemptContext.setStatus("Preserving status on directory entries. [" +
+            sourceReader.getPosition() * 100 / totalLen + "%]");
+      }
+    } finally {
+      IOUtils.closeStream(sourceReader);
+    }
+    LOG.info("Preserved status on " + preservedEntries + " dir entries on target");
+  }
+
+  // This method deletes "extra" files from the target, if they're not
+  // available at the source.
+  private void deleteMissing(Configuration conf) throws IOException {
+    LOG.info("-delete option is enabled. About to remove entries from " +
+        "target that are missing in source");
+
+    // Sort the source-file listing alphabetically.
+    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
+    FileSystem clusterFS = sourceListing.getFileSystem(conf);
+    Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);
+
+    // Similarly, create the listing of target-files. Sort alphabetically.
+    Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
+    CopyListing target = new GlobbedCopyListing(new Configuration(conf), null);
+
+    List<Path> targets = new ArrayList<Path>(1);
+    Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
+    targets.add(targetFinalPath);
+    DistCpOptions options = new DistCpOptions(targets, new Path("/NONE"));
+
+    target.buildListing(targetListing, options);
+    Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
+    long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();
+
+    SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
+                                 SequenceFile.Reader.file(sortedSourceListing));
+    SequenceFile.Reader targetReader = new SequenceFile.Reader(conf,
+                                 SequenceFile.Reader.file(sortedTargetListing));
+
+    // Walk both source and target file listings.
+    // Delete all from target that doesn't also exist on source.
+    long deletedEntries = 0;
+    try {
+      FileStatus srcFileStatus = new FileStatus();
+      Text srcRelPath = new Text();
+      FileStatus trgtFileStatus = new FileStatus();
+      Text trgtRelPath = new Text();
+
+      FileSystem targetFS = targetFinalPath.getFileSystem(conf);
+      boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
+      while (targetReader.next(trgtRelPath, trgtFileStatus)) {
+        // Skip sources that don't exist on target.
+        while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
+          srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
+        }
+
+        if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue;
+
+        // Target doesn't exist at source. Delete.
+        boolean result = (!targetFS.exists(trgtFileStatus.getPath()) ||
+            targetFS.delete(trgtFileStatus.getPath(), true));
+        if (result) {
+          LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
+          deletedEntries++;
+        } else {
+          throw new IOException("Unable to delete " + trgtFileStatus.getPath());
+        }
+        taskAttemptContext.progress();
+        taskAttemptContext.setStatus("Deleting missing files from target. [" +
+            targetReader.getPosition() * 100 / totalLen + "%]");
+      }
+    } finally {
+      IOUtils.closeStream(sourceReader);
+      IOUtils.closeStream(targetReader);
+    }
+    LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
+  }
+
+  private void commitData(Configuration conf) throws IOException {
+
+    Path workDir = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
+    Path finalDir = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
+    FileSystem targetFS = workDir.getFileSystem(conf);
+
+    LOG.info("Atomic commit enabled. Moving " + workDir + " to " + finalDir);
+    if (targetFS.exists(finalDir) && targetFS.exists(workDir)) {
+      LOG.error("Pre-existing final-path found at: " + finalDir);
+      throw new IOException("Target-path can't be committed to because it " +
+          "exists at " + finalDir + ". Copied data is in temp-dir: " + workDir + ". ");
+    }
+
+    boolean result = targetFS.rename(workDir, finalDir);
+    if (!result) {
+      LOG.warn("Rename failed. Perhaps data already moved. Verifying...");
+      result = targetFS.exists(finalDir) && !targetFS.exists(workDir);
+    }
+    if (result) {
+      LOG.info("Data committed successfully to " + finalDir);
+      taskAttemptContext.setStatus("Data committed successfully to " + finalDir);
+    } else {
+      LOG.error("Unable to commit data to " + finalDir);
+      throw new IOException("Atomic commit failed. Temporary data in " + workDir +
+        ", Unable to move to " + finalDir);
+    }
+  }
+}
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyMapper.java
@ -0,0 +1,330 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.mapred;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.tools.DistCpConstants;
+import org.apache.hadoop.tools.DistCpOptionSwitch;
+import org.apache.hadoop.tools.DistCpOptions;
+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
+import org.apache.hadoop.tools.util.DistCpUtils;
+import org.apache.hadoop.util.StringUtils;
+
+import java.io.*;
+import java.util.EnumSet;
+import java.util.Arrays;
+
+/**
+ * Mapper class that executes the DistCp copy operation.
+ * Implements the o.a.h.mapreduce.Mapper<> interface.
+ */
+public class CopyMapper extends Mapper<Text, FileStatus, Text, Text> {
+
+  /**
+   * Hadoop counters for the DistCp CopyMapper.
+   * (These have been kept identical to the old DistCp,
+   * for backward compatibility.)
+   */
+  public static enum Counter {
+    COPY,         // Number of files received by the mapper for copy.
+    SKIP,         // Number of files skipped.
+    FAIL,         // Number of files that failed to be copied.
+    BYTESCOPIED,  // Number of bytes actually copied by the copy-mapper, total.
+    BYTESEXPECTED,// Number of bytes expected to be copied.
+    BYTESFAILED,  // Number of bytes that failed to be copied.
+    BYTESSKIPPED, // Number of bytes that were skipped from copy.
+  }
+
+  private static Log LOG = LogFactory.getLog(CopyMapper.class);
+
+  private Configuration conf;
+
+  private boolean syncFolders = false;
+  private boolean ignoreFailures = false;
+  private boolean skipCrc = false;
+  private boolean overWrite = false;
+  private EnumSet<FileAttribute> preserve = EnumSet.noneOf(FileAttribute.class);
+
+  private FileSystem targetFS = null;
+  private Path    targetWorkPath = null;
+
+  /**
+   * Implementation of the Mapper::setup() method. This extracts the DistCp-
+   * options specified in the Job's configuration, to set up the Job.
+   * @param context Mapper's context.
+   * @throws IOException On IO failure.
+   * @throws InterruptedException If the job is interrupted.
+   */
+  @Override
+  public void setup(Context context) throws IOException, InterruptedException {
+    conf = context.getConfiguration();
+
+    syncFolders = conf.getBoolean(DistCpOptionSwitch.SYNC_FOLDERS.getConfigLabel(), false);
+    ignoreFailures = conf.getBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), false);
+    skipCrc = conf.getBoolean(DistCpOptionSwitch.SKIP_CRC.getConfigLabel(), false);
+    overWrite = conf.getBoolean(DistCpOptionSwitch.OVERWRITE.getConfigLabel(), false);
+    preserve = DistCpUtils.unpackAttributes(conf.get(DistCpOptionSwitch.
+        PRESERVE_STATUS.getConfigLabel()));
+
+    targetWorkPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
+    Path targetFinalPath = new Path(conf.get(
+            DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
+    targetFS = targetFinalPath.getFileSystem(conf);
+
+    if (targetFS.exists(targetFinalPath) && targetFS.isFile(targetFinalPath)) {
+      overWrite = true; // When target is an existing file, overwrite it.
+    }
+
+    if (conf.get(DistCpConstants.CONF_LABEL_SSL_CONF) != null) {
+      initializeSSLConf(context);
+    }
+  }
+
+  /**
+   * Initialize SSL Config if same is set in conf
+   *
+   * @throws IOException - If any
+   */
+  private void initializeSSLConf(Context context) throws IOException {
+    LOG.info("Initializing SSL configuration");
+    
+    String workDir = conf.get(JobContext.JOB_LOCAL_DIR) + "/work";
+    Path[] cacheFiles = context.getLocalCacheFiles();
+
+    Configuration sslConfig = new Configuration(false);
+    String sslConfFileName = conf.get(DistCpConstants.CONF_LABEL_SSL_CONF);
+    Path sslClient = findCacheFile(cacheFiles, sslConfFileName);
+    if (sslClient == null) {
+      LOG.warn("SSL Client config file not found. Was looking for " + sslConfFileName +
+          " in " + Arrays.toString(cacheFiles));
+      return;
+    }
+    sslConfig.addResource(sslClient);
+
+    String trustStoreFile = conf.get("ssl.client.truststore.location");
+    Path trustStorePath = findCacheFile(cacheFiles, trustStoreFile);
+    sslConfig.set("ssl.client.truststore.location", trustStorePath.toString());
+
+    String keyStoreFile = conf.get("ssl.client.keystore.location");
+    Path keyStorePath = findCacheFile(cacheFiles, keyStoreFile);
+    sslConfig.set("ssl.client.keystore.location", keyStorePath.toString());
+
+    try {
+      OutputStream out = new FileOutputStream(workDir + "/" + sslConfFileName);
+      try {
+        sslConfig.writeXml(out);
+      } finally {
+        out.close();
+      }
+      conf.set(DistCpConstants.CONF_LABEL_SSL_KEYSTORE, sslConfFileName);
+    } catch (IOException e) {
+      LOG.warn("Unable to write out the ssl configuration. " +
+          "Will fall back to default ssl-client.xml in class path, if there is one", e);
+    }
+  }
+
+  /**
+   * Find entry from distributed cache
+   *
+   * @param cacheFiles - All localized cache files
+   * @param fileName - fileName to search
+   * @return Path of the filename if found, else null
+   */
+  private Path findCacheFile(Path[] cacheFiles, String fileName) {
+    if (cacheFiles != null && cacheFiles.length > 0) {
+      for (Path file : cacheFiles) {
+        if (file.getName().equals(fileName)) {
+          return file;
+        }
+      }
+    }
+    return null;
+  }
+
+  /**
+   * Implementation of the Mapper<>::map(). Does the copy.
+   * @param relPath The target path.
+   * @param sourceFileStatus The source path.
+   * @throws IOException
+   */
+  @Override
+  public void map(Text relPath, FileStatus sourceFileStatus, Context context)
+          throws IOException, InterruptedException {
+    Path sourcePath = sourceFileStatus.getPath();
+
+    if (LOG.isDebugEnabled())
+      LOG.debug("DistCpMapper::map(): Received " + sourcePath + ", " + relPath);
+
+    Path target = new Path(targetWorkPath.makeQualified(targetFS.getUri(),
+                          targetFS.getWorkingDirectory()) + relPath.toString());
+
+    EnumSet<DistCpOptions.FileAttribute> fileAttributes
+            = getFileAttributeSettings(context);
+
+    final String description = "Copying " + sourcePath + " to " + target;
+    context.setStatus(description);
+
+    LOG.info(description);
+
+    try {
+      FileStatus sourceCurrStatus;
+      FileSystem sourceFS;
+      try {
+        sourceFS = sourcePath.getFileSystem(conf);
+        sourceCurrStatus = sourceFS.getFileStatus(sourcePath);
+      } catch (FileNotFoundException e) {
+        throw new IOException(new RetriableFileCopyCommand.CopyReadException(e));
+      }
+
+      FileStatus targetStatus = null;
+
+      try {
+        targetStatus = targetFS.getFileStatus(target);
+      } catch (FileNotFoundException ignore) {
+        if (LOG.isDebugEnabled())
+          LOG.debug("Path could not be found: " + target, ignore);
+      }
+
+      if (targetStatus != null && (targetStatus.isDirectory() != sourceCurrStatus.isDirectory())) {
+        throw new IOException("Can't replace " + target + ". Target is " +
+            getFileType(targetStatus) + ", Source is " + getFileType(sourceCurrStatus));
+      }
+
+      if (sourceCurrStatus.isDirectory()) {
+        createTargetDirsWithRetry(description, target, context);
+        return;
+      }
+
+      if (skipFile(sourceFS, sourceCurrStatus, target)) {
+        LOG.info("Skipping copy of " + sourceCurrStatus.getPath()
+                 + " to " + target);
+        updateSkipCounters(context, sourceCurrStatus);
+        context.write(null, new Text("SKIP: " + sourceCurrStatus.getPath()));
+      }
+      else {
+        copyFileWithRetry(description, sourceCurrStatus, target, context,
+                          fileAttributes);
+      }
+
+      DistCpUtils.preserve(target.getFileSystem(conf), target,
+                           sourceCurrStatus, fileAttributes);
+
+    } catch (IOException exception) {
+      handleFailures(exception, sourceFileStatus, target, context);
+    }
+  }
+
+  private String getFileType(FileStatus fileStatus) {
+    return fileStatus == null ? "N/A" : (fileStatus.isDirectory() ? "dir" : "file");
+  }
+
+  private static EnumSet<DistCpOptions.FileAttribute>
+          getFileAttributeSettings(Mapper.Context context) {
+    String attributeString = context.getConfiguration().get(
+            DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel());
+    return DistCpUtils.unpackAttributes(attributeString);
+  }
+
+  private void copyFileWithRetry(String description, FileStatus sourceFileStatus,
+               Path target, Context context,
+               EnumSet<DistCpOptions.FileAttribute> fileAttributes) throws IOException {
+
+    long bytesCopied;
+    try {
+      bytesCopied = (Long)new RetriableFileCopyCommand(description)
+                       .execute(sourceFileStatus, target, context, fileAttributes);
+    } catch (Exception e) {
+      context.setStatus("Copy Failure: " + sourceFileStatus.getPath());
+      throw new IOException("File copy failed: " + sourceFileStatus.getPath() +
+          " --> " + target, e);
+    }
+    incrementCounter(context, Counter.BYTESEXPECTED, sourceFileStatus.getLen());
+    incrementCounter(context, Counter.BYTESCOPIED, bytesCopied);
+    incrementCounter(context, Counter.COPY, 1);
+  }
+
+  private void createTargetDirsWithRetry(String description,
+                   Path target, Context context) throws IOException {
+    try {
+      new RetriableDirectoryCreateCommand(description).execute(target, context);
+    } catch (Exception e) {
+      throw new IOException("mkdir failed for " + target, e);
+    }
+    incrementCounter(context, Counter.COPY, 1);
+  }
+
+  private static void updateSkipCounters(Context context,
+                                         FileStatus sourceFile) {
+    incrementCounter(context, Counter.SKIP, 1);
+    incrementCounter(context, Counter.BYTESSKIPPED, sourceFile.getLen());
+
+  }
+
+  private void handleFailures(IOException exception,
+                                     FileStatus sourceFileStatus, Path target,
+                                     Context context) throws IOException, InterruptedException {
+    LOG.error("Failure in copying " + sourceFileStatus.getPath() + " to " +
+                target, exception);
+
+    if (ignoreFailures && exception.getCause() instanceof
+            RetriableFileCopyCommand.CopyReadException) {
+      incrementCounter(context, Counter.FAIL, 1);
+      incrementCounter(context, Counter.BYTESFAILED, sourceFileStatus.getLen());
+      context.write(null, new Text("FAIL: " + sourceFileStatus.getPath() + " - " + 
+          StringUtils.stringifyException(exception)));
+    }
+    else
+      throw exception;
+  }
+
+  private static void incrementCounter(Context context, Counter counter,
+                                       long value) {
+    context.getCounter(counter).increment(value);
+  }
+
+  private boolean skipFile(FileSystem sourceFS, FileStatus source, Path target)
+                                          throws IOException {
+    return     targetFS.exists(target)
+            && !overWrite
+            && !mustUpdate(sourceFS, source, target);
+  }
+
+  private boolean mustUpdate(FileSystem sourceFS, FileStatus source, Path target)
+                                    throws IOException {
+    final FileStatus targetFileStatus = targetFS.getFileStatus(target);
+
+    return     syncFolders
+            && (
+                   targetFileStatus.getLen() != source.getLen()
+                || (!skipCrc &&
+                       !DistCpUtils.checksumsAreEqual(sourceFS,
+                                          source.getPath(), targetFS, target))
+                || (source.getBlockSize() != targetFileStatus.getBlockSize() &&
+                      preserve.contains(FileAttribute.BLOCKSIZE))
+               );
+  }
+}
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyOutputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/CopyOutputFormat.java
@ -0,0 +1,124 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.mapred;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.*;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.apache.hadoop.mapreduce.security.TokenCache;
+import org.apache.hadoop.tools.DistCpConstants;
+
+import java.io.IOException;
+
+/**
+ * The CopyOutputFormat is the Hadoop OutputFormat used in DistCp.
+ * It sets up the Job's Configuration (in the Job-Context) with the settings
+ * for the work-directory, final commit-directory, etc. It also sets the right
+ * output-committer.
+ * @param <K>
+ * @param <V>
+ */
+public class CopyOutputFormat<K, V> extends TextOutputFormat<K, V> {
+
+  /**
+   * Setter for the working directory for DistCp (where files will be copied
+   * before they are moved to the final commit-directory.)
+   * @param job The Job on whose configuration the working-directory is to be set.
+   * @param workingDirectory The path to use as the working directory.
+   */
+  public static void setWorkingDirectory(Job job, Path workingDirectory) {
+    job.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH,
+        workingDirectory.toString());
+  }
+
+  /**
+   * Setter for the final directory for DistCp (where files copied will be
+   * moved, atomically.)
+   * @param job The Job on whose configuration the working-directory is to be set.
+   * @param commitDirectory The path to use for final commit.
+   */
+  public static void setCommitDirectory(Job job, Path commitDirectory) {
+    job.getConfiguration().set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH,
+        commitDirectory.toString());
+  }
+
+  /**
+   * Getter for the working directory.
+   * @param job The Job from whose configuration the working-directory is to
+   * be retrieved.
+   * @return The working-directory Path.
+   */
+  public static Path getWorkingDirectory(Job job) {
+    return getWorkingDirectory(job.getConfiguration());
+  }
+
+  private static Path getWorkingDirectory(Configuration conf) {
+    String workingDirectory = conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH);
+    if (workingDirectory == null || workingDirectory.isEmpty()) {
+      return null;
+    } else {
+      return new Path(workingDirectory);
+    }
+  }
+
+  /**
+   * Getter for the final commit-directory.
+   * @param job The Job from whose configuration the commit-directory is to be
+   * retrieved.
+   * @return The commit-directory Path.
+   */
+  public static Path getCommitDirectory(Job job) {
+    return getCommitDirectory(job.getConfiguration());
+  }
+
+  private static Path getCommitDirectory(Configuration conf) {
+    String commitDirectory = conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH);
+    if (commitDirectory == null || commitDirectory.isEmpty()) {
+      return null;
+    } else {
+      return new Path(commitDirectory);
+    }
+  }
+
+  /** @inheritDoc */
+  @Override
+  public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException {
+    return new CopyCommitter(getOutputPath(context), context);
+  }
+
+  /** @inheritDoc */
+  @Override
+  public void checkOutputSpecs(JobContext context) throws IOException {
+    Configuration conf = context.getConfiguration();
+
+    if (getCommitDirectory(conf) == null) {
+      throw new IllegalStateException("Commit directory not configured");
+    }
+
+    Path workingPath = getWorkingDirectory(conf);
+    if (workingPath == null) {
+      throw new IllegalStateException("Working directory not configured");
+    }
+
+    // get delegation token for outDir's file system
+    TokenCache.obtainTokensForNamenodes(context.getCredentials(),
+                                        new Path[] {workingPath}, conf);
+  }
+}
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableDirectoryCreateCommand.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableDirectoryCreateCommand.java
@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.mapred;
+
+import org.apache.hadoop.tools.util.RetriableCommand;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.mapreduce.Mapper;
+
+/**
+ * This class extends Retriable command to implement the creation of directories
+ * with retries on failure.
+ */
+public class RetriableDirectoryCreateCommand extends RetriableCommand {
+
+  /**
+   * Constructor, taking a description of the action.
+   * @param description Verbose description of the copy operation.
+   */
+  public RetriableDirectoryCreateCommand(String description) {
+    super(description);
+  }
+
+  /**
+   * Implementation of RetriableCommand::doExecute().
+   * This implements the actual mkdirs() functionality.
+   * @param arguments Argument-list to the command.
+   * @return Boolean. True, if the directory could be created successfully.
+   * @throws Exception IOException, on failure to create the directory.
+   */
+  @Override
+  protected Object doExecute(Object... arguments) throws Exception {
+    assert arguments.length == 2 : "Unexpected argument list.";
+    Path target = (Path)arguments[0];
+    Mapper.Context context = (Mapper.Context)arguments[1];
+
+    FileSystem targetFS = target.getFileSystem(context.getConfiguration());
+    return targetFS.mkdirs(target);
+  }
+}
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/RetriableFileCopyCommand.java
@ -0,0 +1,245 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.mapred;
+
+import org.apache.hadoop.tools.util.RetriableCommand;
+import org.apache.hadoop.tools.util.ThrottledInputStream;
+import org.apache.hadoop.tools.util.DistCpUtils;
+import org.apache.hadoop.tools.DistCpOptions.*;
+import org.apache.hadoop.tools.DistCpConstants;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import java.io.*;
+import java.util.EnumSet;
+
+/**
+ * This class extends RetriableCommand to implement the copy of files,
+ * with retries on failure.
+ */
+public class RetriableFileCopyCommand extends RetriableCommand {
+
+  private static Log LOG = LogFactory.getLog(RetriableFileCopyCommand.class);
+  private static int BUFFER_SIZE = 8 * 1024;
+
+  /**
+   * Constructor, taking a description of the action.
+   * @param description Verbose description of the copy operation.
+   */
+  public RetriableFileCopyCommand(String description) {
+    super(description);
+  }
+
+  /**
+   * Implementation of RetriableCommand::doExecute().
+   * This is the actual copy-implementation.
+   * @param arguments Argument-list to the command.
+   * @return Number of bytes copied.
+   * @throws Exception: CopyReadException, if there are read-failures. All other
+   *         failures are IOExceptions.
+   */
+  @SuppressWarnings("unchecked")
+  @Override
+  protected Object doExecute(Object... arguments) throws Exception {
+    assert arguments.length == 4 : "Unexpected argument list.";
+    FileStatus source = (FileStatus)arguments[0];
+    assert !source.isDirectory() : "Unexpected file-status. Expected file.";
+    Path target = (Path)arguments[1];
+    Mapper.Context context = (Mapper.Context)arguments[2];
+    EnumSet<FileAttribute> fileAttributes
+            = (EnumSet<FileAttribute>)arguments[3];
+    return doCopy(source, target, context, fileAttributes);
+  }
+
+  private long doCopy(FileStatus sourceFileStatus, Path target,
+                      Mapper.Context context,
+                      EnumSet<FileAttribute> fileAttributes)
+          throws IOException {
+
+    Path tmpTargetPath = getTmpFile(target, context);
+    final Configuration configuration = context.getConfiguration();
+    FileSystem targetFS = target.getFileSystem(configuration);
+
+    try {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Copying " + sourceFileStatus.getPath() + " to " + target);
+        LOG.debug("Tmp-file path: " + tmpTargetPath);
+      }
+      FileSystem sourceFS = sourceFileStatus.getPath().getFileSystem(
+              configuration);
+      long bytesRead = copyToTmpFile(tmpTargetPath, targetFS, sourceFileStatus,
+                                     context, fileAttributes);
+
+      compareFileLengths(sourceFileStatus, tmpTargetPath, configuration, bytesRead);
+      compareCheckSums(sourceFS, sourceFileStatus.getPath(), targetFS, tmpTargetPath);
+      promoteTmpToTarget(tmpTargetPath, target, targetFS);
+      return bytesRead;
+
+    } finally {
+      if (targetFS.exists(tmpTargetPath))
+        targetFS.delete(tmpTargetPath, false);
+    }
+  }
+
+  private long copyToTmpFile(Path tmpTargetPath, FileSystem targetFS,
+                             FileStatus sourceFileStatus, Mapper.Context context,
+                             EnumSet<FileAttribute> fileAttributes)
+                             throws IOException {
+    OutputStream outStream = new BufferedOutputStream(targetFS.create(
+            tmpTargetPath, true, BUFFER_SIZE,
+            getReplicationFactor(fileAttributes, sourceFileStatus, targetFS),
+            getBlockSize(fileAttributes, sourceFileStatus, targetFS), context));
+    return copyBytes(sourceFileStatus, outStream, BUFFER_SIZE, true, context);
+  }
+
+  private void compareFileLengths(FileStatus sourceFileStatus, Path target,
+                                  Configuration configuration, long bytesRead)
+                                  throws IOException {
+    final Path sourcePath = sourceFileStatus.getPath();
+    FileSystem fs = sourcePath.getFileSystem(configuration);
+    if (fs.getFileStatus(sourcePath).getLen() != bytesRead)
+      throw new IOException("Mismatch in length of source:" + sourcePath
+                + " and target:" + target);
+  }
+
+  private void compareCheckSums(FileSystem sourceFS, Path source,
+                                FileSystem targetFS, Path target)
+                                throws IOException {
+    if (!DistCpUtils.checksumsAreEqual(sourceFS, source, targetFS, target))
+      throw new IOException("Check-sum mismatch between "
+                              + source + " and " + target);
+
+  }
+
+  //If target file exists and unable to delete target - fail
+  //If target doesn't exist and unable to create parent folder - fail
+  //If target is successfully deleted and parent exists, if rename fails - fail
+  private void promoteTmpToTarget(Path tmpTarget, Path target, FileSystem fs)
+                                  throws IOException {
+    if ((fs.exists(target) && !fs.delete(target, false))
+        || (!fs.exists(target.getParent()) && !fs.mkdirs(target.getParent()))
+        || !fs.rename(tmpTarget, target)) {
+      throw new IOException("Failed to promote tmp-file:" + tmpTarget
+                              + " to: " + target);
+    }
+  }
+
+  private Path getTmpFile(Path target, Mapper.Context context) {
+    Path targetWorkPath = new Path(context.getConfiguration().
+        get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
+
+    Path root = target.equals(targetWorkPath)? targetWorkPath.getParent() : targetWorkPath;
+    LOG.info("Creating temp file: " +
+        new Path(root, ".distcp.tmp." + context.getTaskAttemptID().toString()));
+    return new Path(root, ".distcp.tmp." + context.getTaskAttemptID().toString());
+  }
+
+  private long copyBytes(FileStatus sourceFileStatus, OutputStream outStream,
+                         int bufferSize, boolean mustCloseStream,
+                         Mapper.Context context) throws IOException {
+    Path source = sourceFileStatus.getPath();
+    byte buf[] = new byte[bufferSize];
+    ThrottledInputStream inStream = null;
+    long totalBytesRead = 0;
+
+    try {
+      inStream = getInputStream(source, context.getConfiguration());
+      int bytesRead = readBytes(inStream, buf);
+      while (bytesRead >= 0) {
+        totalBytesRead += bytesRead;
+        outStream.write(buf, 0, bytesRead);
+        updateContextStatus(totalBytesRead, context, sourceFileStatus);
+        bytesRead = inStream.read(buf);
+      }
+    } finally {
+      if (mustCloseStream)
+        IOUtils.cleanup(LOG, outStream, inStream);
+    }
+
+    return totalBytesRead;
+  }
+
+  private void updateContextStatus(long totalBytesRead, Mapper.Context context,
+                                   FileStatus sourceFileStatus) {
+    StringBuilder message = new StringBuilder(DistCpUtils.getFormatter()
+                .format(totalBytesRead * 100.0f / sourceFileStatus.getLen()));
+    message.append("% ")
+            .append(description).append(" [")
+            .append(DistCpUtils.getStringDescriptionFor(totalBytesRead))
+            .append('/')
+        .append(DistCpUtils.getStringDescriptionFor(sourceFileStatus.getLen()))
+            .append(']');
+    context.setStatus(message.toString());
+  }
+
+  private static int readBytes(InputStream inStream, byte buf[])
+          throws IOException {
+    try {
+      return inStream.read(buf);
+    }
+    catch (IOException e) {
+      throw new CopyReadException(e);
+    }
+  }
+
+  private static ThrottledInputStream getInputStream(Path path, Configuration conf)
+          throws IOException {
+    try {
+      FileSystem fs = path.getFileSystem(conf);
+      long bandwidthMB = conf.getInt(DistCpConstants.CONF_LABEL_BANDWIDTH_MB,
+              DistCpConstants.DEFAULT_BANDWIDTH_MB);
+      return new ThrottledInputStream(new BufferedInputStream(fs.open(path)),
+              bandwidthMB * 1024 * 1024);
+    }
+    catch (IOException e) {
+      throw new CopyReadException(e);
+    }
+  }
+
+  private static short getReplicationFactor(
+          EnumSet<FileAttribute> fileAttributes,
+          FileStatus sourceFile, FileSystem targetFS) {
+    return fileAttributes.contains(FileAttribute.REPLICATION)?
+            sourceFile.getReplication() : targetFS.getDefaultReplication();
+  }
+
+  private static long getBlockSize(
+          EnumSet<FileAttribute> fileAttributes,
+          FileStatus sourceFile, FileSystem targetFS) {
+    return fileAttributes.contains(FileAttribute.BLOCKSIZE)?
+            sourceFile.getBlockSize() : targetFS.getDefaultBlockSize();
+  }
+
+  /**
+   * Special subclass of IOException. This is used to distinguish read-operation
+   * failures from other kinds of IOExceptions.
+   * The failure to read from source is dealt with specially, in the CopyMapper.
+   * Such failures may be skipped if the DistCpOptions indicate so.
+   * Write failures are intolerable, and amount to CopyMapper failure.  
+   */
+  public static class CopyReadException extends IOException {
+    public CopyReadException(Throwable rootCause) {
+      super(rootCause);
+    }
+  }
+}
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/UniformSizeInputFormat.java
@ -0,0 +1,169 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.mapred;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.tools.DistCpConstants;
+import org.apache.hadoop.tools.util.DistCpUtils;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.mapreduce.*;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.hadoop.conf.Configuration;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+/**
+ * UniformSizeInputFormat extends the InputFormat<> class, to produce
+ * input-splits for DistCp.
+ * It looks at the copy-listing and groups the contents into input-splits such
+ * that the total-number of bytes to be copied for each input split is
+ * uniform.
+ */
+public class UniformSizeInputFormat extends InputFormat<Text, FileStatus> {
+  private static final Log LOG
+                = LogFactory.getLog(UniformSizeInputFormat.class);
+
+  /**
+   * Implementation of InputFormat::getSplits(). Returns a list of InputSplits,
+   * such that the number of bytes to be copied for all the splits are
+   * approximately equal.
+   * @param context JobContext for the job.
+   * @return The list of uniformly-distributed input-splits.
+   * @throws IOException: On failure.
+   * @throws InterruptedException
+   */
+  @Override
+  public List<InputSplit> getSplits(JobContext context)
+                      throws IOException, InterruptedException {
+    Configuration configuration = context.getConfiguration();
+    int numSplits = DistCpUtils.getInt(configuration,
+                                       JobContext.NUM_MAPS);
+
+    if (numSplits == 0) return new ArrayList<InputSplit>();
+
+    return getSplits(configuration, numSplits,
+                     DistCpUtils.getLong(configuration,
+                          DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED));
+  }
+
+  private List<InputSplit> getSplits(Configuration configuration, int numSplits,
+                                     long totalSizeBytes) throws IOException {
+    List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
+    long nBytesPerSplit = (long) Math.ceil(totalSizeBytes * 1.0 / numSplits);
+
+    FileStatus srcFileStatus = new FileStatus();
+    Text srcRelPath = new Text();
+    long currentSplitSize = 0;
+    long lastSplitStart = 0;
+    long lastPosition = 0;
+
+    final Path listingFilePath = getListingFilePath(configuration);
+
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Average bytes per map: " + nBytesPerSplit +
+          ", Number of maps: " + numSplits + ", total size: " + totalSizeBytes);
+    }
+    SequenceFile.Reader reader=null;
+    try {
+      reader = getListingFileReader(configuration);
+      while (reader.next(srcRelPath, srcFileStatus)) {
+        // If adding the current file would cause the bytes per map to exceed
+        // limit. Add the current file to new split
+        if (currentSplitSize + srcFileStatus.getLen() > nBytesPerSplit && lastPosition != 0) {
+          FileSplit split = new FileSplit(listingFilePath, lastSplitStart,
+              lastPosition - lastSplitStart, null);
+          if (LOG.isDebugEnabled()) {
+            LOG.debug ("Creating split : " + split + ", bytes in split: " + currentSplitSize);
+          }
+          splits.add(split);
+          lastSplitStart = lastPosition;
+          currentSplitSize = 0;
+        }
+        currentSplitSize += srcFileStatus.getLen();
+        lastPosition = reader.getPosition();
+      }
+      if (lastPosition > lastSplitStart) {
+        FileSplit split = new FileSplit(listingFilePath, lastSplitStart,
+            lastPosition - lastSplitStart, null);
+        if (LOG.isDebugEnabled()) {
+          LOG.info ("Creating split : " + split + ", bytes in split: " + currentSplitSize);
+        }
+        splits.add(split);
+      }
+
+    } finally {
+      IOUtils.closeStream(reader);
+    }
+
+    return splits;
+  }
+
+  private static Path getListingFilePath(Configuration configuration) {
+    final String listingFilePathString =
+            configuration.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, "");
+
+    assert !listingFilePathString.equals("")
+              : "Couldn't find listing file. Invalid input.";
+    return new Path(listingFilePathString);
+  }
+
+  private SequenceFile.Reader getListingFileReader(Configuration configuration) {
+
+    final Path listingFilePath = getListingFilePath(configuration);
+    try {
+      final FileSystem fileSystem = listingFilePath.getFileSystem(configuration);
+      if (!fileSystem.exists(listingFilePath))
+        throw new IllegalArgumentException("Listing file doesn't exist at: "
+                                           + listingFilePath);
+
+      return new SequenceFile.Reader(configuration,
+                                     SequenceFile.Reader.file(listingFilePath));
+    }
+    catch (IOException exception) {
+      LOG.error("Couldn't find listing file at: " + listingFilePath, exception);
+      throw new IllegalArgumentException("Couldn't find listing-file at: "
+                                         + listingFilePath, exception);
+    }
+  }
+
+  /**
+   * Implementation of InputFormat::createRecordReader().
+   * @param split The split for which the RecordReader is sought.
+   * @param context The context of the current task-attempt.
+   * @return A SequenceFileRecordReader instance, (since the copy-listing is a
+   * simple sequence-file.)
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  @Override
+  public RecordReader<Text, FileStatus> createRecordReader(InputSplit split,
+                                                     TaskAttemptContext context)
+                                      throws IOException, InterruptedException {
+    return new SequenceFileRecordReader<Text, FileStatus>();
+  }
+}
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputChunk.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputChunk.java
@ -0,0 +1,246 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.mapred.lib;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.tools.DistCpConstants;
+import org.apache.hadoop.tools.util.DistCpUtils;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskID;
+
+import java.io.IOException;
+
+/**
+ * The DynamicInputChunk represents a single chunk of work, when used in
+ * conjunction with the DynamicInputFormat and the DynamicRecordReader.
+ * The records in the DynamicInputFormat's input-file are split across various
+ * DynamicInputChunks. Each one is claimed and processed in an iteration of
+ * a dynamic-mapper. When a DynamicInputChunk has been exhausted, the faster
+ * mapper may claim another and process it, until there are no more to be
+ * consumed.
+ */
+class DynamicInputChunk<K, V> {
+  private static Log LOG = LogFactory.getLog(DynamicInputChunk.class);
+
+  private static Configuration configuration;
+  private static Path chunkRootPath;
+  private static String chunkFilePrefix;
+  private static int numChunksLeft = -1; // Un-initialized before 1st dir-scan.
+  private static FileSystem fs;
+
+  private Path chunkFilePath;
+  private SequenceFileRecordReader<K, V> reader;
+  private SequenceFile.Writer writer;
+
+  private static void initializeChunkInvariants(Configuration config)
+                                                  throws IOException {
+    configuration = config;
+    Path listingFilePath = new Path(getListingFilePath(configuration));
+    chunkRootPath = new Path(listingFilePath.getParent(), "chunkDir");
+    fs = chunkRootPath.getFileSystem(configuration);
+    chunkFilePrefix = listingFilePath.getName() + ".chunk.";
+  }
+
+  private static String getListingFilePath(Configuration configuration) {
+    final String listingFileString = configuration.get(
+            DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, "");
+    assert !listingFileString.equals("") : "Listing file not found.";
+    return listingFileString;
+  }
+
+  private static boolean areInvariantsInitialized() {
+    return chunkRootPath != null;
+  }
+
+  private DynamicInputChunk(String chunkId, Configuration configuration)
+                                                      throws IOException {
+    if (!areInvariantsInitialized())
+      initializeChunkInvariants(configuration);
+
+    chunkFilePath = new Path(chunkRootPath, chunkFilePrefix + chunkId);
+    openForWrite();
+  }
+
+
+  private void openForWrite() throws IOException {
+    writer = SequenceFile.createWriter(
+            chunkFilePath.getFileSystem(configuration), configuration,
+            chunkFilePath, Text.class, FileStatus.class,
+            SequenceFile.CompressionType.NONE);
+
+  }
+
+  /**
+   * Factory method to create chunk-files for writing to.
+   * (For instance, when the DynamicInputFormat splits the input-file into
+   * chunks.)
+   * @param chunkId String to identify the chunk.
+   * @param configuration Configuration, describing the location of the listing-
+   * file, file-system for the map-job, etc.
+   * @return A DynamicInputChunk, corresponding to a chunk-file, with the name
+   * incorporating the chunk-id.
+   * @throws IOException Exception on failure to create the chunk.
+   */
+  public static DynamicInputChunk createChunkForWrite(String chunkId,
+                          Configuration configuration) throws IOException {
+    return new DynamicInputChunk(chunkId, configuration);
+  }
+
+  /**
+   * Method to write records into a chunk.
+   * @param key Key from the listing file.
+   * @param value Corresponding value from the listing file.
+   * @throws IOException Exception onf failure to write to the file.
+   */
+  public void write(Text key, FileStatus value) throws IOException {
+    writer.append(key, value);
+  }
+
+  /**
+   * Closes streams opened to the chunk-file.
+   */
+  public void close() {
+    IOUtils.cleanup(LOG, reader, writer);
+  }
+
+  /**
+   * Reassigns the chunk to a specified Map-Task, for consumption.
+   * @param taskId The Map-Task to which a the chunk is to be reassigned.
+   * @throws IOException Exception on failure to reassign.
+   */
+  public void assignTo(TaskID taskId) throws IOException {
+    Path newPath = new Path(chunkRootPath, taskId.toString());
+    if (!fs.rename(chunkFilePath, newPath)) {
+      LOG.warn(chunkFilePath + " could not be assigned to " + taskId);
+    }
+  }
+
+  private DynamicInputChunk(Path chunkFilePath,
+                            TaskAttemptContext taskAttemptContext)
+                                   throws IOException, InterruptedException {
+    if (!areInvariantsInitialized())
+      initializeChunkInvariants(taskAttemptContext.getConfiguration());
+
+    this.chunkFilePath = chunkFilePath;
+    openForRead(taskAttemptContext);
+  }
+
+  private void openForRead(TaskAttemptContext taskAttemptContext)
+          throws IOException, InterruptedException {
+    reader = new SequenceFileRecordReader<K, V>();
+    reader.initialize(new FileSplit(chunkFilePath, 0,
+            DistCpUtils.getFileSize(chunkFilePath, configuration), null),
+            taskAttemptContext);
+  }
+
+  /**
+   * Factory method that
+   * 1. acquires a chunk for the specified map-task attempt
+   * 2. returns a DynamicInputChunk associated with the acquired chunk-file.
+   * @param taskAttemptContext The attempt-context for the map task that's
+   * trying to acquire a chunk.
+   * @return The acquired dynamic-chunk. The chunk-file is renamed to the
+   * attempt-id (from the attempt-context.)
+   * @throws IOException Exception on failure.
+   * @throws InterruptedException Exception on failure.
+   */
+  public static DynamicInputChunk acquire(TaskAttemptContext taskAttemptContext)
+                                      throws IOException, InterruptedException {
+    if (!areInvariantsInitialized())
+        initializeChunkInvariants(taskAttemptContext.getConfiguration());
+
+    String taskId
+            = taskAttemptContext.getTaskAttemptID().getTaskID().toString();
+    Path acquiredFilePath = new Path(chunkRootPath, taskId);
+
+    if (fs.exists(acquiredFilePath)) {
+      LOG.info("Acquiring pre-assigned chunk: " + acquiredFilePath);
+      return new DynamicInputChunk(acquiredFilePath, taskAttemptContext);
+    }
+
+    for (FileStatus chunkFile : getListOfChunkFiles()) {
+      if (fs.rename(chunkFile.getPath(), acquiredFilePath)) {
+        LOG.info(taskId + " acquired " + chunkFile.getPath());
+        return new DynamicInputChunk(acquiredFilePath, taskAttemptContext);
+      }
+      else
+        LOG.warn(taskId + " could not acquire " + chunkFile.getPath());
+    }
+
+    return null;
+  }
+
+  /**
+   * Method to be called to relinquish an acquired chunk. All streams open to
+   * the chunk are closed, and the chunk-file is deleted.
+   * @throws IOException Exception thrown on failure to release (i.e. delete)
+   * the chunk file.
+   */
+  public void release() throws IOException {
+    close();
+    if (!fs.delete(chunkFilePath, false)) {
+      LOG.error("Unable to release chunk at path: " + chunkFilePath);
+      throw new IOException("Unable to release chunk at path: " + chunkFilePath);
+    }
+  }
+
+  static FileStatus [] getListOfChunkFiles() throws IOException {
+    Path chunkFilePattern = new Path(chunkRootPath, chunkFilePrefix + "*");
+    FileStatus chunkFiles[] = fs.globStatus(chunkFilePattern);
+    numChunksLeft = chunkFiles.length;
+    return chunkFiles;
+  }
+
+  /**
+   * Getter for the chunk-file's path, on HDFS.
+   * @return The qualified path to the chunk-file.
+   */
+  public Path getPath() {
+    return chunkFilePath;
+  }
+
+  /**
+   * Getter for the record-reader, opened to the chunk-file.
+   * @return Opened Sequence-file reader.
+   */
+  public SequenceFileRecordReader<K,V> getReader() {
+    assert reader != null : "Reader un-initialized!";
+    return reader;
+  }
+
+  /**
+   * Getter for the number of chunk-files left in the chunk-file directory.
+   * Useful to determine how many chunks (and hence, records) are left to be
+   * processed.
+   * @return Before the first scan of the directory, the number returned is -1.
+   * Otherwise, the number of chunk-files seen from the last scan is returned.
+   */
+  public static int getNumChunksLeft() {
+    return numChunksLeft;
+  }
+}
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputFormat.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/mapred/lib/DynamicInputFormat.java
@ -0,0 +1,292 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools.mapred.lib;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.mapreduce.*;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.hadoop.tools.DistCpConstants;
+import org.apache.hadoop.tools.util.DistCpUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileStatus;
+
+import java.util.List;
+import java.util.ArrayList;
+import java.io.IOException;
+
+/**
+ * DynamicInputFormat implements the "Worker pattern" for DistCp.
+ * Rather than to split up the copy-list into a set of static splits,
+ * the DynamicInputFormat does the following:
+ * 1. Splits the copy-list into small chunks on the DFS.
+ * 2. Creates a set of empty "dynamic" splits, that each consume as many chunks
+ *    as it can.
+ * This arrangement ensures that a single slow mapper won't slow down the entire
+ * job (since the slack will be picked up by other mappers, who consume more
+ * chunks.)
+ * By varying the split-ratio, one can vary chunk sizes to achieve different
+ * performance characteristics. 
+ */
+public class DynamicInputFormat<K, V> extends InputFormat<K, V> {
+  private static final Log LOG = LogFactory.getLog(DynamicInputFormat.class);
+
+  private static final String CONF_LABEL_LISTING_SPLIT_RATIO
+          = "mapred.listing.split.ratio";
+  private static final String CONF_LABEL_NUM_SPLITS
+          = "mapred.num.splits";
+  private static final String CONF_LABEL_NUM_ENTRIES_PER_CHUNK
+          = "mapred.num.entries.per.chunk";
+
+  /**
+   * Implementation of InputFormat::getSplits(). This method splits up the
+   * copy-listing file into chunks, and assigns the first batch to different
+   * tasks.
+   * @param jobContext JobContext for the map job.
+   * @return The list of (empty) dynamic input-splits.
+   * @throws IOException, on failure.
+   * @throws InterruptedException
+   */
+  @Override
+  public List<InputSplit> getSplits(JobContext jobContext)
+      throws IOException, InterruptedException {
+    LOG.info("DynamicInputFormat: Getting splits for job:"
+             + jobContext.getJobID());
+    return createSplits(jobContext,
+                        splitCopyListingIntoChunksWithShuffle(jobContext));
+  }
+
+  private List<InputSplit> createSplits(JobContext jobContext,
+                                        List<DynamicInputChunk> chunks)
+          throws IOException {
+    int numMaps = getNumMapTasks(jobContext.getConfiguration());
+
+    final int nSplits = Math.min(numMaps, chunks.size());
+    List<InputSplit> splits = new ArrayList<InputSplit>(nSplits);
+    
+    for (int i=0; i< nSplits; ++i) {
+      TaskID taskId = new TaskID(jobContext.getJobID(), TaskType.MAP, i);
+      chunks.get(i).assignTo(taskId);
+      splits.add(new FileSplit(chunks.get(i).getPath(), 0,
+          // Setting non-zero length for FileSplit size, to avoid a possible
+          // future when 0-sized file-splits are considered "empty" and skipped
+          // over.
+          MIN_RECORDS_PER_CHUNK,
+          null));
+    }
+    DistCpUtils.publish(jobContext.getConfiguration(),
+                        CONF_LABEL_NUM_SPLITS, splits.size());
+    return splits;
+  }
+
+  private static int N_CHUNKS_OPEN_AT_ONCE_DEFAULT = 16;
+
+  private List<DynamicInputChunk> splitCopyListingIntoChunksWithShuffle
+                                    (JobContext context) throws IOException {
+
+    final Configuration configuration = context.getConfiguration();
+    int numRecords = getNumberOfRecords(configuration);
+    int numMaps = getNumMapTasks(configuration);
+    // Number of chunks each map will process, on average.
+    int splitRatio = getListingSplitRatio(configuration, numMaps, numRecords);
+    validateNumChunksUsing(splitRatio, numMaps);
+
+    int numEntriesPerChunk = (int)Math.ceil((float)numRecords
+                                          /(splitRatio * numMaps));
+    DistCpUtils.publish(context.getConfiguration(),
+                        CONF_LABEL_NUM_ENTRIES_PER_CHUNK,
+                        numEntriesPerChunk);
+
+    final int nChunksTotal = (int)Math.ceil((float)numRecords/numEntriesPerChunk);
+    int nChunksOpenAtOnce
+            = Math.min(N_CHUNKS_OPEN_AT_ONCE_DEFAULT, nChunksTotal);
+
+    Path listingPath = getListingFilePath(configuration);
+    SequenceFile.Reader reader
+            = new SequenceFile.Reader(configuration,
+                                      SequenceFile.Reader.file(listingPath));
+
+    List<DynamicInputChunk> openChunks
+                  = new ArrayList<DynamicInputChunk>();
+    
+    List<DynamicInputChunk> chunksFinal = new ArrayList<DynamicInputChunk>();
+
+    FileStatus fileStatus = new FileStatus();
+    Text relPath = new Text();
+    int recordCounter = 0;
+    int chunkCount = 0;
+
+    try {
+
+      while (reader.next(relPath, fileStatus)) {
+        if (recordCounter % (nChunksOpenAtOnce*numEntriesPerChunk) == 0) {
+          // All chunks full. Create new chunk-set.
+          closeAll(openChunks);
+          chunksFinal.addAll(openChunks);
+
+          openChunks = createChunks(
+                  configuration, chunkCount, nChunksTotal, nChunksOpenAtOnce);
+
+          chunkCount += openChunks.size();
+
+          nChunksOpenAtOnce = openChunks.size();
+          recordCounter = 0;
+        }
+
+        // Shuffle into open chunks.
+        openChunks.get(recordCounter%nChunksOpenAtOnce).write(relPath, fileStatus);
+        ++recordCounter;
+      }
+
+    } finally {
+      closeAll(openChunks);
+      chunksFinal.addAll(openChunks);
+      IOUtils.closeStream(reader);
+    }
+
+    LOG.info("Number of dynamic-chunk-files created: " + chunksFinal.size()); 
+    return chunksFinal;
+  }
+
+  private static void validateNumChunksUsing(int splitRatio, int numMaps)
+                                              throws IOException {
+    if (splitRatio * numMaps > MAX_CHUNKS_TOLERABLE)
+      throw new IOException("Too many chunks created with splitRatio:"
+                 + splitRatio + ", numMaps:" + numMaps
+                 + ". Reduce numMaps or decrease split-ratio to proceed.");
+  }
+
+  private static void closeAll(List<DynamicInputChunk> chunks) {
+    for (DynamicInputChunk chunk: chunks)
+      chunk.close();
+  }
+
+  private static List<DynamicInputChunk> createChunks(Configuration config,
+                      int chunkCount, int nChunksTotal, int nChunksOpenAtOnce)
+                                          throws IOException {
+    List<DynamicInputChunk> chunks = new ArrayList<DynamicInputChunk>();
+    int chunkIdUpperBound
+            = Math.min(nChunksTotal, chunkCount + nChunksOpenAtOnce);
+
+    // If there will be fewer than nChunksOpenAtOnce chunks left after
+    // the current batch of chunks, fold the remaining chunks into
+    // the current batch.
+    if (nChunksTotal - chunkIdUpperBound < nChunksOpenAtOnce)
+      chunkIdUpperBound = nChunksTotal;
+
+    for (int i=chunkCount; i < chunkIdUpperBound; ++i)
+      chunks.add(createChunk(i, config));
+    return chunks;
+  }
+
+  private static DynamicInputChunk createChunk(int chunkId, Configuration config)
+                                              throws IOException {
+    return DynamicInputChunk.createChunkForWrite(String.format("%05d", chunkId),
+                                              config);
+  }
+
+
+  private static Path getListingFilePath(Configuration configuration) {
+    String listingFilePathString = configuration.get(
+            DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, "");
+
+    assert !listingFilePathString.equals("") : "Listing file not found.";
+
+    Path listingFilePath = new Path(listingFilePathString);
+    try {
+      assert listingFilePath.getFileSystem(configuration)
+              .exists(listingFilePath) : "Listing file: " + listingFilePath +
+                                          " not found.";
+    } catch (IOException e) {
+      assert false :   "Listing file: " + listingFilePath
+                    + " couldn't be accessed. " + e.getMessage();
+    }
+    return listingFilePath;
+  }
+
+  private static int getNumberOfRecords(Configuration configuration) {
+    return DistCpUtils.getInt(configuration,
+                              DistCpConstants.CONF_LABEL_TOTAL_NUMBER_OF_RECORDS);
+  }
+
+  private static int getNumMapTasks(Configuration configuration) {
+    return DistCpUtils.getInt(configuration,
+                              JobContext.NUM_MAPS);
+  }
+
+  private static int getListingSplitRatio(Configuration configuration,
+                                            int numMaps, int numPaths) {
+    return configuration.getInt(
+            CONF_LABEL_LISTING_SPLIT_RATIO,
+            getSplitRatio(numMaps, numPaths));
+  }
+
+  private static final int MAX_CHUNKS_TOLERABLE = 400;
+  private static final int MAX_CHUNKS_IDEAL     = 100;
+  private static final int MIN_RECORDS_PER_CHUNK = 5;
+  private static final int SPLIT_RATIO_DEFAULT  = 2;
+
+  /**
+   * Package private, for testability.
+   * @param nMaps The number of maps requested for.
+   * @param nRecords The number of records to be copied.
+   * @return The number of splits each map should handle, ideally.
+   */
+  static int getSplitRatio(int nMaps, int nRecords) {
+    if (nMaps == 1) {
+      LOG.warn("nMaps == 1. Why use DynamicInputFormat?");
+      return 1;
+    }
+
+    if (nMaps > MAX_CHUNKS_IDEAL)
+      return SPLIT_RATIO_DEFAULT;
+
+    int nPickups = (int)Math.ceil((float)MAX_CHUNKS_IDEAL/nMaps);
+    int nRecordsPerChunk = (int)Math.ceil((float)nRecords/(nMaps*nPickups));
+
+    return nRecordsPerChunk < MIN_RECORDS_PER_CHUNK ?
+              SPLIT_RATIO_DEFAULT : nPickups;
+  }
+
+  static int getNumEntriesPerChunk(Configuration configuration) {
+    return DistCpUtils.getInt(configuration,
+                              CONF_LABEL_NUM_ENTRIES_PER_CHUNK);
+  }
+
+
+  /**
+   * Implementation of Inputformat::createRecordReader().
+   * @param inputSplit The split for which the RecordReader is required.
+   * @param taskAttemptContext TaskAttemptContext for the current attempt.
+   * @return DynamicRecordReader instance.
+   * @throws IOException, on failure.
+   * @throws InterruptedException
+   */
+  @Override
+  public RecordReader<K, V> createRecordReader(
+          InputSplit inputSplit,
+          TaskAttemptContext taskAttemptContext)
+          throws IOException, InterruptedException {
+    return new DynamicRecordReader<K, V>();
+  }
+}
--- a/Show More
+++ b/Show More