Merge trunk to HDFS-4685.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-4685@1556097 13f79535-47bb-0310-9956-ffa450edef68
2014-01-07 01:07:33 +00:00 · 2014-01-07 01:07:33 +00:00 · 2fbb3d694e
parent c658567571 5241aa4cdd
commit 2fbb3d694e
159 changed files with 5282 additions and 2295 deletions
--- a/hadoop-common-project/hadoop-auth/pom.xml
+++ b/hadoop-common-project/hadoop-auth/pom.xml
@ -92,6 +92,11 @@
      <artifactId>hadoop-minikdc</artifactId>
      <scope>test</scope>
    </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpclient</artifactId>
+      <scope>compile</scope>
+    </dependency>
  </dependencies>

  <build>
--- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/PseudoAuthenticationHandler.java
+++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/PseudoAuthenticationHandler.java
@ -16,10 +16,15 @@ package org.apache.hadoop.security.authentication.server;
 import org.apache.hadoop.security.authentication.client.AuthenticationException;
 import org.apache.hadoop.security.authentication.client.PseudoAuthenticator;

+import org.apache.http.client.utils.URLEncodedUtils;
+import org.apache.http.NameValuePair;
+
 import javax.servlet.ServletException;
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
 import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.List;
 import java.util.Properties;

 /**
@ -48,6 +53,7 @@ public class PseudoAuthenticationHandler implements AuthenticationHandler {
   */
  public static final String ANONYMOUS_ALLOWED = TYPE + ".anonymous.allowed";

+  private static final Charset UTF8_CHARSET = Charset.forName("UTF-8");
  private boolean acceptAnonymous;

  /**
@ -114,6 +120,18 @@ public class PseudoAuthenticationHandler implements AuthenticationHandler {
    return true;
  }

+  private String getUserName(HttpServletRequest request) {
+    List<NameValuePair> list = URLEncodedUtils.parse(request.getQueryString(), UTF8_CHARSET);
+    if (list != null) {
+      for (NameValuePair nv : list) {
+        if (PseudoAuthenticator.USER_NAME.equals(nv.getName())) {
+          return nv.getValue();
+        }
+      }
+    }
+    return null;
+  }
+
  /**
   * Authenticates an HTTP client request.
   * <p/>
@ -139,7 +157,7 @@ public class PseudoAuthenticationHandler implements AuthenticationHandler {
  public AuthenticationToken authenticate(HttpServletRequest request, HttpServletResponse response)
    throws IOException, AuthenticationException {
    AuthenticationToken token;
-    String userName = request.getParameter(PseudoAuthenticator.USER_NAME);
+    String userName = getUserName(request);
    if (userName == null) {
      if (getAcceptAnonymous()) {
        token = AuthenticationToken.ANONYMOUS;
--- a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestPseudoAuthenticationHandler.java
+++ b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestPseudoAuthenticationHandler.java
@ -94,7 +94,7 @@ public class TestPseudoAuthenticationHandler {

      HttpServletRequest request = Mockito.mock(HttpServletRequest.class);
      HttpServletResponse response = Mockito.mock(HttpServletResponse.class);
-      Mockito.when(request.getParameter(PseudoAuthenticator.USER_NAME)).thenReturn("user");
+      Mockito.when(request.getQueryString()).thenReturn(PseudoAuthenticator.USER_NAME + "=" + "user");

      AuthenticationToken token = handler.authenticate(request, response);

--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@ -108,6 +108,8 @@ Trunk (Unreleased)
    HADOOP-10141. Create KeyProvider API to separate encryption key storage
    from the applications. (omalley)

+    HADOOP-10201. Add listing to KeyProvider API. (Larry McCay via omalley)
+
  BUG FIXES

    HADOOP-9451. Fault single-layer config if node group topology is enabled.
@ -407,6 +409,9 @@ Release 2.4.0 - UNRELEASED
    HADOOP-10169. Remove the unnecessary synchronized in JvmMetrics class.
    (Liang Xie via jing9) 

+    HADOOP-10198. DomainSocket: add support for socketpair.
+    (Colin Patrick McCabe via wang)
+
  OPTIMIZATIONS

    HADOOP-9748. Reduce blocking on UGI.ensureInitialized (daryn)
@ -416,6 +421,9 @@ Release 2.4.0 - UNRELEASED

    HADOOP-10172. Cache SASL server factories (daryn)

+    HADOOP-10173. Remove UGI from DIGEST-MD5 SASL server creation (daryn via
+    kihwal)
+
  BUG FIXES

    HADOOP-9964. Fix deadlocks in TestHttpServer by synchronize
@ -489,6 +497,9 @@ Release 2.4.0 - UNRELEASED

    HADOOP-10171. TestRPC fails intermittently on jkd7 (Mit Desai via jeagles)

+    HADOOP-10147  HDFS-5678 Upgrade to commons-logging 1.1.3 to avoid potential
+    deadlock in MiniDFSCluster (stevel)
+
 Release 2.3.0 - UNRELEASED

  INCOMPATIBLE CHANGES
@ -568,6 +579,12 @@ Release 2.3.0 - UNRELEASED
    HADOOP-10175. Har files system authority should preserve userinfo.
    (Chuan Liu via cnauroth)

+    HADOOP-10090. Jobtracker metrics not updated properly after execution
+    of a mapreduce job. (ivanmi)
+
+    HADOOP-10193. hadoop-auth's PseudoAuthenticationHandler can consume getInputStream. 
+    (gchanan via tucu)
+
 Release 2.2.0 - 2013-10-13

  INCOMPATIBLE CHANGES
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java
@ -36,8 +36,11 @@ import java.security.KeyStoreException;
 import java.security.NoSuchAlgorithmException;
 import java.security.UnrecoverableKeyException;
 import java.security.cert.CertificateException;
+import java.util.ArrayList;
 import java.util.Date;
+import java.util.Enumeration;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;

 /**
@ -56,6 +59,7 @@ import java.util.Map;
 */
@InterfaceAudience.Private
 public class JavaKeyStoreProvider extends KeyProvider {
+  private static final String KEY_METADATA = "KeyMetadata";
  public static final String SCHEME_NAME = "jceks";
  public static final String KEYSTORE_PASSWORD_NAME =
      "HADOOP_KEYSTORE_PASSWORD";
@ -117,6 +121,44 @@ public class JavaKeyStoreProvider extends KeyProvider {
    return new KeyVersion(versionName, key.getEncoded());
  }

+  @Override
+  public List<String> getKeys() throws IOException {
+    ArrayList<String> list = new ArrayList<String>();
+    String alias = null;
+    try {
+      Enumeration<String> e = keyStore.aliases();
+      while (e.hasMoreElements()) {
+         alias = e.nextElement();
+         // only include the metadata key names in the list of names
+         if (!alias.contains("@")) {
+             list.add(alias);
+         }
+      }
+    } catch (KeyStoreException e) {
+      throw new IOException("Can't get key " + alias + " from " + path, e);
+    }
+    return list;
+  }
+
+  @Override
+  public List<KeyVersion> getKeyVersions(String name) throws IOException {
+    List<KeyVersion> list = new ArrayList<KeyVersion>();
+    Metadata km = getMetadata(name);
+    if (km != null) {
+      int latestVersion = km.getVersions();
+      KeyVersion v = null;
+      String versionName = null;
+      for (int i = 0; i < latestVersion; i++) {
+        versionName = buildVersionName(name, i);
+        v = getKeyVersion(versionName);
+        if (v != null) {
+          list.add(v);
+        }
+      }
+    }
+    return list;
+  }
+
  @Override
  public Metadata getMetadata(String name) throws IOException {
    if (cache.containsKey(name)) {
@ -288,7 +330,7 @@ public class JavaKeyStoreProvider extends KeyProvider {

    @Override
    public String getFormat() {
-      return "KeyMetadata";
+      return KEY_METADATA;
    }

    @Override
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java
@ -254,6 +254,20 @@ public abstract class KeyProvider {
  public abstract KeyVersion getKeyVersion(String versionName
                                            ) throws IOException;

+  /**
+   * Get the key names for all keys.
+   * @return the list of key names
+   * @throws IOException
+   */
+  public abstract List<String> getKeys() throws IOException;
+
+  /**
+   * Get the key material for all versions of a specific key name.
+   * @return the list of key material
+   * @throws IOException
+   */
+  public abstract List<KeyVersion> getKeyVersions(String name) throws IOException;
+
  /**
   * Get the current version of the key, which should be used for encrypting new
   * data.
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/UserProvider.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/UserProvider.java
@ -20,8 +20,10 @@ package org.apache.hadoop.crypto.key;

 import java.io.IOException;
 import java.net.URI;
+import java.util.ArrayList;
 import java.util.Date;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;

 import org.apache.hadoop.classification.InterfaceAudience;
@ -142,4 +144,32 @@ public class UserProvider extends KeyProvider {
      return null;
    }
  }
+
+  @Override
+  public List<String> getKeys() throws IOException {
+    List<String> list = new ArrayList<String>();
+    List<Text> keys = credentials.getAllSecretKeys();
+    for (Text key : keys) {
+      if (key.find("@") == -1) {
+        list.add(key.toString());
+      }
+    }
+    return list;
+  }
+
+  @Override
+  public List<KeyVersion> getKeyVersions(String name) throws IOException {
+      List<KeyVersion> list = new ArrayList<KeyVersion>();
+      Metadata km = getMetadata(name);
+      if (km != null) {
+        int latestVersion = km.getVersions();
+        for (int i = 0; i < latestVersion; i++) {
+          KeyVersion v = getKeyVersion(buildVersionName(name, i));
+          if (v != null) {
+            list.add(v);
+          }
+        }
+      }
+      return list;
+  }
 }
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
@ -21,7 +21,6 @@ package org.apache.hadoop.fs;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.http.lib.StaticUserWebFilter;
-import org.apache.hadoop.security.authorize.Service;

 /** 
 * This class contains constants for configuration keys used
@ -240,4 +239,7 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
  /** Default value for IPC_SERVER_CONNECTION_IDLE_SCAN_INTERVAL_KEY */
  public static final int IPC_CLIENT_CONNECTION_IDLESCANINTERVAL_DEFAULT =
      10000;
+
+  public static final String HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS =
+    "hadoop.user.group.metrics.percentiles.intervals";
 }
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/unix/DomainSocket.java
@ -276,6 +276,24 @@ public class DomainSocket implements Closeable {
    return new DomainSocket(path, fd);
  }

+  /**
+   * Create a pair of UNIX domain sockets which are connected to each other
+   * by calling socketpair(2).
+   *
+   * @return                An array of two UNIX domain sockets connected to
+   *                        each other.
+   * @throws IOException    on error.
+   */
+  public static DomainSocket[] socketpair() throws IOException {
+    int fds[] = socketpair0();
+    return new DomainSocket[] {
+      new DomainSocket("(anonymous0)", fds[0]),
+      new DomainSocket("(anonymous1)", fds[1])
+    };
+  }
+
+  private static native int[] socketpair0() throws IOException;
+
  private static native int accept0(int fd) throws IOException;

  /**
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java
@ -29,7 +29,9 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@ -73,15 +75,6 @@ public class Credentials implements Writable {
    this.addAll(credentials);
  }
  
-  /**
-   * Returns the key bytes for the alias
-   * @param alias the alias for the key
-   * @return key for this alias
-   */
-  public byte[] getSecretKey(Text alias) {
-    return secretKeysMap.get(alias);
-  }
-  
  /**
   * Returns the Token object for the alias
   * @param alias the alias for the Token
@ -118,6 +111,15 @@ public class Credentials implements Writable {
    return tokenMap.size();
  }

+  /**
+   * Returns the key bytes for the alias
+   * @param alias the alias for the key
+   * @return key for this alias
+   */
+  public byte[] getSecretKey(Text alias) {
+    return secretKeysMap.get(alias);
+  }
+  
  /**
   * @return number of keys in the in-memory map
   */
@ -142,6 +144,16 @@ public class Credentials implements Writable {
    secretKeysMap.remove(alias);
  }

+  /**
+   * Return all the secret key entries in the in-memory map
+   */
+  public List<Text> getAllSecretKeys() {
+    List<Text> list = new java.util.ArrayList<Text>();
+    list.addAll(secretKeysMap.keySet());
+
+    return list;
+  }
+
  /**
   * Convenience method for reading a token storage file, and loading the Tokens
   * therein in the passed UGI
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Groups.java
@ -138,6 +138,7 @@ public class Groups {
    List<String> groupList = impl.getGroups(user);
    long endMs = Time.monotonicNow();
    long deltaMs = endMs - startMs ;
+    UserGroupInformation.metrics.addGetGroups(deltaMs);
    if (deltaMs > warningDeltaMs) {
      LOG.warn("Potential performance problem: getGroups(user=" + user +") " +
          "took " + deltaMs + " milliseconds.");
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcServer.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcServer.java
@ -131,7 +131,7 @@ public class SaslRpcServer {
  public SaslServer create(Connection connection,
                           SecretManager<TokenIdentifier> secretManager
      ) throws IOException, InterruptedException {
-    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
+    UserGroupInformation ugi = null;
    final CallbackHandler callback;
    switch (authMethod) {
      case TOKEN: {
@ -139,6 +139,7 @@ public class SaslRpcServer {
        break;
      }
      case KERBEROS: {
+        ugi = UserGroupInformation.getCurrentUser();
        if (serverId.isEmpty()) {
          throw new AccessControlException(
              "Kerberos principal name does NOT have the expected "
@ -153,7 +154,9 @@ public class SaslRpcServer {
            "Server does not support SASL " + authMethod);
    }
    
-    SaslServer saslServer = ugi.doAs(
+    final SaslServer saslServer;
+    if (ugi != null) {
+      saslServer = ugi.doAs(
        new PrivilegedExceptionAction<SaslServer>() {
          @Override
          public SaslServer run() throws SaslException  {
@ -161,6 +164,10 @@ public class SaslRpcServer {
                SaslRpcServer.SASL_PROPS, callback);
          }
        });
+    } else {
+      saslServer = saslFactory.createSaslServer(mechanism, protocol, serverId,
+          SaslRpcServer.SASL_PROPS, callback);
+    }
    if (saslServer == null) {
      throw new AccessControlException(
          "Unable to find SASL server implementation for " + mechanism);
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
@ -19,6 +19,7 @@ package org.apache.hadoop.security;

 import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN;
 import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN_DEFAULT;
+import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS;

 import java.io.File;
 import java.io.IOException;
@ -58,6 +59,8 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.metrics2.annotation.Metric;
 import org.apache.hadoop.metrics2.annotation.Metrics;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MutableQuantiles;
 import org.apache.hadoop.metrics2.lib.MutableRate;
 import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
 import org.apache.hadoop.security.authentication.util.KerberosUtil;
@ -92,14 +95,27 @@ public class UserGroupInformation {
   */
  @Metrics(about="User and group related metrics", context="ugi")
  static class UgiMetrics {
+    final MetricsRegistry registry = new MetricsRegistry("UgiMetrics");
+
    @Metric("Rate of successful kerberos logins and latency (milliseconds)")
    MutableRate loginSuccess;
    @Metric("Rate of failed kerberos logins and latency (milliseconds)")
    MutableRate loginFailure;
+    @Metric("GetGroups") MutableRate getGroups;
+    MutableQuantiles[] getGroupsQuantiles;

    static UgiMetrics create() {
      return DefaultMetricsSystem.instance().register(new UgiMetrics());
    }
+
+    void addGetGroups(long latency) {
+      getGroups.add(latency);
+      if (getGroupsQuantiles != null) {
+        for (MutableQuantiles q : getGroupsQuantiles) {
+          q.add(latency);
+        }
+      }
+    }
  }
  
  /**
@ -250,6 +266,20 @@ public class UserGroupInformation {
      groups = Groups.getUserToGroupsMappingService(conf);
    }
    UserGroupInformation.conf = conf;
+
+    if (metrics.getGroupsQuantiles == null) {
+      int[] intervals = conf.getInts(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS);
+      if (intervals != null && intervals.length > 0) {
+        final int length = intervals.length;
+        MutableQuantiles[] getGroupsQuantiles = new MutableQuantiles[length];
+        for (int i = 0; i < length; i++) {
+          getGroupsQuantiles[i] = metrics.registry.newQuantiles(
+            "getGroups" + intervals[i] + "s",
+            "Get groups", "ops", "latency", intervals[i]);
+        }
+        metrics.getGroupsQuantiles = getGroupsQuantiles;
+      }
+    }
  }

  /**
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java
@ -928,8 +928,10 @@ public class StringUtils {
   * @param args  List of arguments.
   * @return      null if the option was not found; the value of the 
   *              option otherwise.
+   * @throws IllegalArgumentException if the option's argument is not present
   */
-  public static String popOptionWithArgument(String name, List<String> args) {
+  public static String popOptionWithArgument(String name, List<String> args)
+      throws IllegalArgumentException {
    String val = null;
    for (Iterator<String> iter = args.iterator(); iter.hasNext(); ) {
      String cur = iter.next();
@ -939,7 +941,7 @@ public class StringUtils {
      } else if (cur.equals(name)) {
        iter.remove();
        if (!iter.hasNext()) {
-          throw new RuntimeException("option " + name + " requires 1 " +
+          throw new IllegalArgumentException("option " + name + " requires 1 " +
              "argument.");
        }
        val = iter.next();
--- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c
+++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/net/unix/DomainSocket.c
@ -364,6 +364,50 @@ JNIEnv *env, jclass clazz, jstring path)
  return fd;
 }

+#define SOCKETPAIR_ARRAY_LEN 2
+
+JNIEXPORT jarray JNICALL
+Java_org_apache_hadoop_net_unix_DomainSocket_socketpair0(
+JNIEnv *env, jclass clazz)
+{
+  jarray arr = NULL;
+  int idx, err, fds[SOCKETPAIR_ARRAY_LEN] = { -1, -1 };
+  jthrowable jthr = NULL;
+
+  arr = (*env)->NewIntArray(env, SOCKETPAIR_ARRAY_LEN);
+  jthr = (*env)->ExceptionOccurred(env);
+  if (jthr) {
+    (*env)->ExceptionClear(env);
+    goto done;
+  }
+  if (socketpair(PF_UNIX, SOCK_STREAM, 0, fds) < 0) {
+    err = errno;
+    jthr = newSocketException(env, err,
+            "socketpair(2) error: %s", terror(err));
+    goto done;
+  }
+  (*env)->SetIntArrayRegion(env, arr, 0, SOCKETPAIR_ARRAY_LEN, fds);
+  jthr = (*env)->ExceptionOccurred(env);
+  if (jthr) {
+    (*env)->ExceptionClear(env);
+    goto done;
+  }
+
+done:
+  if (jthr) {
+    (*env)->DeleteLocalRef(env, arr);
+    arr = NULL;
+    for (idx = 0; idx < SOCKETPAIR_ARRAY_LEN; idx++) {
+      if (fds[idx] >= 0) {
+        close(fds[idx]);
+        fds[idx] = -1;
+      }
+    }
+    (*env)->Throw(env, jthr);
+  }
+  return arr;
+}
+
 JNIEXPORT jint JNICALL
 Java_org_apache_hadoop_net_unix_DomainSocket_accept0(
 JNIEnv *env, jclass clazz, jint fd)
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyProviderFactory.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyProviderFactory.java
@ -21,6 +21,7 @@ import java.io.File;
 import java.io.IOException;
 import java.util.List;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.crypto.key.KeyProvider.KeyVersion;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.UserGroupInformation;
@ -160,6 +161,16 @@ public class TestKeyProviderFactory {
        provider.getCurrentKey("key4").getMaterial());
    assertArrayEquals(key3, provider.getCurrentKey("key3").getMaterial());
    assertEquals("key3@0", provider.getCurrentKey("key3").getVersionName());
+
+    List<String> keys = provider.getKeys();
+    assertTrue("Keys should have been returned.", keys.size() == 2);
+    assertTrue("Returned Keys should have included key3.", keys.contains("key3"));
+    assertTrue("Returned Keys should have included key4.", keys.contains("key4"));
+
+    List<KeyVersion> kvl = provider.getKeyVersions("key3");
+    assertTrue("KeyVersions should have been returned for key3.", kvl.size() == 1);
+    assertTrue("KeyVersions should have included key3@0.", kvl.get(0).getVersionName().equals("key3@0"));
+    assertArrayEquals(key3, kvl.get(0).getMaterial());
  }

  @Test
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSourceAdapter.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSourceAdapter.java
@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.metrics2.impl;
+
+import static org.junit.Assert.*;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.metrics2.MetricsSource;
+import org.apache.hadoop.metrics2.MetricsTag;
+import org.apache.hadoop.metrics2.annotation.Metric;
+import org.apache.hadoop.metrics2.annotation.Metrics;
+import org.apache.hadoop.metrics2.lib.MetricsAnnotations;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MetricsSourceBuilder;
+import org.apache.hadoop.metrics2.lib.MutableCounterLong;
+import org.junit.Test;
+
+public class TestMetricsSourceAdapter {
+
+  @Test
+  public void testGetMetricsAndJmx() throws Exception {
+    // create test source with a single metric counter of value 0
+    TestSource source = new TestSource("test");
+    MetricsSourceBuilder sb = MetricsAnnotations.newSourceBuilder(source);
+    final MetricsSource s = sb.build();
+
+    List<MetricsTag> injectedTags = new ArrayList<MetricsTag>();
+    MetricsSourceAdapter sa = new MetricsSourceAdapter(
+        "test", "test", "test desc", s, injectedTags, null, null, 1, false);
+
+    // all metrics are initially assumed to have changed
+    MetricsCollectorImpl builder = new MetricsCollectorImpl();
+    Iterable<MetricsRecordImpl> metricsRecords = sa.getMetrics(builder, true);
+
+    // Validate getMetrics and JMX initial values
+    MetricsRecordImpl metricsRecord = metricsRecords.iterator().next();
+    assertEquals(0L,
+        metricsRecord.metrics().iterator().next().value().longValue());
+
+    Thread.sleep(100); // skip JMX cache TTL
+    assertEquals(0L, (Number)sa.getAttribute("C1"));
+
+    // change metric value
+    source.incrementCnt();
+
+    // validate getMetrics and JMX
+    builder = new MetricsCollectorImpl();
+    metricsRecords = sa.getMetrics(builder, true);
+    metricsRecord = metricsRecords.iterator().next();
+    assertTrue(metricsRecord.metrics().iterator().hasNext());
+    Thread.sleep(100); // skip JMX cache TTL
+    assertEquals(1L, (Number)sa.getAttribute("C1"));
+  }
+
+  @SuppressWarnings("unused")
+  @Metrics(context="test")
+  private static class TestSource {
+    @Metric("C1 desc") MutableCounterLong c1;
+    final MetricsRegistry registry;
+
+    TestSource(String recName) {
+      registry = new MetricsRegistry(recName);
+    }
+
+    public void incrementCnt() {
+      c1.incr();
+    }
+  }
+}
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/unix/TestDomainSocket.java
@ -420,7 +420,8 @@ public class TestDomainSocket {
   * @throws IOException
   */
  void testClientServer1(final Class<? extends WriteStrategy> writeStrategyClass,
-      final Class<? extends ReadStrategy> readStrategyClass) throws Exception {
+      final Class<? extends ReadStrategy> readStrategyClass,
+      final DomainSocket preConnectedSockets[]) throws Exception {
    final String TEST_PATH = new File(sockDir.getDir(),
        "test_sock_client_server1").getAbsolutePath();
    final byte clientMsg1[] = new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6 };
@ -428,13 +429,15 @@ public class TestDomainSocket {
    final byte clientMsg2 = 0x45;
    final ArrayBlockingQueue<Throwable> threadResults =
        new ArrayBlockingQueue<Throwable>(2);
-    final DomainSocket serv = DomainSocket.bindAndListen(TEST_PATH);
+    final DomainSocket serv = (preConnectedSockets != null) ?
+      null : DomainSocket.bindAndListen(TEST_PATH);
    Thread serverThread = new Thread() {
      public void run(){
        // Run server
        DomainSocket conn = null;
        try {
-          conn = serv.accept();
+          conn = preConnectedSockets != null ?
+                    preConnectedSockets[0] : serv.accept();
          byte in1[] = new byte[clientMsg1.length];
          ReadStrategy reader = readStrategyClass.newInstance();
          reader.init(conn);
@ -459,7 +462,8 @@ public class TestDomainSocket {
    Thread clientThread = new Thread() {
      public void run(){
        try {
-          DomainSocket client = DomainSocket.connect(TEST_PATH);
+          DomainSocket client = preConnectedSockets != null ?
+                preConnectedSockets[1] : DomainSocket.connect(TEST_PATH);
          WriteStrategy writer = writeStrategyClass.newInstance();
          writer.init(client);
          writer.write(clientMsg1);
@ -487,25 +491,45 @@ public class TestDomainSocket {
    }
    serverThread.join(120000);
    clientThread.join(120000);
+    if (serv != null) {
      serv.close();
    }
+  }

  @Test(timeout=180000)
  public void testClientServerOutStreamInStream() throws Exception {
    testClientServer1(OutputStreamWriteStrategy.class,
-        InputStreamReadStrategy.class);
+        InputStreamReadStrategy.class, null);
+  }
+
+  @Test(timeout=180000)
+  public void testClientServerOutStreamInStreamWithSocketpair() throws Exception {
+    testClientServer1(OutputStreamWriteStrategy.class,
+        InputStreamReadStrategy.class, DomainSocket.socketpair());
  }

  @Test(timeout=180000)
  public void testClientServerOutStreamInDbb() throws Exception {
    testClientServer1(OutputStreamWriteStrategy.class,
-        DirectByteBufferReadStrategy.class);
+        DirectByteBufferReadStrategy.class, null);
+  }
+
+  @Test(timeout=180000)
+  public void testClientServerOutStreamInDbbWithSocketpair() throws Exception {
+    testClientServer1(OutputStreamWriteStrategy.class,
+        DirectByteBufferReadStrategy.class, DomainSocket.socketpair());
  }

  @Test(timeout=180000)
  public void testClientServerOutStreamInAbb() throws Exception {
    testClientServer1(OutputStreamWriteStrategy.class,
-        ArrayBackedByteBufferReadStrategy.class);
+        ArrayBackedByteBufferReadStrategy.class, null);
+  }
+
+  @Test(timeout=180000)
+  public void testClientServerOutStreamInAbbWithSocketpair() throws Exception {
+    testClientServer1(OutputStreamWriteStrategy.class,
+        ArrayBackedByteBufferReadStrategy.class, DomainSocket.socketpair());
  }

  static private class PassedFile {
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestUserGroupInformation.java
@ -19,7 +19,6 @@ package org.apache.hadoop.security;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.ipc.TestSaslRPC;
 import org.apache.hadoop.metrics2.MetricsRecordBuilder;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
 import org.apache.hadoop.security.authentication.util.KerberosName;
@ -40,9 +39,9 @@ import java.util.Collection;
 import java.util.LinkedHashSet;
 import java.util.Set;

+import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS;
 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTH_TO_LOCAL;
 import static org.apache.hadoop.ipc.TestSaslRPC.*;
-import static org.apache.hadoop.security.token.delegation.TestDelegationToken.TestDelegationTokenIdentifier;
 import static org.apache.hadoop.test.MetricsAsserts.*;
 import static org.junit.Assert.*;
 import static org.mockito.Mockito.mock;
@ -55,6 +54,8 @@ public class TestUserGroupInformation {
  final private static String GROUP3_NAME = "group3";
  final private static String[] GROUP_NAMES = 
    new String[]{GROUP1_NAME, GROUP2_NAME, GROUP3_NAME};
+  // Rollover interval of percentile metrics (in seconds)
+  private static final int PERCENTILES_INTERVAL = 1;
  private static Configuration conf;
  
  /**
@ -80,7 +81,8 @@ public class TestUserGroupInformation {
    // doesn't matter what it is, but getGroups needs it set...
    // use HADOOP_HOME environment variable to prevent interfering with logic
    // that finds winutils.exe
-    System.setProperty("hadoop.home.dir", System.getenv("HADOOP_HOME"));
+    String home = System.getenv("HADOOP_HOME");
+    System.setProperty("hadoop.home.dir", (home != null ? home : "."));
    // fake the realm is kerberos is enabled
    System.setProperty("java.security.krb5.kdc", "");
    System.setProperty("java.security.krb5.realm", "DEFAULT.REALM");
@ -150,11 +152,15 @@ public class TestUserGroupInformation {
  /** Test login method */
  @Test (timeout = 30000)
  public void testLogin() throws Exception {
+    conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
+      String.valueOf(PERCENTILES_INTERVAL));
+    UserGroupInformation.setConfiguration(conf);
    // login from unix
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
    assertEquals(UserGroupInformation.getCurrentUser(),
                 UserGroupInformation.getLoginUser());
    assertTrue(ugi.getGroupNames().length >= 1);
+    verifyGroupMetrics(1);

    // ensure that doAs works correctly
    UserGroupInformation userGroupInfo = 
@ -728,6 +734,21 @@ public class TestUserGroupInformation {
    }
  }

+  private static void verifyGroupMetrics(
+      long groups) throws InterruptedException {
+    MetricsRecordBuilder rb = getMetrics("UgiMetrics");
+    if (groups > 0) {
+      assertCounter("GetGroupsNumOps", groups, rb);
+      double avg = getDoubleGauge("GetGroupsAvgTime", rb);
+      assertTrue(avg >= 0.0);
+
+      // Sleep for an interval+slop to let the percentiles rollover
+      Thread.sleep((PERCENTILES_INTERVAL+1)*1000);
+      // Check that the percentiles were updated
+      assertQuantileGauges("GetGroups1s", rb);
+    }
+  }
+
  /**
   * Test for the case that UserGroupInformation.getCurrentUser()
   * is called when the AccessControlContext has a Subject associated
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@ -13,10 +13,6 @@ Trunk (Unreleased)

    HDFS-3125. Add JournalService to enable Journal Daemon. (suresh)

-    HDFS-2832. Heterogeneous Storages support in HDFS phase 1 - treat DataNode
-    as a collection of storages (see breakdown of tasks below for features and
-    contributors).
-
  IMPROVEMENTS

    HDFS-4665. Move TestNetworkTopologyWithNodeGroup to common.
@ -243,13 +239,18 @@ Trunk (Unreleased)

    HDFS-5636. Enforce a max TTL per cache pool. (awang via cmccabe)

+    HDFS-5651. Remove dfs.namenode.caching.enabled and improve CRM locking.
+    (cmccabe via wang)
+
  OPTIMIZATIONS
+
    HDFS-5349. DNA_CACHE and DNA_UNCACHE should be by blockId only. (cmccabe)

    HDFS-5665. Remove the unnecessary writeLock while initializing CacheManager
    in FsNameSystem Ctor. (Uma Maheswara Rao G via Andrew Wang)

  BUG FIXES
+
    HADOOP-9635 Fix potential Stack Overflow in DomainSocket.c (V. Karthik Kumar
                via cmccabe)

@ -444,139 +445,27 @@ Trunk (Unreleased)

    HDFS-5626. dfsadmin -report shows incorrect cache values. (cmccabe)

-    HDFS-5406. Send incremental block reports for all storages in a
-    single call. (Arpit Agarwal)
-
-    HDFS-5454. DataNode UUID should be assigned prior to FsDataset
-    initialization. (Arpit Agarwal)
-
    HDFS-5679. TestCacheDirectives should handle the case where native code
    is not available. (wang)

-  BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
+    HDFS-5701. Fix the CacheAdmin -addPool -maxTtl option name.
+    (Stephen Chu via wang)

-    HDFS-4985. Add storage type to the protocol and expose it in block report
-    and block locations. (Arpit Agarwal)
+    HDFS-5708. The CacheManager throws a NPE in the DataNode logs when
+    processing cache reports that refer to a block not known to the
+    BlockManager. (cmccabe via wang)

-    HDFS-5115. Make StorageID a UUID. (Arpit Agarwal)
+    HDFS-5659. dfsadmin -report doesn't output cache information properly.
+    (wang)

-    HDFS-5000. DataNode configuration should allow specifying storage type.
-    (Arpit Agarwal)
+    HDFS-5705. TestSecondaryNameNodeUpgrade#testChangeNsIDFails may fail due
+    to ConcurrentModificationException. (Ted Yu via brandonli)

-    HDFS-4987. Namenode changes to track multiple storages per datanode.
-    (szetszwo)
+    HDFS-5719. FSImage#doRollback() should close prevState before return
+    (Ted Yu via brandonli)

-    HDFS-5154. Fix TestBlockManager and TestDatanodeDescriptor after HDFS-4987.
-    (Junping Du via szetszwo)
-
-    HDFS-5009. Include storage information in the LocatedBlock.  (szetszwo)
-
-    HDFS-5134. Move blockContentsStale, heartbeatedSinceFailover and
-    firstBlockReport from DatanodeDescriptor to DatanodeStorageInfo; and
-    fix a synchronization problem in DatanodeStorageInfo.  (szetszwo)
-
-    HDFS-5157. Add StorageType to FsVolume.  (Junping Du via szetszwo)
-
-    HDFS-4990. Change BlockPlacementPolicy to choose storages instead of
-    datanodes.  (szetszwo)
-
-    HDFS-5232. Protocol changes to transmit StorageUuid. (Arpit Agarwal)
-
-    HDFS-5233. Use Datanode UUID to identify Datanodes. (Arpit Agarwal)
-
-    HDFS-5222. Move block schedule information from DatanodeDescriptor to
-    DatanodeStorageInfo.  (szetszwo)
-
-    HDFS-4988. Datanode must support all the volumes as individual storages.
-    (Arpit Agarwal)
-
-    HDFS-5377. Heartbeats from Datandode should include one storage report
-    per storage directory. (Arpit Agarwal)
-
-    HDFS-5398. NameNode changes to process storage reports per storage
-    directory. (Arpit Agarwal)
-
-    HDFS-5390. Send one incremental block report per storage directory.
-    (Arpit Agarwal)
-
-    HDFS-5401. Fix NPE in Directory Scanner. (Arpit Agarwal)
-
-    HDFS-5417. Fix storage IDs in PBHelper and UpgradeUtilities.  (szetszwo)
-
-    HDFS-5214. Fix NPEs in BlockManager and DirectoryScanner. (Arpit Agarwal)
-
-    HDFS-5435. File append fails to initialize storageIDs. (Junping Du via
-    Arpit Agarwal)
-
-    HDFS-5437. Fix TestBlockReport and TestBPOfferService failures. (Arpit
-    Agarwal)
-
-    HDFS-5447. Fix TestJspHelper. (Arpit Agarwal)
-
-    HDFS-5452. Fix TestReplicationPolicy and TestBlocksScheduledCounter.
-
-    HDFS-5448. Datanode should generate its ID on first registration. (Arpit
-    Agarwal)
-
-    HDFS-5448. Fix break caused by previous checkin for HDFS-5448. (Arpit
-    Agarwal)
-
-    HDFS-5455. NN should update storageMap on first heartbeat. (Arpit Agarwal)
-
-    HDFS-5457. Fix TestDatanodeRegistration, TestFsck and TestAddBlockRetry.
-    (Contributed by szetszwo)
-
-    HDFS-5466. Update storage IDs when the pipeline is updated. (Contributed
-    by szetszwo)
-
-    HDFS-5439. Fix TestPendingReplication. (Contributed by Junping Du, Arpit
-    Agarwal)
-
-    HDFS-5470. Add back trunk's reportDiff algorithm to the branch.
-    (Contributed by szetszwo)
-
-    HDFS-5472. Fix TestDatanodeManager, TestSafeMode and
-    TestNNThroughputBenchmark (Contributed by szetszwo)
-
-    HDFS-5475. NN incorrectly tracks more than one replica per DN. (Arpit
-    Agarwal)
-
-    HDFS-5481. Fix TestDataNodeVolumeFailure in branch HDFS-2832. (Contributed
-    by Junping Du)
-
-    HDFS-5480. Update Balancer for HDFS-2832. (Contributed by szetszwo)
-
-    HDFS-5486. Fix TestNameNodeMetrics for HDFS-2832. (Arpit Agarwal)
-
-    HDFS-5491. Update editsStored for HDFS-2832. (Arpit Agarwal)
-
-    HDFS-5494. Fix findbugs warnings for HDFS-2832. (Arpit Agarwal)
-
-    HDFS-5508. Fix compilation error after merge. (Contributed by szetszwo)
-
-    HDFS-5501. Fix pendingReceivedRequests tracking in BPServiceActor. (Arpit
-    Agarwal)
-
-    HDFS-5510. Fix a findbug warning in DataStorage.java on HDFS-2832 branch.
-    (Junping Du via Arpit Agarwal)
- 
-    HDFS-5515. Fix TestDFSStartupVersions for HDFS-2832. (Arpit Agarwal)
-
-    HDFS-5527. Fix TestUnderReplicatedBlocks on branch HDFS-2832. (Arpit
-    Agarwal)
-
-    HDFS-5547. Fix build break after merge from trunk to HDFS-2832. (Arpit
-    Agarwal)
-
-    HDFS-5542. Fix TODO and clean up the code in HDFS-2832. (Contributed by
-    szetszwo)
-
-    HDFS-5559. Fix TestDatanodeConfig in HDFS-2832. (Contributed by szetszwo)
-
-    HDFS-5484. StorageType and State in DatanodeStorageInfo in NameNode is
-    not accurate. (Eric Sirianni via Arpit Agarwal)
-
-    HDFS-5648. Get rid of FsDatasetImpl#perVolumeReplicaMap. (Arpit Agarwal)
+    HDFS-5589. Namenode loops caching and uncaching when data should be
+    uncached (awang via cmccabe)

 Release 2.4.0 - UNRELEASED

@ -608,6 +497,10 @@ Release 2.4.0 - UNRELEASED

    HDFS-5514. FSNamesystem's fsLock should allow custom implementation (daryn)

+    HDFS-2832. Heterogeneous Storages support in HDFS phase 1 - treat DataNode
+    as a collection of storages (see breakdown of tasks below for features and
+    contributors).
+
  IMPROVEMENTS

    HDFS-5267. Remove volatile from LightWeightHashSet. (Junping Du via llu)
@ -618,9 +511,6 @@ Release 2.4.0 - UNRELEASED
    HDFS-5004. Add additional JMX bean for NameNode status data
    (Trevor Lorimer via cos)

-    HDFS-5068. Convert NNThroughputBenchmark to a Tool to allow generic options.
-    (shv)
-
    HDFS-4994. Audit log getContentSummary() calls. (Robert Parker via kihwal)

    HDFS-5144. Document time unit to NameNodeMetrics. (Akira Ajisaka via
@ -768,6 +658,11 @@ Release 2.4.0 - UNRELEASED
    HDFS-2933. Improve DataNode Web UI Index Page. (Vivek Ganesan via
    Arpit Agarwal)

+    HDFS-5695. Clean up TestOfflineEditsViewer and OfflineEditsViewerHelper.
+    (Haohui Mai via jing9)
+
+    HDFS-5220. Expose group resolution time as metric (jxiang via cmccabe)
+
  OPTIMIZATIONS

    HDFS-5239.  Allow FSNamesystem lock fairness to be configurable (daryn)
@ -833,6 +728,139 @@ Release 2.4.0 - UNRELEASED
    HDFS-5690. DataNode fails to start in secure mode when dfs.http.policy equals to 
    HTTP_ONLY. (Haohui Mai via jing9)

+  BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS
+
+    HDFS-4985. Add storage type to the protocol and expose it in block report
+    and block locations. (Arpit Agarwal)
+
+    HDFS-5115. Make StorageID a UUID. (Arpit Agarwal)
+
+    HDFS-5000. DataNode configuration should allow specifying storage type.
+    (Arpit Agarwal)
+
+    HDFS-4987. Namenode changes to track multiple storages per datanode.
+    (szetszwo)
+
+    HDFS-5154. Fix TestBlockManager and TestDatanodeDescriptor after HDFS-4987.
+    (Junping Du via szetszwo)
+
+    HDFS-5009. Include storage information in the LocatedBlock.  (szetszwo)
+
+    HDFS-5134. Move blockContentsStale, heartbeatedSinceFailover and
+    firstBlockReport from DatanodeDescriptor to DatanodeStorageInfo; and
+    fix a synchronization problem in DatanodeStorageInfo.  (szetszwo)
+
+    HDFS-5157. Add StorageType to FsVolume.  (Junping Du via szetszwo)
+
+    HDFS-4990. Change BlockPlacementPolicy to choose storages instead of
+    datanodes.  (szetszwo)
+
+    HDFS-5232. Protocol changes to transmit StorageUuid. (Arpit Agarwal)
+
+    HDFS-5233. Use Datanode UUID to identify Datanodes. (Arpit Agarwal)
+
+    HDFS-5222. Move block schedule information from DatanodeDescriptor to
+    DatanodeStorageInfo.  (szetszwo)
+
+    HDFS-4988. Datanode must support all the volumes as individual storages.
+    (Arpit Agarwal)
+
+    HDFS-5377. Heartbeats from Datandode should include one storage report
+    per storage directory. (Arpit Agarwal)
+
+    HDFS-5398. NameNode changes to process storage reports per storage
+    directory. (Arpit Agarwal)
+
+    HDFS-5390. Send one incremental block report per storage directory.
+    (Arpit Agarwal)
+
+    HDFS-5401. Fix NPE in Directory Scanner. (Arpit Agarwal)
+
+    HDFS-5417. Fix storage IDs in PBHelper and UpgradeUtilities.  (szetszwo)
+
+    HDFS-5214. Fix NPEs in BlockManager and DirectoryScanner. (Arpit Agarwal)
+
+    HDFS-5435. File append fails to initialize storageIDs. (Junping Du via
+    Arpit Agarwal)
+
+    HDFS-5437. Fix TestBlockReport and TestBPOfferService failures. (Arpit
+    Agarwal)
+
+    HDFS-5447. Fix TestJspHelper. (Arpit Agarwal)
+
+    HDFS-5452. Fix TestReplicationPolicy and TestBlocksScheduledCounter.
+
+    HDFS-5448. Datanode should generate its ID on first registration. (Arpit
+    Agarwal)
+
+    HDFS-5448. Fix break caused by previous checkin for HDFS-5448. (Arpit
+    Agarwal)
+
+    HDFS-5455. NN should update storageMap on first heartbeat. (Arpit Agarwal)
+
+    HDFS-5457. Fix TestDatanodeRegistration, TestFsck and TestAddBlockRetry.
+    (Contributed by szetszwo)
+
+    HDFS-5466. Update storage IDs when the pipeline is updated. (Contributed
+    by szetszwo)
+
+    HDFS-5439. Fix TestPendingReplication. (Contributed by Junping Du, Arpit
+    Agarwal)
+
+    HDFS-5470. Add back trunk's reportDiff algorithm to the branch.
+    (Contributed by szetszwo)
+
+    HDFS-5472. Fix TestDatanodeManager, TestSafeMode and
+    TestNNThroughputBenchmark (Contributed by szetszwo)
+
+    HDFS-5475. NN incorrectly tracks more than one replica per DN. (Arpit
+    Agarwal)
+
+    HDFS-5481. Fix TestDataNodeVolumeFailure in branch HDFS-2832. (Contributed
+    by Junping Du)
+
+    HDFS-5480. Update Balancer for HDFS-2832. (Contributed by szetszwo)
+
+    HDFS-5486. Fix TestNameNodeMetrics for HDFS-2832. (Arpit Agarwal)
+
+    HDFS-5491. Update editsStored for HDFS-2832. (Arpit Agarwal)
+
+    HDFS-5494. Fix findbugs warnings for HDFS-2832. (Arpit Agarwal)
+
+    HDFS-5508. Fix compilation error after merge. (Contributed by szetszwo)
+
+    HDFS-5501. Fix pendingReceivedRequests tracking in BPServiceActor. (Arpit
+    Agarwal)
+
+    HDFS-5510. Fix a findbug warning in DataStorage.java on HDFS-2832 branch.
+    (Junping Du via Arpit Agarwal)
+ 
+    HDFS-5515. Fix TestDFSStartupVersions for HDFS-2832. (Arpit Agarwal)
+
+    HDFS-5527. Fix TestUnderReplicatedBlocks on branch HDFS-2832. (Arpit
+    Agarwal)
+
+    HDFS-5547. Fix build break after merge from trunk to HDFS-2832. (Arpit
+    Agarwal)
+
+    HDFS-5542. Fix TODO and clean up the code in HDFS-2832. (Contributed by
+    szetszwo)
+
+    HDFS-5559. Fix TestDatanodeConfig in HDFS-2832. (Contributed by szetszwo)
+
+    HDFS-5484. StorageType and State in DatanodeStorageInfo in NameNode is
+    not accurate. (Eric Sirianni via Arpit Agarwal)
+
+    HDFS-5648. Get rid of FsDatasetImpl#perVolumeReplicaMap. (Arpit Agarwal)
+
+    HDFS-5406. Send incremental block reports for all storages in a
+    single call. (Arpit Agarwal)
+
+    HDFS-5454. DataNode UUID should be assigned prior to FsDataset
+    initialization. (Arpit Agarwal)
+
+    HDFS-5667. Include DatanodeStorage in StorageReport. (Arpit Agarwal)
+
 Release 2.3.0 - UNRELEASED

  INCOMPATIBLE CHANGES
@ -866,6 +894,12 @@ Release 2.3.0 - UNRELEASED
    HDFS-5662. Can't decommission a DataNode due to file's replication factor
    larger than the rest of the cluster size. (brandonli)

+    HDFS-5068. Convert NNThroughputBenchmark to a Tool to allow generic options.
+    (shv)
+
+    HDFS-5675. Add Mkdirs operation to NNThroughputBenchmark.
+    (Plamen Jeliazkov via shv)
+
  OPTIMIZATIONS

  BUG FIXES
@ -1016,6 +1050,11 @@ Release 2.3.0 - UNRELEASED
    HDFS-5661. Browsing FileSystem via web ui, should use datanode's fqdn instead of ip 
    address. (Benoy Antony via jing9)

+    HDFS-5582. hdfs getconf -excludeFile or -includeFile always failed (sathish
+    via cmccabe)
+
+    HDFS-5671. Fix socket leak in DFSInputStream#getBlockReader. (JamesLi via umamahesh) 
+
 Release 2.2.0 - 2013-10-13

  INCOMPATIBLE CHANGES
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@ -108,8 +108,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
  public static final long    DFS_DATANODE_MAX_LOCKED_MEMORY_DEFAULT = 0;
  public static final String  DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_KEY = "dfs.datanode.fsdatasetcache.max.threads.per.volume";
  public static final int     DFS_DATANODE_FSDATASETCACHE_MAX_THREADS_PER_VOLUME_DEFAULT = 4;
-  public static final String  DFS_NAMENODE_CACHING_ENABLED_KEY = "dfs.namenode.caching.enabled";
-  public static final boolean DFS_NAMENODE_CACHING_ENABLED_DEFAULT = false;
+  public static final String  DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT =
+    "dfs.namenode.path.based.cache.block.map.allocation.percent";
+  public static final float    DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT = 0.25f;

  public static final String  DFS_NAMENODE_HTTP_PORT_KEY = "dfs.http.port";
  public static final int     DFS_NAMENODE_HTTP_PORT_DEFAULT = 50070;
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
@ -1188,11 +1188,21 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
    }
    // Try to create a new remote peer.
    Peer peer = newTcpPeer(dnAddr);
-    return BlockReaderFactory.newBlockReader(
-        dfsClient.getConf(), file, block, blockToken, startOffset,
-        len, verifyChecksum, clientName, peer, chosenNode, 
-        dsFactory, peerCache, fileInputStreamCache, false,
+    try {
+      reader = BlockReaderFactory.newBlockReader(dfsClient.getConf(), file,
+          block, blockToken, startOffset, len, verifyChecksum, clientName,
+          peer, chosenNode, dsFactory, peerCache, fileInputStreamCache, false,
          curCachingStrategy);
+      return reader;
+    } catch (IOException ex) {
+      DFSClient.LOG.debug(
+          "Exception while getting block reader, closing stale " + peer, ex);
+      throw ex;
+    } finally {
+      if (reader == null) {
+        IOUtils.closeQuietly(peer);
+      }
+    }
  }


--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
@ -533,21 +533,7 @@ public class PBHelper {
  
  static public DatanodeInfoProto convertDatanodeInfo(DatanodeInfo di) {
    if (di == null) return null;
-    DatanodeInfoProto.Builder builder = DatanodeInfoProto.newBuilder();
-    if (di.getNetworkLocation() != null) {
-      builder.setLocation(di.getNetworkLocation());
-    }
-        
-    return builder.
-     setId(PBHelper.convert((DatanodeID) di)).
-     setCapacity(di.getCapacity()).
-     setDfsUsed(di.getDfsUsed()).
-     setRemaining(di.getRemaining()).
-     setBlockPoolUsed(di.getBlockPoolUsed()).
-     setLastUpdate(di.getLastUpdate()).
-     setXceiverCount(di.getXceiverCount()).
-     setAdminState(PBHelper.convert(di.getAdminState())).
-     build();     
+    return convert(di);
  }
  
  
@ -591,15 +577,20 @@ public class PBHelper {
  
  public static DatanodeInfoProto convert(DatanodeInfo info) {
    DatanodeInfoProto.Builder builder = DatanodeInfoProto.newBuilder();
-    builder.setBlockPoolUsed(info.getBlockPoolUsed());
-    builder.setAdminState(PBHelper.convert(info.getAdminState()));
-    builder.setCapacity(info.getCapacity())
-        .setDfsUsed(info.getDfsUsed())
+    if (info.getNetworkLocation() != null) {
+      builder.setLocation(info.getNetworkLocation());
+    }
+    builder
        .setId(PBHelper.convert((DatanodeID)info))
-        .setLastUpdate(info.getLastUpdate())
-        .setLocation(info.getNetworkLocation())
+        .setCapacity(info.getCapacity())
+        .setDfsUsed(info.getDfsUsed())
        .setRemaining(info.getRemaining())
+        .setBlockPoolUsed(info.getBlockPoolUsed())
+        .setCacheCapacity(info.getCacheCapacity())
+        .setCacheUsed(info.getCacheUsed())
+        .setLastUpdate(info.getLastUpdate())
        .setXceiverCount(info.getXceiverCount())
+        .setAdminState(PBHelper.convert(info.getAdminState()))
        .build();
    return builder.build();
  }
@ -1591,13 +1582,17 @@ public class PBHelper {
    StorageReportProto.Builder builder = StorageReportProto.newBuilder()
        .setBlockPoolUsed(r.getBlockPoolUsed()).setCapacity(r.getCapacity())
        .setDfsUsed(r.getDfsUsed()).setRemaining(r.getRemaining())
-        .setStorageUuid(r.getStorageID());
+        .setStorageUuid(r.getStorage().getStorageID())
+        .setStorage(convert(r.getStorage()));
    return builder.build();
  }

  public static StorageReport convert(StorageReportProto p) {
-    return new StorageReport(p.getStorageUuid(), p.getFailed(),
-        p.getCapacity(), p.getDfsUsed(), p.getRemaining(),
+    return new StorageReport(
+        p.hasStorage() ?
+            convert(p.getStorage()) :
+            new DatanodeStorage(p.getStorageUuid()),
+        p.getFailed(), p.getCapacity(), p.getDfsUsed(), p.getRemaining(),
        p.getBlockPoolUsed());
  }

--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java
@ -21,12 +21,14 @@ import static org.apache.hadoop.util.ExitUtil.terminate;

 import java.io.Closeable;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Date;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Random;
+import java.util.TreeMap;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.ReentrantLock;
@ -76,7 +78,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
  /**
   * Pseudorandom number source
   */
-  private final Random random = new Random();
+  private static final Random random = new Random();

  /**
   * The interval at which we scan the namesystem for caching changes.
@ -87,17 +89,17 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
   * The CacheReplicationMonitor (CRM) lock. Used to synchronize starting and
   * waiting for rescan operations.
   */
-  private final ReentrantLock lock = new ReentrantLock();
+  private final ReentrantLock lock;

  /**
   * Notifies the scan thread that an immediate rescan is needed.
   */
-  private final Condition doRescan = lock.newCondition();
+  private final Condition doRescan;

  /**
   * Notifies waiting threads that a rescan has finished.
   */
-  private final Condition scanFinished = lock.newCondition();
+  private final Condition scanFinished;

  /**
   * Whether there are pending CacheManager operations that necessitate a
@ -121,11 +123,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
   */
  private boolean shutdown = false;

-  /**
-   * The monotonic time at which the current scan started.
-   */
-  private long startTimeMs;
-
  /**
   * Mark status of the current scan.
   */
@ -142,24 +139,27 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
  private long scannedBlocks;

  public CacheReplicationMonitor(FSNamesystem namesystem,
-      CacheManager cacheManager, long intervalMs) {
+      CacheManager cacheManager, long intervalMs, ReentrantLock lock) {
    this.namesystem = namesystem;
    this.blockManager = namesystem.getBlockManager();
    this.cacheManager = cacheManager;
    this.cachedBlocks = cacheManager.getCachedBlocks();
    this.intervalMs = intervalMs;
+    this.lock = lock;
+    this.doRescan = this.lock.newCondition();
+    this.scanFinished = this.lock.newCondition();
  }

  @Override
  public void run() {
-    startTimeMs = 0;
+    long startTimeMs = 0;
+    Thread.currentThread().setName("CacheReplicationMonitor(" +
+        System.identityHashCode(this) + ")");
    LOG.info("Starting CacheReplicationMonitor with interval " +
             intervalMs + " milliseconds");
    try {
      long curTimeMs = Time.monotonicNow();
      while (true) {
-        // Not all of the variables accessed here need the CRM lock, but take
-        // it anyway for simplicity
        lock.lock();
        try {
          while (true) {
@ -180,12 +180,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
            doRescan.await(delta, TimeUnit.MILLISECONDS);
            curTimeMs = Time.monotonicNow();
          }
-        } finally {
-          lock.unlock();
-        }
-        // Mark scan as started, clear needsRescan
-        lock.lock();
-        try {
          isScanning = true;
          needsRescan = false;
        } finally {
@ -195,7 +189,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
        mark = !mark;
        rescan();
        curTimeMs = Time.monotonicNow();
-        // Retake the CRM lock to update synchronization-related variables
+        // Update synchronization-related variables.
        lock.lock();
        try {
          isScanning = false;
@ -208,32 +202,15 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
            scannedBlocks + " block(s) in " + (curTimeMs - startTimeMs) + " " +
            "millisecond(s).");
      }
+    } catch (InterruptedException e) {
+      LOG.info("Shutting down CacheReplicationMonitor.");
+      return;
    } catch (Throwable t) {
      LOG.fatal("Thread exiting", t);
      terminate(1, t);
    }
  }

-  /**
-   * Similar to {@link CacheReplicationMonitor#waitForRescan()}, except it only
-   * waits if there are pending operations that necessitate a rescan as
-   * indicated by {@link #setNeedsRescan()}.
-   * <p>
-   * Note that this call may release the FSN lock, so operations before and
-   * after are not necessarily atomic.
-   */
-  public void waitForRescanIfNeeded() {
-    lock.lock();
-    try {
-      if (!needsRescan) {
-        return;
-      }
-    } finally {
-      lock.unlock();
-    }
-    waitForRescan();
-  }
-
  /**
   * Waits for a rescan to complete. This doesn't guarantee consistency with
   * pending operations, only relative recency, since it will not force a new
@ -242,33 +219,21 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
   * Note that this call will release the FSN lock, so operations before and
   * after are not atomic.
   */
-  public void waitForRescan() {
-    // Drop the FSN lock temporarily and retake it after we finish waiting
-    // Need to handle both the read lock and the write lock
-    boolean retakeWriteLock = false;
-    if (namesystem.hasWriteLock()) {
-      namesystem.writeUnlock();
-      retakeWriteLock = true;
-    } else if (namesystem.hasReadLock()) {
-      namesystem.readUnlock();
-    } else {
-      // Expected to have at least one of the locks
-      Preconditions.checkState(false,
-          "Need to be holding either the read or write lock");
+  public void waitForRescanIfNeeded() {
+    Preconditions.checkArgument(!namesystem.hasWriteLock(),
+        "Must not hold the FSN write lock when waiting for a rescan.");
+    Preconditions.checkArgument(lock.isHeldByCurrentThread(),
+        "Must hold the CRM lock when waiting for a rescan.");
+    if (!needsRescan) {
+      return;
    }
-    // try/finally for retaking FSN lock
-    try {
-      lock.lock();
-      // try/finally for releasing CRM lock
-      try {
    // If no scan is already ongoing, mark the CRM as dirty and kick
    if (!isScanning) {
-          needsRescan = true;
      doRescan.signal();
    }
    // Wait until the scan finishes and the count advances
    final long startCount = scanCount;
-        while (startCount >= scanCount) {
+    while ((!shutdown) && (startCount >= scanCount)) {
      try {
        scanFinished.await();
      } catch (InterruptedException e) {
@ -277,16 +242,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
        break;
      }
    }
-      } finally {
-        lock.unlock();
-      }
-    } finally {
-      if (retakeWriteLock) {
-        namesystem.writeLock();
-      } else {
-        namesystem.readLock();
-      }
-    }
  }

  /**
@ -294,42 +249,43 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
   * changes that require a rescan.
   */
  public void setNeedsRescan() {
-    lock.lock();
-    try {
+    Preconditions.checkArgument(lock.isHeldByCurrentThread(),
+        "Must hold the CRM lock when setting the needsRescan bit.");
    this.needsRescan = true;
-    } finally {
-      lock.unlock();
-    }
  }

  /**
-   * Shut down and join the monitor thread.
+   * Shut down the monitor thread.
   */
  @Override
  public void close() throws IOException {
+    Preconditions.checkArgument(namesystem.hasWriteLock());
    lock.lock();
    try {
      if (shutdown) return;
+      // Since we hold both the FSN write lock and the CRM lock here,
+      // we know that the CRM thread cannot be currently modifying
+      // the cache manager state while we're closing it.
+      // Since the CRM thread checks the value of 'shutdown' after waiting
+      // for a lock, we know that the thread will not modify the cache
+      // manager state after this point.
      shutdown = true;
      doRescan.signalAll();
      scanFinished.signalAll();
    } finally {
      lock.unlock();
    }
-    try {
-      if (this.isAlive()) {
-        this.join(60000);
-      }
-    } catch (InterruptedException e) {
-      Thread.currentThread().interrupt();
-    }
  }

-  private void rescan() {
+  private void rescan() throws InterruptedException {
    scannedDirectives = 0;
    scannedBlocks = 0;
    namesystem.writeLock();
    try {
+      if (shutdown) {
+        throw new InterruptedException("CacheReplicationMonitor was " +
+            "shut down.");
+      }
      resetStatistics();
      rescanCacheDirectives();
      rescanCachedBlockMap();
@ -356,8 +312,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
    FSDirectory fsDir = namesystem.getFSDirectory();
    final long now = new Date().getTime();
    for (CacheDirective directive : cacheManager.getCacheDirectives()) {
-      // Reset the directive's statistics
-      directive.resetStatistics();
      // Skip processing this entry if it has expired
      if (LOG.isTraceEnabled()) {
        LOG.trace("Directive expiry is at " + directive.getExpiryTime());
@ -460,14 +414,21 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
            directive.getReplication()) * blockInfo.getNumBytes();
        cachedTotal += cachedByBlock;

-        if (mark != ocblock.getMark()) {
-          // Mark hasn't been set in this scan, so update replication and mark.
+        if ((mark != ocblock.getMark()) ||
+            (ocblock.getReplication() < directive.getReplication())) {
+          //
+          // Overwrite the block's replication and mark in two cases:
+          //
+          // 1. If the mark on the CachedBlock is different from the mark for
+          // this scan, that means the block hasn't been updated during this
+          // scan, and we should overwrite whatever is there, since it is no
+          // longer valid.
+          //
+          // 2. If the replication in the CachedBlock is less than what the
+          // directive asks for, we want to increase the block's replication
+          // field to what the directive asks for.
+          //
          ocblock.setReplicationAndMark(directive.getReplication(), mark);
-        } else {
-          // Mark already set in this scan.  Set replication to highest value in
-          // any CacheDirective that covers this file.
-          ocblock.setReplicationAndMark((short)Math.max(
-              directive.getReplication(), ocblock.getReplication()), mark);
        }
      }
    }
@ -483,6 +444,39 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
    }
  }

+  private String findReasonForNotCaching(CachedBlock cblock, 
+          BlockInfo blockInfo) {
+    if (blockInfo == null) {
+      // Somehow, a cache report with the block arrived, but the block
+      // reports from the DataNode haven't (yet?) described such a block.
+      // Alternately, the NameNode might have invalidated the block, but the
+      // DataNode hasn't caught up.  In any case, we want to tell the DN
+      // to uncache this.
+      return "not tracked by the BlockManager";
+    } else if (!blockInfo.isComplete()) {
+      // When a cached block changes state from complete to some other state
+      // on the DataNode (perhaps because of append), it will begin the
+      // uncaching process.  However, the uncaching process is not
+      // instantaneous, especially if clients have pinned the block.  So
+      // there may be a period of time when incomplete blocks remain cached
+      // on the DataNodes.
+      return "not complete";
+    } else if (cblock.getReplication() == 0) {
+      // Since 0 is not a valid value for a cache directive's replication
+      // field, seeing a replication of 0 on a CacheBlock means that it
+      // has never been reached by any sweep.
+      return "not needed by any directives";
+    } else if (cblock.getMark() != mark) { 
+      // Although the block was needed in the past, we didn't reach it during
+      // the current sweep.  Therefore, it doesn't need to be cached any more.
+      // Need to set the replication to 0 so it doesn't flip back to cached
+      // when the mark flips on the next scan
+      cblock.setReplicationAndMark((short)0, mark);
+      return "no longer needed by any directives";
+    }
+    return null;
+  }
+
  /**
   * Scan through the cached block map.
   * Any blocks which are under-replicated should be assigned new Datanodes.
@ -508,11 +502,17 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
          iter.remove();
        }
      }
-      // If the block's mark doesn't match with the mark of this scan, that
-      // means that this block couldn't be reached during this scan.  That means
-      // it doesn't need to be cached any more.
-      int neededCached = (cblock.getMark() != mark) ?
-          0 : cblock.getReplication();
+      BlockInfo blockInfo = blockManager.
+            getStoredBlock(new Block(cblock.getBlockId()));
+      String reason = findReasonForNotCaching(cblock, blockInfo);
+      int neededCached = 0;
+      if (reason != null) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("not caching " + cblock + " because it is " + reason);
+        }
+      } else {
+        neededCached = cblock.getReplication();
+      }
      int numCached = cached.size();
      if (numCached >= neededCached) {
        // If we have enough replicas, drop all pending cached.
@ -566,9 +566,6 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
  private void addNewPendingUncached(int neededUncached,
      CachedBlock cachedBlock, List<DatanodeDescriptor> cached,
      List<DatanodeDescriptor> pendingUncached) {
-    if (!cacheManager.isActive()) {
-      return;
-    }
    // Figure out which replicas can be uncached.
    LinkedList<DatanodeDescriptor> possibilities =
        new LinkedList<DatanodeDescriptor>();
@ -601,19 +598,18 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
   * @param pendingCached    A list of DataNodes that will soon cache the
   *                         block.
   */
-  private void addNewPendingCached(int neededCached,
+  private void addNewPendingCached(final int neededCached,
      CachedBlock cachedBlock, List<DatanodeDescriptor> cached,
      List<DatanodeDescriptor> pendingCached) {
-    if (!cacheManager.isActive()) {
-      return;
-    }
    // To figure out which replicas can be cached, we consult the
    // blocksMap.  We don't want to try to cache a corrupt replica, though.
    BlockInfo blockInfo = blockManager.
          getStoredBlock(new Block(cachedBlock.getBlockId()));
    if (blockInfo == null) {
-      LOG.debug("Not caching block " + cachedBlock + " because it " +
-          "was deleted from all DataNodes.");
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Not caching block " + cachedBlock + " because there " +
+            "is no record of it on the NameNode.");
+      }
      return;
    }
    if (!blockInfo.isComplete()) {
@ -623,35 +619,156 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
      }
      return;
    }
-    List<DatanodeDescriptor> possibilities = new LinkedList<DatanodeDescriptor>();
+    // Filter the list of replicas to only the valid targets
+    List<DatanodeDescriptor> possibilities =
+        new LinkedList<DatanodeDescriptor>();
    int numReplicas = blockInfo.getCapacity();
    Collection<DatanodeDescriptor> corrupt =
        blockManager.getCorruptReplicas(blockInfo);
+    int outOfCapacity = 0;
    for (int i = 0; i < numReplicas; i++) {
      DatanodeDescriptor datanode = blockInfo.getDatanode(i);
-      if ((datanode != null) && 
-          ((!pendingCached.contains(datanode)) &&
-          ((corrupt == null) || (!corrupt.contains(datanode))))) {
+      if (datanode == null) {
+        continue;
+      }
+      if (datanode.isDecommissioned() || datanode.isDecommissionInProgress()) {
+        continue;
+      }
+      if (corrupt != null && corrupt.contains(datanode)) {
+        continue;
+      }
+      if (pendingCached.contains(datanode) || cached.contains(datanode)) {
+        continue;
+      }
+      long pendingCapacity = datanode.getCacheRemaining();
+      // Subtract pending cached blocks from effective capacity
+      Iterator<CachedBlock> it = datanode.getPendingCached().iterator();
+      while (it.hasNext()) {
+        CachedBlock cBlock = it.next();
+        BlockInfo info =
+            blockManager.getStoredBlock(new Block(cBlock.getBlockId()));
+        if (info != null) {
+          pendingCapacity -= info.getNumBytes();
+        }
+      }
+      it = datanode.getPendingUncached().iterator();
+      // Add pending uncached blocks from effective capacity
+      while (it.hasNext()) {
+        CachedBlock cBlock = it.next();
+        BlockInfo info =
+            blockManager.getStoredBlock(new Block(cBlock.getBlockId()));
+        if (info != null) {
+          pendingCapacity += info.getNumBytes();
+        }
+      }
+      if (pendingCapacity < blockInfo.getNumBytes()) {
+        if (LOG.isTraceEnabled()) {
+          LOG.trace("Datanode " + datanode + " is not a valid possibility for"
+              + " block " + blockInfo.getBlockId() + " of size "
+              + blockInfo.getNumBytes() + " bytes, only has "
+              + datanode.getCacheRemaining() + " bytes of cache remaining.");
+        }
+        outOfCapacity++;
+        continue;
+      }
      possibilities.add(datanode);
    }
-    }
-    while (neededCached > 0) {
-      if (possibilities.isEmpty()) {
-        LOG.warn("We need " + neededCached + " more replica(s) than " +
-            "actually exist to provide a cache replication of " +
-            cachedBlock.getReplication() + " for " + cachedBlock);
-        return;
-      }
-      DatanodeDescriptor datanode =
-          possibilities.remove(random.nextInt(possibilities.size()));
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("AddNewPendingCached: datanode " + datanode + 
-            " will now cache block " + cachedBlock);
-      }
+    List<DatanodeDescriptor> chosen = chooseDatanodesForCaching(possibilities,
+        neededCached, blockManager.getDatanodeManager().getStaleInterval());
+    for (DatanodeDescriptor datanode : chosen) {
      pendingCached.add(datanode);
      boolean added = datanode.getPendingCached().add(cachedBlock);
      assert added;
-      neededCached--;
+    }
+    // We were unable to satisfy the requested replication factor
+    if (neededCached > chosen.size()) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(
+            "Only have " +
+            (cachedBlock.getReplication() - neededCached + chosen.size()) +
+            " of " + cachedBlock.getReplication() + " cached replicas for " +
+            cachedBlock + " (" + outOfCapacity + " nodes have insufficient " +
+            "capacity).");
      }
    }
  }
+
+  /**
+   * Chooses datanode locations for caching from a list of valid possibilities.
+   * Non-stale nodes are chosen before stale nodes.
+   * 
+   * @param possibilities List of candidate datanodes
+   * @param neededCached Number of replicas needed
+   * @param staleInterval Age of a stale datanode
+   * @return A list of chosen datanodes
+   */
+  private static List<DatanodeDescriptor> chooseDatanodesForCaching(
+      final List<DatanodeDescriptor> possibilities, final int neededCached,
+      final long staleInterval) {
+    // Make a copy that we can modify
+    List<DatanodeDescriptor> targets =
+        new ArrayList<DatanodeDescriptor>(possibilities);
+    // Selected targets
+    List<DatanodeDescriptor> chosen = new LinkedList<DatanodeDescriptor>();
+
+    // Filter out stale datanodes
+    List<DatanodeDescriptor> stale = new LinkedList<DatanodeDescriptor>();
+    Iterator<DatanodeDescriptor> it = targets.iterator();
+    while (it.hasNext()) {
+      DatanodeDescriptor d = it.next();
+      if (d.isStale(staleInterval)) {
+        it.remove();
+        stale.add(d);
+      }
+    }
+    // Select targets
+    while (chosen.size() < neededCached) {
+      // Try to use stale nodes if we're out of non-stale nodes, else we're done
+      if (targets.isEmpty()) {
+        if (!stale.isEmpty()) {
+          targets = stale;
+        } else {
+          break;
+        }
+      }
+      // Select a random target
+      DatanodeDescriptor target =
+          chooseRandomDatanodeByRemainingCapacity(targets);
+      chosen.add(target);
+      targets.remove(target);
+    }
+    return chosen;
+  }
+
+  /**
+   * Choose a single datanode from the provided list of possible
+   * targets, weighted by the percentage of free space remaining on the node.
+   * 
+   * @return The chosen datanode
+   */
+  private static DatanodeDescriptor chooseRandomDatanodeByRemainingCapacity(
+      final List<DatanodeDescriptor> targets) {
+    // Use a weighted probability to choose the target datanode
+    float total = 0;
+    for (DatanodeDescriptor d : targets) {
+      total += d.getCacheRemainingPercent();
+    }
+    // Give each datanode a portion of keyspace equal to its relative weight
+    // [0, w1) selects d1, [w1, w2) selects d2, etc.
+    TreeMap<Integer, DatanodeDescriptor> lottery =
+        new TreeMap<Integer, DatanodeDescriptor>();
+    int offset = 0;
+    for (DatanodeDescriptor d : targets) {
+      // Since we're using floats, be paranoid about negative values
+      int weight =
+          Math.max(1, (int)((d.getCacheRemainingPercent() / total) * 1000000));
+      offset += weight;
+      lottery.put(offset, d);
+    }
+    // Choose a number from [0, offset), which is the total amount of weight,
+    // to select the winner
+    DatanodeDescriptor winner =
+        lottery.higherEntry(random.nextInt(offset)).getValue();
+    return winner;
+  }
+}
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
@ -355,11 +355,11 @@ public class DatanodeDescriptor extends DatanodeInfo {
    setLastUpdate(Time.now());    
    this.volumeFailures = volFailures;
    for (StorageReport report : reports) {
-      DatanodeStorageInfo storage = storageMap.get(report.getStorageID());
+      DatanodeStorageInfo storage = storageMap.get(report.getStorage().getStorageID());
      if (storage == null) {
        // This is seen during cluster initialization when the heartbeat
        // is received before the initial block reports from each storage.
-        storage = updateStorage(new DatanodeStorage(report.getStorageID()));
+        storage = updateStorage(report.getStorage());
      }
      storage.receivedHeartbeat(report);
      totalCapacity += report.getCapacity();
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
@ -1443,6 +1443,13 @@ public class DatanodeManager {
    return getClass().getSimpleName() + ": " + host2DatanodeMap;
  }

+  public void clearPendingCachingCommands() {
+    for (DatanodeDescriptor dn : datanodeMap.values()) {
+      dn.getPendingCached().clear();
+      dn.getPendingUncached().clear();
+    }
+  }
+
  public void setShouldSendCachingCommands(boolean shouldSendCachingCommands) {
    this.shouldSendCachingCommands = shouldSendCachingCommands;
  }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java
@ -121,7 +121,7 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
      reports = new StorageReport[volumes.volumes.size()];
      int i = 0;
      for (FsVolumeImpl volume : volumes.volumes) {
-        reports[i++] = new StorageReport(volume.getStorageID(),
+        reports[i++] = new StorageReport(volume.toDatanodeStorage(),
                                         false,
                                         volume.getCapacity(),
                                         volume.getDfsUsed(),
@ -237,12 +237,9 @@ class FsDatasetImpl implements FsDatasetSpi<FsVolumeImpl> {
    final List<FsVolumeImpl> volArray = new ArrayList<FsVolumeImpl>(
        storage.getNumStorageDirs());
    for (int idx = 0; idx < storage.getNumStorageDirs(); idx++) {
-      // TODO: getStorageTypeFromLocations() is only a temporary workaround and 
-      // should be replaced with getting storage type from DataStorage (missing 
-      // storage type now) directly.
      Storage.StorageDirectory sd = storage.getStorageDir(idx);
      final File dir = sd.getCurrentDir();
-      final StorageType storageType = getStorageTypeFromLocations(dataLocations, dir);
+      final StorageType storageType = getStorageTypeFromLocations(dataLocations, sd.getRoot());
      volArray.add(new FsVolumeImpl(this, sd.getStorageUuid(), dir, conf,
          storageType));
      LOG.info("Added volume - " + dir + ", StorageType: " + storageType);
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
@ -19,10 +19,10 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl;

 import java.io.File;
 import java.io.IOException;
-import java.util.HashMap;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.Executor;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.ThreadFactory;
@ -54,7 +54,7 @@ class FsVolumeImpl implements FsVolumeSpi {
  private final String storageID;
  private final StorageType storageType;
  private final Map<String, BlockPoolSlice> bpSlices
-      = new HashMap<String, BlockPoolSlice>();
+      = new ConcurrentHashMap<String, BlockPoolSlice>();
  private final File currentDir;    // <StorageDirectory>/current
  private final DF usage;           
  private final long reserved;
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java
@ -17,8 +17,8 @@
 */
 package org.apache.hadoop.hdfs.server.namenode;

-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_DEFAULT;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES;
@ -40,6 +40,7 @@ import java.util.List;
 import java.util.Map.Entry;
 import java.util.SortedMap;
 import java.util.TreeMap;
+import java.util.concurrent.locks.ReentrantLock;

 import org.apache.commons.io.IOUtils;
 import org.apache.commons.logging.Log;
@ -62,7 +63,6 @@ import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
 import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
-import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
@ -85,7 +85,7 @@ import com.google.common.annotations.VisibleForTesting;
 /**
 * The Cache Manager handles caching on DataNodes.
 *
- * This class is instantiated by the FSNamesystem when caching is enabled.
+ * This class is instantiated by the FSNamesystem.
 * It maintains the mapping of cached blocks to datanodes via processing
 * datanode cache reports. Based on these reports and addition and removal of
 * caching directives, we will schedule caching and uncaching work.
@ -94,6 +94,8 @@ import com.google.common.annotations.VisibleForTesting;
 public final class CacheManager {
  public static final Log LOG = LogFactory.getLog(CacheManager.class);

+  private static final float MIN_CACHED_BLOCKS_PERCENT = 0.001f;
+
  // TODO: add pending / underCached / schedule cached blocks stats.

  /**
@ -148,34 +150,16 @@ public final class CacheManager {
   */
  private final long scanIntervalMs;

-  /**
-   * Whether caching is enabled.
-   *
-   * If caching is disabled, we will not process cache reports or store
-   * information about what is cached where.  We also do not start the
-   * CacheReplicationMonitor thread.  This will save resources, but provide
-   * less functionality.
-   *     
-   * Even when caching is disabled, we still store path-based cache
-   * information.  This information is stored in the edit log and fsimage.  We
-   * don't want to lose it just because a configuration setting was turned off.
-   * However, we will not act on this information if caching is disabled.
-   */
-  private final boolean enabled;
-
-  /**
-   * Whether the CacheManager is active.
-   * 
-   * When the CacheManager is active, it tells the DataNodes what to cache
-   * and uncache.  The CacheManager cannot become active if enabled = false.
-   */
-  private boolean active = false;
-
  /**
   * All cached blocks.
   */
  private final GSet<CachedBlock, CachedBlock> cachedBlocks;

+  /**
+   * Lock which protects the CacheReplicationMonitor.
+   */
+  private final ReentrantLock crmLock = new ReentrantLock();
+
  /**
   * The CacheReplicationMonitor.
   */
@ -195,56 +179,53 @@ public final class CacheManager {
    scanIntervalMs = conf.getLong(
        DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS,
        DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT);
-    this.enabled = conf.getBoolean(DFS_NAMENODE_CACHING_ENABLED_KEY,
-        DFS_NAMENODE_CACHING_ENABLED_DEFAULT);
-    this.cachedBlocks = !enabled ? null :
-        new LightWeightGSet<CachedBlock, CachedBlock>(
-            LightWeightGSet.computeCapacity(0.25, "cachedBlocks"));
+    float cachedBlocksPercent = conf.getFloat(
+          DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT,
+          DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT_DEFAULT);
+    if (cachedBlocksPercent < MIN_CACHED_BLOCKS_PERCENT) {
+      LOG.info("Using minimum value " + MIN_CACHED_BLOCKS_PERCENT +
+        " for " + DFS_NAMENODE_PATH_BASED_CACHE_BLOCK_MAP_ALLOCATION_PERCENT);
+      cachedBlocksPercent = MIN_CACHED_BLOCKS_PERCENT;
+    }
+    this.cachedBlocks = new LightWeightGSet<CachedBlock, CachedBlock>(
+          LightWeightGSet.computeCapacity(cachedBlocksPercent,
+              "cachedBlocks"));
+
  }

-  /**
-   * Activate the cache manager.
-   * 
-   * When the cache manager is active, tell the datanodes where to cache files.
-   */
-  public void activate() {
+  public void startMonitorThread() {
+    crmLock.lock();
+    try {
+      if (this.monitor == null) {
+        this.monitor = new CacheReplicationMonitor(namesystem, this,
+            scanIntervalMs, crmLock);
+        this.monitor.start();
+      }
+    } finally {
+      crmLock.unlock();
+    }
+  }
+
+  public void stopMonitorThread() {
+    crmLock.lock();
+    try {
+      if (this.monitor != null) {
+        CacheReplicationMonitor prevMonitor = this.monitor;
+        this.monitor = null;
+        IOUtils.closeQuietly(prevMonitor);
+      }
+    } finally {
+      crmLock.unlock();
+    }
+  }
+
+  public void clearDirectiveStats() {
    assert namesystem.hasWriteLock();
-    if (enabled && (!active)) {
-      LOG.info("Activating CacheManager.  " +
-          "Starting replication monitor thread...");
-      active = true;
-      monitor = new CacheReplicationMonitor(namesystem, this,
-         scanIntervalMs);
-      monitor.start();
+    for (CacheDirective directive : directivesById.values()) {
+      directive.resetStatistics();
    }
  }

-  /**
-   * Deactivate the cache manager.
-   * 
-   * When the cache manager is inactive, it does not tell the datanodes where to
-   * cache files.
-   */
-  public void deactivate() {
-    assert namesystem.hasWriteLock();
-    if (active) {
-      LOG.info("Deactivating CacheManager.  " +
-          "stopping CacheReplicationMonitor thread...");
-      active = false;
-      IOUtils.closeQuietly(monitor);
-      monitor = null;
-      LOG.info("CacheReplicationMonitor thread stopped and deactivated.");
-    }
-  }
-
-  /**
-   * Return true only if the cache manager is active.
-   * Must be called under the FSN read or write lock.
-   */
-  public boolean isActive() {
-    return active;
-  }
-
  /**
   * @return Unmodifiable view of the collection of CachePools.
   */
@ -481,9 +462,7 @@ public final class CacheManager {
    directive.addBytesNeeded(stats.getBytesNeeded());
    directive.addFilesNeeded(directive.getFilesNeeded());

-    if (monitor != null) {
-      monitor.setNeedsRescan();
-    }
+    setNeedsRescan();
  }

  /**
@ -515,10 +494,6 @@ public final class CacheManager {
      long expiryTime = validateExpiryTime(info, pool.getMaxRelativeExpiryMs());
      // Do quota validation if required
      if (!flags.contains(CacheFlag.FORCE)) {
-        // Can't kick and wait if caching is disabled
-        if (monitor != null) {
-          monitor.waitForRescan();
-        }
        checkLimit(pool, path, replication);
      }
      // All validation passed
@ -623,9 +598,7 @@ public final class CacheManager {
      validateExpiryTime(infoWithDefaults, destPool.getMaxRelativeExpiryMs());

      // Indicate changes to the CRM
-      if (monitor != null) {
-        monitor.setNeedsRescan();
-      }
+      setNeedsRescan();

      // Validation passed
      removeInternal(prevEntry);
@ -660,9 +633,7 @@ public final class CacheManager {
    pool.getDirectiveList().remove(directive);
    assert directive.getPool() == null;

-    if (monitor != null) {
-      monitor.setNeedsRescan();
-    }
+    setNeedsRescan();
  }

  public void removeDirective(long id, FSPermissionChecker pc)
@ -695,9 +666,6 @@ public final class CacheManager {
    if (filter.getReplication() != null) {
      throw new IOException("Filtering by replication is unsupported.");
    }
-    if (monitor != null) {
-      monitor.waitForRescanIfNeeded();
-    }
    ArrayList<CacheDirectiveEntry> replies =
        new ArrayList<CacheDirectiveEntry>(NUM_PRE_ALLOCATED_ENTRIES);
    int numReplies = 0;
@ -806,9 +774,7 @@ public final class CacheManager {
        bld.append(prefix).append("set limit to " + info.getLimit());
        prefix = "; ";
        // New limit changes stats, need to set needs refresh
-        if (monitor != null) {
-          monitor.setNeedsRescan();
-        }
+        setNeedsRescan();
      }
      if (info.getMaxRelativeExpiryMs() != null) {
        final Long maxRelativeExpiry = info.getMaxRelativeExpiryMs();
@ -854,9 +820,7 @@ public final class CacheManager {
        directivesById.remove(directive.getId());
        iter.remove();
      }
-      if (monitor != null) {
-        monitor.setNeedsRescan();
-      }
+      setNeedsRescan();
    } catch (IOException e) {
      LOG.info("removeCachePool of " + poolName + " failed: ", e);
      throw e;
@ -867,9 +831,6 @@ public final class CacheManager {
  public BatchedListEntries<CachePoolEntry>
      listCachePools(FSPermissionChecker pc, String prevKey) {
    assert namesystem.hasReadLock();
-    if (monitor != null) {
-      monitor.waitForRescanIfNeeded();
-    }
    final int NUM_PRE_ALLOCATED_ENTRIES = 16;
    ArrayList<CachePoolEntry> results = 
        new ArrayList<CachePoolEntry>(NUM_PRE_ALLOCATED_ENTRIES);
@ -885,9 +846,6 @@ public final class CacheManager {
  }

  public void setCachedLocations(LocatedBlock block) {
-    if (!enabled) {
-      return;
-    }
    CachedBlock cachedBlock =
        new CachedBlock(block.getBlock().getBlockId(),
            (short)0, false);
@ -903,12 +861,6 @@ public final class CacheManager {

  public final void processCacheReport(final DatanodeID datanodeID,
      final List<Long> blockIds) throws IOException {
-    if (!enabled) {
-      LOG.info("Ignoring cache report from " + datanodeID +
-          " because " + DFS_NAMENODE_CACHING_ENABLED_KEY + " = false. " +
-          "number of blocks: " + blockIds.size());
-      return;
-    }
    namesystem.writeLock();
    final long startTime = Time.monotonicNow();
    final long endTime;
@ -940,39 +892,28 @@ public final class CacheManager {
      final List<Long> blockIds) {
    CachedBlocksList cached = datanode.getCached();
    cached.clear();
+    CachedBlocksList cachedList = datanode.getCached();
+    CachedBlocksList pendingCachedList = datanode.getPendingCached();
    for (Iterator<Long> iter = blockIds.iterator(); iter.hasNext(); ) {
-      Block block = new Block(iter.next());
-      BlockInfo blockInfo = blockManager.getStoredBlock(block);
-      if (!blockInfo.isComplete()) {
-        LOG.warn("Ignoring block id " + block.getBlockId() + ", because " +
-            "it is in not complete yet.  It is in state " + 
-            blockInfo.getBlockUCState());
-        continue;
-      }
-      Collection<DatanodeDescriptor> corruptReplicas =
-          blockManager.getCorruptReplicas(blockInfo);
-      if ((corruptReplicas != null) && corruptReplicas.contains(datanode)) {
-        // The NameNode will eventually remove or update the corrupt block.
-        // Until then, we pretend that it isn't cached.
-        LOG.warn("Ignoring cached replica on " + datanode + " of " + block +
-            " because it is corrupt.");
-        continue;
-      }
+      long blockId = iter.next();
      CachedBlock cachedBlock =
-          new CachedBlock(block.getBlockId(), (short)0, false);
+          new CachedBlock(blockId, (short)0, false);
      CachedBlock prevCachedBlock = cachedBlocks.get(cachedBlock);
-      // Use the existing CachedBlock if it's present; otherwise,
-      // insert a new one.
+      // Add the block ID from the cache report to the cachedBlocks map
+      // if it's not already there.
      if (prevCachedBlock != null) {
        cachedBlock = prevCachedBlock;
      } else {
        cachedBlocks.put(cachedBlock);
      }
-      if (!cachedBlock.isPresent(datanode.getCached())) {
-        datanode.getCached().add(cachedBlock);
+      // Add the block to the datanode's implicit cached block list
+      // if it's not already there.  Similarly, remove it from the pending
+      // cached block list if it exists there.
+      if (!cachedBlock.isPresent(cachedList)) {
+        cachedList.add(cachedBlock);
      }
-      if (cachedBlock.isPresent(datanode.getPendingCached())) {
-        datanode.getPendingCached().remove(cachedBlock);
+      if (cachedBlock.isPresent(pendingCachedList)) {
+        pendingCachedList.remove(cachedBlock);
      }
    }
  }
@ -1097,4 +1038,36 @@ public final class CacheManager {
    }
    prog.endStep(Phase.LOADING_FSIMAGE, step);
  }
+
+  public void waitForRescanIfNeeded() {
+    crmLock.lock();
+    try {
+      if (monitor != null) {
+        monitor.waitForRescanIfNeeded();
+      }
+    } finally {
+      crmLock.unlock();
+    }
+  }
+
+  private void setNeedsRescan() {
+    crmLock.lock();
+    try {
+      if (monitor != null) {
+        monitor.setNeedsRescan();
+      }
+    } finally {
+      crmLock.unlock();
+    }
+  }
+
+  @VisibleForTesting
+  public Thread getCacheReplicationMonitor() {
+    crmLock.lock();
+    try {
+      return monitor;
+    } finally {
+      crmLock.unlock();
+    }
+  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@ -405,6 +405,7 @@ public class FSImage implements Closeable {
    // Directories that don't have previous state do not rollback
    boolean canRollback = false;
    FSImage prevState = new FSImage(conf);
+    try {
      prevState.getStorage().layoutVersion = HdfsConstants.LAYOUT_VERSION;
      for (Iterator<StorageDirectory> it = storage.dirIterator(); it.hasNext();) {
        StorageDirectory sd = it.next();
@ -459,6 +460,9 @@ public class FSImage implements Closeable {
        LOG.info("Rollback of " + sd.getRoot()+ " is complete.");
      }
      isUpgradeFinalized = true;
+    } finally {
+      prevState.close();
+    }
  }

  private void doFinalize(StorageDirectory sd) throws IOException {
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@ -931,7 +931,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    writeLock();
    try {
      if (blockManager != null) blockManager.close();
-      cacheManager.deactivate();
    } finally {
      writeUnlock();
    }
@ -1001,7 +1000,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
          editLogRollerThreshold, editLogRollerInterval));
      nnEditLogRoller.start();

-      cacheManager.activate();
+      cacheManager.startMonitorThread();
      blockManager.getDatanodeManager().setShouldSendCachingCommands(true);
    } finally {
      writeUnlock();
@ -1052,7 +1051,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
        // so that the tailer starts from the right spot.
        dir.fsImage.updateLastAppliedTxIdFromWritten();
      }
-      cacheManager.deactivate();
+      cacheManager.stopMonitorThread();
+      cacheManager.clearDirectiveStats();
+      blockManager.getDatanodeManager().clearPendingCachingCommands();
      blockManager.getDatanodeManager().setShouldSendCachingCommands(false);
    } finally {
      writeUnlock();
@ -7066,6 +7067,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      return (Long) cacheEntry.getPayload();
    }
    boolean success = false;
+    if (!flags.contains(CacheFlag.FORCE)) {
+      cacheManager.waitForRescanIfNeeded();
+    }
    writeLock();
    Long result = null;
    try {
@ -7107,6 +7111,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    if (cacheEntry != null && cacheEntry.isSuccess()) {
      return;
    }
+    if (!flags.contains(CacheFlag.FORCE)) {
+      cacheManager.waitForRescanIfNeeded();
+    }
    writeLock();
    try {
      checkOperation(OperationCategory.WRITE);
@ -7166,6 +7173,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    final FSPermissionChecker pc = isPermissionEnabled ?
        getPermissionChecker() : null;
    BatchedListEntries<CacheDirectiveEntry> results;
+    cacheManager.waitForRescanIfNeeded();
    readLock();
    boolean success = false;
    try {
@ -7289,6 +7297,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    BatchedListEntries<CachePoolEntry> results;
    checkOperation(OperationCategory.READ);
    boolean success = false;
+    cacheManager.waitForRescanIfNeeded();
    readLock();
    try {
      checkOperation(OperationCategory.READ);
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@ -480,6 +480,14 @@ public class NameNode implements NameNodeStatusMXBean {
   * @param conf the configuration
   */
  protected void initialize(Configuration conf) throws IOException {
+    if (conf.get(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS) == null) {
+      String intervals = conf.get(DFS_METRICS_PERCENTILES_INTERVALS_KEY);
+      if (intervals != null) {
+        conf.set(HADOOP_USER_GROUP_METRICS_PERCENTILES_INTERVALS,
+          intervals);
+      }
+    }
+
    UserGroupInformation.setConfiguration(conf);
    loginAsNameNodeUser(conf);

--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/StorageReport.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/StorageReport.java
@ -21,7 +21,7 @@ package org.apache.hadoop.hdfs.server.protocol;
 * Utilization report for a Datanode storage
 */
 public class StorageReport {
-  private final String storageID;
+  private final DatanodeStorage storage;
  private final boolean failed;
  private final long capacity;
  private final long dfsUsed;
@ -30,9 +30,9 @@ public class StorageReport {

  public static final StorageReport[] EMPTY_ARRAY = {};
  
-  public StorageReport(String sid, boolean failed, long capacity, long dfsUsed,
-      long remaining, long bpUsed) {
-    this.storageID = sid;
+  public StorageReport(DatanodeStorage storage, boolean failed,
+      long capacity, long dfsUsed, long remaining, long bpUsed) {
+    this.storage = storage;
    this.failed = failed;
    this.capacity = capacity;
    this.dfsUsed = dfsUsed;
@ -40,8 +40,8 @@ public class StorageReport {
    this.blockPoolUsed = bpUsed;
  }

-  public String getStorageID() {
-    return storageID;
+  public DatanodeStorage getStorage() {
+    return storage;
  }

  public boolean isFailed() {
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/CacheAdmin.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/CacheAdmin.java
@ -84,7 +84,12 @@ public class CacheAdmin extends Configured implements Tool {
    for (int j = 1; j < args.length; j++) {
      argsList.add(args[j]);
    }
+    try {
      return command.run(getConf(), argsList);
+    } catch (IllegalArgumentException e) {
+      System.err.println(prettifyException(e));
+      return -1;
+    }
  }

  public static void main(String[] argsArray) throws IOException {
@ -135,6 +140,20 @@ public class CacheAdmin extends Configured implements Tool {
    return maxTtl;
  }

+  private static Expiration parseExpirationString(String ttlString)
+      throws IOException {
+    Expiration ex = null;
+    if (ttlString != null) {
+      if (ttlString.equalsIgnoreCase("never")) {
+        ex = CacheDirectiveInfo.Expiration.NEVER;
+      } else {
+        long ttl = DFSUtil.parseRelativeTime(ttlString);
+        ex = CacheDirectiveInfo.Expiration.newRelative(ttl);
+      }
+    }
+    return ex;
+  }
+
  interface Command {
    String getName();
    String getShortUsage();
@ -171,6 +190,7 @@ public class CacheAdmin extends Configured implements Tool {
      listing.addRow("<time-to-live>", "How long the directive is " +
          "valid. Can be specified in minutes, hours, and days, e.g. " +
          "30m, 4h, 2d. Valid units are [smhd]." +
+          " \"never\" indicates a directive that never expires." +
          " If unspecified, the directive never expires.");
      return getShortUsage() + "\n" +
        "Add a new cache directive.\n\n" +
@ -203,16 +223,16 @@ public class CacheAdmin extends Configured implements Tool {
      }

      String ttlString = StringUtils.popOptionWithArgument("-ttl", args);
-      if (ttlString != null) {
      try {
-          long ttl = DFSUtil.parseRelativeTime(ttlString);
-          builder.setExpiration(CacheDirectiveInfo.Expiration.newRelative(ttl));
+        Expiration ex = parseExpirationString(ttlString);
+        if (ex != null) {
+          builder.setExpiration(ex);
+        }
      } catch (IOException e) {
        System.err.println(
            "Error while parsing ttl value: " + e.getMessage());
        return 1;
      }
-      }

      if (!args.isEmpty()) {
        System.err.println("Can't understand argument: " + args.get(0));
@ -326,7 +346,7 @@ public class CacheAdmin extends Configured implements Tool {
      listing.addRow("<time-to-live>", "How long the directive is " +
          "valid. Can be specified in minutes, hours, and days, e.g. " +
          "30m, 4h, 2d. Valid units are [smhd]." +
-          " If unspecified, the directive never expires.");
+          " \"never\" indicates a directive that never expires.");
      return getShortUsage() + "\n" +
        "Modify a cache directive.\n\n" +
        listing.toString();
@ -362,18 +382,17 @@ public class CacheAdmin extends Configured implements Tool {
        modified = true;
      }
      String ttlString = StringUtils.popOptionWithArgument("-ttl", args);
-      if (ttlString != null) {
-        long ttl;
      try {
-          ttl = DFSUtil.parseRelativeTime(ttlString);
+        Expiration ex = parseExpirationString(ttlString);
+        if (ex != null) {
+          builder.setExpiration(ex);
+          modified = true;
+        }
      } catch (IOException e) {
        System.err.println(
            "Error while parsing ttl value: " + e.getMessage());
        return 1;
      }
-        builder.setExpiration(CacheDirectiveInfo.Expiration.newRelative(ttl));
-        modified = true;
-      }
      if (!args.isEmpty()) {
        System.err.println("Can't understand argument: " + args.get(0));
        System.err.println("Usage is " + getShortUsage());
@ -578,7 +597,7 @@ public class CacheAdmin extends Configured implements Tool {
    public String getShortUsage() {
      return "[" + NAME + " <name> [-owner <owner>] " +
          "[-group <group>] [-mode <mode>] [-limit <limit>] " +
-          "[-maxttl <maxTtl>]\n";
+          "[-maxTtl <maxTtl>]\n";
    }

    @Override
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetConf.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetConf.java
@ -29,6 +29,7 @@ import java.util.Map;
 import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.DFSUtil.ConfiguredNNAddress;
@ -85,9 +86,9 @@ public class GetConf extends Configured implements Tool {
      map.put(BACKUP.getName().toLowerCase(), 
          new BackupNodesCommandHandler());
      map.put(INCLUDE_FILE.getName().toLowerCase(), 
-          new CommandHandler("DFSConfigKeys.DFS_HOSTS"));
+          new CommandHandler(DFSConfigKeys.DFS_HOSTS));
      map.put(EXCLUDE_FILE.getName().toLowerCase(),
-          new CommandHandler("DFSConfigKeys.DFS_HOSTS_EXCLUDE"));
+          new CommandHandler(DFSConfigKeys.DFS_HOSTS_EXCLUDE));
      map.put(NNRPCADDRESSES.getName().toLowerCase(),
          new NNRpcAddressesCommandHandler());
      map.put(CONFKEY.getName().toLowerCase(),
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto
@ -196,12 +196,13 @@ message HeartbeatRequestProto {
 }

 message StorageReportProto {
-  required string storageUuid = 1;
+  required string storageUuid = 1 [ deprecated = true ];
  optional bool failed = 2 [ default = false ];
  optional uint64 capacity = 3 [ default = 0 ];
  optional uint64 dfsUsed = 4 [ default = 0 ];
  optional uint64 remaining = 5 [ default = 0 ];
  optional uint64 blockPoolUsed = 6 [ default = 0 ];
+  optional DatanodeStorageProto storage = 7; // supersedes StorageUuid
 }

 /**
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@ -1476,13 +1476,13 @@
 </property>

 <property>
-  <name>dfs.namenode.caching.enabled</name>
-  <value>false</value>
+  <name>dfs.namenode.path.based.cache.block.map.allocation.percent</name>
+  <value>0.25</value>
  <description>
-    Set to true to enable block caching.  This flag enables the NameNode to
-    maintain a mapping of cached blocks to DataNodes via processing DataNode
-    cache reports.  Based on these reports and addition and removal of caching
-    directives, the NameNode will schedule caching and uncaching work.
+    The percentage of the Java heap which we will allocate to the cached blocks
+    map.  The cached blocks map is a hash map which uses chained hashing.
+    Smaller maps may be accessed more slowly if the number of cached blocks is
+    large; larger maps will consume more memory.
  </description>
 </property>

--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/CentralizedCacheManagement.apt.vm
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/CentralizedCacheManagement.apt.vm
@ -242,12 +242,6 @@ Centralized Cache Management in HDFS

  Be sure to configure the following:

-  * dfs.namenode.caching.enabled
-
-    This must be set to true to enable caching. If this is false, the NameNode
-    will ignore cache reports, and will not ask DataNodes to cache
-    blocks.
-
  * dfs.datanode.max.locked.memory

    The DataNode will treat this as the maximum amount of memory it can use for
@ -281,6 +275,13 @@ Centralized Cache Management in HDFS

    By default, this parameter is set to 10000, which is 10 seconds.

+  * dfs.namenode.path.based.cache.block.map.allocation.percent
+
+    The percentage of the Java heap which we will allocate to the cached blocks
+    map.  The cached blocks map is a hash map which uses chained hashing.
+    Smaller maps may be accessed more slowly if the number of cached blocks is
+    large; larger maps will consume more memory.  The default is 0.25 percent.
+
 ** {OS Limits}

  If you get the error "Cannot start datanode because the configured max
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@ -140,6 +140,7 @@ public class MiniDFSCluster {
    private int nameNodeHttpPort = 0;
    private final Configuration conf;
    private int numDataNodes = 1;
+    private StorageType storageType = StorageType.DEFAULT;
    private boolean format = true;
    private boolean manageNameDfsDirs = true;
    private boolean manageNameDfsSharedDirs = true;
@ -185,6 +186,14 @@ public class MiniDFSCluster {
      return this;
    }

+    /**
+     * Default: StorageType.DEFAULT
+     */
+    public Builder storageType(StorageType type) {
+      this.storageType = type;
+      return this;
+    }
+
    /**
     * Default: true
     */
@ -341,6 +350,7 @@ public class MiniDFSCluster {
      
    initMiniDFSCluster(builder.conf,
                       builder.numDataNodes,
+                       builder.storageType,
                       builder.format,
                       builder.manageNameDfsDirs,
                       builder.manageNameDfsSharedDirs,
@ -592,7 +602,7 @@ public class MiniDFSCluster {
                        String[] racks, String hosts[],
                        long[] simulatedCapacities) throws IOException {
    this.nameNodes = new NameNodeInfo[1]; // Single namenode in the cluster
-    initMiniDFSCluster(conf, numDataNodes, format,
+    initMiniDFSCluster(conf, numDataNodes, StorageType.DEFAULT, format,
        manageNameDfsDirs, true, manageDataDfsDirs, manageDataDfsDirs,
        operation, racks, hosts,
        simulatedCapacities, null, true, false,
@ -601,7 +611,7 @@ public class MiniDFSCluster {

  private void initMiniDFSCluster(
      Configuration conf,
-      int numDataNodes, boolean format, boolean manageNameDfsDirs,
+      int numDataNodes, StorageType storageType, boolean format, boolean manageNameDfsDirs,
      boolean manageNameDfsSharedDirs, boolean enableManagedDfsDirsRedundancy,
      boolean manageDataDfsDirs, StartupOption operation, String[] racks,
      String[] hosts, long[] simulatedCapacities, String clusterId,
@ -670,7 +680,7 @@ public class MiniDFSCluster {
    }

    // Start the DataNodes
-    startDataNodes(conf, numDataNodes, manageDataDfsDirs, operation, racks,
+    startDataNodes(conf, numDataNodes, storageType, manageDataDfsDirs, operation, racks,
        hosts, simulatedCapacities, setupHostsFile, checkDataNodeAddrConfig, checkDataNodeHostConfig);
    waitClusterUp();
    //make sure ProxyUsers uses the latest conf
@ -990,6 +1000,19 @@ public class MiniDFSCluster {
    }
  }

+  String makeDataNodeDirs(int dnIndex, StorageType storageType) throws IOException {
+    StringBuilder sb = new StringBuilder();
+    for (int j = 0; j < DIRS_PER_DATANODE; ++j) {
+      File dir = getInstanceStorageDir(dnIndex, j);
+      dir.mkdirs();
+      if (!dir.isDirectory()) {
+        throw new IOException("Mkdirs failed to create directory for DataNode " + dir);
+      }
+      sb.append((j > 0 ? "," : "") + "[" + storageType + "]" + fileAsURI(dir));
+    }
+    return sb.toString();
+  }
+
  /**
   * Modify the config and start up additional DataNodes.  The info port for
   * DataNodes is guaranteed to use a free port.
@ -1052,7 +1075,7 @@ public class MiniDFSCluster {
                             String[] racks, String[] hosts,
                             long[] simulatedCapacities,
                             boolean setupHostsFile) throws IOException {
-    startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, hosts,
+    startDataNodes(conf, numDataNodes, StorageType.DEFAULT, manageDfsDirs, operation, racks, hosts,
        simulatedCapacities, setupHostsFile, false, false);
  }

@ -1066,7 +1089,7 @@ public class MiniDFSCluster {
      long[] simulatedCapacities,
      boolean setupHostsFile,
      boolean checkDataNodeAddrConfig) throws IOException {
-    startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, hosts,
+    startDataNodes(conf, numDataNodes, StorageType.DEFAULT, manageDfsDirs, operation, racks, hosts,
        simulatedCapacities, setupHostsFile, checkDataNodeAddrConfig, false);
  }

@ -1098,7 +1121,7 @@ public class MiniDFSCluster {
   * @throws IllegalStateException if NameNode has been shutdown
   */
  public synchronized void startDataNodes(Configuration conf, int numDataNodes,
-      boolean manageDfsDirs, StartupOption operation, 
+      StorageType storageType, boolean manageDfsDirs, StartupOption operation,
      String[] racks, String[] hosts,
      long[] simulatedCapacities,
      boolean setupHostsFile,
@ -1154,16 +1177,7 @@ public class MiniDFSCluster {
      // Set up datanode address
      setupDatanodeAddress(dnConf, setupHostsFile, checkDataNodeAddrConfig);
      if (manageDfsDirs) {
-        StringBuilder sb = new StringBuilder();
-        for (int j = 0; j < DIRS_PER_DATANODE; ++j) {
-          File dir = getInstanceStorageDir(i, j);
-          dir.mkdirs();
-          if (!dir.isDirectory()) {
-            throw new IOException("Mkdirs failed to create directory for DataNode " + dir);
-          }
-          sb.append((j > 0 ? "," : "") + fileAsURI(dir));
-        }
-        String dirs = sb.toString();
+        String dirs = makeDataNodeDirs(i, storageType);
        dnConf.set(DFS_DATANODE_DATA_DIR_KEY, dirs);
        conf.set(DFS_DATANODE_DATA_DIR_KEY, dirs);
      }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java
@ -50,7 +50,7 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
  }

  public synchronized void startDataNodes(Configuration conf, int numDataNodes,
-      boolean manageDfsDirs, StartupOption operation, 
+      StorageType storageType, boolean manageDfsDirs, StartupOption operation,
      String[] racks, String[] nodeGroups, String[] hosts,
      long[] simulatedCapacities,
      boolean setupHostsFile,
@ -112,15 +112,7 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
      // Set up datanode address
      setupDatanodeAddress(dnConf, setupHostsFile, checkDataNodeAddrConfig);
      if (manageDfsDirs) {
-        File dir1 = getInstanceStorageDir(i, 0);
-        File dir2 = getInstanceStorageDir(i, 1);
-        dir1.mkdirs();
-        dir2.mkdirs();
-        if (!dir1.isDirectory() || !dir2.isDirectory()) { 
-          throw new IOException("Mkdirs failed to create directory for DataNode "
-              + i + ": " + dir1 + " or " + dir2);
-        }
-        String dirs = fileAsURI(dir1) + "," + fileAsURI(dir2);
+        String dirs = makeDataNodeDirs(i, storageType);
        dnConf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dirs);
        conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dirs);
      }
@ -198,7 +190,7 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
      String[] racks, String[] nodeGroups, String[] hosts,
      long[] simulatedCapacities,
      boolean setupHostsFile) throws IOException {
-    startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, nodeGroups, 
+    startDataNodes(conf, numDataNodes, StorageType.DEFAULT, manageDfsDirs, operation, racks, nodeGroups,
        hosts, simulatedCapacities, setupHostsFile, false, false);
  }

@ -213,13 +205,13 @@ public class MiniDFSClusterWithNodeGroup extends MiniDFSCluster {
  // This is for initialize from parent class.
  @Override
  public synchronized void startDataNodes(Configuration conf, int numDataNodes, 
-      boolean manageDfsDirs, StartupOption operation, 
+      StorageType storageType, boolean manageDfsDirs, StartupOption operation,
      String[] racks, String[] hosts,
      long[] simulatedCapacities,
      boolean setupHostsFile,
      boolean checkDataNodeAddrConfig,
      boolean checkDataNodeHostConfig) throws IOException {
-    startDataNodes(conf, numDataNodes, manageDfsDirs, operation, racks, 
+    startDataNodes(conf, numDataNodes, storageType, manageDfsDirs, operation, racks,
        NODE_GROUPS, hosts, simulatedCapacities, setupHostsFile, 
        checkDataNodeAddrConfig, checkDataNodeHostConfig);
  }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
@ -257,8 +257,10 @@ public class BlockManagerTestUtil {
      DatanodeDescriptor dnd) {
    ArrayList<StorageReport> reports = new ArrayList<StorageReport>();
    for (DatanodeStorageInfo storage : dnd.getStorageInfos()) {
+      DatanodeStorage dns = new DatanodeStorage(
+          storage.getStorageID(), storage.getState(), storage.getStorageType());
      StorageReport report = new StorageReport(
-          storage.getStorageID(), false, storage.getCapacity(),
+          dns ,false, storage.getCapacity(),
          storage.getDfsUsed(), storage.getRemaining(),
          storage.getBlockPoolUsed());
      reports.add(report);
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestJspHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/common/TestJspHelper.java
@ -470,11 +470,14 @@ public class TestJspHelper {
    BlockManagerTestUtil.updateStorage(dnDesc1, new DatanodeStorage("dnStorage1"));
    BlockManagerTestUtil.updateStorage(dnDesc2, new DatanodeStorage("dnStorage2"));

+    DatanodeStorage dns1 = new DatanodeStorage("dnStorage1");
+    DatanodeStorage dns2 = new DatanodeStorage("dnStorage2");
+
    StorageReport[] report1 = new StorageReport[] {
-        new StorageReport("dnStorage1", false, 1024, 100, 924, 100)
+        new StorageReport(dns1, false, 1024, 100, 924, 100)
    };
    StorageReport[] report2 = new StorageReport[] {
-        new StorageReport("dnStorage2", false, 2500, 200, 1848, 200)
+        new StorageReport(dns2, false, 2500, 200, 1848, 200)
    };
    dnDesc1.updateHeartbeat(report1, 5l, 3l, 10, 2);
    dnDesc2.updateHeartbeat(report2, 10l, 2l, 20, 1);
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java
@ -394,8 +394,9 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
    }

    synchronized StorageReport getStorageReport(String bpid) {
-      return new StorageReport(getStorageUuid(), false, getCapacity(),
-          getUsed(), getFree(), map.get(bpid).getUsed());
+      return new StorageReport(new DatanodeStorage(getStorageUuid()),
+          false, getCapacity(), getUsed(), getFree(),
+          map.get(bpid).getUsed());
    }
  }
  
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDiskError.java
@ -40,6 +40,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.protocol.datatransfer.BlockConstructionStage;
 import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
+import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.util.DataChecksum;
 import org.junit.After;
@ -186,9 +187,8 @@ public class TestDiskError {
    // Check permissions on directories in 'dfs.datanode.data.dir'
    FileSystem localFS = FileSystem.getLocal(conf);
    for (DataNode dn : cluster.getDataNodes()) {
-      String[] dataDirs =
-        dn.getConf().getStrings(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY);
-      for (String dir : dataDirs) {
+      for (FsVolumeSpi v : dn.getFSDataset().getVolumes()) {
+        String dir = v.getBasePath();
        Path dataDir = new Path(dir);
        FsPermission actual = localFS.getFileStatus(dataDir).getPermission();
          assertEquals("Permission for dir: " + dataDir + ", is " + actual +
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestFsDatasetCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestFsDatasetCache.java
@ -36,16 +36,20 @@ import java.util.Set;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.HdfsBlockLocation;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.LogVerificationAppender;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
+import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi;
@ -82,7 +86,11 @@ public class TestFsDatasetCache {

  // Most Linux installs allow a default of 64KB locked memory
  private static final long CACHE_CAPACITY = 64 * 1024;
-  private static final long BLOCK_SIZE = 4096;
+  // mlock always locks the entire page. So we don't need to deal with this
+  // rounding, use the OS page size for the block size.
+  private static final long PAGE_SIZE =
+      NativeIO.POSIX.getCacheManipulator().getOperatingSystemPageSize();
+  private static final long BLOCK_SIZE = PAGE_SIZE;

  private static Configuration conf;
  private static MiniDFSCluster cluster = null;
@ -104,14 +112,13 @@ public class TestFsDatasetCache {
  public void setUp() throws Exception {
    assumeTrue(!Path.WINDOWS);
    conf = new HdfsConfiguration();
-    conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY, true);
-    conf.setLong(DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_RETRY_INTERVAL_MS,
-        500);
+    conf.setLong(
+        DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS, 100);
+    conf.setLong(DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_KEY, 500);
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
    conf.setLong(DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY,
        CACHE_CAPACITY);
    conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
-    conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY, true);

    prevCacheManipulator = NativeIO.POSIX.getCacheManipulator();
    NativeIO.POSIX.setCacheManipulator(new NoMlockCacheManipulator());
@ -325,7 +332,7 @@ public class TestFsDatasetCache {

    // Create some test files that will exceed total cache capacity
    final int numFiles = 5;
-    final long fileSize = 15000;
+    final long fileSize = CACHE_CAPACITY / (numFiles-1);

    final Path[] testFiles = new Path[numFiles];
    final HdfsBlockLocation[][] fileLocs = new HdfsBlockLocation[numFiles][];
@ -451,4 +458,65 @@ public class TestFsDatasetCache {
      }
    }, 100, 10000);
  }
+
+  @Test(timeout=60000)
+  public void testPageRounder() throws Exception {
+    // Write a small file
+    Path fileName = new Path("/testPageRounder");
+    final int smallBlocks = 512; // This should be smaller than the page size
+    assertTrue("Page size should be greater than smallBlocks!",
+        PAGE_SIZE > smallBlocks);
+    final int numBlocks = 5;
+    final int fileLen = smallBlocks * numBlocks;
+    FSDataOutputStream out =
+        fs.create(fileName, false, 4096, (short)1, smallBlocks);
+    out.write(new byte[fileLen]);
+    out.close();
+    HdfsBlockLocation[] locs = (HdfsBlockLocation[])fs.getFileBlockLocations(
+        fileName, 0, fileLen);
+    // Cache the file and check the sizes match the page size
+    setHeartbeatResponse(cacheBlocks(locs));
+    verifyExpectedCacheUsage(PAGE_SIZE * numBlocks, numBlocks);
+    // Uncache and check that it decrements by the page size too
+    setHeartbeatResponse(uncacheBlocks(locs));
+    verifyExpectedCacheUsage(0, 0);
+  }
+
+  @Test(timeout=60000)
+  public void testUncacheQuiesces() throws Exception {
+    // Create a file
+    Path fileName = new Path("/testUncacheQuiesces");
+    int fileLen = 4096;
+    DFSTestUtil.createFile(fs, fileName, fileLen, (short)1, 0xFDFD);
+    // Cache it
+    DistributedFileSystem dfs = cluster.getFileSystem();
+    dfs.addCachePool(new CachePoolInfo("pool"));
+    dfs.addCacheDirective(new CacheDirectiveInfo.Builder()
+        .setPool("pool").setPath(fileName).setReplication((short)3).build());
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
+        long blocksCached =
+            MetricsAsserts.getLongCounter("BlocksCached", dnMetrics);
+        return blocksCached > 0;
+      }
+    }, 1000, 30000);
+    // Uncache it
+    dfs.removeCacheDirective(1);
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
+        long blocksUncached =
+            MetricsAsserts.getLongCounter("BlocksUncached", dnMetrics);
+        return blocksUncached > 0;
+      }
+    }, 1000, 30000);
+    // Make sure that no additional messages were sent
+    Thread.sleep(10000);
+    MetricsRecordBuilder dnMetrics = getMetrics(dn.getMetrics().name());
+    MetricsAsserts.assertCounter("BlocksCached", 1l, dnMetrics);
+    MetricsAsserts.assertCounter("BlocksUncached", 1l, dnMetrics);
+  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestStorageReport.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestStorageReport.java
@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.datanode;
+
+import java.io.IOException;
+
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.*;
+import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
+import org.apache.hadoop.hdfs.server.protocol.StorageReport;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mockito;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertNotSame;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.anyInt;
+import static org.mockito.Matchers.anyLong;
+
+public class TestStorageReport {
+  public static final Log LOG = LogFactory.getLog(TestStorageReport.class);
+
+  private static short REPL_FACTOR = 1;
+  private static final StorageType storageType = StorageType.SSD; // pick non-default.
+
+  private static Configuration conf;
+  private MiniDFSCluster cluster;
+  private DistributedFileSystem fs;
+  static String bpid;
+
+  @Before
+  public void startUpCluster() throws IOException {
+    conf = new HdfsConfiguration();
+    cluster = new MiniDFSCluster.Builder(conf)
+        .numDataNodes(REPL_FACTOR)
+        .storageType(storageType)
+        .build();
+    fs = cluster.getFileSystem();
+    bpid = cluster.getNamesystem().getBlockPoolId();
+  }
+
+  @After
+  public void shutDownCluster() throws IOException {
+    if (cluster != null) {
+      fs.close();
+      cluster.shutdown();
+      cluster = null;
+    }
+  }
+
+  /**
+   * Ensure that storage type and storage state are propagated
+   * in Storage Reports.
+   */
+  @Test
+  public void testStorageReportHasStorageTypeAndState() throws IOException {
+
+    // Make sure we are not testing with the default type, that would not
+    // be a very good test.
+    assertNotSame(storageType, StorageType.DEFAULT);
+    NameNode nn = cluster.getNameNode();
+    DataNode dn = cluster.getDataNodes().get(0);
+
+    // Insert a spy object for the NN RPC.
+    DatanodeProtocolClientSideTranslatorPB nnSpy =
+        DataNodeTestUtils.spyOnBposToNN(dn, nn);
+
+    // Trigger a heartbeat so there is an interaction with the spy
+    // object.
+    DataNodeTestUtils.triggerHeartbeat(dn);
+
+    // Verify that the callback passed in the expected parameters.
+    ArgumentCaptor<StorageReport[]> captor =
+        ArgumentCaptor.forClass(StorageReport[].class);
+
+    Mockito.verify(nnSpy).sendHeartbeat(
+        any(DatanodeRegistration.class),
+        captor.capture(),
+        anyLong(), anyLong(), anyInt(), anyInt(), anyInt());
+
+    StorageReport[] reports = captor.getValue();
+
+    for (StorageReport report: reports) {
+      assertThat(report.getStorage().getStorageType(), is(storageType));
+      assertThat(report.getStorage().getState(), is(DatanodeStorage.State.NORMAL));
+    }
+  }
+}
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java
@ -605,6 +605,98 @@ public class NNThroughputBenchmark implements Tool {
    }
  }

+  /**
+   * Directory creation statistics.
+   *
+   * Each thread creates the same (+ or -1) number of directories.
+   * Directory names are pre-generated during initialization.
+   */
+  class MkdirsStats extends OperationStatsBase {
+    // Operation types
+    static final String OP_MKDIRS_NAME = "mkdirs";
+    static final String OP_MKDIRS_USAGE = "-op mkdirs [-threads T] [-dirs N] " +
+        "[-dirsPerDir P]";
+
+    protected FileNameGenerator nameGenerator;
+    protected String[][] dirPaths;
+
+    MkdirsStats(List<String> args) {
+      super();
+      parseArguments(args);
+    }
+
+    @Override
+    String getOpName() {
+      return OP_MKDIRS_NAME;
+    }
+
+    @Override
+    void parseArguments(List<String> args) {
+      boolean ignoreUnrelatedOptions = verifyOpArgument(args);
+      int nrDirsPerDir = 2;
+      for (int i = 2; i < args.size(); i++) {       // parse command line
+        if(args.get(i).equals("-dirs")) {
+          if(i+1 == args.size())  printUsage();
+          numOpsRequired = Integer.parseInt(args.get(++i));
+        } else if(args.get(i).equals("-threads")) {
+          if(i+1 == args.size())  printUsage();
+          numThreads = Integer.parseInt(args.get(++i));
+        } else if(args.get(i).equals("-dirsPerDir")) {
+          if(i+1 == args.size())  printUsage();
+          nrDirsPerDir = Integer.parseInt(args.get(++i));
+        } else if(!ignoreUnrelatedOptions)
+          printUsage();
+      }
+      nameGenerator = new FileNameGenerator(getBaseDir(), nrDirsPerDir);
+    }
+
+    @Override
+    void generateInputs(int[] opsPerThread) throws IOException {
+      assert opsPerThread.length == numThreads : "Error opsPerThread.length";
+      nameNodeProto.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE,
+          false);
+      LOG.info("Generate " + numOpsRequired + " inputs for " + getOpName());
+      dirPaths = new String[numThreads][];
+      for(int idx=0; idx < numThreads; idx++) {
+        int threadOps = opsPerThread[idx];
+        dirPaths[idx] = new String[threadOps];
+        for(int jdx=0; jdx < threadOps; jdx++)
+          dirPaths[idx][jdx] = nameGenerator.
+              getNextFileName("ThroughputBench");
+      }
+    }
+
+    /**
+     * returns client name
+     */
+    @Override
+    String getExecutionArgument(int daemonId) {
+      return getClientName(daemonId);
+    }
+
+    /**
+     * Do mkdirs operation.
+     */
+    @Override
+    long executeOp(int daemonId, int inputIdx, String clientName)
+        throws IOException {
+      long start = Time.now();
+      nameNodeProto.mkdirs(dirPaths[daemonId][inputIdx],
+          FsPermission.getDefault(), true);
+      long end = Time.now();
+      return end-start;
+    }
+
+    @Override
+    void printResults() {
+      LOG.info("--- " + getOpName() + " inputs ---");
+      LOG.info("nrDirs = " + numOpsRequired);
+      LOG.info("nrThreads = " + numThreads);
+      LOG.info("nrDirsPerDir = " + nameGenerator.getFilesPerDirectory());
+      printStats();
+    }
+  }
+
  /**
   * Open file statistics.
   * 
@ -846,7 +938,7 @@ public class NNThroughputBenchmark implements Tool {
      // register datanode
      dnRegistration = nameNodeProto.registerDatanode(dnRegistration);
      //first block reports
-      storage = new DatanodeStorage(dnRegistration.getDatanodeUuid());
+      storage = new DatanodeStorage(DatanodeStorage.generateUuid());
      final StorageBlockReport[] reports = {
          new StorageBlockReport(storage,
              new BlockListAsLongs(null, null).getBlockListAsLongs())
@ -862,8 +954,8 @@ public class NNThroughputBenchmark implements Tool {
    void sendHeartbeat() throws IOException {
      // register datanode
      // TODO:FEDERATION currently a single block pool is supported
-      StorageReport[] rep = { new StorageReport(dnRegistration.getDatanodeUuid(),
-          false, DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED) };
+      StorageReport[] rep = { new StorageReport(storage, false,
+          DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED) };
      DatanodeCommand[] cmds = nameNodeProto.sendHeartbeat(dnRegistration, rep,
          0L, 0L, 0, 0, 0).getCommands();
      if(cmds != null) {
@ -909,7 +1001,7 @@ public class NNThroughputBenchmark implements Tool {
    @SuppressWarnings("unused") // keep it for future blockReceived benchmark
    int replicateBlocks() throws IOException {
      // register datanode
-      StorageReport[] rep = { new StorageReport(dnRegistration.getDatanodeUuid(),
+      StorageReport[] rep = { new StorageReport(storage,
          false, DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED) };
      DatanodeCommand[] cmds = nameNodeProto.sendHeartbeat(dnRegistration,
          rep, 0L, 0L, 0, 0, 0).getCommands();
@ -918,7 +1010,8 @@ public class NNThroughputBenchmark implements Tool {
          if (cmd.getAction() == DatanodeProtocol.DNA_TRANSFER) {
            // Send a copy of a block to another datanode
            BlockCommand bcmd = (BlockCommand)cmd;
-            return transferBlocks(bcmd.getBlocks(), bcmd.getTargets());
+            return transferBlocks(bcmd.getBlocks(), bcmd.getTargets(),
+                                  bcmd.getTargetStorageIDs());
          }
        }
      }
@ -931,12 +1024,14 @@ public class NNThroughputBenchmark implements Tool {
     * that the blocks have been received.
     */
    private int transferBlocks( Block blocks[], 
-                                DatanodeInfo xferTargets[][] 
+                                DatanodeInfo xferTargets[][],
+                                String targetStorageIDs[][]
                              ) throws IOException {
      for(int i = 0; i < blocks.length; i++) {
        DatanodeInfo blockTargets[] = xferTargets[i];
        for(int t = 0; t < blockTargets.length; t++) {
          DatanodeInfo dnInfo = blockTargets[t];
+          String targetStorageID = targetStorageIDs[i][t];
          DatanodeRegistration receivedDNReg;
          receivedDNReg = new DatanodeRegistration(dnInfo,
            new DataStorage(nsInfo),
@ -946,7 +1041,7 @@ public class NNThroughputBenchmark implements Tool {
                  blocks[i], ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK,
                  null) };
          StorageReceivedDeletedBlocks[] report = { new StorageReceivedDeletedBlocks(
-              receivedDNReg.getDatanodeUuid(), rdBlocks) };
+              targetStorageID, rdBlocks) };
          nameNodeProto.blockReceivedAndDeleted(receivedDNReg, nameNode
              .getNamesystem().getBlockPoolId(), report);
        }
@ -1035,7 +1130,7 @@ public class NNThroughputBenchmark implements Tool {
      }

      // create files 
-      LOG.info("Creating " + nrFiles + " with " + blocksPerFile + " blocks each.");
+      LOG.info("Creating " + nrFiles + " files with " + blocksPerFile + " blocks each.");
      FileNameGenerator nameGenerator;
      nameGenerator = new FileNameGenerator(getBaseDir(), 100);
      String clientName = getClientName(007);
@ -1069,7 +1164,7 @@ public class NNThroughputBenchmark implements Tool {
              loc.getBlock().getLocalBlock(),
              ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK, null) };
          StorageReceivedDeletedBlocks[] report = { new StorageReceivedDeletedBlocks(
-              datanodes[dnIdx].dnRegistration.getDatanodeUuid(), rdBlocks) };
+              datanodes[dnIdx].storage.getStorageID(), rdBlocks) };
          nameNodeProto.blockReceivedAndDeleted(datanodes[dnIdx].dnRegistration, loc
              .getBlock().getBlockPoolId(), report);
        }
@ -1279,6 +1374,7 @@ public class NNThroughputBenchmark implements Tool {
    System.err.println("Usage: NNThroughputBenchmark"
        + "\n\t"    + OperationStatsBase.OP_ALL_USAGE
        + " | \n\t" + CreateFileStats.OP_CREATE_USAGE
+        + " | \n\t" + MkdirsStats.OP_MKDIRS_USAGE
        + " | \n\t" + OpenFileStats.OP_OPEN_USAGE
        + " | \n\t" + DeleteFileStats.OP_DELETE_USAGE
        + " | \n\t" + FileStatusStats.OP_FILE_STATUS_USAGE
@ -1328,6 +1424,10 @@ public class NNThroughputBenchmark implements Tool {
        opStat = new CreateFileStats(args);
        ops.add(opStat);
      }
+      if(runAll || MkdirsStats.OP_MKDIRS_NAME.equals(type)) {
+        opStat = new MkdirsStats(args);
+        ops.add(opStat);
+      }
      if(runAll || OpenFileStats.OP_OPEN_NAME.equals(type)) {
        opStat = new OpenFileStats(args);
        ops.add(opStat);
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java
@ -20,7 +20,6 @@ package org.apache.hadoop.hdfs.server.namenode;

 import java.io.File;
 import java.io.IOException;
-import java.security.PrivilegedExceptionAction;
 import java.util.Iterator;

 import org.apache.commons.logging.Log;
@ -29,25 +28,13 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileContext;
-import org.apache.hadoop.fs.Options.Rename;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.hdfs.DFSClientAdapter;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
-import org.apache.hadoop.hdfs.protocol.HdfsConstants;
-import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
-import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.common.Util;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.security.token.Token;

 /**
 * OfflineEditsViewerHelper is a helper class for TestOfflineEditsViewer,
@ -135,151 +122,11 @@ public class OfflineEditsViewerHelper {
   * OP_CLEAR_NS_QUOTA  (12)
   */
  private CheckpointSignature runOperations() throws IOException {
-
    LOG.info("Creating edits by performing fs operations");
    // no check, if it's not it throws an exception which is what we want
-    DistributedFileSystem dfs =
-      (DistributedFileSystem)cluster.getFileSystem();
-    FileContext fc = FileContext.getFileContext(cluster.getURI(0), config);
-    // OP_ADD 0
-    Path pathFileCreate = new Path("/file_create_u\1F431");
-    FSDataOutputStream s = dfs.create(pathFileCreate);
-    // OP_CLOSE 9
-    s.close();
-    // OP_RENAME_OLD 1
-    Path pathFileMoved = new Path("/file_moved");
-    dfs.rename(pathFileCreate, pathFileMoved);
-    // OP_DELETE 2
-    dfs.delete(pathFileMoved, false);
-    // OP_MKDIR 3
-    Path pathDirectoryMkdir = new Path("/directory_mkdir");
-    dfs.mkdirs(pathDirectoryMkdir);
-    // OP_ALLOW_SNAPSHOT 29
-    dfs.allowSnapshot(pathDirectoryMkdir);
-    // OP_DISALLOW_SNAPSHOT 30
-    dfs.disallowSnapshot(pathDirectoryMkdir);
-    // OP_CREATE_SNAPSHOT 26
-    String ssName = "snapshot1";
-    dfs.allowSnapshot(pathDirectoryMkdir);
-    dfs.createSnapshot(pathDirectoryMkdir, ssName);
-    // OP_RENAME_SNAPSHOT 28
-    String ssNewName = "snapshot2";
-    dfs.renameSnapshot(pathDirectoryMkdir, ssName, ssNewName);
-    // OP_DELETE_SNAPSHOT 27
-    dfs.deleteSnapshot(pathDirectoryMkdir, ssNewName);
-    // OP_SET_REPLICATION 4
-    s = dfs.create(pathFileCreate);
-    s.close();
-    dfs.setReplication(pathFileCreate, (short)1);
-    // OP_SET_PERMISSIONS 7
-    Short permission = 0777;
-    dfs.setPermission(pathFileCreate, new FsPermission(permission));
-    // OP_SET_OWNER 8
-    dfs.setOwner(pathFileCreate, new String("newOwner"), null);
-    // OP_CLOSE 9 see above
-    // OP_SET_GENSTAMP 10 see above
-    // OP_SET_NS_QUOTA 11 obsolete
-    // OP_CLEAR_NS_QUOTA 12 obsolete
-    // OP_TIMES 13
-    long mtime = 1285195527000L; // Wed, 22 Sep 2010 22:45:27 GMT
-    long atime = mtime;
-    dfs.setTimes(pathFileCreate, mtime, atime);
-    // OP_SET_QUOTA 14
-    dfs.setQuota(pathDirectoryMkdir, 1000L, HdfsConstants.QUOTA_DONT_SET);
-    // OP_RENAME 15
-    fc.rename(pathFileCreate, pathFileMoved, Rename.NONE);
-    // OP_CONCAT_DELETE 16
-    Path   pathConcatTarget = new Path("/file_concat_target");
-    Path[] pathConcatFiles  = new Path[2];
-    pathConcatFiles[0]      = new Path("/file_concat_0");
-    pathConcatFiles[1]      = new Path("/file_concat_1");
-
-    long  length      = blockSize * 3; // multiple of blocksize for concat
-    short replication = 1;
-    long  seed        = 1;
-
-    DFSTestUtil.createFile(dfs, pathConcatTarget, length, replication, seed);
-    DFSTestUtil.createFile(dfs, pathConcatFiles[0], length, replication, seed);
-    DFSTestUtil.createFile(dfs, pathConcatFiles[1], length, replication, seed);
-    dfs.concat(pathConcatTarget, pathConcatFiles);
-    // OP_SYMLINK 17
-    Path pathSymlink = new Path("/file_symlink");
-    fc.createSymlink(pathConcatTarget, pathSymlink, false);
-    // OP_GET_DELEGATION_TOKEN 18
-    // OP_RENEW_DELEGATION_TOKEN 19
-    // OP_CANCEL_DELEGATION_TOKEN 20
-    // see TestDelegationToken.java
-    // fake the user to renew token for
-    final Token<?>[] tokens = dfs.addDelegationTokens("JobTracker", null);
-    UserGroupInformation longUgi = UserGroupInformation.createRemoteUser(
-      "JobTracker/foo.com@FOO.COM");
-    try {
-      longUgi.doAs(new PrivilegedExceptionAction<Object>() {
-        @Override
-        public Object run() throws IOException, InterruptedException {
-          for (Token<?> token : tokens) {
-            token.renew(config);
-            token.cancel(config);
-          }
-          return null;
-        }
-      });
-    } catch(InterruptedException e) {
-      throw new IOException(
-        "renewDelegationToken threw InterruptedException", e);
-    }
-    // OP_UPDATE_MASTER_KEY 21
-    //   done by getDelegationTokenSecretManager().startThreads();
-
-    // OP_ADD_CACHE_POOL 35
-    final String pool = "poolparty";
-    dfs.addCachePool(new CachePoolInfo(pool));
-    // OP_MODIFY_CACHE_POOL 36
-    dfs.modifyCachePool(new CachePoolInfo(pool)
-        .setOwnerName("carlton")
-        .setGroupName("party")
-        .setMode(new FsPermission((short)0700))
-        .setLimit(1989l));
-    // OP_ADD_PATH_BASED_CACHE_DIRECTIVE 33
-    long id = dfs.addCacheDirective(
-        new CacheDirectiveInfo.Builder().
-            setPath(new Path("/bar")).
-            setReplication((short)1).
-            setPool(pool).
-            build());
-    // OP_MODIFY_PATH_BASED_CACHE_DIRECTIVE 38
-    dfs.modifyCacheDirective(
-        new CacheDirectiveInfo.Builder().
-            setId(id).
-            setPath(new Path("/bar2")).
-            build());
-    // OP_REMOVE_PATH_BASED_CACHE_DIRECTIVE 34
-    dfs.removeCacheDirective(id);
-    // OP_REMOVE_CACHE_POOL 37
-    dfs.removeCachePool(pool);
-    // sync to disk, otherwise we parse partial edits
-    cluster.getNameNode().getFSImage().getEditLog().logSync();
-    
-    // OP_REASSIGN_LEASE 22
-    String filePath = "/hard-lease-recovery-test";
-    byte[] bytes = "foo-bar-baz".getBytes();
-    DFSClientAdapter.stopLeaseRenewer(dfs);
-    FSDataOutputStream leaseRecoveryPath = dfs.create(new Path(filePath));
-    leaseRecoveryPath.write(bytes);
-    leaseRecoveryPath.hflush();
-    // Set the hard lease timeout to 1 second.
-    cluster.setLeasePeriod(60 * 1000, 1000);
-    // wait for lease recovery to complete
-    LocatedBlocks locatedBlocks;
-    do {
-      try {
-        Thread.sleep(1000);
-      } catch (InterruptedException e) {
-        LOG.info("Innocuous exception", e);
-      }
-      locatedBlocks = DFSClientAdapter.callGetBlockLocations(
-          cluster.getNameNodeRpc(), filePath, 0L, bytes.length);
-    } while (locatedBlocks.isUnderConstruction());
+    DistributedFileSystem dfs = (DistributedFileSystem) cluster.getFileSystem();
+    DFSTestUtil.runOperations(cluster, dfs, cluster.getConfiguration(0),
+        dfs.getDefaultBlockSize(), 0);

    // Force a roll so we get an OP_END_LOG_SEGMENT txn
    return cluster.getNameNodeRpc().rollEditLog();
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java
@ -21,7 +21,6 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_BLOCK_SIZE_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CACHEREPORT_INTERVAL_MSEC_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_CACHING_ENABLED_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS;
 import static org.apache.hadoop.hdfs.protocol.CachePoolInfo.RELATIVE_EXPIRY_NEVER;
 import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains;
@ -58,17 +57,21 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.LogVerificationAppender;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo;
+import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo.Expiration;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveIterator;
 import org.apache.hadoop.hdfs.protocol.CacheDirectiveStats;
 import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
 import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
-import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo.Expiration;
 import org.apache.hadoop.hdfs.protocol.CachePoolStats;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.CachedBlocksList.Type;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.io.nativeio.NativeIO;
 import org.apache.hadoop.io.nativeio.NativeIO.POSIX.CacheManipulator;
@ -79,6 +82,7 @@ import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.util.GSet;
 import org.apache.log4j.Level;
 import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@ -104,7 +108,7 @@ public class TestCacheDirectives {
    EditLogFileOutputStream.setShouldSkipFsyncForTesting(false);
  }

-  private static final long BLOCK_SIZE = 512;
+  private static final long BLOCK_SIZE = 4096;
  private static final int NUM_DATANODES = 4;
  // Most Linux installs will allow non-root users to lock 64KB.
  // In this test though, we stub out mlock so this doesn't matter.
@ -115,7 +119,6 @@ public class TestCacheDirectives {
    conf.setLong(DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
    conf.setLong(DFS_DATANODE_MAX_LOCKED_MEMORY_KEY, CACHE_CAPACITY);
    conf.setLong(DFS_HEARTBEAT_INTERVAL_KEY, 1);
-    conf.setBoolean(DFS_NAMENODE_CACHING_ENABLED_KEY, true);
    conf.setLong(DFS_CACHEREPORT_INTERVAL_MSEC_KEY, 1000);
    conf.setLong(DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS, 1000);
    // set low limits here for testing purposes
@ -602,8 +605,8 @@ public class TestCacheDirectives {
   * Wait for the NameNode to have an expected number of cached blocks
   * and replicas.
   * @param nn NameNode
-   * @param expectedCachedBlocks
-   * @param expectedCachedReplicas
+   * @param expectedCachedBlocks if -1, treat as wildcard
+   * @param expectedCachedReplicas if -1, treat as wildcard
   * @throws Exception
   */
  private static void waitForCachedBlocks(NameNode nn,
@ -632,17 +635,19 @@ public class TestCacheDirectives {
        } finally {
          namesystem.readUnlock();
        }
-        if ((numCachedBlocks == expectedCachedBlocks) && 
-            (numCachedReplicas == expectedCachedReplicas)) {
+        if (expectedCachedBlocks == -1 ||
+            numCachedBlocks == expectedCachedBlocks) {
+          if (expectedCachedReplicas == -1 ||
+              numCachedReplicas == expectedCachedReplicas) {
            return true;
-        } else {
+          }
+        }
        LOG.info(logString + " cached blocks: have " + numCachedBlocks +
            " / " + expectedCachedBlocks + ".  " +
            "cached replicas: have " + numCachedReplicas +
            " / " + expectedCachedReplicas);
        return false;
      }
-      }
    }, 500, 60000);
  }

@ -796,7 +801,15 @@ public class TestCacheDirectives {
      }
    }, 500, 60000);

+    // Send a cache report referring to a bogus block.  It is important that
+    // the NameNode be robust against this.
    NamenodeProtocols nnRpc = namenode.getRpcServer();
+    DataNode dn0 = cluster.getDataNodes().get(0);
+    String bpid = cluster.getNamesystem().getBlockPoolId();
+    LinkedList<Long> bogusBlockIds = new LinkedList<Long> ();
+    bogusBlockIds.add(999999L);
+    nnRpc.cacheReport(dn0.getDNRegistrationForBP(bpid), bpid, bogusBlockIds);
+
    Path rootDir = helper.getDefaultWorkingDirectory(dfs);
    // Create the pool
    final String pool = "friendlyPool";
@ -826,6 +839,24 @@ public class TestCacheDirectives {
      waitForCachedBlocks(namenode, expected, expected,
          "testWaitForCachedReplicas:1");
    }
+
+    // Check that the datanodes have the right cache values
+    DatanodeInfo[] live = dfs.getDataNodeStats(DatanodeReportType.LIVE);
+    assertEquals("Unexpected number of live nodes", NUM_DATANODES, live.length);
+    long totalUsed = 0;
+    for (DatanodeInfo dn : live) {
+      final long cacheCapacity = dn.getCacheCapacity();
+      final long cacheUsed = dn.getCacheUsed();
+      final long cacheRemaining = dn.getCacheRemaining();
+      assertEquals("Unexpected cache capacity", CACHE_CAPACITY, cacheCapacity);
+      assertEquals("Capacity not equal to used + remaining",
+          cacheCapacity, cacheUsed + cacheRemaining);
+      assertEquals("Remaining not equal to capacity - used",
+          cacheCapacity - cacheUsed, cacheRemaining);
+      totalUsed += cacheUsed;
+    }
+    assertEquals(expected*BLOCK_SIZE, totalUsed);
+
    // Uncache and check each path in sequence
    RemoteIterator<CacheDirectiveEntry> entries =
      new CacheDirectiveIterator(nnRpc, null);
@ -838,55 +869,6 @@ public class TestCacheDirectives {
    }
  }

-  @Test(timeout=120000)
-  public void testAddingCacheDirectiveInfosWhenCachingIsDisabled()
-      throws Exception {
-    cluster.shutdown();
-    HdfsConfiguration conf = createCachingConf();
-    conf.setBoolean(DFS_NAMENODE_CACHING_ENABLED_KEY, false);
-    MiniDFSCluster cluster =
-      new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build();
-
-    try {
-      cluster.waitActive();
-      DistributedFileSystem dfs = cluster.getFileSystem();
-      NameNode namenode = cluster.getNameNode();
-      // Create the pool
-      String pool = "pool1";
-      namenode.getRpcServer().addCachePool(new CachePoolInfo(pool));
-      // Create some test files
-      final int numFiles = 2;
-      final int numBlocksPerFile = 2;
-      final List<String> paths = new ArrayList<String>(numFiles);
-      for (int i=0; i<numFiles; i++) {
-        Path p = new Path("/testCachePaths-" + i);
-        FileSystemTestHelper.createFile(dfs, p, numBlocksPerFile,
-            (int)BLOCK_SIZE);
-        paths.add(p.toUri().getPath());
-      }
-      // Check the initial statistics at the namenode
-      waitForCachedBlocks(namenode, 0, 0,
-          "testAddingCacheDirectiveInfosWhenCachingIsDisabled:0");
-      // Cache and check each path in sequence
-      int expected = 0;
-      for (int i=0; i<numFiles; i++) {
-        CacheDirectiveInfo directive =
-            new CacheDirectiveInfo.Builder().
-              setPath(new Path(paths.get(i))).
-              setPool(pool).
-              build();
-        dfs.addCacheDirective(directive);
-        waitForCachedBlocks(namenode, expected, 0,
-          "testAddingCacheDirectiveInfosWhenCachingIsDisabled:1");
-      }
-      Thread.sleep(20000);
-      waitForCachedBlocks(namenode, expected, 0,
-          "testAddingCacheDirectiveInfosWhenCachingIsDisabled:2");
-    } finally {
-      cluster.shutdown();
-    }
-  }
-
  @Test(timeout=120000)
  public void testWaitForCachedReplicasInDirectory() throws Exception {
    // Create the pool
@ -965,7 +947,6 @@ public class TestCacheDirectives {
        (4+3) * numBlocksPerFile * BLOCK_SIZE,
        3, 2,
        poolInfo, "testWaitForCachedReplicasInDirectory:2:pool");
-
    // remove and watch numCached go to 0
    dfs.removeCacheDirective(id);
    dfs.removeCacheDirective(id2);
@ -1374,4 +1355,39 @@ public class TestCacheDirectives {
        .setExpiration(Expiration.newRelative(RELATIVE_EXPIRY_NEVER - 1))
        .build());
  }
+
+  @Test(timeout=60000)
+  public void testExceedsCapacity() throws Exception {
+    // Create a giant file
+    final Path fileName = new Path("/exceeds");
+    final long fileLen = CACHE_CAPACITY * (NUM_DATANODES*2);
+    int numCachedReplicas = (int) ((CACHE_CAPACITY*NUM_DATANODES)/BLOCK_SIZE);
+    DFSTestUtil.createFile(dfs, fileName, fileLen, (short) NUM_DATANODES,
+        0xFADED);
+    // Set up a log appender watcher
+    final LogVerificationAppender appender = new LogVerificationAppender();
+    final Logger logger = Logger.getRootLogger();
+    logger.addAppender(appender);
+    dfs.addCachePool(new CachePoolInfo("pool"));
+    dfs.addCacheDirective(new CacheDirectiveInfo.Builder().setPool("pool")
+        .setPath(fileName).setReplication((short) 1).build());
+    waitForCachedBlocks(namenode, -1, numCachedReplicas,
+        "testExceeds:1");
+    // Check that no DNs saw an excess CACHE message
+    int lines = appender.countLinesWithMessage(
+        "more bytes in the cache: " +
+        DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY);
+    assertEquals("Namenode should not send extra CACHE commands", 0, lines);
+    // Try creating a file with giant-sized blocks that exceed cache capacity
+    dfs.delete(fileName, false);
+    DFSTestUtil.createFile(dfs, fileName, 4096, fileLen, CACHE_CAPACITY * 2,
+        (short) 1, 0xFADED);
+    // Nothing will get cached, so just force sleep for a bit
+    Thread.sleep(4000);
+    // Still should not see any excess commands
+    lines = appender.countLinesWithMessage(
+        "more bytes in the cache: " +
+        DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY);
+    assertEquals("Namenode should not send extra CACHE commands", 0, lines);
+  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
@ -140,8 +140,9 @@ public class TestDeadDatanode {

    // Ensure heartbeat from dead datanode is rejected with a command
    // that asks datanode to register again
-    StorageReport[] rep = { new StorageReport(reg.getDatanodeUuid(), false, 0, 0,
-        0, 0) };
+    StorageReport[] rep = { new StorageReport(
+        new DatanodeStorage(reg.getDatanodeUuid()),
+        false, 0, 0, 0, 0) };
    DatanodeCommand[] cmd = dnp.sendHeartbeat(reg, rep, 0L, 0L, 0, 0, 0)
        .getCommands();
    assertEquals(1, cmd.length);
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
@ -27,6 +27,7 @@ import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.net.URI;
+import java.util.LinkedList;
 import java.util.concurrent.locks.ReentrantReadWriteLock;

 import org.apache.commons.logging.Log;
@ -59,6 +60,8 @@ import org.junit.Assert;
 import org.junit.Test;
 import org.mockito.Mockito;

+import com.google.common.util.concurrent.Uninterruptibles;
+
 /**
 * Tests state transition from active->standby, and manual failover
 * and failback between two namenodes.
@ -124,6 +127,17 @@ public class TestHAStateTransitions {
    }
  }

+  private void addCrmThreads(MiniDFSCluster cluster,
+      LinkedList<Thread> crmThreads) {
+    for (int nn = 0; nn <= 1; nn++) {
+      Thread thread = cluster.getNameNode(nn).getNamesystem().
+          getCacheManager().getCacheReplicationMonitor();
+      if (thread != null) {
+        crmThreads.add(thread);
+      }
+    }
+  }
+
  /**
   * Test that transitioning a service to the state that it is already
   * in is a nop, specifically, an exception is not thrown.
@ -131,19 +145,30 @@ public class TestHAStateTransitions {
  @Test
  public void testTransitionToCurrentStateIsANop() throws Exception {
    Configuration conf = new Configuration();
+    conf.setLong(DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS, 1L);
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
      .nnTopology(MiniDFSNNTopology.simpleHATopology())
      .numDataNodes(1)
      .build();
+    LinkedList<Thread> crmThreads = new LinkedList<Thread>();
    try {
      cluster.waitActive();
+      addCrmThreads(cluster, crmThreads);
      cluster.transitionToActive(0);
+      addCrmThreads(cluster, crmThreads);
      cluster.transitionToActive(0);
+      addCrmThreads(cluster, crmThreads);
      cluster.transitionToStandby(0);
+      addCrmThreads(cluster, crmThreads);
      cluster.transitionToStandby(0);
+      addCrmThreads(cluster, crmThreads);
    } finally {
      cluster.shutdown();
    }
+    // Verify that all cacheReplicationMonitor threads shut down
+    for (Thread thread : crmThreads) {
+      Uninterruptibles.joinUninterruptibly(thread);
+    }
  }

  /**
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
@ -47,6 +47,8 @@ import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.metrics2.MetricsRecordBuilder;
+import org.apache.hadoop.metrics2.MetricsSource;
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.test.MetricsAsserts;
 import org.apache.hadoop.util.Time;
 import org.apache.log4j.Level;
@ -108,6 +110,12 @@ public class TestNameNodeMetrics {
  
  @After
  public void tearDown() throws Exception {
+    MetricsSource source = DefaultMetricsSystem.instance().getSource("UgiMetrics");
+    if (source != null) {
+      // Run only once since the UGI metrics is cleaned up during teardown
+      MetricsRecordBuilder rb = getMetrics(source);
+      assertQuantileGauges("GetGroups1s", rb);
+    }
    cluster.shutdown();
  }
  
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java
@ -33,10 +33,15 @@ import java.io.PrintStream;
 import java.net.InetSocketAddress;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.StringTokenizer;

+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.DFSUtil.ConfiguredNNAddress;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
@ -55,7 +60,7 @@ public class TestGetConf {
  enum TestType {
    NAMENODE, BACKUP, SECONDARY, NNRPCADDRESSES
  }
-  
+  FileSystem localFileSys; 
  /** Setup federation nameServiceIds in the configuration */
  private void setupNameServices(HdfsConfiguration conf, int nameServiceIdCount) {
    StringBuilder nsList = new StringBuilder();
@ -379,4 +384,70 @@ public class TestGetConf {
      }
    }
  }
+  @Test
+  public void TestGetConfExcludeCommand() throws Exception{
+  	HdfsConfiguration conf = new HdfsConfiguration();
+    // Set up the hosts/exclude files.
+    localFileSys = FileSystem.getLocal(conf);
+    Path workingDir = localFileSys.getWorkingDirectory();
+    Path dir = new Path(workingDir, System.getProperty("test.build.data", "target/test/data") + "/Getconf/");
+    Path hostsFile = new Path(dir, "hosts");
+    Path excludeFile = new Path(dir, "exclude");
+    
+    // Setup conf
+    conf.set(DFSConfigKeys.DFS_HOSTS, hostsFile.toUri().getPath());
+    conf.set(DFSConfigKeys.DFS_HOSTS_EXCLUDE, excludeFile.toUri().getPath());
+    writeConfigFile(hostsFile, null);
+    writeConfigFile(excludeFile, null);    
+    String[] args = {"-excludeFile"};
+    String ret = runTool(conf, args, true);
+    assertEquals(excludeFile.toUri().getPath(),ret.trim());
+    cleanupFile(localFileSys, excludeFile.getParent());
+  }
+  
+  @Test
+  public void TestGetConfIncludeCommand() throws Exception{
+  	HdfsConfiguration conf = new HdfsConfiguration();
+    // Set up the hosts/exclude files.
+    localFileSys = FileSystem.getLocal(conf);
+    Path workingDir = localFileSys.getWorkingDirectory();
+    Path dir = new Path(workingDir, System.getProperty("test.build.data", "target/test/data") + "/Getconf/");
+    Path hostsFile = new Path(dir, "hosts");
+    Path excludeFile = new Path(dir, "exclude");
+    
+    // Setup conf
+    conf.set(DFSConfigKeys.DFS_HOSTS, hostsFile.toUri().getPath());
+    conf.set(DFSConfigKeys.DFS_HOSTS_EXCLUDE, excludeFile.toUri().getPath());
+    writeConfigFile(hostsFile, null);
+    writeConfigFile(excludeFile, null);    
+    String[] args = {"-includeFile"};
+    String ret = runTool(conf, args, true);
+    assertEquals(hostsFile.toUri().getPath(),ret.trim());
+    cleanupFile(localFileSys, excludeFile.getParent());
+  }
+  
+  private void writeConfigFile(Path name, ArrayList<String> nodes) 
+      throws IOException {
+      // delete if it already exists
+      if (localFileSys.exists(name)) {
+        localFileSys.delete(name, true);
+      }
+
+      FSDataOutputStream stm = localFileSys.create(name);
+      
+      if (nodes != null) {
+        for (Iterator<String> it = nodes.iterator(); it.hasNext();) {
+          String node = it.next();
+          stm.writeBytes(node);
+          stm.writeBytes("\n");
+        }
+      }
+      stm.close();
+    }
+  
+  private void cleanupFile(FileSystem fileSys, Path name) throws IOException {
+    assertTrue(fileSys.exists(name));
+    fileSys.delete(name, true);
+    assertTrue(!fileSys.exists(name));
+  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/TestOfflineEditsViewer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/TestOfflineEditsViewer.java
@ -26,8 +26,6 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
-import java.util.HashMap;
-import java.util.Map;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@ -36,64 +34,58 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes;
 import org.apache.hadoop.hdfs.server.namenode.OfflineEditsViewerHelper;
 import org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer.Flags;
 import org.apache.hadoop.test.PathUtils;
+import org.junit.After;
 import org.junit.Before;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+import com.google.common.collect.ImmutableSet;

 public class TestOfflineEditsViewer {
-  private static final Log LOG = LogFactory.getLog(TestOfflineEditsViewer.class);
+  private static final Log LOG = LogFactory
+      .getLog(TestOfflineEditsViewer.class);

-  private static final Map<FSEditLogOpCodes, Boolean> obsoleteOpCodes =
-    new HashMap<FSEditLogOpCodes, Boolean>();
-
-  private static final Map<FSEditLogOpCodes, Boolean> missingOpCodes =
-      new HashMap<FSEditLogOpCodes, Boolean>();
-
-  static {
-    initializeObsoleteOpCodes();
-    initializeMissingOpCodes();
-  }
-
-  private static String buildDir =
-    PathUtils.getTestDirName(TestOfflineEditsViewer.class);
-
-  private static String cacheDir =
-    System.getProperty("test.cache.data", "build/test/cache");
+  private static String buildDir = PathUtils
+      .getTestDirName(TestOfflineEditsViewer.class);

  // to create edits and get edits filename
-  private static final OfflineEditsViewerHelper nnHelper 
-    = new OfflineEditsViewerHelper();
+  private static final OfflineEditsViewerHelper nnHelper = new OfflineEditsViewerHelper();
+  private static final ImmutableSet<FSEditLogOpCodes> skippedOps = skippedOps();

-  /**
-   * Initialize obsoleteOpCodes
-   *
-   * Reason for suppressing "deprecation" warnings:
-   *
-   * These are the opcodes that are not used anymore, some
-   * are marked deprecated, we need to include them here to make
-   * sure we exclude them when checking for completeness of testing,
-   * that's why the "deprecation" warnings are suppressed.
-   */
  @SuppressWarnings("deprecation")
-  private static void initializeObsoleteOpCodes() {
-    obsoleteOpCodes.put(FSEditLogOpCodes.OP_DATANODE_ADD, true);
-    obsoleteOpCodes.put(FSEditLogOpCodes.OP_DATANODE_REMOVE, true);
-    obsoleteOpCodes.put(FSEditLogOpCodes.OP_SET_NS_QUOTA, true);
-    obsoleteOpCodes.put(FSEditLogOpCodes.OP_CLEAR_NS_QUOTA, true);
+  private static ImmutableSet<FSEditLogOpCodes> skippedOps() {
+    ImmutableSet.Builder<FSEditLogOpCodes> b = ImmutableSet
+        .<FSEditLogOpCodes> builder();
+
+    // Deprecated opcodes
+    b.add(FSEditLogOpCodes.OP_DATANODE_ADD)
+        .add(FSEditLogOpCodes.OP_DATANODE_REMOVE)
+        .add(FSEditLogOpCodes.OP_SET_NS_QUOTA)
+        .add(FSEditLogOpCodes.OP_CLEAR_NS_QUOTA)
+        .add(FSEditLogOpCodes.OP_SET_GENSTAMP_V1);
+
+    // Cannot test delegation token related code in insecure set up
+    b.add(FSEditLogOpCodes.OP_GET_DELEGATION_TOKEN)
+        .add(FSEditLogOpCodes.OP_RENEW_DELEGATION_TOKEN)
+        .add(FSEditLogOpCodes.OP_CANCEL_DELEGATION_TOKEN);
+
+    // Skip invalid opcode
+    b.add(FSEditLogOpCodes.OP_INVALID);
+    return b.build();
  }

-  /**
-   * Initialize missingOpcodes
-   *
-   *  Opcodes that are not available except after uprade from
-   *  an older version. We don't test these here.
-   */
-  private static void initializeMissingOpCodes() {
-    obsoleteOpCodes.put(FSEditLogOpCodes.OP_SET_GENSTAMP_V1, true);
-  }
+  @Rule
+  public final TemporaryFolder folder = new TemporaryFolder();

  @Before
-  public void setup() {
-    new File(cacheDir).mkdirs();
+  public void setUp() throws IOException {
+    nnHelper.startCluster(buildDir + "/dfs/");
+  }
+
+  @After
+  public void tearDown() throws IOException {
+    nnHelper.shutdownCluster();
  }

  /**
@ -101,54 +93,42 @@ public class TestOfflineEditsViewer {
   */
  @Test
  public void testGenerated() throws IOException {
-
-    LOG.info("START - testing with generated edits");
-
-    nnHelper.startCluster(buildDir + "/dfs/");
-
    // edits generated by nnHelper (MiniDFSCluster), should have all op codes
    // binary, XML, reparsed binary
    String edits = nnHelper.generateEdits();
-    String editsParsedXml = cacheDir + "/editsParsed.xml";
-    String editsReparsed  = cacheDir + "/editsReparsed";
+    String editsParsedXml = folder.newFile("editsParsed.xml").getAbsolutePath();
+    String editsReparsed = folder.newFile("editsParsed").getAbsolutePath();

    // parse to XML then back to binary
    assertEquals(0, runOev(edits, editsParsedXml, "xml", false));
    assertEquals(0, runOev(editsParsedXml, editsReparsed, "binary", false));

    // judgment time
-    assertTrue(
-      "Edits " + edits + " should have all op codes",
+    assertTrue("Edits " + edits + " should have all op codes",
        hasAllOpCodes(edits));
-    LOG.info("Comparing generated file " + editsReparsed +
-             " with reference file " + edits);
+    LOG.info("Comparing generated file " + editsReparsed
+        + " with reference file " + edits);
    assertTrue(
        "Generated edits and reparsed (bin to XML to bin) should be same",
        filesEqualIgnoreTrailingZeros(edits, editsReparsed));
-
-    // removes edits so do this at the end
-    nnHelper.shutdownCluster();
-
-    LOG.info("END");
  }

  @Test
  public void testRecoveryMode() throws IOException {
-    LOG.info("START - testing with generated edits");
-
-    nnHelper.startCluster(buildDir + "/dfs/");
-
    // edits generated by nnHelper (MiniDFSCluster), should have all op codes
    // binary, XML, reparsed binary
    String edits = nnHelper.generateEdits();
-    
+    FileOutputStream os = new FileOutputStream(edits, true);
    // Corrupt the file by truncating the end
-    FileChannel editsFile = new FileOutputStream(edits, true).getChannel();
+    FileChannel editsFile = os.getChannel();
    editsFile.truncate(editsFile.size() - 5);

-    String editsParsedXml = cacheDir + "/editsRecoveredParsed.xml";
-    String editsReparsed  = cacheDir + "/editsRecoveredReparsed";
-    String editsParsedXml2 = cacheDir + "/editsRecoveredParsed2.xml";
+    String editsParsedXml = folder.newFile("editsRecoveredParsed.xml")
+        .getAbsolutePath();
+    String editsReparsed = folder.newFile("editsRecoveredReparsed")
+        .getAbsolutePath();
+    String editsParsedXml2 = folder.newFile("editsRecoveredParsed2.xml")
+        .getAbsolutePath();

    // Can't read the corrupted file without recovery mode
    assertEquals(-1, runOev(edits, editsParsedXml, "xml", false));
@ -162,18 +142,14 @@ public class TestOfflineEditsViewer {
    assertTrue("Test round trip",
        filesEqualIgnoreTrailingZeros(editsParsedXml, editsParsedXml2));

-    // removes edits so do this at the end
-    nnHelper.shutdownCluster();
-
-    LOG.info("END");
+    os.close();
  }

  @Test
  public void testStored() throws IOException {
-
-    LOG.info("START - testing with stored reference edits");
-
    // reference edits stored with source code (see build.xml)
+    final String cacheDir = System.getProperty("test.cache.data",
+        "build/test/cache");
    // binary, XML, reparsed binary
    String editsStored = cacheDir + "/editsStored";
    String editsStoredParsedXml = cacheDir + "/editsStoredParsed.xml";
@ -183,21 +159,17 @@ public class TestOfflineEditsViewer {

    // parse to XML then back to binary
    assertEquals(0, runOev(editsStored, editsStoredParsedXml, "xml", false));
-    assertEquals(0, runOev(editsStoredParsedXml, editsStoredReparsed,
-        "binary", false));
+    assertEquals(0,
+        runOev(editsStoredParsedXml, editsStoredReparsed, "binary", false));

    // judgement time
-    assertTrue(
-      "Edits " + editsStored + " should have all op codes",
+    assertTrue("Edits " + editsStored + " should have all op codes",
        hasAllOpCodes(editsStored));
-    assertTrue(
-      "Reference XML edits and parsed to XML should be same",
+    assertTrue("Reference XML edits and parsed to XML should be same",
        filesEqual(editsStoredXml, editsStoredParsedXml));
    assertTrue(
        "Reference edits and reparsed (bin to XML to bin) should be same",
        filesEqualIgnoreTrailingZeros(editsStored, editsStoredReparsed));
-
-    LOG.info("END");
  }

  /**
@ -233,19 +205,14 @@ public class TestOfflineEditsViewer {
    OfflineEditsViewer oev = new OfflineEditsViewer();
    if (oev.go(inFilename, outFilename, "stats", new Flags(), visitor) != 0)
      return false;
-    LOG.info("Statistics for " + inFilename + "\n" +
-      visitor.getStatisticsString());
+    LOG.info("Statistics for " + inFilename + "\n"
+        + visitor.getStatisticsString());

    boolean hasAllOpCodes = true;
    for (FSEditLogOpCodes opCode : FSEditLogOpCodes.values()) {
      // don't need to test obsolete opCodes
-      if(obsoleteOpCodes.containsKey(opCode)) {
+      if (skippedOps.contains(opCode))
        continue;
-      } else if (missingOpCodes.containsKey(opCode)) {
-        continue;
-      } else if (opCode == FSEditLogOpCodes.OP_INVALID) {
-        continue;
-      }

      Long count = visitor.getStatistics().get(opCode);
      if ((count == null) || (count == 0)) {
@ -257,9 +224,9 @@ public class TestOfflineEditsViewer {
  }

  /**
-   * Compare two files, ignore trailing zeros at the end,
-   * for edits log the trailing zeros do not make any difference,
-   * throw exception is the files are not same
+   * Compare two files, ignore trailing zeros at the end, for edits log the
+   * trailing zeros do not make any difference, throw exception is the files are
+   * not same
   *
   * @param filenameSmall first file to compare (doesn't have to be smaller)
   * @param filenameLarge second file to compare (doesn't have to be larger)
@ -288,7 +255,9 @@ public class TestOfflineEditsViewer {
    large.limit(small.capacity());

    // compares position to limit
-    if(!small.equals(large)) { return false; }
+    if (!small.equals(large)) {
+      return false;
+    }

    // everything after limit should be 0xFF
    int i = large.limit();
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml
@ -13,8 +13,8 @@
      <TXID>2</TXID>
      <DELEGATION_KEY>
        <KEY_ID>1</KEY_ID>
-        <EXPIRY_DATE>1388171826188</EXPIRY_DATE>
-        <KEY>c7d869c22c8afce1</KEY>
+        <EXPIRY_DATE>1389121087930</EXPIRY_DATE>
+        <KEY>d48b4b3e6a43707b</KEY>
      </DELEGATION_KEY>
    </DATA>
  </RECORD>
@ -24,8 +24,8 @@
      <TXID>3</TXID>
      <DELEGATION_KEY>
        <KEY_ID>2</KEY_ID>
-        <EXPIRY_DATE>1388171826191</EXPIRY_DATE>
-        <KEY>a3c41446507dfca9</KEY>
+        <EXPIRY_DATE>1389121087937</EXPIRY_DATE>
+        <KEY>62b6fae6bff918a9</KEY>
      </DELEGATION_KEY>
    </DATA>
  </RECORD>
@ -37,17 +37,17 @@
      <INODEID>16386</INODEID>
      <PATH>/file_create_u\0001;F431</PATH>
      <REPLICATION>1</REPLICATION>
-      <MTIME>1387480626844</MTIME>
-      <ATIME>1387480626844</ATIME>
+      <MTIME>1388429889312</MTIME>
+      <ATIME>1388429889312</ATIME>
      <BLOCKSIZE>512</BLOCKSIZE>
-      <CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
      <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
      <PERMISSION_STATUS>
        <USERNAME>andrew</USERNAME>
        <GROUPNAME>supergroup</GROUPNAME>
        <MODE>420</MODE>
      </PERMISSION_STATUS>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>7</RPC_CALLID>
    </DATA>
  </RECORD>
@ -59,8 +59,8 @@
      <INODEID>0</INODEID>
      <PATH>/file_create_u\0001;F431</PATH>
      <REPLICATION>1</REPLICATION>
-      <MTIME>1387480626885</MTIME>
-      <ATIME>1387480626844</ATIME>
+      <MTIME>1388429889328</MTIME>
+      <ATIME>1388429889312</ATIME>
      <BLOCKSIZE>512</BLOCKSIZE>
      <CLIENT_NAME></CLIENT_NAME>
      <CLIENT_MACHINE></CLIENT_MACHINE>
@ -78,8 +78,8 @@
      <LENGTH>0</LENGTH>
      <SRC>/file_create_u\0001;F431</SRC>
      <DST>/file_moved</DST>
-      <TIMESTAMP>1387480626894</TIMESTAMP>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <TIMESTAMP>1388429889336</TIMESTAMP>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>9</RPC_CALLID>
    </DATA>
  </RECORD>
@ -89,8 +89,8 @@
      <TXID>7</TXID>
      <LENGTH>0</LENGTH>
      <PATH>/file_moved</PATH>
-      <TIMESTAMP>1387480626905</TIMESTAMP>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <TIMESTAMP>1388429889346</TIMESTAMP>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>10</RPC_CALLID>
    </DATA>
  </RECORD>
@ -101,7 +101,7 @@
      <LENGTH>0</LENGTH>
      <INODEID>16387</INODEID>
      <PATH>/directory_mkdir</PATH>
-      <TIMESTAMP>1387480626917</TIMESTAMP>
+      <TIMESTAMP>1388429889357</TIMESTAMP>
      <PERMISSION_STATUS>
        <USERNAME>andrew</USERNAME>
        <GROUPNAME>supergroup</GROUPNAME>
@ -136,7 +136,7 @@
      <TXID>12</TXID>
      <SNAPSHOTROOT>/directory_mkdir</SNAPSHOTROOT>
      <SNAPSHOTNAME>snapshot1</SNAPSHOTNAME>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>15</RPC_CALLID>
    </DATA>
  </RECORD>
@ -147,7 +147,7 @@
      <SNAPSHOTROOT>/directory_mkdir</SNAPSHOTROOT>
      <SNAPSHOTOLDNAME>snapshot1</SNAPSHOTOLDNAME>
      <SNAPSHOTNEWNAME>snapshot2</SNAPSHOTNEWNAME>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>16</RPC_CALLID>
    </DATA>
  </RECORD>
@ -157,7 +157,7 @@
      <TXID>14</TXID>
      <SNAPSHOTROOT>/directory_mkdir</SNAPSHOTROOT>
      <SNAPSHOTNAME>snapshot2</SNAPSHOTNAME>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>17</RPC_CALLID>
    </DATA>
  </RECORD>
@ -169,17 +169,17 @@
      <INODEID>16388</INODEID>
      <PATH>/file_create_u\0001;F431</PATH>
      <REPLICATION>1</REPLICATION>
-      <MTIME>1387480626978</MTIME>
-      <ATIME>1387480626978</ATIME>
+      <MTIME>1388429889412</MTIME>
+      <ATIME>1388429889412</ATIME>
      <BLOCKSIZE>512</BLOCKSIZE>
-      <CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
      <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
      <PERMISSION_STATUS>
        <USERNAME>andrew</USERNAME>
        <GROUPNAME>supergroup</GROUPNAME>
        <MODE>420</MODE>
      </PERMISSION_STATUS>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>18</RPC_CALLID>
    </DATA>
  </RECORD>
@ -191,8 +191,8 @@
      <INODEID>0</INODEID>
      <PATH>/file_create_u\0001;F431</PATH>
      <REPLICATION>1</REPLICATION>
-      <MTIME>1387480626985</MTIME>
-      <ATIME>1387480626978</ATIME>
+      <MTIME>1388429889420</MTIME>
+      <ATIME>1388429889412</ATIME>
      <BLOCKSIZE>512</BLOCKSIZE>
      <CLIENT_NAME></CLIENT_NAME>
      <CLIENT_MACHINE></CLIENT_MACHINE>
@ -253,9 +253,9 @@
      <LENGTH>0</LENGTH>
      <SRC>/file_create_u\0001;F431</SRC>
      <DST>/file_moved</DST>
-      <TIMESTAMP>1387480627035</TIMESTAMP>
+      <TIMESTAMP>1388429889495</TIMESTAMP>
      <OPTIONS>NONE</OPTIONS>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>25</RPC_CALLID>
    </DATA>
  </RECORD>
@ -267,17 +267,17 @@
      <INODEID>16389</INODEID>
      <PATH>/file_concat_target</PATH>
      <REPLICATION>1</REPLICATION>
-      <MTIME>1387480627043</MTIME>
-      <ATIME>1387480627043</ATIME>
+      <MTIME>1388429889511</MTIME>
+      <ATIME>1388429889511</ATIME>
      <BLOCKSIZE>512</BLOCKSIZE>
-      <CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
      <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
      <PERMISSION_STATUS>
        <USERNAME>andrew</USERNAME>
        <GROUPNAME>supergroup</GROUPNAME>
        <MODE>420</MODE>
      </PERMISSION_STATUS>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>27</RPC_CALLID>
    </DATA>
  </RECORD>
@ -388,8 +388,8 @@
      <INODEID>0</INODEID>
      <PATH>/file_concat_target</PATH>
      <REPLICATION>1</REPLICATION>
-      <MTIME>1387480627148</MTIME>
-      <ATIME>1387480627043</ATIME>
+      <MTIME>1388429889812</MTIME>
+      <ATIME>1388429889511</ATIME>
      <BLOCKSIZE>512</BLOCKSIZE>
      <CLIENT_NAME></CLIENT_NAME>
      <CLIENT_MACHINE></CLIENT_MACHINE>
@ -423,17 +423,17 @@
      <INODEID>16390</INODEID>
      <PATH>/file_concat_0</PATH>
      <REPLICATION>1</REPLICATION>
-      <MTIME>1387480627155</MTIME>
-      <ATIME>1387480627155</ATIME>
+      <MTIME>1388429889825</MTIME>
+      <ATIME>1388429889825</ATIME>
      <BLOCKSIZE>512</BLOCKSIZE>
-      <CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
      <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
      <PERMISSION_STATUS>
        <USERNAME>andrew</USERNAME>
        <GROUPNAME>supergroup</GROUPNAME>
        <MODE>420</MODE>
      </PERMISSION_STATUS>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>40</RPC_CALLID>
    </DATA>
  </RECORD>
@ -544,8 +544,8 @@
      <INODEID>0</INODEID>
      <PATH>/file_concat_0</PATH>
      <REPLICATION>1</REPLICATION>
-      <MTIME>1387480627193</MTIME>
-      <ATIME>1387480627155</ATIME>
+      <MTIME>1388429889909</MTIME>
+      <ATIME>1388429889825</ATIME>
      <BLOCKSIZE>512</BLOCKSIZE>
      <CLIENT_NAME></CLIENT_NAME>
      <CLIENT_MACHINE></CLIENT_MACHINE>
@ -579,17 +579,17 @@
      <INODEID>16391</INODEID>
      <PATH>/file_concat_1</PATH>
      <REPLICATION>1</REPLICATION>
-      <MTIME>1387480627200</MTIME>
-      <ATIME>1387480627200</ATIME>
+      <MTIME>1388429889920</MTIME>
+      <ATIME>1388429889920</ATIME>
      <BLOCKSIZE>512</BLOCKSIZE>
-      <CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
      <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
      <PERMISSION_STATUS>
        <USERNAME>andrew</USERNAME>
        <GROUPNAME>supergroup</GROUPNAME>
        <MODE>420</MODE>
      </PERMISSION_STATUS>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>52</RPC_CALLID>
    </DATA>
  </RECORD>
@ -700,8 +700,8 @@
      <INODEID>0</INODEID>
      <PATH>/file_concat_1</PATH>
      <REPLICATION>1</REPLICATION>
-      <MTIME>1387480627238</MTIME>
-      <ATIME>1387480627200</ATIME>
+      <MTIME>1388429890016</MTIME>
+      <ATIME>1388429889920</ATIME>
      <BLOCKSIZE>512</BLOCKSIZE>
      <CLIENT_NAME></CLIENT_NAME>
      <CLIENT_MACHINE></CLIENT_MACHINE>
@ -733,12 +733,12 @@
      <TXID>56</TXID>
      <LENGTH>0</LENGTH>
      <TRG>/file_concat_target</TRG>
-      <TIMESTAMP>1387480627246</TIMESTAMP>
+      <TIMESTAMP>1388429890031</TIMESTAMP>
      <SOURCES>
        <SOURCE1>/file_concat_0</SOURCE1>
        <SOURCE2>/file_concat_1</SOURCE2>
      </SOURCES>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>63</RPC_CALLID>
    </DATA>
  </RECORD>
@ -750,14 +750,14 @@
      <INODEID>16392</INODEID>
      <PATH>/file_symlink</PATH>
      <VALUE>/file_concat_target</VALUE>
-      <MTIME>1387480627255</MTIME>
-      <ATIME>1387480627255</ATIME>
+      <MTIME>1388429890046</MTIME>
+      <ATIME>1388429890046</ATIME>
      <PERMISSION_STATUS>
        <USERNAME>andrew</USERNAME>
        <GROUPNAME>supergroup</GROUPNAME>
        <MODE>511</MODE>
      </PERMISSION_STATUS>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>64</RPC_CALLID>
    </DATA>
  </RECORD>
@ -771,11 +771,11 @@
        <OWNER>andrew</OWNER>
        <RENEWER>JobTracker</RENEWER>
        <REALUSER></REALUSER>
-        <ISSUE_DATE>1387480627262</ISSUE_DATE>
-        <MAX_DATE>1388085427262</MAX_DATE>
+        <ISSUE_DATE>1388429890059</ISSUE_DATE>
+        <MAX_DATE>1389034690059</MAX_DATE>
        <MASTER_KEY_ID>2</MASTER_KEY_ID>
      </DELEGATION_TOKEN_IDENTIFIER>
-      <EXPIRY_TIME>1387567027262</EXPIRY_TIME>
+      <EXPIRY_TIME>1388516290059</EXPIRY_TIME>
    </DATA>
  </RECORD>
  <RECORD>
@ -788,11 +788,11 @@
        <OWNER>andrew</OWNER>
        <RENEWER>JobTracker</RENEWER>
        <REALUSER></REALUSER>
-        <ISSUE_DATE>1387480627262</ISSUE_DATE>
-        <MAX_DATE>1388085427262</MAX_DATE>
+        <ISSUE_DATE>1388429890059</ISSUE_DATE>
+        <MAX_DATE>1389034690059</MAX_DATE>
        <MASTER_KEY_ID>2</MASTER_KEY_ID>
      </DELEGATION_TOKEN_IDENTIFIER>
-      <EXPIRY_TIME>1387567027281</EXPIRY_TIME>
+      <EXPIRY_TIME>1388516290109</EXPIRY_TIME>
    </DATA>
  </RECORD>
  <RECORD>
@ -805,8 +805,8 @@
        <OWNER>andrew</OWNER>
        <RENEWER>JobTracker</RENEWER>
        <REALUSER></REALUSER>
-        <ISSUE_DATE>1387480627262</ISSUE_DATE>
-        <MAX_DATE>1388085427262</MAX_DATE>
+        <ISSUE_DATE>1388429890059</ISSUE_DATE>
+        <MAX_DATE>1389034690059</MAX_DATE>
        <MASTER_KEY_ID>2</MASTER_KEY_ID>
      </DELEGATION_TOKEN_IDENTIFIER>
    </DATA>
@ -821,7 +821,7 @@
      <MODE>493</MODE>
      <LIMIT>9223372036854775807</LIMIT>
      <MAXRELATIVEEXPIRY>2305843009213693951</MAXRELATIVEEXPIRY>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>68</RPC_CALLID>
    </DATA>
  </RECORD>
@ -834,7 +834,7 @@
      <GROUPNAME>party</GROUPNAME>
      <MODE>448</MODE>
      <LIMIT>1989</LIMIT>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>69</RPC_CALLID>
    </DATA>
  </RECORD>
@ -846,8 +846,8 @@
      <PATH>/bar</PATH>
      <REPLICATION>1</REPLICATION>
      <POOL>poolparty</POOL>
-      <EXPIRATION>2305844396694321272</EXPIRATION>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <EXPIRATION>2305844397643584141</EXPIRATION>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>70</RPC_CALLID>
    </DATA>
  </RECORD>
@ -857,7 +857,7 @@
      <TXID>64</TXID>
      <ID>1</ID>
      <PATH>/bar2</PATH>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>71</RPC_CALLID>
    </DATA>
  </RECORD>
@ -866,7 +866,7 @@
    <DATA>
      <TXID>65</TXID>
      <ID>1</ID>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>72</RPC_CALLID>
    </DATA>
  </RECORD>
@ -875,7 +875,7 @@
    <DATA>
      <TXID>66</TXID>
      <POOLNAME>poolparty</POOLNAME>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>73</RPC_CALLID>
    </DATA>
  </RECORD>
@ -887,17 +887,17 @@
      <INODEID>16393</INODEID>
      <PATH>/hard-lease-recovery-test</PATH>
      <REPLICATION>1</REPLICATION>
-      <MTIME>1387480627356</MTIME>
-      <ATIME>1387480627356</ATIME>
+      <MTIME>1388429890261</MTIME>
+      <ATIME>1388429890261</ATIME>
      <BLOCKSIZE>512</BLOCKSIZE>
-      <CLIENT_NAME>DFSClient_NONMAPREDUCE_1147796111_1</CLIENT_NAME>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-1396063717_1</CLIENT_NAME>
      <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
      <PERMISSION_STATUS>
        <USERNAME>andrew</USERNAME>
        <GROUPNAME>supergroup</GROUPNAME>
        <MODE>420</MODE>
      </PERMISSION_STATUS>
-      <RPC_CLIENTID>a90261a0-3759-4480-ba80-e10c9ae331e6</RPC_CLIENTID>
+      <RPC_CLIENTID>bfe81b9e-5c10-4f90-a5e1-b707da7bb781</RPC_CLIENTID>
      <RPC_CALLID>74</RPC_CALLID>
    </DATA>
  </RECORD>
@ -954,7 +954,23 @@
    <OPCODE>OP_REASSIGN_LEASE</OPCODE>
    <DATA>
      <TXID>73</TXID>
-      <LEASEHOLDER>DFSClient_NONMAPREDUCE_1147796111_1</LEASEHOLDER>
+      <LEASEHOLDER>DFSClient_NONMAPREDUCE_-1396063717_1</LEASEHOLDER>
+      <PATH>/hard-lease-recovery-test</PATH>
+      <NEWHOLDER>HDFS_NameNode</NEWHOLDER>
+    </DATA>
+  </RECORD>
+  <RECORD>
+    <OPCODE>OP_SET_GENSTAMP_V2</OPCODE>
+    <DATA>
+      <TXID>74</TXID>
+      <GENSTAMPV2>1012</GENSTAMPV2>
+    </DATA>
+  </RECORD>
+  <RECORD>
+    <OPCODE>OP_REASSIGN_LEASE</OPCODE>
+    <DATA>
+      <TXID>75</TXID>
+      <LEASEHOLDER>HDFS_NameNode</LEASEHOLDER>
      <PATH>/hard-lease-recovery-test</PATH>
      <NEWHOLDER>HDFS_NameNode</NEWHOLDER>
    </DATA>
@ -962,20 +978,20 @@
  <RECORD>
    <OPCODE>OP_CLOSE</OPCODE>
    <DATA>
-      <TXID>74</TXID>
+      <TXID>76</TXID>
      <LENGTH>0</LENGTH>
      <INODEID>0</INODEID>
      <PATH>/hard-lease-recovery-test</PATH>
      <REPLICATION>1</REPLICATION>
-      <MTIME>1387480629729</MTIME>
-      <ATIME>1387480627356</ATIME>
+      <MTIME>1388429895216</MTIME>
+      <ATIME>1388429890261</ATIME>
      <BLOCKSIZE>512</BLOCKSIZE>
      <CLIENT_NAME></CLIENT_NAME>
      <CLIENT_MACHINE></CLIENT_MACHINE>
      <BLOCK>
        <BLOCK_ID>1073741834</BLOCK_ID>
        <NUM_BYTES>11</NUM_BYTES>
-        <GENSTAMP>1011</GENSTAMP>
+        <GENSTAMP>1012</GENSTAMP>
      </BLOCK>
      <PERMISSION_STATUS>
        <USERNAME>andrew</USERNAME>
@ -987,7 +1003,7 @@
  <RECORD>
    <OPCODE>OP_END_LOG_SEGMENT</OPCODE>
    <DATA>
-      <TXID>75</TXID>
+      <TXID>77</TXID>
    </DATA>
  </RECORD>
 </EDITS>
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@ -77,6 +77,9 @@ Trunk (Unreleased)
    MAPREDUCE-5189. Add policies and wiring to respond to preemption requests
    from YARN. (Carlo Curino via cdouglas)

+    MAPREDUCE-5196. Add bookkeeping for managing checkpoints of task state.
+    (Carlo Curino via cdouglas)
+
  BUG FIXES

    MAPREDUCE-4272. SortedRanges.Range#compareTo is not spec compliant.
@ -193,6 +196,8 @@ Release 2.4.0 - UNRELEASED
    MAPREDUCE-5550. Task Status message (reporter.setStatus) not shown in UI
    with Hadoop 2.0 (Gera Shegalov via Sandy Ryza)

+    MAPREDUCE-3310. Custom grouping comparator cannot be set for Combiners (tucu)
+
  OPTIMIZATIONS

    MAPREDUCE-5484. YarnChild unnecessarily loads job conf twice (Sandy Ryza)
@ -258,6 +263,15 @@ Release 2.4.0 - UNRELEASED
    MAPREDUCE-5687. Fixed failure in TestYARNRunner caused by YARN-1446. (Jian He
    via vinodkv)

+    MAPREDUCE-5694. Fixed MR AppMaster to shutdown the LogManager so as to avoid
+    losing syslog in some conditions. (Mohammad Kamrul Islam via vinodkv)
+
+    MAPREDUCE-5685. Fixed a bug with JobContext getCacheFiles API inside the
+    WrappedReducer class. (Yi Song via vinodkv)
+
+    MAPREDUCE-5689. MRAppMaster does not preempt reducers when scheduled maps 
+    cannot be fulfilled. (lohit via kasha)
+
 Release 2.3.0 - UNRELEASED

  INCOMPATIBLE CHANGES
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java
@ -36,7 +36,9 @@ import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.mapred.SortedRanges.Range;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TypeConverter;
+import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
 import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;
 import org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener;
 import org.apache.hadoop.mapreduce.v2.app.TaskHeartbeatHandler;
@ -45,8 +47,8 @@ import org.apache.hadoop.mapreduce.v2.app.job.Task;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptDiagnosticsUpdateEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
-import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus;
+import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent;
 import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler;
 import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy;
 import org.apache.hadoop.mapreduce.v2.app.security.authorize.MRAMPolicyProvider;
@ -228,6 +230,22 @@ public class TaskAttemptListenerImpl extends CompositeService
            TaskAttemptEventType.TA_COMMIT_PENDING));
  }

+  @Override
+  public void preempted(TaskAttemptID taskAttemptID, TaskStatus taskStatus)
+          throws IOException, InterruptedException {
+    LOG.info("Preempted state update from " + taskAttemptID.toString());
+    // An attempt is telling us that it got preempted.
+    org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID =
+        TypeConverter.toYarn(taskAttemptID);
+
+    preemptionPolicy.reportSuccessfulPreemption(attemptID);
+    taskHeartbeatHandler.progressing(attemptID);
+
+    context.getEventHandler().handle(
+        new TaskAttemptEvent(attemptID,
+            TaskAttemptEventType.TA_PREEMPTED));
+  }
+
  @Override
  public void done(TaskAttemptID taskAttemptID) throws IOException {
    LOG.info("Done acknowledgement from " + taskAttemptID.toString());
@ -250,6 +268,10 @@ public class TaskAttemptListenerImpl extends CompositeService

    org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID =
        TypeConverter.toYarn(taskAttemptID);
+
+    // handling checkpoints
+    preemptionPolicy.handleFailedContainer(attemptID);
+
    context.getEventHandler().handle(
        new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_FAILMSG));
  }
@ -264,6 +286,10 @@ public class TaskAttemptListenerImpl extends CompositeService

    org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID =
        TypeConverter.toYarn(taskAttemptID);
+
+    // handling checkpoints
+    preemptionPolicy.handleFailedContainer(attemptID);
+
    context.getEventHandler().handle(
        new TaskAttemptEvent(attemptID, TaskAttemptEventType.TA_FAILMSG));
  }
@ -293,12 +319,6 @@ public class TaskAttemptListenerImpl extends CompositeService
    return new MapTaskCompletionEventsUpdate(events, shouldReset);
  }

-  @Override
-  public boolean ping(TaskAttemptID taskAttemptID) throws IOException {
-    LOG.info("Ping from " + taskAttemptID.toString());
-    return true;
-  }
-
  @Override
  public void reportDiagnosticInfo(TaskAttemptID taskAttemptID, String diagnosticInfo)
 throws IOException {
@ -321,11 +341,33 @@ public class TaskAttemptListenerImpl extends CompositeService
  }

  @Override
-  public boolean statusUpdate(TaskAttemptID taskAttemptID,
+  public AMFeedback statusUpdate(TaskAttemptID taskAttemptID,
      TaskStatus taskStatus) throws IOException, InterruptedException {
-    LOG.info("Status update from " + taskAttemptID.toString());
+
    org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId yarnAttemptID =
        TypeConverter.toYarn(taskAttemptID);
+
+    AMFeedback feedback = new AMFeedback();
+    feedback.setTaskFound(true);
+
+    // Propagating preemption to the task if TASK_PREEMPTION is enabled
+    if (getConfig().getBoolean(MRJobConfig.TASK_PREEMPTION, false)
+        && preemptionPolicy.isPreempted(yarnAttemptID)) {
+      feedback.setPreemption(true);
+      LOG.info("Setting preemption bit for task: "+ yarnAttemptID
+          + " of type " + yarnAttemptID.getTaskId().getTaskType());
+    }
+
+    if (taskStatus == null) {
+      //We are using statusUpdate only as a simple ping
+      LOG.info("Ping from " + taskAttemptID.toString());
+      taskHeartbeatHandler.progressing(yarnAttemptID);
+      return feedback;
+    }
+
+    // if we are here there is an actual status update to be processed
+    LOG.info("Status update from " + taskAttemptID.toString());
+
    taskHeartbeatHandler.progressing(yarnAttemptID);
    TaskAttemptStatus taskAttemptStatus =
        new TaskAttemptStatus();
@ -386,7 +428,7 @@ public class TaskAttemptListenerImpl extends CompositeService
    context.getEventHandler().handle(
        new TaskAttemptStatusUpdateEvent(taskAttemptStatus.id,
            taskAttemptStatus));
-    return true;
+    return feedback;
  }

  @Override
@ -494,4 +536,18 @@ public class TaskAttemptListenerImpl extends CompositeService
    return ProtocolSignature.getProtocolSignature(this, 
        protocol, clientVersion, clientMethodsHash);
  }
+
+  // task checkpoint bookeeping
+  @Override
+  public TaskCheckpointID getCheckpointID(TaskID taskId) {
+    TaskId tid = TypeConverter.toYarn(taskId);
+    return preemptionPolicy.getCheckpointID(tid);
+  }
+
+  @Override
+  public void setCheckpointID(TaskID taskId, TaskCheckpointID cid) {
+    TaskId tid = TypeConverter.toYarn(taskId);
+    preemptionPolicy.setCheckpointID(tid, cid);
+  }
+
 }
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
@ -139,6 +139,7 @@ import org.apache.hadoop.yarn.security.client.ClientToAMTokenSecretManager;
 import org.apache.hadoop.yarn.util.Clock;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.hadoop.yarn.util.SystemClock;
+import org.apache.log4j.LogManager;

 import com.google.common.annotations.VisibleForTesting;

@ -1395,6 +1396,8 @@ public class MRAppMaster extends CompositeService {
    } catch (Throwable t) {
      LOG.fatal("Error starting MRAppMaster", t);
      System.exit(1);
+    } finally {
+      LogManager.shutdown();
    }
  }

--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptEventType.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/event/TaskAttemptEventType.java
@ -47,6 +47,7 @@ public enum TaskAttemptEventType {
  TA_FAILMSG,
  TA_UPDATE,
  TA_TIMED_OUT,
+  TA_PREEMPTED,

  //Producer:TaskCleaner
  TA_CLEANUP_DONE,
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
@ -304,6 +304,9 @@ public abstract class TaskAttemptImpl implements
     .addTransition(TaskAttemptStateInternal.RUNNING,
         TaskAttemptStateInternal.KILL_CONTAINER_CLEANUP, TaskAttemptEventType.TA_KILL,
         CLEANUP_CONTAINER_TRANSITION)
+     .addTransition(TaskAttemptStateInternal.RUNNING,
+         TaskAttemptStateInternal.KILLED,
+         TaskAttemptEventType.TA_PREEMPTED, new PreemptedTransition())

     // Transitions from COMMIT_PENDING state
     .addTransition(TaskAttemptStateInternal.COMMIT_PENDING,
@ -437,6 +440,7 @@ public abstract class TaskAttemptImpl implements
             TaskAttemptEventType.TA_DONE,
             TaskAttemptEventType.TA_FAILMSG,
             TaskAttemptEventType.TA_CONTAINER_CLEANED,
+             TaskAttemptEventType.TA_PREEMPTED,
             // Container launch events can arrive late
             TaskAttemptEventType.TA_CONTAINER_LAUNCHED,
             TaskAttemptEventType.TA_CONTAINER_LAUNCH_FAILED))
@ -1874,6 +1878,27 @@ public abstract class TaskAttemptImpl implements
    }
  }

+  private static class PreemptedTransition implements
+      SingleArcTransition<TaskAttemptImpl,TaskAttemptEvent> {
+    @SuppressWarnings("unchecked")
+    @Override
+    public void transition(TaskAttemptImpl taskAttempt,
+        TaskAttemptEvent event) {
+      taskAttempt.setFinishTime();
+      taskAttempt.taskAttemptListener.unregister(
+          taskAttempt.attemptId, taskAttempt.jvmID);
+      taskAttempt.eventHandler.handle(new ContainerLauncherEvent(
+          taskAttempt.attemptId,
+          taskAttempt.getAssignedContainerID(), taskAttempt.getAssignedContainerMgrAddress(),
+          taskAttempt.container.getContainerToken(),
+          ContainerLauncher.EventType.CONTAINER_REMOTE_CLEANUP));
+      taskAttempt.eventHandler.handle(new TaskTAttemptEvent(
+          taskAttempt.attemptId,
+          TaskEventType.T_ATTEMPT_KILLED));
+
+    }
+  }
+
  private static class CleanupContainerTransition implements
       SingleArcTransition<TaskAttemptImpl, TaskAttemptEvent> {
    @SuppressWarnings("unchecked")
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
@ -229,7 +229,8 @@ public class RMContainerAllocator extends RMContainerRequestor

    int completedMaps = getJob().getCompletedMaps();
    int completedTasks = completedMaps + getJob().getCompletedReduces();
-    if (lastCompletedTasks != completedTasks) {
+    if ((lastCompletedTasks != completedTasks) ||
+          (scheduledRequests.maps.size() > 0)) {
      lastCompletedTasks = completedTasks;
      recalculateReduceSchedule = true;
    }
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/AMPreemptionPolicy.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/AMPreemptionPolicy.java
@ -19,10 +19,9 @@ package org.apache.hadoop.mapreduce.v2.app.rm.preemption;

 import java.util.List;

-import org.apache.hadoop.mapred.TaskAttemptID;
-import org.apache.hadoop.mapred.TaskID;
 import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;
 import org.apache.hadoop.yarn.api.records.Container;
@ -81,7 +80,7 @@ public interface AMPreemptionPolicy {
   * successfully preempted (for bookeeping, counters, etc..)
   * @param attemptID Task attempt that preempted
   */
-  public void reportSuccessfulPreemption(TaskAttemptID attemptID);
+  public void reportSuccessfulPreemption(TaskAttemptId attemptID);

  /**
   * Callback informing the policy of containers exiting with a failure. This
@ -98,20 +97,20 @@ public interface AMPreemptionPolicy {
  public void handleCompletedContainer(TaskAttemptId attemptID);

  /**
-   * Method to retrieve the latest checkpoint for a given {@link TaskID}
+   * Method to retrieve the latest checkpoint for a given {@link TaskId}
   * @param taskId TaskID
   * @return CheckpointID associated with this task or null
   */
-  public TaskCheckpointID getCheckpointID(TaskID taskId);
+  public TaskCheckpointID getCheckpointID(TaskId taskId);

  /**
   * Method to store the latest {@link
   * org.apache.hadoop.mapreduce.checkpoint.CheckpointID} for a given {@link
-   * TaskID}. Assigning a null is akin to remove all previous checkpoints for
+   * TaskId}. Assigning a null is akin to remove all previous checkpoints for
   * this task.
   * @param taskId TaskID
   * @param cid Checkpoint to assign or <tt>null</tt> to remove it.
   */
-  public void setCheckpointID(TaskID taskId, TaskCheckpointID cid);
+  public void setCheckpointID(TaskId taskId, TaskCheckpointID cid);

 }
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/CheckpointAMPreemptionPolicy.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/CheckpointAMPreemptionPolicy.java
@ -0,0 +1,290 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapreduce.v2.app.rm.preemption;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.mapreduce.JobCounter;
+import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
+import org.apache.hadoop.mapreduce.v2.app.AppContext;
+import org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.PreemptionContainer;
+import org.apache.hadoop.yarn.api.records.PreemptionContract;
+import org.apache.hadoop.yarn.api.records.PreemptionMessage;
+import org.apache.hadoop.yarn.api.records.PreemptionResourceRequest;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.api.records.StrictPreemptionContract;
+import org.apache.hadoop.yarn.event.EventHandler;
+
+/**
+ * This policy works in combination with an implementation of task
+ * checkpointing. It computes the tasks to be preempted in response to the RM
+ * request for preemption. For strict requests, it maps containers to
+ * corresponding tasks; for fungible requests, it attempts to pick the best
+ * containers to preempt (reducers in reverse allocation order). The
+ * TaskAttemptListener will interrogate this policy when handling a task
+ * heartbeat to check whether the task should be preempted or not. When handling
+ * fungible requests, the policy discount the RM ask by the amount of currently
+ * in-flight preemptions (i.e., tasks that are checkpointing).
+ *
+ * This class it is also used to maintain the list of checkpoints for existing
+ * tasks. Centralizing this functionality here, allows us to have visibility on
+ * preemption and checkpoints in a single location, thus coordinating preemption
+ * and checkpoint management decisions in a single policy.
+ */
+public class CheckpointAMPreemptionPolicy implements AMPreemptionPolicy {
+
+  // task attempts flagged for preemption
+  private final Set<TaskAttemptId> toBePreempted;
+
+  private final Set<TaskAttemptId> countedPreemptions;
+
+  private final Map<TaskId,TaskCheckpointID> checkpoints;
+
+  private final Map<TaskAttemptId,Resource> pendingFlexiblePreemptions;
+
+  @SuppressWarnings("rawtypes")
+  private EventHandler eventHandler;
+
+  static final Log LOG = LogFactory
+      .getLog(CheckpointAMPreemptionPolicy.class);
+
+  public CheckpointAMPreemptionPolicy() {
+    this(Collections.synchronizedSet(new HashSet<TaskAttemptId>()),
+         Collections.synchronizedSet(new HashSet<TaskAttemptId>()),
+         Collections.synchronizedMap(new HashMap<TaskId,TaskCheckpointID>()),
+         Collections.synchronizedMap(new HashMap<TaskAttemptId,Resource>()));
+  }
+
+  CheckpointAMPreemptionPolicy(Set<TaskAttemptId> toBePreempted,
+      Set<TaskAttemptId> countedPreemptions,
+      Map<TaskId,TaskCheckpointID> checkpoints,
+      Map<TaskAttemptId,Resource> pendingFlexiblePreemptions) {
+    this.toBePreempted = toBePreempted;
+    this.countedPreemptions = countedPreemptions;
+    this.checkpoints = checkpoints;
+    this.pendingFlexiblePreemptions = pendingFlexiblePreemptions;
+  }
+
+  @Override
+  public void init(AppContext context) {
+    this.eventHandler = context.getEventHandler();
+  }
+
+  @Override
+  public void preempt(Context ctxt, PreemptionMessage preemptionRequests) {
+
+    if (preemptionRequests != null) {
+
+      // handling non-negotiable preemption
+
+      StrictPreemptionContract cStrict = preemptionRequests.getStrictContract();
+      if (cStrict != null
+          && cStrict.getContainers() != null
+          && cStrict.getContainers().size() > 0) {
+        LOG.info("strict preemption :" +
+            preemptionRequests.getStrictContract().getContainers().size() +
+            " containers to kill");
+
+        // handle strict preemptions. These containers are non-negotiable
+        for (PreemptionContainer c :
+            preemptionRequests.getStrictContract().getContainers()) {
+          ContainerId reqCont = c.getId();
+          TaskAttemptId reqTask = ctxt.getTaskAttempt(reqCont);
+          if (reqTask != null) {
+            // ignore requests for preempting containers running maps
+            if (org.apache.hadoop.mapreduce.v2.api.records.TaskType.REDUCE
+                .equals(reqTask.getTaskId().getTaskType())) {
+              toBePreempted.add(reqTask);
+              LOG.info("preempting " + reqCont + " running task:" + reqTask);
+            } else {
+              LOG.info("NOT preempting " + reqCont + " running task:" + reqTask);
+            }
+          }
+        }
+      }
+
+      // handling negotiable preemption
+      PreemptionContract cNegot = preemptionRequests.getContract();
+      if (cNegot != null
+          && cNegot.getResourceRequest() != null
+          && cNegot.getResourceRequest().size() > 0
+          && cNegot.getContainers() != null
+          && cNegot.getContainers().size() > 0) {
+
+        LOG.info("negotiable preemption :" +
+            preemptionRequests.getContract().getResourceRequest().size() +
+            " resourceReq, " +
+            preemptionRequests.getContract().getContainers().size() +
+            " containers");
+        // handle fungible preemption. Here we only look at the total amount of
+        // resources to be preempted and pick enough of our containers to
+        // satisfy that. We only support checkpointing for reducers for now.
+        List<PreemptionResourceRequest> reqResources =
+          preemptionRequests.getContract().getResourceRequest();
+
+        // compute the total amount of pending preemptions (to be discounted
+        // from current request)
+        int pendingPreemptionRam = 0;
+        int pendingPreemptionCores = 0;
+        for (Resource r : pendingFlexiblePreemptions.values()) {
+          pendingPreemptionRam += r.getMemory();
+          pendingPreemptionCores += r.getVirtualCores();
+        }
+
+        // discount preemption request based on currently pending preemption
+        for (PreemptionResourceRequest rr : reqResources) {
+          ResourceRequest reqRsrc = rr.getResourceRequest();
+          if (!ResourceRequest.ANY.equals(reqRsrc.getResourceName())) {
+            // For now, only respond to aggregate requests and ignore locality
+            continue;
+          }
+
+          LOG.info("ResourceRequest:" + reqRsrc);
+          int reqCont = reqRsrc.getNumContainers();
+          int reqMem = reqRsrc.getCapability().getMemory();
+          int totalMemoryToRelease = reqCont * reqMem;
+          int reqCores = reqRsrc.getCapability().getVirtualCores();
+          int totalCoresToRelease = reqCont * reqCores;
+
+          // remove
+          if (pendingPreemptionRam > 0) {
+            // if goes negative we simply exit
+            totalMemoryToRelease -= pendingPreemptionRam;
+            // decrement pending resources if zero or negatve we will
+            // ignore it while processing next PreemptionResourceRequest
+            pendingPreemptionRam -= totalMemoryToRelease;
+          }
+          if (pendingPreemptionCores > 0) {
+            totalCoresToRelease -= pendingPreemptionCores;
+            pendingPreemptionCores -= totalCoresToRelease;
+          }
+
+          // reverse order of allocation (for now)
+          List<Container> listOfCont = ctxt.getContainers(TaskType.REDUCE);
+          Collections.sort(listOfCont, new Comparator<Container>() {
+            @Override
+            public int compare(final Container o1, final Container o2) {
+              return o2.getId().getId() - o1.getId().getId();
+            }
+          });
+
+          // preempt reducers first
+          for (Container cont : listOfCont) {
+            if (totalMemoryToRelease <= 0 && totalCoresToRelease<=0) {
+              break;
+            }
+            TaskAttemptId reduceId = ctxt.getTaskAttempt(cont.getId());
+            int cMem = cont.getResource().getMemory();
+            int cCores = cont.getResource().getVirtualCores();
+
+            if (!toBePreempted.contains(reduceId)) {
+              totalMemoryToRelease -= cMem;
+              totalCoresToRelease -= cCores;
+                toBePreempted.add(reduceId);
+                pendingFlexiblePreemptions.put(reduceId, cont.getResource());
+            }
+            LOG.info("ResourceRequest:" + reqRsrc + " satisfied preempting "
+                + reduceId);
+          }
+          // if map was preemptable we would do add them to toBePreempted here
+        }
+      }
+    }
+  }
+
+  @Override
+  public void handleFailedContainer(TaskAttemptId attemptID) {
+    toBePreempted.remove(attemptID);
+    checkpoints.remove(attemptID.getTaskId());
+  }
+
+  @Override
+  public void handleCompletedContainer(TaskAttemptId attemptID){
+    LOG.info(" task completed:" + attemptID);
+    toBePreempted.remove(attemptID);
+    pendingFlexiblePreemptions.remove(attemptID);
+  }
+
+  @Override
+  public boolean isPreempted(TaskAttemptId yarnAttemptID) {
+    if (toBePreempted.contains(yarnAttemptID)) {
+      updatePreemptionCounters(yarnAttemptID);
+      return true;
+    }
+    return false;
+  }
+
+  @Override
+  public void reportSuccessfulPreemption(TaskAttemptId taskAttemptID) {
+    // ignore
+  }
+
+  @Override
+  public TaskCheckpointID getCheckpointID(TaskId taskId) {
+    return checkpoints.get(taskId);
+  }
+
+  @Override
+  public void setCheckpointID(TaskId taskId, TaskCheckpointID cid) {
+    checkpoints.put(taskId, cid);
+    if (cid != null) {
+      updateCheckpointCounters(taskId, cid);
+    }
+  }
+
+  @SuppressWarnings({ "unchecked" })
+  private void updateCheckpointCounters(TaskId taskId, TaskCheckpointID cid) {
+    JobCounterUpdateEvent jce = new JobCounterUpdateEvent(taskId.getJobId());
+    jce.addCounterUpdate(JobCounter.CHECKPOINTS, 1);
+    eventHandler.handle(jce);
+    jce = new JobCounterUpdateEvent(taskId.getJobId());
+    jce.addCounterUpdate(JobCounter.CHECKPOINT_BYTES, cid.getCheckpointBytes());
+    eventHandler.handle(jce);
+    jce = new JobCounterUpdateEvent(taskId.getJobId());
+    jce.addCounterUpdate(JobCounter.CHECKPOINT_TIME, cid.getCheckpointTime());
+    eventHandler.handle(jce);
+
+  }
+
+  @SuppressWarnings({ "unchecked" })
+  private void updatePreemptionCounters(TaskAttemptId yarnAttemptID) {
+    if (!countedPreemptions.contains(yarnAttemptID)) {
+      countedPreemptions.add(yarnAttemptID);
+      JobCounterUpdateEvent jce = new JobCounterUpdateEvent(yarnAttemptID
+          .getTaskId().getJobId());
+      jce.addCounterUpdate(JobCounter.TASKS_REQ_PREEMPT, 1);
+      eventHandler.handle(jce);
+    }
+  }
+
+}
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/KillAMPreemptionPolicy.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/KillAMPreemptionPolicy.java
@ -19,11 +19,10 @@ package org.apache.hadoop.mapreduce.v2.app.rm.preemption;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.mapred.TaskAttemptID;
-import org.apache.hadoop.mapred.TaskID;
 import org.apache.hadoop.mapreduce.JobCounter;
 import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;
 import org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent;
@ -89,17 +88,17 @@ public class KillAMPreemptionPolicy implements AMPreemptionPolicy {
  }

  @Override
-  public void reportSuccessfulPreemption(TaskAttemptID taskAttemptID) {
+  public void reportSuccessfulPreemption(TaskAttemptId taskAttemptID) {
    // ignore
  }

  @Override
-  public TaskCheckpointID getCheckpointID(TaskID taskId) {
+  public TaskCheckpointID getCheckpointID(TaskId taskId) {
    return null;
  }

  @Override
-  public void setCheckpointID(TaskID taskId, TaskCheckpointID cid) {
+  public void setCheckpointID(TaskId taskId, TaskCheckpointID cid) {
    // ignore
  }

--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/NoopAMPreemptionPolicy.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/NoopAMPreemptionPolicy.java
@ -17,10 +17,9 @@
 */
 package org.apache.hadoop.mapreduce.v2.app.rm.preemption;

-import org.apache.hadoop.mapred.TaskAttemptID;
-import org.apache.hadoop.mapred.TaskID;
 import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;
 import org.apache.hadoop.yarn.api.records.PreemptionMessage;

@ -50,17 +49,17 @@ public class NoopAMPreemptionPolicy implements AMPreemptionPolicy {
  }

  @Override
-  public void reportSuccessfulPreemption(TaskAttemptID taskAttemptID) {
+  public void reportSuccessfulPreemption(TaskAttemptId taskAttemptID) {
    // ignore
  }

  @Override
-  public TaskCheckpointID getCheckpointID(TaskID taskId) {
+  public TaskCheckpointID getCheckpointID(TaskId taskId) {
    return null;
  }

  @Override
-  public void setCheckpointID(TaskID taskId, TaskCheckpointID cid) {
+  public void setCheckpointID(TaskId taskId, TaskCheckpointID cid) {
    // ignore
  }

--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java
@ -17,26 +17,23 @@
 */
 package org.apache.hadoop.mapred;

-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Matchers.any;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.never;
-import static org.mockito.Mockito.times;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.when;
+import org.apache.hadoop.mapred.Counters;
+import org.apache.hadoop.mapred.Counters.Counter;
+import org.apache.hadoop.mapreduce.checkpoint.EnumCounter;

 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
-
-import junit.framework.Assert;
+import java.util.List;

 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TaskType;
 import org.apache.hadoop.mapreduce.TypeConverter;
+import org.apache.hadoop.mapreduce.checkpoint.CheckpointID;
+import org.apache.hadoop.mapreduce.checkpoint.FSCheckpointID;
+import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
 import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent;
@ -46,21 +43,31 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;
 import org.apache.hadoop.mapreduce.v2.app.TaskHeartbeatHandler;
 import org.apache.hadoop.mapreduce.v2.app.job.Job;
+import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy;
+import org.apache.hadoop.mapreduce.v2.app.rm.preemption.CheckpointAMPreemptionPolicy;
 import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler;
 import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
+import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.util.SystemClock;
+
 import org.junit.Test;
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.*;

 public class TestTaskAttemptListenerImpl {
-  public static class MockTaskAttemptListenerImpl extends TaskAttemptListenerImpl {
+  public static class MockTaskAttemptListenerImpl
+      extends TaskAttemptListenerImpl {

    public MockTaskAttemptListenerImpl(AppContext context,
        JobTokenSecretManager jobTokenSecretManager,
        RMHeartbeatHandler rmHeartbeatHandler,
-        TaskHeartbeatHandler hbHandler) {
-      super(context, jobTokenSecretManager, rmHeartbeatHandler, null);
+        TaskHeartbeatHandler hbHandler,
+        AMPreemptionPolicy policy) {
+
+      super(context, jobTokenSecretManager, rmHeartbeatHandler, policy);
      this.taskHeartbeatHandler = hbHandler;
    }
    
@ -87,9 +94,16 @@ public class TestTaskAttemptListenerImpl {
    RMHeartbeatHandler rmHeartbeatHandler =
        mock(RMHeartbeatHandler.class);
    TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class);
+    Dispatcher dispatcher = mock(Dispatcher.class);
+    EventHandler ea = mock(EventHandler.class);
+    when(dispatcher.getEventHandler()).thenReturn(ea);
+
+    when(appCtx.getEventHandler()).thenReturn(ea);
+    CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
+    policy.init(appCtx);
    MockTaskAttemptListenerImpl listener = 
      new MockTaskAttemptListenerImpl(appCtx, secret,
-          rmHeartbeatHandler, hbHandler);
+          rmHeartbeatHandler, hbHandler, policy);
    Configuration conf = new Configuration();
    listener.init(conf);
    listener.start();
@ -144,7 +158,7 @@ public class TestTaskAttemptListenerImpl {
    assertNotNull(jvmid);
    try {
      JVMId.forName("jvm_001_002_m_004_006");
-      Assert.fail();
+      fail();
    } catch (IllegalArgumentException e) {
      assertEquals(e.getMessage(),
          "TaskId string : jvm_001_002_m_004_006 is not properly formed");
@ -190,8 +204,14 @@ public class TestTaskAttemptListenerImpl {
    RMHeartbeatHandler rmHeartbeatHandler =
        mock(RMHeartbeatHandler.class);
    final TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class);
-    TaskAttemptListenerImpl listener =
-        new TaskAttemptListenerImpl(appCtx, secret, rmHeartbeatHandler, null) {
+    Dispatcher dispatcher = mock(Dispatcher.class);
+    EventHandler ea = mock(EventHandler.class);
+    when(dispatcher.getEventHandler()).thenReturn(ea);
+    when(appCtx.getEventHandler()).thenReturn(ea);
+    CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
+    policy.init(appCtx);
+    TaskAttemptListenerImpl listener = new TaskAttemptListenerImpl(
+        appCtx, secret, rmHeartbeatHandler, policy) {
      @Override
      protected void registerHeartbeatHandler(Configuration conf) {
        taskHeartbeatHandler = hbHandler;
@ -219,7 +239,8 @@ public class TestTaskAttemptListenerImpl {
        isMap ? org.apache.hadoop.mapreduce.v2.api.records.TaskType.MAP
            : org.apache.hadoop.mapreduce.v2.api.records.TaskType.REDUCE);
    TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(tid, 0);
-    RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
+    RecordFactory recordFactory =
+      RecordFactoryProvider.getRecordFactory(null);
    TaskAttemptCompletionEvent tce = recordFactory
        .newRecordInstance(TaskAttemptCompletionEvent.class);
    tce.setEventId(eventId);
@ -244,8 +265,14 @@ public class TestTaskAttemptListenerImpl {
    RMHeartbeatHandler rmHeartbeatHandler =
        mock(RMHeartbeatHandler.class);
    final TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class);
-    TaskAttemptListenerImpl listener =
-        new TaskAttemptListenerImpl(appCtx, secret, rmHeartbeatHandler, null) {
+    Dispatcher dispatcher = mock(Dispatcher.class);
+    EventHandler ea = mock(EventHandler.class);
+    when(dispatcher.getEventHandler()).thenReturn(ea);
+    when(appCtx.getEventHandler()).thenReturn(ea);
+    CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
+    policy.init(appCtx);
+    TaskAttemptListenerImpl listener = new TaskAttemptListenerImpl(
+        appCtx, secret, rmHeartbeatHandler, policy) {
      @Override
      protected void registerHeartbeatHandler(Configuration conf) {
        taskHeartbeatHandler = hbHandler;
@ -270,4 +297,88 @@ public class TestTaskAttemptListenerImpl {

    listener.stop();
  }
+
+  @Test
+  public void testCheckpointIDTracking()
+    throws IOException, InterruptedException{
+
+    SystemClock clock = new SystemClock();
+
+    org.apache.hadoop.mapreduce.v2.app.job.Task mockTask =
+        mock(org.apache.hadoop.mapreduce.v2.app.job.Task.class);
+    when(mockTask.canCommit(any(TaskAttemptId.class))).thenReturn(true);
+    Job mockJob = mock(Job.class);
+    when(mockJob.getTask(any(TaskId.class))).thenReturn(mockTask);
+
+    Dispatcher dispatcher = mock(Dispatcher.class);
+    EventHandler ea = mock(EventHandler.class);
+    when(dispatcher.getEventHandler()).thenReturn(ea);
+
+    RMHeartbeatHandler rmHeartbeatHandler =
+        mock(RMHeartbeatHandler.class);
+
+    AppContext appCtx = mock(AppContext.class);
+    when(appCtx.getJob(any(JobId.class))).thenReturn(mockJob);
+    when(appCtx.getClock()).thenReturn(clock);
+    when(appCtx.getEventHandler()).thenReturn(ea);
+    JobTokenSecretManager secret = mock(JobTokenSecretManager.class);
+    final TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class);
+    when(appCtx.getEventHandler()).thenReturn(ea);
+    CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
+    policy.init(appCtx);
+    TaskAttemptListenerImpl listener = new TaskAttemptListenerImpl(
+        appCtx, secret, rmHeartbeatHandler, policy) {
+      @Override
+      protected void registerHeartbeatHandler(Configuration conf) {
+        taskHeartbeatHandler = hbHandler;
+      }
+    };
+
+    Configuration conf = new Configuration();
+    conf.setBoolean(MRJobConfig.TASK_PREEMPTION, true);
+    //conf.setBoolean("preemption.reduce", true);
+
+    listener.init(conf);
+    listener.start();
+
+    TaskAttemptID tid = new TaskAttemptID("12345", 1, TaskType.REDUCE, 1, 0);
+
+    List<Path> partialOut = new ArrayList<Path>();
+    partialOut.add(new Path("/prev1"));
+    partialOut.add(new Path("/prev2"));
+
+    Counters counters = mock(Counters.class);
+    final long CBYTES = 64L * 1024 * 1024;
+    final long CTIME = 4344L;
+    final Path CLOC = new Path("/test/1");
+    Counter cbytes = mock(Counter.class);
+    when(cbytes.getValue()).thenReturn(CBYTES);
+    Counter ctime = mock(Counter.class);
+    when(ctime.getValue()).thenReturn(CTIME);
+    when(counters.findCounter(eq(EnumCounter.CHECKPOINT_BYTES)))
+        .thenReturn(cbytes);
+    when(counters.findCounter(eq(EnumCounter.CHECKPOINT_MS)))
+        .thenReturn(ctime);
+
+    // propagating a taskstatus that contains a checkpoint id
+    TaskCheckpointID incid = new TaskCheckpointID(new FSCheckpointID(
+          CLOC), partialOut, counters);
+    listener.setCheckpointID(
+        org.apache.hadoop.mapred.TaskID.downgrade(tid.getTaskID()), incid);
+
+    // and try to get it back
+    CheckpointID outcid = listener.getCheckpointID(tid.getTaskID());
+    TaskCheckpointID tcid = (TaskCheckpointID) outcid;
+    assertEquals(CBYTES, tcid.getCheckpointBytes());
+    assertEquals(CTIME, tcid.getCheckpointTime());
+    assertTrue(partialOut.containsAll(tcid.getPartialCommittedOutput()));
+    assertTrue(tcid.getPartialCommittedOutput().containsAll(partialOut));
+
+    //assert it worked
+    assert outcid == incid;
+
+    listener.stop();
+
+  }
+
 }
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestCheckpointPreemptionPolicy.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestCheckpointPreemptionPolicy.java
@ -0,0 +1,329 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapreduce.v2.app;
+
+import org.apache.hadoop.yarn.api.records.PreemptionContract;
+import org.apache.hadoop.yarn.api.records.PreemptionMessage;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.util.resource.Resources;
+
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.*;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.mapred.TaskAttemptListenerImpl;
+import org.apache.hadoop.mapreduce.v2.api.records.JobId;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
+import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
+import org.apache.hadoop.mapreduce.v2.app.MRAppMaster.RunningAppContext;
+import org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator;
+import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy;
+import org.apache.hadoop.mapreduce.v2.app.rm.preemption.CheckpointAMPreemptionPolicy;
+import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
+import org.apache.hadoop.yarn.api.records.PreemptionContainer;
+import org.apache.hadoop.yarn.api.records.PreemptionResourceRequest;
+import org.apache.hadoop.yarn.api.records.StrictPreemptionContract;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestCheckpointPreemptionPolicy {
+
+  TaskAttemptListenerImpl pel= null;
+  RMContainerAllocator r;
+  JobId jid;
+  RunningAppContext mActxt;
+  Set<ContainerId> preemptedContainers = new HashSet<ContainerId>();
+  Map<ContainerId,TaskAttemptId> assignedContainers =
+      new HashMap<ContainerId, TaskAttemptId>();
+  private final RecordFactory recordFactory =
+        RecordFactoryProvider.getRecordFactory(null);
+  HashMap<ContainerId,Resource> contToResourceMap =
+    new HashMap<ContainerId, Resource>();
+
+  private int minAlloc = 1024;
+
+  @Before
+  @SuppressWarnings("rawtypes") // mocked generics
+  public void setup() {
+    ApplicationId appId = ApplicationId.newInstance(200, 1);
+    ApplicationAttemptId appAttemptId =
+        ApplicationAttemptId.newInstance(appId, 1);
+    jid = MRBuilderUtils.newJobId(appId, 1);
+
+    mActxt = mock(RunningAppContext.class);
+    EventHandler ea = mock(EventHandler.class);
+    when(mActxt.getEventHandler()).thenReturn(ea);
+    for (int i = 0; i < 40; ++i) {
+      ContainerId cId = ContainerId.newInstance(appAttemptId, i);
+      if (0 == i % 7) {
+        preemptedContainers.add(cId);
+      }
+      TaskId tId = 0 == i % 2
+          ? MRBuilderUtils.newTaskId(jid, i / 2, TaskType.MAP)
+          : MRBuilderUtils.newTaskId(jid, i / 2 + 1, TaskType.REDUCE);
+      assignedContainers.put(cId, MRBuilderUtils.newTaskAttemptId(tId, 0));
+      contToResourceMap.put(cId, Resource.newInstance(2 * minAlloc, 2));
+    }
+
+    for (Map.Entry<ContainerId,TaskAttemptId> ent :
+         assignedContainers.entrySet()) {
+      System.out.println("cont:" + ent.getKey().getId() +
+          " type:" + ent.getValue().getTaskId().getTaskType() +
+          " res:" + contToResourceMap.get(ent.getKey()).getMemory() + "MB" );
+    }
+  }
+
+  @Test
+  public void testStrictPreemptionContract() {
+
+    final Map<ContainerId,TaskAttemptId> containers = assignedContainers;
+    AMPreemptionPolicy.Context mPctxt = new AMPreemptionPolicy.Context() {
+      @Override
+      public TaskAttemptId getTaskAttempt(ContainerId cId) {
+        return containers.get(cId);
+      }
+      @Override
+      public List<Container> getContainers(TaskType t) {
+        List<Container> p = new ArrayList<Container>();
+        for (Map.Entry<ContainerId,TaskAttemptId> ent :
+            assignedContainers.entrySet()) {
+          if (ent.getValue().getTaskId().getTaskType().equals(t)) {
+            p.add(Container.newInstance(ent.getKey(), null, null,
+                contToResourceMap.get(ent.getKey()),
+                Priority.newInstance(0), null));
+          }
+        }
+        return p;
+      }
+    };
+
+    PreemptionMessage pM = generatePreemptionMessage(preemptedContainers,
+        contToResourceMap, Resource.newInstance(1024, 1), true);
+
+    CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
+    policy.init(mActxt);
+    policy.preempt(mPctxt, pM);
+
+
+    for (ContainerId c : preemptedContainers) {
+      TaskAttemptId t = assignedContainers.get(c);
+      if (TaskType.MAP.equals(t.getTaskId().getTaskType())) {
+        assert policy.isPreempted(t) == false;
+      } else {
+        assert policy.isPreempted(t);
+      }
+    }
+  }
+
+
+  @Test
+  public void testPreemptionContract() {
+    final Map<ContainerId,TaskAttemptId> containers = assignedContainers;
+    AMPreemptionPolicy.Context mPctxt = new AMPreemptionPolicy.Context() {
+      @Override
+      public TaskAttemptId getTaskAttempt(ContainerId cId) {
+        return containers.get(cId);
+      }
+
+      @Override
+      public List<Container> getContainers(TaskType t) {
+        List<Container> p = new ArrayList<Container>();
+        for (Map.Entry<ContainerId,TaskAttemptId> ent :
+            assignedContainers.entrySet()){
+          if(ent.getValue().getTaskId().getTaskType().equals(t)){
+            p.add(Container.newInstance(ent.getKey(), null, null,
+                contToResourceMap.get(ent.getKey()),
+                Priority.newInstance(0), null));
+          }
+        }
+        return p;
+      }
+    };
+
+    PreemptionMessage pM = generatePreemptionMessage(preemptedContainers,
+        contToResourceMap, Resource.newInstance(minAlloc, 1), false);
+
+    CheckpointAMPreemptionPolicy policy = new CheckpointAMPreemptionPolicy();
+    policy.init(mActxt);
+
+    int supposedMemPreemption = pM.getContract().getResourceRequest()
+        .get(0).getResourceRequest().getCapability().getMemory()
+        * pM.getContract().getResourceRequest().get(0).getResourceRequest()
+        .getNumContainers();
+
+    // first round of preemption
+    policy.preempt(mPctxt, pM);
+    List<TaskAttemptId> preempting =
+      validatePreemption(pM, policy, supposedMemPreemption);
+
+    // redundant message
+    policy.preempt(mPctxt, pM);
+    List<TaskAttemptId> preempting2 =
+      validatePreemption(pM, policy, supposedMemPreemption);
+
+    // check that nothing got added
+    assert preempting2.equals(preempting);
+
+    // simulate 2 task completions/successful preemption
+    policy.handleCompletedContainer(preempting.get(0));
+    policy.handleCompletedContainer(preempting.get(1));
+
+    // remove from assignedContainers
+    Iterator<Map.Entry<ContainerId,TaskAttemptId>> it =
+      assignedContainers.entrySet().iterator();
+    while (it.hasNext()) {
+      Map.Entry<ContainerId,TaskAttemptId> ent = it.next();
+      if (ent.getValue().equals(preempting.get(0)) ||
+        ent.getValue().equals(preempting.get(1)))
+        it.remove();
+    }
+
+    // one more message asking for preemption
+    policy.preempt(mPctxt, pM);
+
+    // triggers preemption of 2 more containers (i.e., the preemption set changes)
+    List<TaskAttemptId> preempting3 =
+      validatePreemption(pM, policy, supposedMemPreemption);
+    assert preempting3.equals(preempting2) == false;
+  }
+
+  private List<TaskAttemptId> validatePreemption(PreemptionMessage pM,
+    CheckpointAMPreemptionPolicy policy, int supposedMemPreemption) {
+    Resource effectivelyPreempted = Resource.newInstance(0, 0);
+
+    List<TaskAttemptId> preempting = new ArrayList<TaskAttemptId>();
+
+    for (Map.Entry<ContainerId, TaskAttemptId> ent :
+        assignedContainers.entrySet()) {
+      if (policy.isPreempted(ent.getValue())) {
+        Resources.addTo(effectivelyPreempted,contToResourceMap.get(ent.getKey()));
+        // preempt only reducers
+        if (policy.isPreempted(ent.getValue())){
+          assertEquals(TaskType.REDUCE, ent.getValue().getTaskId().getTaskType());
+          preempting.add(ent.getValue());
+        }
+      }
+    }
+
+    // preempt enough
+    assert (effectivelyPreempted.getMemory() >= supposedMemPreemption)
+      : " preempted: " + effectivelyPreempted.getMemory();
+
+    // preempt not too much enough
+    assert effectivelyPreempted.getMemory() <= supposedMemPreemption + minAlloc;
+    return preempting;
+  }
+
+  private PreemptionMessage generatePreemptionMessage(
+      Set<ContainerId> containerToPreempt,
+      HashMap<ContainerId, Resource> resPerCont,
+      Resource minimumAllocation, boolean strict) {
+
+    Set<ContainerId> currentContPreemption = Collections.unmodifiableSet(
+        new HashSet<ContainerId>(containerToPreempt));
+    containerToPreempt.clear();
+    Resource tot = Resource.newInstance(0, 0);
+    for(ContainerId c : currentContPreemption){
+      Resources.addTo(tot,
+          resPerCont.get(c));
+    }
+    int numCont = (int) Math.ceil(tot.getMemory() /
+              (double) minimumAllocation.getMemory());
+    ResourceRequest rr = ResourceRequest.newInstance(
+        Priority.newInstance(0), ResourceRequest.ANY,
+        minimumAllocation, numCont);
+    if (strict) {
+      return generatePreemptionMessage(new Allocation(null, null,
+                  currentContPreemption, null, null));
+    }
+    return generatePreemptionMessage(new Allocation(null, null,
+                          null, currentContPreemption,
+                          Collections.singletonList(rr)));
+  }
+
+
+  private PreemptionMessage generatePreemptionMessage(Allocation allocation) {
+    PreemptionMessage pMsg = null;
+    // assemble strict preemption request
+    if (allocation.getStrictContainerPreemptions() != null) {
+       pMsg = recordFactory.newRecordInstance(PreemptionMessage.class);
+      StrictPreemptionContract pStrict =
+          recordFactory.newRecordInstance(StrictPreemptionContract.class);
+      Set<PreemptionContainer> pCont = new HashSet<PreemptionContainer>();
+      for (ContainerId cId : allocation.getStrictContainerPreemptions()) {
+        PreemptionContainer pc =
+            recordFactory.newRecordInstance(PreemptionContainer.class);
+        pc.setId(cId);
+        pCont.add(pc);
+      }
+      pStrict.setContainers(pCont);
+      pMsg.setStrictContract(pStrict);
+    }
+
+    // assemble negotiable preemption request
+    if (allocation.getResourcePreemptions() != null &&
+        allocation.getResourcePreemptions().size() > 0 &&
+        allocation.getContainerPreemptions() != null &&
+        allocation.getContainerPreemptions().size() > 0) {
+      if (pMsg == null) {
+        pMsg = recordFactory.newRecordInstance(PreemptionMessage.class);
+      }
+      PreemptionContract contract =
+          recordFactory.newRecordInstance(PreemptionContract.class);
+      Set<PreemptionContainer> pCont = new HashSet<PreemptionContainer>();
+      for (ContainerId cId : allocation.getContainerPreemptions()) {
+        PreemptionContainer pc =
+            recordFactory.newRecordInstance(PreemptionContainer.class);
+        pc.setId(cId);
+        pCont.add(pc);
+      }
+      List<PreemptionResourceRequest> pRes =
+        new ArrayList<PreemptionResourceRequest>();
+      for (ResourceRequest crr : allocation.getResourcePreemptions()) {
+        PreemptionResourceRequest prr =
+            recordFactory.newRecordInstance(PreemptionResourceRequest.class);
+        prr.setResourceRequest(crr);
+        pRes.add(prr);
+      }
+      contract.setContainers(pCont);
+      contract.setResourceRequest(pRes);
+      pMsg.setContract(contract);
+    }
+    return pMsg;
+  }
+
+}
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java
@ -1604,6 +1604,21 @@ public class TestRMContainerAllocator {
        numPendingReduces, 
        maxReduceRampupLimit, reduceSlowStart);
    verify(allocator).rampDownReduces(anyInt());
+
+    // Test reduce ramp-down for when there are scheduled maps
+    // Since we have two scheduled Maps, rampDownReducers 
+    // should be invoked twice.
+    scheduledMaps = 2;
+    assignedReduces = 2;
+    doReturn(10 * 1024).when(allocator).getMemLimit();
+    allocator.scheduleReduces(
+        totalMaps, succeededMaps, 
+        scheduledMaps, scheduledReduces, 
+        assignedMaps, assignedReduces, 
+        mapResourceReqt, reduceResourceReqt, 
+        numPendingReduces, 
+        maxReduceRampupLimit, reduceSlowStart);
+    verify(allocator, times(2)).rampDownReduces(anyInt());
  }

  private static class RecalculateContainerAllocator extends MyContainerAllocator {
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalJobRunner.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalJobRunner.java
@ -53,6 +53,7 @@ import org.apache.hadoop.mapreduce.QueueInfo;
 import org.apache.hadoop.mapreduce.TaskCompletionEvent;
 import org.apache.hadoop.mapreduce.TaskTrackerInfo;
 import org.apache.hadoop.mapreduce.TaskType;
+import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
 import org.apache.hadoop.mapreduce.protocol.ClientProtocol;
 import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
@ -575,10 +576,17 @@ public class LocalJobRunner implements ClientProtocol {

    // TaskUmbilicalProtocol methods

+    @Override
    public JvmTask getTask(JvmContext context) { return null; }
    
-    public synchronized boolean statusUpdate(TaskAttemptID taskId,
+    @Override
+    public synchronized AMFeedback statusUpdate(TaskAttemptID taskId,
        TaskStatus taskStatus) throws IOException, InterruptedException {
+      AMFeedback feedback = new AMFeedback();
+      feedback.setTaskFound(true);
+      if (null == taskStatus) {
+        return feedback;
+      }
      // Serialize as we would if distributed in order to make deep copy
      ByteArrayOutputStream baos = new ByteArrayOutputStream();
      DataOutputStream dos = new DataOutputStream(baos);
@ -618,7 +626,7 @@ public class LocalJobRunner implements ClientProtocol {
      }

      // ignore phase
-      return true;
+      return feedback;
    }

    /** Return the current values of the counters for this job,
@ -654,24 +662,24 @@ public class LocalJobRunner implements ClientProtocol {
      statusUpdate(taskid, taskStatus);
    }

+    @Override
    public void reportDiagnosticInfo(TaskAttemptID taskid, String trace) {
      // Ignore for now
    }
    
+    @Override
    public void reportNextRecordRange(TaskAttemptID taskid, 
        SortedRanges.Range range) throws IOException {
      LOG.info("Task " + taskid + " reportedNextRecordRange " + range);
    }

-    public boolean ping(TaskAttemptID taskid) throws IOException {
-      return true;
-    }
-    
+    @Override
    public boolean canCommit(TaskAttemptID taskid) 
    throws IOException {
      return true;
    }
    
+    @Override
    public void done(TaskAttemptID taskId) throws IOException {
      int taskIndex = mapIds.indexOf(taskId);
      if (taskIndex >= 0) {                       // mapping
@ -681,11 +689,13 @@ public class LocalJobRunner implements ClientProtocol {
      }
    }

+    @Override
    public synchronized void fsError(TaskAttemptID taskId, String message) 
    throws IOException {
      LOG.fatal("FSError: "+ message + "from task: " + taskId);
    }

+    @Override
    public void shuffleError(TaskAttemptID taskId, String message) throws IOException {
      LOG.fatal("shuffleError: "+ message + "from task: " + taskId);
    }
@ -695,12 +705,30 @@ public class LocalJobRunner implements ClientProtocol {
      LOG.fatal("Fatal: "+ msg + "from task: " + taskId);
    }
    
+    @Override
    public MapTaskCompletionEventsUpdate getMapCompletionEvents(JobID jobId, 
        int fromEventId, int maxLocs, TaskAttemptID id) throws IOException {
      return new MapTaskCompletionEventsUpdate(
        org.apache.hadoop.mapred.TaskCompletionEvent.EMPTY_ARRAY, false);
    }

+    @Override
+    public void preempted(TaskAttemptID taskId, TaskStatus taskStatus)
+        throws IOException, InterruptedException {
+      // ignore
+    }
+
+    @Override
+    public TaskCheckpointID getCheckpointID(TaskID taskId) {
+      // ignore
+      return null;
+    }
+
+    @Override
+    public void setCheckpointID(TaskID downgrade, TaskCheckpointID cid) {
+      // ignore
+    }
+
  }

  public LocalJobRunner(Configuration conf) throws IOException {
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestMRWithDistributedCache.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestMRWithDistributedCache.java
@ -44,6 +44,8 @@ import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.TaskInputOutputContext;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
 import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
@ -83,11 +85,10 @@ public class TestMRWithDistributedCache extends TestCase {
  private static final Log LOG =
    LogFactory.getLog(TestMRWithDistributedCache.class);
  
-  public static class DistributedCacheChecker extends
-      Mapper<LongWritable, Text, NullWritable, NullWritable> {
+  private static class DistributedCacheChecker {

-    @Override
-    public void setup(Context context) throws IOException {
+    public void setup(TaskInputOutputContext<?, ?, ?, ?> context)
+        throws IOException {
      Configuration conf = context.getConfiguration();
      Path[] localFiles = context.getLocalCacheFiles();
      URI[] files = context.getCacheFiles();
@ -101,6 +102,10 @@ public class TestMRWithDistributedCache extends TestCase {
      TestCase.assertEquals(2, files.length);
      TestCase.assertEquals(2, archives.length);

+      // Check the file name
+      TestCase.assertTrue(files[0].getPath().endsWith("distributed.first"));
+      TestCase.assertTrue(files[1].getPath().endsWith("distributed.second.jar"));
+      
      // Check lengths of the files
      TestCase.assertEquals(1, fs.getFileStatus(localFiles[0]).getLen());
      TestCase.assertTrue(fs.getFileStatus(localFiles[1]).getLen() > 1);
@ -130,6 +135,26 @@ public class TestMRWithDistributedCache extends TestCase {
      TestCase.assertTrue("second file should be symlinked too",
          expectedAbsentSymlinkFile.exists());
    }
+
+  }
+
+  public static class DistributedCacheCheckerMapper extends
+      Mapper<LongWritable, Text, NullWritable, NullWritable> {
+
+    @Override
+    protected void setup(Context context) throws IOException,
+        InterruptedException {
+      new DistributedCacheChecker().setup(context);
+    }
+  }
+
+  public static class DistributedCacheCheckerReducer extends
+      Reducer<LongWritable, Text, NullWritable, NullWritable> {
+
+    @Override
+    public void setup(Context context) throws IOException {
+      new DistributedCacheChecker().setup(context);
+    }
  }

  private void testWithConf(Configuration conf) throws IOException,
@ -146,7 +171,8 @@ public class TestMRWithDistributedCache extends TestCase {


    Job job = Job.getInstance(conf);
-    job.setMapperClass(DistributedCacheChecker.class);
+    job.setMapperClass(DistributedCacheCheckerMapper.class);
+    job.setReducerClass(DistributedCacheCheckerReducer.class);
    job.setOutputFormatClass(NullOutputFormat.class);
    FileInputFormat.setInputPaths(job, first);
    // Creates the Job Configuration
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/AMFeedback.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/AMFeedback.java
@ -0,0 +1,63 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.hadoop.mapred;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Writable;
+
+/**
+ * This class is a simple struct to include both the taskFound information and
+ * a possible preemption request coming from the AM.
+ */
+public class AMFeedback implements Writable {
+
+  boolean taskFound;
+  boolean preemption;
+
+  public void setTaskFound(boolean t){
+    taskFound=t;
+  }
+
+  public boolean getTaskFound(){
+    return taskFound;
+  }
+
+  public void setPreemption(boolean preemption) {
+    this.preemption=preemption;
+  }
+
+  public boolean getPreemption() {
+    return preemption;
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    out.writeBoolean(taskFound);
+    out.writeBoolean(preemption);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    taskFound = in.readBoolean();
+    preemption = in.readBoolean();
+  }
+
+}
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java
@ -949,6 +949,23 @@ public class JobConf extends Configuration {
    return get(KeyFieldBasedPartitioner.PARTITIONER_OPTIONS);
  }

+  /**
+   * Get the user defined {@link WritableComparable} comparator for
+   * grouping keys of inputs to the combiner.
+   *
+   * @return comparator set by the user for grouping values.
+   * @see #setCombinerKeyGroupingComparator(Class) for details.
+   */
+  public RawComparator getCombinerKeyGroupingComparator() {
+    Class<? extends RawComparator> theClass = getClass(
+        JobContext.COMBINER_GROUP_COMPARATOR_CLASS, null, RawComparator.class);
+    if (theClass == null) {
+      return getOutputKeyComparator();
+    }
+
+    return ReflectionUtils.newInstance(theClass, this);
+  }
+
  /** 
   * Get the user defined {@link WritableComparable} comparator for 
   * grouping keys of inputs to the reduce.
@ -966,6 +983,37 @@ public class JobConf extends Configuration {
    return ReflectionUtils.newInstance(theClass, this);
  }

+  /**
+   * Set the user defined {@link RawComparator} comparator for
+   * grouping keys in the input to the combiner.
+   * <p/>
+   * <p>This comparator should be provided if the equivalence rules for keys
+   * for sorting the intermediates are different from those for grouping keys
+   * before each call to
+   * {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.</p>
+   * <p/>
+   * <p>For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed
+   * in a single call to the reduce function if K1 and K2 compare as equal.</p>
+   * <p/>
+   * <p>Since {@link #setOutputKeyComparatorClass(Class)} can be used to control
+   * how keys are sorted, this can be used in conjunction to simulate
+   * <i>secondary sort on values</i>.</p>
+   * <p/>
+   * <p><i>Note</i>: This is not a guarantee of the combiner sort being
+   * <i>stable</i> in any sense. (In any case, with the order of available
+   * map-outputs to the combiner being non-deterministic, it wouldn't make
+   * that much sense.)</p>
+   *
+   * @param theClass the comparator class to be used for grouping keys for the
+   * combiner. It should implement <code>RawComparator</code>.
+   * @see #setOutputKeyComparatorClass(Class)
+   */
+  public void setCombinerKeyGroupingComparator(
+      Class<? extends RawComparator> theClass) {
+    setClass(JobContext.COMBINER_GROUP_COMPARATOR_CLASS,
+        theClass, RawComparator.class);
+  }
+
  /** 
   * Set the user defined {@link RawComparator} comparator for 
   * grouping keys in the input to the reduce.
@ -990,6 +1038,7 @@ public class JobConf extends Configuration {
   * @param theClass the comparator class to be used for grouping keys. 
   *                 It should implement <code>RawComparator</code>.
   * @see #setOutputKeyComparatorClass(Class)
+   * @see #setCombinerKeyGroupingComparator(Class)
   */
  public void setOutputValueGroupingComparator(
      Class<? extends RawComparator> theClass) {
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java
@ -187,6 +187,7 @@ abstract public class Task implements Writable, Configurable {
  protected SecretKey tokenSecret;
  protected SecretKey shuffleSecret;
  protected GcTimeUpdater gcUpdater;
+  final AtomicBoolean mustPreempt = new AtomicBoolean(false);

  ////////////////////////////////////////////
  // Constructors
@ -711,6 +712,7 @@ abstract public class Task implements Writable, Configurable {
        }
        try {
          boolean taskFound = true; // whether TT knows about this task
+          AMFeedback amFeedback = null;
          // sleep for a bit
          synchronized(lock) {
            if (taskDone.get()) {
@ -728,12 +730,14 @@ abstract public class Task implements Writable, Configurable {
            taskStatus.statusUpdate(taskProgress.get(),
                                    taskProgress.toString(), 
                                    counters);
-            taskFound = umbilical.statusUpdate(taskId, taskStatus);
+            amFeedback = umbilical.statusUpdate(taskId, taskStatus);
+            taskFound = amFeedback.getTaskFound();
            taskStatus.clearStatus();
          }
          else {
            // send ping 
-            taskFound = umbilical.ping(taskId);
+            amFeedback = umbilical.statusUpdate(taskId, null);
+            taskFound = amFeedback.getTaskFound();
          }

          // if Task Tracker is not aware of our task ID (probably because it died and 
@ -744,6 +748,17 @@ abstract public class Task implements Writable, Configurable {
            System.exit(66);
          }

+          // Set a flag that says we should preempt this is read by
+          // ReduceTasks in places of the execution where it is
+          // safe/easy to preempt
+          boolean lastPreempt = mustPreempt.get();
+          mustPreempt.set(mustPreempt.get() || amFeedback.getPreemption());
+
+          if (lastPreempt ^ mustPreempt.get()) {
+            LOG.info("PREEMPTION TASK: setting mustPreempt to " +
+                mustPreempt.get() + " given " + amFeedback.getPreemption() +
+                " for "+ taskId + " task status: " +taskStatus.getPhase());
+          }
          sendProgress = resetProgressFlag(); 
          remainingRetries = MAX_RETRIES;
        } 
@ -992,10 +1007,17 @@ abstract public class Task implements Writable, Configurable {
  public void done(TaskUmbilicalProtocol umbilical,
                   TaskReporter reporter
                   ) throws IOException, InterruptedException {
+    updateCounters();
+    if (taskStatus.getRunState() == TaskStatus.State.PREEMPTED ) {
+      // If we are preempted, do no output promotion; signal done and exit
+      committer.commitTask(taskContext);
+      umbilical.preempted(taskId, taskStatus);
+      taskDone.set(true);
+      reporter.stopCommunicationThread();
+      return;
+    }
    LOG.info("Task:" + taskId + " is done."
        + " And is in the process of committing");
-    updateCounters();
-
    boolean commitRequired = isCommitRequired();
    if (commitRequired) {
      int retries = MAX_RETRIES;
@ -1054,7 +1076,7 @@ abstract public class Task implements Writable, Configurable {
    int retries = MAX_RETRIES;
    while (true) {
      try {
-        if (!umbilical.statusUpdate(getTaskID(), taskStatus)) {
+        if (!umbilical.statusUpdate(getTaskID(), taskStatus).getTaskFound()) {
          LOG.warn("Parent died.  Exiting "+taskId);
          System.exit(66);
        }
@ -1098,8 +1120,8 @@ abstract public class Task implements Writable, Configurable {
    if (isMapTask() && conf.getNumReduceTasks() > 0) {
      try {
        Path mapOutput =  mapOutputFile.getOutputFile();
-        FileSystem localFS = FileSystem.getLocal(conf);
-        return localFS.getFileStatus(mapOutput).getLen();
+        FileSystem fs = mapOutput.getFileSystem(conf);
+        return fs.getFileStatus(mapOutput).getLen();
      } catch (IOException e) {
        LOG.warn ("Could not find output size " , e);
      }
@ -1553,7 +1575,8 @@ abstract public class Task implements Writable, Configurable {
      combinerClass = cls;
      keyClass = (Class<K>) job.getMapOutputKeyClass();
      valueClass = (Class<V>) job.getMapOutputValueClass();
-      comparator = (RawComparator<K>) job.getOutputKeyComparator();
+      comparator = (RawComparator<K>)
+          job.getCombinerKeyGroupingComparator();
    }

    @SuppressWarnings("unchecked")
@ -1602,7 +1625,7 @@ abstract public class Task implements Writable, Configurable {
      this.taskId = taskId;
      keyClass = (Class<K>) context.getMapOutputKeyClass();
      valueClass = (Class<V>) context.getMapOutputValueClass();
-      comparator = (RawComparator<K>) context.getSortComparator();
+      comparator = (RawComparator<K>) context.getCombinerKeyGroupingComparator();
      this.committer = committer;
    }

--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskStatus.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskStatus.java
@ -51,7 +51,7 @@ public abstract class TaskStatus implements Writable, Cloneable {
  @InterfaceAudience.Private
  @InterfaceStability.Unstable
  public static enum State {RUNNING, SUCCEEDED, FAILED, UNASSIGNED, KILLED, 
-                            COMMIT_PENDING, FAILED_UNCLEAN, KILLED_UNCLEAN}
+                            COMMIT_PENDING, FAILED_UNCLEAN, KILLED_UNCLEAN, PREEMPTED}
    
  private final TaskAttemptID taskid;
  private float progress;
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java
@ -24,6 +24,9 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.ipc.VersionedProtocol;
 import org.apache.hadoop.mapred.JvmTask;
+import org.apache.hadoop.mapreduce.checkpoint.CheckpointID;
+import org.apache.hadoop.mapreduce.checkpoint.FSCheckpointID;
+import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
 import org.apache.hadoop.mapreduce.security.token.JobTokenSelector;
 import org.apache.hadoop.security.token.TokenInfo;

@ -64,9 +67,10 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
   * Version 17 Modified TaskID to be aware of the new TaskTypes
   * Version 18 Added numRequiredSlots to TaskStatus for MAPREDUCE-516
   * Version 19 Added fatalError for child to communicate fatal errors to TT
+   * Version 20 Added methods to manage checkpoints
   * */

-  public static final long versionID = 19L;
+  public static final long versionID = 20L;
  
  /**
   * Called when a child task process starts, to get its task.
@ -78,7 +82,8 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
  JvmTask getTask(JvmContext context) throws IOException;
  
  /**
-   * Report child's progress to parent.
+   * Report child's progress to parent. Also invoked to report still alive (used
+   * to be in ping). It reports an AMFeedback used to propagate preemption requests.
   * 
   * @param taskId task-id of the child
   * @param taskStatus status of the child
@ -86,7 +91,7 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
   * @throws InterruptedException
   * @return True if the task is known
   */
-  boolean statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus) 
+  AMFeedback statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus) 
  throws IOException, InterruptedException;
  
  /** Report error messages back to parent.  Calls should be sparing, since all
@ -105,11 +110,6 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
  void reportNextRecordRange(TaskAttemptID taskid, SortedRanges.Range range) 
    throws IOException;

-  /** Periodically called by child to check if parent is still alive. 
-   * @return True if the task is known
-   */
-  boolean ping(TaskAttemptID taskid) throws IOException;
-
  /** Report that the task is successfully completed.  Failure is assumed if
   * the task process exits without calling this.
   * @param taskid task's id
@ -161,4 +161,33 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
                                                       TaskAttemptID id) 
  throws IOException;

+  /**
+   * Report to the AM that the task has been succesfully preempted.
+   *
+   * @param taskId task's id
+   * @param taskStatus status of the child
+   * @throws IOException
+   */
+  void preempted(TaskAttemptID taskId, TaskStatus taskStatus)
+      throws IOException, InterruptedException;
+
+  /**
+   * Return the latest CheckpointID for the given TaskID. This provides
+   * the task with a way to locate the checkpointed data and restart from
+   * that point in the computation.
+   *
+   * @param taskID task's id
+   * @return the most recent checkpoint (if any) for this task
+   * @throws IOException
+   */
+  TaskCheckpointID getCheckpointID(TaskID taskID);
+
+  /**
+   * Send a CheckpointID for a given TaskID to be stored in the AM,
+   * to later restart a task from this checkpoint.
+   * @param tid
+   * @param cid
+   */
+  void setCheckpointID(TaskID tid, TaskCheckpointID cid);
+
 }
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java
@ -948,11 +948,27 @@ public class Job extends JobContextImpl implements JobContext {
    conf.setOutputValueClass(theClass);
  }

+  /**
+   * Define the comparator that controls which keys are grouped together
+   * for a single call to combiner,
+   * {@link Reducer#reduce(Object, Iterable,
+   * org.apache.hadoop.mapreduce.Reducer.Context)}
+   *
+   * @param cls the raw comparator to use
+   * @throws IllegalStateException if the job is submitted
+   */
+  public void setCombinerKeyGroupingComparatorClass(
+      Class<? extends RawComparator> cls) throws IllegalStateException {
+    ensureState(JobState.DEFINE);
+    conf.setCombinerKeyGroupingComparator(cls);
+  }
+
  /**
   * Define the comparator that controls how the keys are sorted before they
   * are passed to the {@link Reducer}.
   * @param cls the raw comparator
   * @throws IllegalStateException if the job is submitted
+   * @see #setCombinerKeyGroupingComparatorClass(Class)
   */
  public void setSortComparatorClass(Class<? extends RawComparator> cls
                                     ) throws IllegalStateException {
@ -967,6 +983,7 @@ public class Job extends JobContextImpl implements JobContext {
   *                       org.apache.hadoop.mapreduce.Reducer.Context)}
   * @param cls the raw comparator to use
   * @throws IllegalStateException if the job is submitted
+   * @see #setCombinerKeyGroupingComparatorClass(Class)
   */
  public void setGroupingComparatorClass(Class<? extends RawComparator> cls
                                         ) throws IllegalStateException {
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobContext.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobContext.java
@ -167,12 +167,22 @@ public interface JobContext extends MRJobConfig {
   */
  public String getJar();

+  /**
+   * Get the user defined {@link RawComparator} comparator for
+   * grouping keys of inputs to the combiner.
+   *
+   * @return comparator set by the user for grouping values.
+   * @see Job#setCombinerKeyGroupingComparatorClass(Class)
+   */
+  public RawComparator<?> getCombinerKeyGroupingComparator();
+
    /**
     * Get the user defined {@link RawComparator} comparator for
     * grouping keys of inputs to the reduce.
     *
     * @return comparator set by the user for grouping values.
-   * @see Job#setGroupingComparatorClass(Class) for details.  
+     * @see Job#setGroupingComparatorClass(Class)
+     * @see #getCombinerKeyGroupingComparator()
     */
  public RawComparator<?> getGroupingComparator();
  
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
@ -93,6 +93,8 @@ public interface MRJobConfig {

  public static final String KEY_COMPARATOR = "mapreduce.job.output.key.comparator.class";

+  public static final String COMBINER_GROUP_COMPARATOR_CLASS = "mapreduce.job.combiner.group.comparator.class";
+
  public static final String GROUP_COMPARATOR_CLASS = "mapreduce.job.output.group.comparator.class";

  public static final String WORKING_DIR = "mapreduce.job.working.dir";
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/TaskCheckpointID.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/TaskCheckpointID.java
@ -36,36 +36,30 @@ import org.apache.hadoop.mapred.Counters;
 */
 public class TaskCheckpointID implements CheckpointID {

-  FSCheckpointID rawId;
-  private List<Path> partialOutput;
-  private Counters counters;
+  final FSCheckpointID rawId;
+  private final List<Path> partialOutput;
+  private final Counters counters;

  public TaskCheckpointID() {
-    this.rawId = new FSCheckpointID();
-    this.partialOutput = new ArrayList<Path>();
+    this(new FSCheckpointID(), new ArrayList<Path>(), new Counters());
  }

  public TaskCheckpointID(FSCheckpointID rawId, List<Path> partialOutput,
          Counters counters) {
    this.rawId = rawId;
    this.counters = counters;
-    if(partialOutput == null)
-      this.partialOutput = new ArrayList<Path>();
-    else
-      this.partialOutput = partialOutput;
+    this.partialOutput = null == partialOutput
+      ? new ArrayList<Path>()
+      : partialOutput;
  }

  @Override
  public void write(DataOutput out) throws IOException {
    counters.write(out);
-    if (partialOutput == null) {
-      WritableUtils.writeVLong(out, 0L);
-    } else {
    WritableUtils.writeVLong(out, partialOutput.size());
    for (Path p : partialOutput) {
      Text.writeString(out, p.toString());
    }
-    }
    rawId.write(out);
  }

@ -74,21 +68,22 @@ public class TaskCheckpointID implements CheckpointID{
    partialOutput.clear();
    counters.readFields(in);
    long numPout = WritableUtils.readVLong(in);
-    for(int i=0;i<numPout;i++)
+    for (int i = 0; i < numPout; i++) {
      partialOutput.add(new Path(Text.readString(in)));
+    }
    rawId.readFields(in);
  }

  @Override
  public boolean equals(Object other) {
    if (other instanceof TaskCheckpointID){
-      return this.rawId.equals(((TaskCheckpointID)other).rawId) &&
-             this.counters.equals(((TaskCheckpointID) other).counters) &&
-             this.partialOutput.containsAll(((TaskCheckpointID) other).partialOutput) &&
-             ((TaskCheckpointID) other).partialOutput.containsAll(this.partialOutput);
-    } else {
-      return false;
+      TaskCheckpointID o = (TaskCheckpointID) other;
+      return rawId.equals(o.rawId) &&
+             counters.equals(o.counters) &&
+             partialOutput.containsAll(o.partialOutput) &&
+             o.partialOutput.containsAll(partialOutput);
    }
+    return false;
  }

  @Override
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainMapContextImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainMapContextImpl.java
@ -166,6 +166,11 @@ class ChainMapContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT> implements
    return base.getFileTimestamps();
  }

+  @Override
+  public RawComparator<?> getCombinerKeyGroupingComparator() {
+    return base.getCombinerKeyGroupingComparator();
+  }
+
  @Override
  public RawComparator<?> getGroupingComparator() {
    return base.getGroupingComparator();
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainReduceContextImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainReduceContextImpl.java
@ -159,6 +159,11 @@ class ChainReduceContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT> implements
    return base.getFileTimestamps();
  }

+  @Override
+  public RawComparator<?> getCombinerKeyGroupingComparator() {
+    return base.getCombinerKeyGroupingComparator();
+  }
+
  @Override
  public RawComparator<?> getGroupingComparator() {
    return base.getGroupingComparator();
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/map/WrappedMapper.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/map/WrappedMapper.java
@ -168,6 +168,11 @@ public class WrappedMapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
      return mapContext.getFileTimestamps();
    }

+    @Override
+    public RawComparator<?> getCombinerKeyGroupingComparator() {
+      return mapContext.getCombinerKeyGroupingComparator();
+    }
+
    @Override
    public RawComparator<?> getGroupingComparator() {
      return mapContext.getGroupingComparator();
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/reduce/WrappedReducer.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/reduce/WrappedReducer.java
@ -137,7 +137,7 @@ public class WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>

    @Override
    public URI[] getCacheFiles() throws IOException {
-      return reduceContext.getCacheArchives();
+      return reduceContext.getCacheFiles();
    }

    @Override
@ -161,6 +161,11 @@ public class WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
      return reduceContext.getFileTimestamps();
    }

+    @Override
+    public RawComparator<?> getCombinerKeyGroupingComparator() {
+      return reduceContext.getCombinerKeyGroupingComparator();
+    }
+
    @Override
    public RawComparator<?> getGroupingComparator() {
      return reduceContext.getGroupingComparator();
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/JobContextImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/JobContextImpl.java
@ -252,6 +252,17 @@ public class JobContextImpl implements JobContext {
    return conf.getJar();
  }

+  /**
+   * Get the user defined {@link RawComparator} comparator for
+   * grouping keys of inputs to the combiner.
+   *
+   * @return comparator set by the user for grouping values.
+   * @see Job#setCombinerKeyGroupingComparatorClass(Class) for details.
+   */
+  public RawComparator<?> getCombinerKeyGroupingComparator() {
+    return conf.getCombinerKeyGroupingComparator();
+  }
+
  /** 
   * Get the user defined {@link RawComparator} comparator for 
   * grouping keys of inputs to the reduce.
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManagerImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManagerImpl.java
@ -582,7 +582,7 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
    Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
    Class<V> valClass = (Class<V>) job.getMapOutputValueClass();
    RawComparator<K> comparator = 
-      (RawComparator<K>)job.getOutputKeyComparator();
+      (RawComparator<K>)job.getCombinerKeyGroupingComparator();
    try {
      CombineValuesIterator values = new CombineValuesIterator(
          kvIter, comparator, keyClass, valClass, job, Reporter.NULL,
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java
@ -88,6 +88,8 @@ import org.apache.hadoop.yarn.util.Records;
 import org.apache.hadoop.yarn.webapp.WebApp;
 import org.apache.hadoop.yarn.webapp.WebApps;

+import com.google.common.annotations.VisibleForTesting;
+
 /**
 * This module is responsible for talking to the
 * JobClient (user facing).
@ -142,7 +144,8 @@ public class HistoryClientService extends AbstractService {
    super.serviceStart();
  }

-  private void initializeWebApp(Configuration conf) {
+  @VisibleForTesting
+  protected void initializeWebApp(Configuration conf) {
    webApp = new HsWebApp(history);
    InetSocketAddress bindAddress = MRWebAppUtil.getJHSWebBindAddress(conf);
    // NOTE: there should be a .at(InetSocketAddress)
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistoryServer.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistoryServer.java
@ -45,6 +45,8 @@ import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
 import org.apache.hadoop.yarn.logaggregation.AggregatedLogDeletionService;

+import com.google.common.annotations.VisibleForTesting;
+
 /******************************************************************
 * {@link JobHistoryServer} is responsible for servicing all job history
 * related requests from client.
@ -60,10 +62,10 @@ public class JobHistoryServer extends CompositeService {
  public static final long historyServerTimeStamp = System.currentTimeMillis();

  private static final Log LOG = LogFactory.getLog(JobHistoryServer.class);
-  private HistoryContext historyContext;
+  protected HistoryContext historyContext;
  private HistoryClientService clientService;
  private JobHistory jobHistoryService;
-  private JHSDelegationTokenSecretManager jhsDTSecretManager;
+  protected JHSDelegationTokenSecretManager jhsDTSecretManager;
  private AggregatedLogDeletionService aggLogDelService;
  private HSAdminServer hsAdminServer;
  private HistoryServerStateStoreService stateStore;
@ -129,8 +131,7 @@ public class JobHistoryServer extends CompositeService {
    historyContext = (HistoryContext)jobHistoryService;
    stateStore = createStateStore(conf);
    this.jhsDTSecretManager = createJHSSecretManager(conf, stateStore);
-    clientService = new HistoryClientService(historyContext, 
-        this.jhsDTSecretManager);
+    clientService = createHistoryClientService();
    aggLogDelService = new AggregatedLogDeletionService();
    hsAdminServer = new HSAdminServer(aggLogDelService, jobHistoryService);
    addService(stateStore);
@ -142,6 +143,12 @@ public class JobHistoryServer extends CompositeService {
    super.serviceInit(config);
  }

+  @VisibleForTesting
+  protected HistoryClientService createHistoryClientService() {
+    return new HistoryClientService(historyContext, 
+        this.jhsDTSecretManager);
+  }
+
  protected JHSDelegationTokenSecretManager createJHSSecretManager(
      Configuration conf, HistoryServerStateStoreService store) {
    long secretKeyInterval = 
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapProgress.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapProgress.java
@ -20,7 +20,6 @@ package org.apache.hadoop.mapred;
 import java.io.DataOutputStream;
 import java.io.File;
 import java.io.IOException;
-import java.util.Arrays;
 import java.util.List;

 import junit.framework.TestCase;
@ -29,20 +28,17 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.ipc.ProtocolSignature;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.OutputFormat;
-import org.apache.hadoop.mapreduce.jobhistory.JobSubmittedEvent;
+import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
 import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
 import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
-import org.apache.hadoop.mapreduce.split.JobSplitWriter;
-import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader;
-import org.apache.hadoop.mapreduce.split.JobSplit.SplitMetaInfo;
 import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitIndex;
 import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
+import org.apache.hadoop.mapreduce.split.JobSplitWriter;
+import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader;
 import org.apache.hadoop.util.ReflectionUtils;

 /**
@ -110,11 +106,16 @@ public class TestMapProgress extends TestCase {
      statusUpdate(taskId, taskStatus);
    }
    
+    public void preempted(TaskAttemptID taskId, TaskStatus taskStatus)
+        throws IOException, InterruptedException {
+      statusUpdate(taskId, taskStatus);
+    }
+
    public boolean canCommit(TaskAttemptID taskid) throws IOException {
      return true;
    }
    
-    public boolean statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus) 
+    public AMFeedback statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus) 
    throws IOException, InterruptedException {
      StringBuffer buf = new StringBuffer("Task ");
      buf.append(taskId);
@ -128,7 +129,9 @@ public class TestMapProgress extends TestCase {
      LOG.info(buf.toString());
      // ignore phase
      // ignore counters
-      return true;
+      AMFeedback a = new AMFeedback();
+      a.setTaskFound(true);
+      return a;
    }

    public void reportDiagnosticInfo(TaskAttemptID taskid, String trace) throws IOException {
@ -145,6 +148,17 @@ public class TestMapProgress extends TestCase {
        SortedRanges.Range range) throws IOException {
      LOG.info("Task " + taskid + " reportedNextRecordRange " + range);
    }
+
+    @Override
+    public TaskCheckpointID getCheckpointID(TaskID taskId) {
+      // do nothing
+      return null;
+    }
+
+    @Override
+    public void setCheckpointID(TaskID downgrade, TaskCheckpointID cid) {
+      // do nothing
+    }
  }
  
  private FileSystem fs = null;
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestOldCombinerGrouping.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestOldCombinerGrouping.java
@ -0,0 +1,191 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import junit.framework.Assert;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.UUID;
+
+public class TestOldCombinerGrouping {
+  private static String TEST_ROOT_DIR =
+      new File("build", UUID.randomUUID().toString()).getAbsolutePath();
+
+  public static class Map implements
+      Mapper<LongWritable, Text, Text, LongWritable> {
+    @Override
+    public void map(LongWritable key, Text value,
+        OutputCollector<Text, LongWritable> output, Reporter reporter)
+        throws IOException {
+      String v = value.toString();
+      String k = v.substring(0, v.indexOf(","));
+      v = v.substring(v.indexOf(",") + 1);
+      output.collect(new Text(k), new LongWritable(Long.parseLong(v)));
+    }
+
+    @Override
+    public void close() throws IOException {
+    }
+
+    @Override
+    public void configure(JobConf job) {
+    }
+  }
+
+  public static class Reduce implements
+      Reducer<Text, LongWritable, Text, LongWritable> {
+
+    @Override
+    public void reduce(Text key, Iterator<LongWritable> values,
+        OutputCollector<Text, LongWritable> output, Reporter reporter)
+        throws IOException {
+      LongWritable maxValue = null;
+      while (values.hasNext()) {
+        LongWritable value = values.next();
+        if (maxValue == null) {
+          maxValue = value;
+        } else if (value.compareTo(maxValue) > 0) {
+          maxValue = value;
+        }
+      }
+      output.collect(key, maxValue);
+    }
+
+    @Override
+    public void close() throws IOException {
+    }
+
+    @Override
+    public void configure(JobConf job) {
+    }
+  }
+
+  public static class Combiner extends Reduce {
+  }
+
+  public static class GroupComparator implements RawComparator<Text> {
+    @Override
+    public int compare(byte[] bytes, int i, int i2, byte[] bytes2, int i3,
+        int i4) {
+      byte[] b1 = new byte[i2];
+      System.arraycopy(bytes, i, b1, 0, i2);
+
+      byte[] b2 = new byte[i4];
+      System.arraycopy(bytes2, i3, b2, 0, i4);
+
+      return compare(new Text(new String(b1)), new Text(new String(b2)));
+    }
+
+    @Override
+    public int compare(Text o1, Text o2) {
+      String s1 = o1.toString();
+      String s2 = o2.toString();
+      s1 = s1.substring(0, s1.indexOf("|"));
+      s2 = s2.substring(0, s2.indexOf("|"));
+      return s1.compareTo(s2);
+    }
+
+  }
+
+  @Test
+  public void testCombiner() throws Exception {
+    if (!new File(TEST_ROOT_DIR).mkdirs()) {
+      throw new RuntimeException("Could not create test dir: " + TEST_ROOT_DIR);
+    }
+    File in = new File(TEST_ROOT_DIR, "input");
+    if (!in.mkdirs()) {
+      throw new RuntimeException("Could not create test dir: " + in);
+    }
+    File out = new File(TEST_ROOT_DIR, "output");
+    PrintWriter pw = new PrintWriter(new FileWriter(new File(in, "data.txt")));
+    pw.println("A|a,1");
+    pw.println("A|b,2");
+    pw.println("B|a,3");
+    pw.println("B|b,4");
+    pw.println("B|c,5");
+    pw.close();
+    JobConf job = new JobConf();
+    job.set("mapreduce.framework.name", "local");
+    TextInputFormat.setInputPaths(job, new Path(in.getPath()));
+    TextOutputFormat.setOutputPath(job, new Path(out.getPath()));
+    job.setMapperClass(Map.class);
+    job.setReducerClass(Reduce.class);
+    job.setInputFormat(TextInputFormat.class);
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(LongWritable.class);
+    job.setOutputFormat(TextOutputFormat.class);
+    job.setOutputValueGroupingComparator(GroupComparator.class);
+
+    job.setCombinerClass(Combiner.class);
+    job.setCombinerKeyGroupingComparator(GroupComparator.class);
+    job.setInt("min.num.spills.for.combine", 0);
+
+    JobClient client = new JobClient(job);
+    RunningJob runningJob = client.submitJob(job);
+    runningJob.waitForCompletion();
+    if (runningJob.isSuccessful()) {
+      Counters counters = runningJob.getCounters();
+
+      long combinerInputRecords = counters.getGroup(
+          "org.apache.hadoop.mapreduce.TaskCounter").
+          getCounter("COMBINE_INPUT_RECORDS");
+      long combinerOutputRecords = counters.getGroup(
+          "org.apache.hadoop.mapreduce.TaskCounter").
+          getCounter("COMBINE_OUTPUT_RECORDS");
+      Assert.assertTrue(combinerInputRecords > 0);
+      Assert.assertTrue(combinerInputRecords > combinerOutputRecords);
+
+      BufferedReader br = new BufferedReader(new FileReader(
+          new File(out, "part-00000")));
+      Set<String> output = new HashSet<String>();
+      String line = br.readLine();
+      Assert.assertNotNull(line);
+      output.add(line.substring(0, 1) + line.substring(4, 5));
+      line = br.readLine();
+      Assert.assertNotNull(line);
+      output.add(line.substring(0, 1) + line.substring(4, 5));
+      line = br.readLine();
+      Assert.assertNull(line);
+      br.close();
+
+      Set<String> expected = new HashSet<String>();
+      expected.add("A2");
+      expected.add("B5");
+
+      Assert.assertEquals(expected, output);
+
+    } else {
+      Assert.fail("Job failed");
+    }
+  }
+
+}
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestTaskCommit.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestTaskCommit.java
@ -27,6 +27,10 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.ipc.ProtocolSignature;
 import org.apache.hadoop.mapred.SortedRanges.Range;
 import org.apache.hadoop.mapreduce.TaskType;
+import org.apache.hadoop.mapreduce.checkpoint.CheckpointID;
+import org.apache.hadoop.mapreduce.checkpoint.FSCheckpointID;
+import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID;
+

 public class TestTaskCommit extends HadoopTestCase {
  Path rootDir = 
@ -131,11 +135,6 @@ public class TestTaskCommit extends HadoopTestCase {
      return null;
    }

-    @Override
-    public boolean ping(TaskAttemptID taskid) throws IOException {
-      return true;
-    }
-
    @Override
    public void reportDiagnosticInfo(TaskAttemptID taskid, String trace)
        throws IOException {
@ -152,9 +151,11 @@ public class TestTaskCommit extends HadoopTestCase {
    }

    @Override
-    public boolean statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
+    public AMFeedback statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus)
        throws IOException, InterruptedException {
-      return true;
+      AMFeedback a = new AMFeedback();
+      a.setTaskFound(true);
+      return a;
    }

    @Override
@ -168,6 +169,22 @@ public class TestTaskCommit extends HadoopTestCase {
        long clientVersion, int clientMethodsHash) throws IOException {
      return null;
    }
+
+    @Override
+    public void preempted(TaskAttemptID taskId, TaskStatus taskStatus)
+        throws IOException, InterruptedException {
+      fail("Task should not go to commit-pending");
+    }
+
+    @Override
+    public TaskCheckpointID getCheckpointID(TaskID taskId) {
+      return null;
+    }
+
+    @Override
+    public void setCheckpointID(TaskID downgrade, TaskCheckpointID cid) {
+      // ignore
+    }
  }
  
  /**
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestNewCombinerGrouping.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestNewCombinerGrouping.java
@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce;
+
+import junit.framework.Assert;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
+import org.junit.Test;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.UUID;
+
+public class TestNewCombinerGrouping {
+  private static String TEST_ROOT_DIR =
+      new File("build", UUID.randomUUID().toString()).getAbsolutePath();
+
+  public static class Map extends
+      Mapper<LongWritable, Text, Text, LongWritable> {
+
+    @Override
+    protected void map(LongWritable key, Text value,
+        Context context)
+        throws IOException, InterruptedException {
+      String v = value.toString();
+      String k = v.substring(0, v.indexOf(","));
+      v = v.substring(v.indexOf(",") + 1);
+      context.write(new Text(k), new LongWritable(Long.parseLong(v)));
+    }
+  }
+
+  public static class Reduce extends
+      Reducer<Text, LongWritable, Text, LongWritable> {
+
+    @Override
+    protected void reduce(Text key, Iterable<LongWritable> values,
+        Context context)
+        throws IOException, InterruptedException {
+      LongWritable maxValue = null;
+      for (LongWritable value : values) {
+        if (maxValue == null) {
+          maxValue = value;
+        } else if (value.compareTo(maxValue) > 0) {
+          maxValue = value;
+        }
+      }
+      context.write(key, maxValue);
+    }
+  }
+
+  public static class Combiner extends Reduce {
+  }
+
+  public static class GroupComparator implements RawComparator<Text> {
+    @Override
+    public int compare(byte[] bytes, int i, int i2, byte[] bytes2, int i3,
+        int i4) {
+      byte[] b1 = new byte[i2];
+      System.arraycopy(bytes, i, b1, 0, i2);
+
+      byte[] b2 = new byte[i4];
+      System.arraycopy(bytes2, i3, b2, 0, i4);
+
+      return compare(new Text(new String(b1)), new Text(new String(b2)));
+    }
+
+    @Override
+    public int compare(Text o1, Text o2) {
+      String s1 = o1.toString();
+      String s2 = o2.toString();
+      s1 = s1.substring(0, s1.indexOf("|"));
+      s2 = s2.substring(0, s2.indexOf("|"));
+      return s1.compareTo(s2);
+    }
+
+  }
+
+  @Test
+  public void testCombiner() throws Exception {
+    if (!new File(TEST_ROOT_DIR).mkdirs()) {
+      throw new RuntimeException("Could not create test dir: " + TEST_ROOT_DIR);
+    }
+    File in = new File(TEST_ROOT_DIR, "input");
+    if (!in.mkdirs()) {
+      throw new RuntimeException("Could not create test dir: " + in);
+    }
+    File out = new File(TEST_ROOT_DIR, "output");
+    PrintWriter pw = new PrintWriter(new FileWriter(new File(in, "data.txt")));
+    pw.println("A|a,1");
+    pw.println("A|b,2");
+    pw.println("B|a,3");
+    pw.println("B|b,4");
+    pw.println("B|c,5");
+    pw.close();
+    JobConf conf = new JobConf();
+    conf.set("mapreduce.framework.name", "local");
+    Job job = new Job(conf);
+    TextInputFormat.setInputPaths(job, new Path(in.getPath()));
+    TextOutputFormat.setOutputPath(job, new Path(out.getPath()));
+
+    job.setMapperClass(Map.class);
+    job.setReducerClass(Reduce.class);
+    job.setInputFormatClass(TextInputFormat.class);
+    job.setMapOutputKeyClass(Text.class);
+    job.setMapOutputValueClass(LongWritable.class);
+    job.setOutputFormatClass(TextOutputFormat.class);
+    job.setGroupingComparatorClass(GroupComparator.class);
+
+    job.setCombinerKeyGroupingComparatorClass(GroupComparator.class);
+    job.setCombinerClass(Combiner.class);
+    job.getConfiguration().setInt("min.num.spills.for.combine", 0);
+
+    job.submit();
+    job.waitForCompletion(false);
+    if (job.isSuccessful()) {
+      Counters counters = job.getCounters();
+
+      long combinerInputRecords = counters.findCounter(
+          "org.apache.hadoop.mapreduce.TaskCounter",
+          "COMBINE_INPUT_RECORDS").getValue();
+      long combinerOutputRecords = counters.findCounter(
+          "org.apache.hadoop.mapreduce.TaskCounter",
+          "COMBINE_OUTPUT_RECORDS").getValue();
+      Assert.assertTrue(combinerInputRecords > 0);
+      Assert.assertTrue(combinerInputRecords > combinerOutputRecords);
+
+      BufferedReader br = new BufferedReader(new FileReader(
+          new File(out, "part-r-00000")));
+      Set<String> output = new HashSet<String>();
+      String line = br.readLine();
+      Assert.assertNotNull(line);
+      output.add(line.substring(0, 1) + line.substring(4, 5));
+      line = br.readLine();
+      Assert.assertNotNull(line);
+      output.add(line.substring(0, 1) + line.substring(4, 5));
+      line = br.readLine();
+      Assert.assertNull(line);
+      br.close();
+
+      Set<String> expected = new HashSet<String>();
+      expected.add("A2");
+      expected.add("B5");
+
+      Assert.assertEquals(expected, output);
+
+    } else {
+      Assert.fail("Job failed");
+    }
+  }
+
+}
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestJHSSecurity.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestJHSSecurity.java
@ -39,6 +39,7 @@ import org.apache.hadoop.mapreduce.v2.api.protocolrecords.CancelDelegationTokenR
 import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetDelegationTokenRequest;
 import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportRequest;
 import org.apache.hadoop.mapreduce.v2.api.protocolrecords.RenewDelegationTokenRequest;
+import org.apache.hadoop.mapreduce.v2.hs.HistoryClientService;
 import org.apache.hadoop.mapreduce.v2.hs.HistoryServerStateStoreService;
 import org.apache.hadoop.mapreduce.v2.hs.JHSDelegationTokenSecretManager;
 import org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer;
@ -94,6 +95,17 @@ public class TestJHSSecurity {
          return new JHSDelegationTokenSecretManager(initialInterval, 
              maxLifetime, renewInterval, 3600000, store);
        }
+
+        @Override
+        protected HistoryClientService createHistoryClientService() {
+          return new HistoryClientService(historyContext, 
+            this.jhsDTSecretManager) {
+            @Override
+            protected void initializeWebApp(Configuration conf) {
+              // Don't need it, skip.;
+              }
+          };
+        }
      };
 //      final JobHistoryServer jobHistoryServer = jhServer;
      jobHistoryServer.init(conf);
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestUmbilicalProtocolWithJobToken.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestUmbilicalProtocolWithJobToken.java
@ -115,7 +115,7 @@ public class TestUmbilicalProtocolWithJobToken {
          proxy = (TaskUmbilicalProtocol) RPC.getProxy(
              TaskUmbilicalProtocol.class, TaskUmbilicalProtocol.versionID,
              addr, conf);
-          proxy.ping(null);
+          proxy.statusUpdate(null, null);
        } finally {
          server.stop();
          if (proxy != null) {
--- a/hadoop-project/pom.xml
+++ b/hadoop-project/pom.xml
@ -519,7 +519,7 @@
      <dependency>
        <groupId>commons-logging</groupId>
        <artifactId>commons-logging</artifactId>
-        <version>1.1.1</version>
+        <version>1.1.3</version>
        <exclusions>
          <exclusion>
            <groupId>avalon-framework</groupId>
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java
@ -64,8 +64,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
@ -105,8 +106,8 @@ public class ResourceSchedulerWrapper implements

  private Configuration conf;
  private ResourceScheduler scheduler;
-  private Map<ApplicationAttemptId, String> appQueueMap =
-          new ConcurrentHashMap<ApplicationAttemptId, String>();
+  private Map<ApplicationId, String> appQueueMap =
+          new ConcurrentHashMap<ApplicationId, String>();
  private BufferedWriter jobRuntimeLogBW;

  // Priority of the ResourceSchedulerWrapper shutdown hook.
@ -240,7 +241,7 @@ public class ResourceSchedulerWrapper implements
            (AppAttemptRemovedSchedulerEvent) schedulerEvent;
        ApplicationAttemptId appAttemptId =
                appRemoveEvent.getApplicationAttemptID();
-        String queue = appQueueMap.get(appAttemptId);
+        String queue = appQueueMap.get(appAttemptId.getApplicationId());
        SchedulerAppReport app = scheduler.getSchedulerAppInfo(appAttemptId);
        if (! app.getLiveContainers().isEmpty()) {  // have 0 or 1
          // should have one container which is AM container
@ -262,20 +263,18 @@ public class ResourceSchedulerWrapper implements
      schedulerHandleCounter.inc();
      schedulerHandleCounterMap.get(schedulerEvent.getType()).inc();

-      if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_REMOVED
-          && schedulerEvent instanceof AppAttemptRemovedSchedulerEvent) {
+      if (schedulerEvent.getType() == SchedulerEventType.APP_REMOVED
+          && schedulerEvent instanceof AppRemovedSchedulerEvent) {
        SLSRunner.decreaseRemainingApps();
-        AppAttemptRemovedSchedulerEvent appRemoveEvent =
-                (AppAttemptRemovedSchedulerEvent) schedulerEvent;
-        ApplicationAttemptId appAttemptId =
-                appRemoveEvent.getApplicationAttemptID();
-        appQueueMap.remove(appRemoveEvent.getApplicationAttemptID());
-      } else if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_ADDED
-          && schedulerEvent instanceof AppAttemptAddedSchedulerEvent) {
-        AppAttemptAddedSchedulerEvent appAddEvent =
-                (AppAttemptAddedSchedulerEvent) schedulerEvent;
+        AppRemovedSchedulerEvent appRemoveEvent =
+                (AppRemovedSchedulerEvent) schedulerEvent;
+        appQueueMap.remove(appRemoveEvent.getApplicationID());
+      } else if (schedulerEvent.getType() == SchedulerEventType.APP_ADDED
+          && schedulerEvent instanceof AppAddedSchedulerEvent) {
+        AppAddedSchedulerEvent appAddEvent =
+                (AppAddedSchedulerEvent) schedulerEvent;
        String queueName = appAddEvent.getQueue();
-        appQueueMap.put(appAddEvent.getApplicationAttemptId(), queueName);
+        appQueueMap.put(appAddEvent.getApplicationId(), queueName);
      }
    }
  }
@ -297,7 +296,9 @@ public class ResourceSchedulerWrapper implements
          continue;
        }

-        String queue = appQueueMap.get(containerId.getApplicationAttemptId());
+        String queue =
+            appQueueMap.get(containerId.getApplicationAttemptId()
+              .getApplicationId());
        int releasedMemory = 0, releasedVCores = 0;
        if (status.getExitStatus() == ContainerExitStatus.SUCCESS) {
          for (RMContainer rmc : app.getLiveContainers()) {
@ -329,7 +330,7 @@ public class ResourceSchedulerWrapper implements
    // update queue information
    Resource pendingResource = Resources.createResource(0, 0);
    Resource allocatedResource = Resources.createResource(0, 0);
-    String queueName = appQueueMap.get(attemptId);
+    String queueName = appQueueMap.get(attemptId.getApplicationId());
    // container requested
    for (ResourceRequest request : resourceRequests) {
      if (request.getResourceName().equals(ResourceRequest.ANY)) {
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java
@ -285,8 +285,11 @@ public class SLSCapacityScheduler extends CapacityScheduler implements
 	          && schedulerEvent instanceof AppAttemptAddedSchedulerEvent) {
          AppAttemptAddedSchedulerEvent appAddEvent =
              (AppAttemptAddedSchedulerEvent) schedulerEvent;
-	        String queueName = appAddEvent.getQueue();
-	        appQueueMap.put(appAddEvent.getApplicationAttemptId(), queueName);
+          SchedulerApplication app =
+              applications.get(appAddEvent.getApplicationAttemptId()
+                .getApplicationId());
+          appQueueMap.put(appAddEvent.getApplicationAttemptId(), app.getQueue()
+              .getQueueName());
 	      }
 	    }
  }
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@ -187,6 +187,16 @@ Release 2.4.0 - UNRELEASED
    YARN-1307. Redesign znode structure for Zookeeper based RM state-store for
    better organization and scalability. (Tsuyoshi OZAWA via vinodkv)

+    YARN-1172. Convert SecretManagers in RM to services (Tsuyoshi OZAWA via kasha)
+
+    YARN-1523. Use StandbyException instead of RMNotYetReadyException (kasha)
+
+    YARN-1541. Changed ResourceManager to invalidate ApplicationMaster host/port
+    information once an AM crashes. (Jian He via vinodkv)
+
+    YARN-1493. Changed ResourceManager and Scheduler interfacing to recognize
+    app-attempts separately from apps. (Jian He via vinodkv)
+
  OPTIMIZATIONS

  BUG FIXES
@ -267,6 +277,21 @@ Release 2.4.0 - UNRELEASED
    YARN-1451. TestResourceManager relies on the scheduler assigning multiple
    containers in a single node update. (Sandy Ryza via kasha)

+    YARN-1527. Fix yarn rmadmin command to print the correct usage info.
+    (Akira AJISAKA via jianhe)
+
+    YARN-1522. Fixed a race condition in the test TestApplicationCleanup that was
+    causing it to randomly fail. (Liyin Liang via vinodkv)
+
+    YARN-1549. Fixed a bug in ResourceManager's ApplicationMasterService that
+    was causing unamanged AMs to not finish correctly. (haosdent via vinodkv)
+
+    YARN-1559. Race between ServerRMProxy and ClientRMProxy setting 
+    RMProxy#INSTANCE. (kasha and vinodkv via kasha)
+
+    YARN-1560. Fixed TestYarnClient#testAMMRTokens failure with null AMRM token.
+    (Ted Yu via jianhe)
+
 Release 2.3.0 - UNRELEASED

  INCOMPATIBLE CHANGES
@ -473,6 +498,9 @@ Release 2.2.0 - 2013-10-13
    YARN-1278. Fixed NodeManager to not delete local resources for apps on resync
    command from RM - a bug caused by YARN-1149. (Hitesh Shah via vinodkv)

+    YARN-1463. Tests should avoid starting http-server where possible or creates 
+    spnego keytab/principals (vinodkv via kasha)
+
 Release 2.1.1-beta - 2013-09-23

  INCOMPATIBLE CHANGES
--- a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
+++ b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
@ -309,13 +309,4 @@
    <Class name="org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore" />
    <Bug pattern="IS2_INCONSISTENT_SYNC" />
  </Match>
-
-  <!-- Ignore INSTANCE not being final as it is created in sub-classes -->
-  <Match>
-    <Class name="org.apache.hadoop.yarn.client.RMProxy" />
-    <Field name="INSTANCE" />
-    <Bug pattern="MS_SHOULD_BE_FINAL"/>
-  </Match>
-
-
 </FindBugsFilter>
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/ResourceManagerAdministrationProtocol.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/ResourceManagerAdministrationProtocol.java
@ -24,10 +24,10 @@ import org.apache.hadoop.classification.InterfaceAudience.Private;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.hadoop.classification.InterfaceStability.Evolving;
 import org.apache.hadoop.classification.InterfaceStability.Stable;
+import org.apache.hadoop.ipc.StandbyException;
 import org.apache.hadoop.tools.GetUserMappingsProtocol;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.ResourceOption;
-import org.apache.hadoop.yarn.exceptions.RMNotYetActiveException;
 import org.apache.hadoop.yarn.exceptions.YarnException;
 import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsRequest;
 import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsResponse;
@ -51,25 +51,25 @@ public interface ResourceManagerAdministrationProtocol extends GetUserMappingsPr
  @Public
  @Stable
  public RefreshQueuesResponse refreshQueues(RefreshQueuesRequest request) 
-  throws RMNotYetActiveException, YarnException, IOException;
+  throws StandbyException, YarnException, IOException;

  @Public
  @Stable
  public RefreshNodesResponse refreshNodes(RefreshNodesRequest request)
-  throws RMNotYetActiveException, YarnException, IOException;
+  throws StandbyException, YarnException, IOException;

  @Public
  @Stable
  public RefreshSuperUserGroupsConfigurationResponse 
  refreshSuperUserGroupsConfiguration(
      RefreshSuperUserGroupsConfigurationRequest request)
-  throws RMNotYetActiveException, YarnException, IOException;
+  throws StandbyException, YarnException, IOException;

  @Public
  @Stable
  public RefreshUserToGroupsMappingsResponse refreshUserToGroupsMappings(
      RefreshUserToGroupsMappingsRequest request)
-  throws RMNotYetActiveException, YarnException, IOException;
+  throws StandbyException, YarnException, IOException;

  @Public
  @Stable
--- a/Show More
+++ b/Show More