Merge r1415804 through r1419190 from trunk.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-2802@1419193 13f79535-47bb-0310-9956-ffa450edef68
2012-12-10 03:37:32 +00:00 · 2012-12-10 03:37:32 +00:00 · f4a723249e
parent 9821af9ce8 bcaba93941
commit f4a723249e
94 changed files with 3240 additions and 899 deletions
--- a/hadoop-common-project/hadoop-auth/pom.xml
+++ b/hadoop-common-project/hadoop-auth/pom.xml
@ -110,6 +110,7 @@
            <exclude>**/${test.exclude}.java</exclude>
            <exclude>${test.exclude.pattern}</exclude>
            <exclude>**/TestKerberosAuth*.java</exclude>
+            <exclude>**/TestAltKerberosAuth*.java</exclude>
            <exclude>**/Test*$*.java</exclude>
          </excludes>
        </configuration>
--- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/AltKerberosAuthenticationHandler.java
+++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/AltKerberosAuthenticationHandler.java
@ -0,0 +1,150 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+package org.apache.hadoop.security.authentication.server;
+
+import java.io.IOException;
+import java.util.Properties;
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import org.apache.hadoop.security.authentication.client.AuthenticationException;
+
+ /**
+ * The {@link AltKerberosAuthenticationHandler} behaves exactly the same way as
+ * the {@link KerberosAuthenticationHandler}, except that it allows for an
+ * alternative form of authentication for browsers while still using Kerberos
+ * for Java access.  This is an abstract class that should be subclassed
+ * to allow a developer to implement their own custom authentication for browser
+ * access.  The alternateAuthenticate method will be called whenever a request
+ * comes from a browser.
+ * <p/>
+ */
+public abstract class AltKerberosAuthenticationHandler
+                        extends KerberosAuthenticationHandler {
+
+  /**
+   * Constant that identifies the authentication mechanism.
+   */
+  public static final String TYPE = "alt-kerberos";
+
+  /**
+   * Constant for the configuration property that indicates which user agents
+   * are not considered browsers (comma separated)
+   */
+  public static final String NON_BROWSER_USER_AGENTS =
+          TYPE + ".non-browser.user-agents";
+  private static final String NON_BROWSER_USER_AGENTS_DEFAULT =
+          "java,curl,wget,perl";
+
+  private String[] nonBrowserUserAgents;
+
+  /**
+   * Returns the authentication type of the authentication handler,
+   * 'alt-kerberos'.
+   * <p/>
+   *
+   * @return the authentication type of the authentication handler,
+   * 'alt-kerberos'.
+   */
+  @Override
+  public String getType() {
+    return TYPE;
+  }
+
+  @Override
+  public void init(Properties config) throws ServletException {
+    super.init(config);
+
+    nonBrowserUserAgents = config.getProperty(
+            NON_BROWSER_USER_AGENTS, NON_BROWSER_USER_AGENTS_DEFAULT)
+            .split("\\W*,\\W*");
+    for (int i = 0; i < nonBrowserUserAgents.length; i++) {
+        nonBrowserUserAgents[i] = nonBrowserUserAgents[i].toLowerCase();
+    }
+  }
+
+  /**
+   * It enforces the the Kerberos SPNEGO authentication sequence returning an
+   * {@link AuthenticationToken} only after the Kerberos SPNEGO sequence has
+   * completed successfully (in the case of Java access) and only after the
+   * custom authentication implemented by the subclass in alternateAuthenticate
+   * has completed successfully (in the case of browser access).
+   * <p/>
+   *
+   * @param request the HTTP client request.
+   * @param response the HTTP client response.
+   *
+   * @return an authentication token if the request is authorized or null
+   *
+   * @throws IOException thrown if an IO error occurred
+   * @throws AuthenticationException thrown if an authentication error occurred
+   */
+  @Override
+  public AuthenticationToken authenticate(HttpServletRequest request,
+      HttpServletResponse response)
+      throws IOException, AuthenticationException {
+    AuthenticationToken token;
+    if (isBrowser(request.getHeader("User-Agent"))) {
+      token = alternateAuthenticate(request, response);
+    }
+    else {
+      token = super.authenticate(request, response);
+    }
+    return token;
+  }
+
+  /**
+   * This method parses the User-Agent String and returns whether or not it
+   * refers to a browser.  If its not a browser, then Kerberos authentication
+   * will be used; if it is a browser, alternateAuthenticate from the subclass
+   * will be used.
+   * <p/>
+   * A User-Agent String is considered to be a browser if it does not contain
+   * any of the values from alt-kerberos.non-browser.user-agents; the default
+   * behavior is to consider everything a browser unless it contains one of:
+   * "java", "curl", "wget", or "perl".  Subclasses can optionally override
+   * this method to use different behavior.
+   *
+   * @param userAgent The User-Agent String, or null if there isn't one
+   * @return true if the User-Agent String refers to a browser, false if not
+   */
+  protected boolean isBrowser(String userAgent) {
+    if (userAgent == null) {
+      return false;
+    }
+    userAgent = userAgent.toLowerCase();
+    boolean isBrowser = true;
+    for (String nonBrowserUserAgent : nonBrowserUserAgents) {
+        if (userAgent.contains(nonBrowserUserAgent)) {
+            isBrowser = false;
+            break;
+        }
+    }
+    return isBrowser;
+  }
+
+  /**
+   * Subclasses should implement this method to provide the custom
+   * authentication to be used for browsers.
+   *
+   * @param request the HTTP client request.
+   * @param response the HTTP client response.
+   * @return an authentication token if the request is authorized, or null
+   * @throws IOException thrown if an IO error occurs
+   * @throws AuthenticationException thrown if an authentication error occurs
+   */
+  public abstract AuthenticationToken alternateAuthenticate(
+      HttpServletRequest request, HttpServletResponse response)
+      throws IOException, AuthenticationException;
+}
--- a/hadoop-common-project/hadoop-auth/src/site/apt/Configuration.apt.vm
+++ b/hadoop-common-project/hadoop-auth/src/site/apt/Configuration.apt.vm
@ -176,6 +176,73 @@ Configuration

    ...
 </web-app>
+---+
+
+** AltKerberos Configuration
+
+  <<IMPORTANT>>: A KDC must be configured and running.
+
+  The AltKerberos authentication mechanism is a partially implemented derivative
+  of the Kerberos SPNEGO authentication mechanism which allows a "mixed" form of
+  authentication where Kerberos SPNEGO is used by non-browsers while an
+  alternate form of authentication (to be implemented by the user) is used for
+  browsers.  To use AltKerberos as the authentication mechanism (besides
+  providing an implementation), the authentication filter must be configured
+  with the following init parameters, in addition to the previously mentioned
+  Kerberos SPNEGO ones:
+
+    * <<<[PREFIX.]type>>>: the full class name of the implementation of
+      AltKerberosAuthenticationHandler to use.
+
+    * <<<[PREFIX.]alt-kerberos.non-browser.user-agents>>>: a comma-separated
+      list of which user-agents should be considered non-browsers.
+
+  <<Example>>:
+
+---+
+<web-app version="2.5" xmlns="http://java.sun.com/xml/ns/javaee">
+    ...
+
+    <filter>
+        <filter-name>kerberosFilter</filter-name>
+        <filter-class>org.apache.hadoop.security.auth.server.AuthenticationFilter</filter-class>
+        <init-param>
+            <param-name>type</param-name>
+            <param-value>org.my.subclass.of.AltKerberosAuthenticationHandler</param-value>
+        </init-param>
+        <init-param>
+            <param-name>alt-kerberos.non-browser.user-agents</param-name>
+            <param-value>java,curl,wget,perl</param-value>
+        </init-param>
+        <init-param>
+            <param-name>token.validity</param-name>
+            <param-value>30</param-value>
+        </init-param>
+        <init-param>
+            <param-name>cookie.domain</param-name>
+            <param-value>.foo.com</param-value>
+        </init-param>
+        <init-param>
+            <param-name>cookie.path</param-name>
+            <param-value>/</param-value>
+        </init-param>
+        <init-param>
+            <param-name>kerberos.principal</param-name>
+            <param-value>HTTP/localhost@LOCALHOST</param-value>
+        </init-param>
+        <init-param>
+            <param-name>kerberos.keytab</param-name>
+            <param-value>/tmp/auth.keytab</param-value>
+        </init-param>
+    </filter>
+
+    <filter-mapping>
+        <filter-name>kerberosFilter</filter-name>
+        <url-pattern>/kerberos/*</url-pattern>
+    </filter-mapping>
+
+    ...
+</web-app>
 +---+

  \[ {{{./index.html}Go Back}} \]
--- a/hadoop-common-project/hadoop-auth/src/site/apt/index.apt.vm
+++ b/hadoop-common-project/hadoop-auth/src/site/apt/index.apt.vm
@ -24,6 +24,11 @@ Hadoop Auth, Java HTTP SPNEGO ${project.version}
  Hadoop Auth also supports additional authentication mechanisms on the client
  and the server side via 2 simple interfaces.

+  Additionally, it provides a partially implemented derivative of the Kerberos
+  SPNEGO authentication to allow a "mixed" form of authentication where Kerberos
+  SPNEGO is used by non-browsers while an alternate form of authentication
+  (to be implemented by the user) is used for browsers.
+
 * License

  Hadoop Auth is distributed under {{{http://www.apache.org/licenses/}Apache
--- a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestAltKerberosAuthenticationHandler.java
+++ b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestAltKerberosAuthenticationHandler.java
@ -0,0 +1,110 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+package org.apache.hadoop.security.authentication.server;
+
+import java.io.IOException;
+import java.util.Properties;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import org.apache.hadoop.security.authentication.client.AuthenticationException;
+import org.mockito.Mockito;
+
+public class TestAltKerberosAuthenticationHandler
+    extends TestKerberosAuthenticationHandler {
+
+  @Override
+  protected KerberosAuthenticationHandler getNewAuthenticationHandler() {
+    // AltKerberosAuthenticationHandler is abstract; a subclass would normally
+    // perform some other authentication when alternateAuthenticate() is called.
+    // For the test, we'll just return an AuthenticationToken as the other
+    // authentication is left up to the developer of the subclass
+    return new AltKerberosAuthenticationHandler() {
+      @Override
+      public AuthenticationToken alternateAuthenticate(
+              HttpServletRequest request,
+              HttpServletResponse response)
+              throws IOException, AuthenticationException {
+        return new AuthenticationToken("A", "B", getType());
+      }
+    };
+  }
+
+  @Override
+  protected String getExpectedType() {
+    return AltKerberosAuthenticationHandler.TYPE;
+  }
+
+  public void testAlternateAuthenticationAsBrowser() throws Exception {
+    HttpServletRequest request = Mockito.mock(HttpServletRequest.class);
+    HttpServletResponse response = Mockito.mock(HttpServletResponse.class);
+
+    // By default, a User-Agent without "java", "curl", "wget", or "perl" in it
+    // is considered a browser
+    Mockito.when(request.getHeader("User-Agent")).thenReturn("Some Browser");
+
+    AuthenticationToken token = handler.authenticate(request, response);
+    assertEquals("A", token.getUserName());
+    assertEquals("B", token.getName());
+    assertEquals(getExpectedType(), token.getType());
+  }
+
+  public void testNonDefaultNonBrowserUserAgentAsBrowser() throws Exception {
+    HttpServletRequest request = Mockito.mock(HttpServletRequest.class);
+    HttpServletResponse response = Mockito.mock(HttpServletResponse.class);
+
+    if (handler != null) {
+      handler.destroy();
+      handler = null;
+    }
+    handler = getNewAuthenticationHandler();
+    Properties props = getDefaultProperties();
+    props.setProperty("alt-kerberos.non-browser.user-agents", "foo, bar");
+    try {
+      handler.init(props);
+    } catch (Exception ex) {
+      handler = null;
+      throw ex;
+    }
+
+    // Pretend we're something that will not match with "foo" (or "bar")
+    Mockito.when(request.getHeader("User-Agent")).thenReturn("blah");
+    // Should use alt authentication
+    AuthenticationToken token = handler.authenticate(request, response);
+    assertEquals("A", token.getUserName());
+    assertEquals("B", token.getName());
+    assertEquals(getExpectedType(), token.getType());
+  }
+
+  public void testNonDefaultNonBrowserUserAgentAsNonBrowser() throws Exception {
+    if (handler != null) {
+      handler.destroy();
+      handler = null;
+    }
+    handler = getNewAuthenticationHandler();
+    Properties props = getDefaultProperties();
+    props.setProperty("alt-kerberos.non-browser.user-agents", "foo, bar");
+    try {
+      handler.init(props);
+    } catch (Exception ex) {
+      handler = null;
+      throw ex;
+    }
+
+    // Run the kerberos tests again
+    testRequestWithoutAuthorization();
+    testRequestWithInvalidAuthorization();
+    testRequestWithAuthorization();
+    testRequestWithInvalidKerberosAuthorization();
+  }
+}
--- a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestKerberosAuthenticationHandler.java
+++ b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestKerberosAuthenticationHandler.java
@ -28,23 +28,37 @@ import org.ietf.jgss.Oid;

 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
-import java.lang.reflect.Field;
 import java.util.Properties;
 import java.util.concurrent.Callable;

 public class TestKerberosAuthenticationHandler extends TestCase {

-  private KerberosAuthenticationHandler handler;
+  protected KerberosAuthenticationHandler handler;
+
+  protected KerberosAuthenticationHandler getNewAuthenticationHandler() {
+    return new KerberosAuthenticationHandler();
+  }
+
+  protected String getExpectedType() {
+    return KerberosAuthenticationHandler.TYPE;
+  }
+
+  protected Properties getDefaultProperties() {
+    Properties props = new Properties();
+    props.setProperty(KerberosAuthenticationHandler.PRINCIPAL,
+            KerberosTestUtils.getServerPrincipal());
+    props.setProperty(KerberosAuthenticationHandler.KEYTAB,
+            KerberosTestUtils.getKeytabFile());
+    props.setProperty(KerberosAuthenticationHandler.NAME_RULES,
+            "RULE:[1:$1@$0](.*@" + KerberosTestUtils.getRealm()+")s/@.*//\n");
+    return props;
+  }

  @Override
  protected void setUp() throws Exception {
    super.setUp();
-    handler = new KerberosAuthenticationHandler();
-    Properties props = new Properties();
-    props.setProperty(KerberosAuthenticationHandler.PRINCIPAL, KerberosTestUtils.getServerPrincipal());
-    props.setProperty(KerberosAuthenticationHandler.KEYTAB, KerberosTestUtils.getKeytabFile());
-    props.setProperty(KerberosAuthenticationHandler.NAME_RULES,
-                      "RULE:[1:$1@$0](.*@" + KerberosTestUtils.getRealm()+")s/@.*//\n");
+    handler = getNewAuthenticationHandler();
+    Properties props = getDefaultProperties();
    try {
      handler.init(props);
    } catch (Exception ex) {
@ -71,10 +85,8 @@ public class TestKerberosAuthenticationHandler extends TestCase {

    KerberosName.setRules("RULE:[1:$1@$0](.*@FOO)s/@.*//\nDEFAULT");
    
-    handler = new KerberosAuthenticationHandler();
-    Properties props = new Properties();
-    props.setProperty(KerberosAuthenticationHandler.PRINCIPAL, KerberosTestUtils.getServerPrincipal());
-    props.setProperty(KerberosAuthenticationHandler.KEYTAB, KerberosTestUtils.getKeytabFile());
+    handler = getNewAuthenticationHandler();
+    Properties props = getDefaultProperties();
    props.setProperty(KerberosAuthenticationHandler.NAME_RULES, "RULE:[1:$1@$0](.*@BAR)s/@.*//\nDEFAULT");
    try {
      handler.init(props);
@ -97,8 +109,7 @@ public class TestKerberosAuthenticationHandler extends TestCase {
  }

  public void testType() throws Exception {
-    KerberosAuthenticationHandler handler = new KerberosAuthenticationHandler();
-    assertEquals(KerberosAuthenticationHandler.TYPE, handler.getType());
+    assertEquals(getExpectedType(), handler.getType());
  }

  public void testRequestWithoutAuthorization() throws Exception {
@ -182,7 +193,7 @@ public class TestKerberosAuthenticationHandler extends TestCase {

      assertEquals(KerberosTestUtils.getClientPrincipal(), authToken.getName());
      assertTrue(KerberosTestUtils.getClientPrincipal().startsWith(authToken.getUserName()));
-      assertEquals(KerberosAuthenticationHandler.TYPE, authToken.getType());
+      assertEquals(getExpectedType(), authToken.getType());
    } else {
      Mockito.verify(response).setHeader(Mockito.eq(KerberosAuthenticator.WWW_AUTHENTICATE),
                                         Mockito.matches(KerberosAuthenticator.NEGOTIATE + " .*"));
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@ -143,6 +143,9 @@ Trunk (Unreleased)

  BUG FIXES

+    HADOOP-8418. Update UGI Principal classes name for running with
+    IBM JDK on 64 bits Windows.  (Yu Gao via eyang)
+
    HADOOP-8177. MBeans shouldn't try to register when it fails to create MBeanName.
    (Devaraj K via umamahesh)

@ -289,6 +292,9 @@ Trunk (Unreleased)
    HADOOP-9037. Bug in test-patch.sh and precommit build process (Kihwal Lee
    via jlowe)

+    HADOOP-9121. InodeTree.java has redundant check for vName while 
+    throwing exception. (Arup Malakar via suresh)
+
  OPTIMIZATIONS

    HADOOP-7761. Improve the performance of raw comparisons. (todd)
@ -306,6 +312,12 @@ Release 2.0.3-alpha - Unreleased

    HADOOP-9020. Add a SASL PLAIN server (daryn via bobby)

+    HADOOP-9090. Support on-demand publish of metrics. (Mostafa Elhemali via
+    suresh)
+
+    HADOOP-9054. Add AuthenticationHandler that uses Kerberos but allows for 
+    an alternate form of authentication for browsers. (rkanter via tucu)
+
  IMPROVEMENTS

    HADOOP-8789. Tests setLevel(Level.OFF) should be Level.ERROR.
@ -456,6 +468,11 @@ Release 2.0.3-alpha - Unreleased
    HADOOP-8958. ViewFs:Non absolute mount name failures when running 
    multiple tests on Windows. (Chris Nauroth via suresh)

+    HADOOP-9103. UTF8 class does not properly decode Unicode characters
+    outside the basic multilingual plane. (todd)
+
+    HADOOP-9070. Kerberos SASL server cannot find kerberos key. (daryn via atm)
+
 Release 2.0.2-alpha - 2012-09-07 

  INCOMPATIBLE CHANGES
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/InodeTree.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/InodeTree.java
@ -118,8 +118,7 @@ abstract class InodeTree<T> {
      return result;
    }
    
-    INode<T> resolveInternal(final String pathComponent)
-        throws FileNotFoundException {
+    INode<T> resolveInternal(final String pathComponent) {
      return children.get(pathComponent);
    }
    
@ -337,7 +336,7 @@ abstract class InodeTree<T> {
    if (!gotMountTableEntry) {
      throw new IOException(
          "ViewFs: Cannot initialize: Empty Mount table in config for " +
-             vName == null ? "viewfs:///" : ("viewfs://" + vName + "/"));
+             "viewfs://" + vName + "/");
    }
  }

--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java
@ -1858,10 +1858,10 @@ public class SequenceFile {
        UTF8 className = new UTF8();

        className.readFields(in);
-        keyClassName = className.toString(); // key class name
+        keyClassName = className.toStringChecked(); // key class name

        className.readFields(in);
-        valClassName = className.toString(); // val class name
+        valClassName = className.toStringChecked(); // val class name
      } else {
        keyClassName = Text.readString(in);
        valClassName = Text.readString(in);
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java
@ -21,7 +21,9 @@ package org.apache.hadoop.io;
 import java.io.IOException;
 import java.io.DataInput;
 import java.io.DataOutput;
+import java.io.UTFDataFormatException;

+import org.apache.hadoop.util.StringUtils;

 import org.apache.commons.logging.*;
 import org.apache.hadoop.classification.InterfaceAudience;
@ -31,6 +33,9 @@ import org.apache.hadoop.classification.InterfaceStability;
 * 
 * <p>Also includes utilities for efficiently reading and writing UTF-8.
 *
+ * Note that this decodes UTF-8 but actually encodes CESU-8, a variant of
+ * UTF-8: see http://en.wikipedia.org/wiki/CESU-8
+ *
 * @deprecated replaced by Text
 */
@Deprecated
@ -152,6 +157,21 @@ public class UTF8 implements WritableComparable<UTF8> {
    return buffer.toString();
  }
  
+  /**
+   * Convert to a string, checking for valid UTF8.
+   * @return the converted string
+   * @throws UTFDataFormatException if the underlying bytes contain invalid
+   * UTF8 data.
+   */
+  public String toStringChecked() throws IOException {
+    StringBuilder buffer = new StringBuilder(length);
+    synchronized (IBUF) {
+      IBUF.reset(bytes, length);
+      readChars(IBUF, buffer, length);
+    }
+    return buffer.toString();
+  }
+
  /** Returns true iff <code>o</code> is a UTF8 with the same contents.  */
  @Override
  public boolean equals(Object o) {
@ -209,6 +229,19 @@ public class UTF8 implements WritableComparable<UTF8> {
    return result;
  }

+  /**
+   * Convert a UTF-8 encoded byte array back into a string.
+   *
+   * @throws IOException if the byte array is invalid UTF8
+   */
+  public static String fromBytes(byte[] bytes) throws IOException {
+    DataInputBuffer dbuf = new DataInputBuffer();
+    dbuf.reset(bytes, 0, bytes.length);
+    StringBuilder buf = new StringBuilder(bytes.length);
+    readChars(dbuf, buf, bytes.length);
+    return buf.toString();
+  }
+
  /** Read a UTF-8 encoded string.
   *
   * @see DataInput#readUTF()
@ -221,7 +254,7 @@ public class UTF8 implements WritableComparable<UTF8> {
  }

  private static void readChars(DataInput in, StringBuilder buffer, int nBytes)
-    throws IOException {
+    throws UTFDataFormatException, IOException {
    DataOutputBuffer obuf = OBUF_FACTORY.get();
    obuf.reset();
    obuf.write(in, nBytes);
@ -230,18 +263,60 @@ public class UTF8 implements WritableComparable<UTF8> {
    while (i < nBytes) {
      byte b = bytes[i++];
      if ((b & 0x80) == 0) {
+        // 0b0xxxxxxx: 1-byte sequence
        buffer.append((char)(b & 0x7F));
-      } else if ((b & 0xE0) != 0xE0) {
+      } else if ((b & 0xE0) == 0xC0) {
+        if (i >= nBytes) {
+          throw new UTFDataFormatException("Truncated UTF8 at " +
+              StringUtils.byteToHexString(bytes, i - 1, 1));
+        }
+        // 0b110xxxxx: 2-byte sequence
        buffer.append((char)(((b & 0x1F) << 6)
            | (bytes[i++] & 0x3F)));
-      } else {
+      } else if ((b & 0xF0) == 0xE0) {
+        // 0b1110xxxx: 3-byte sequence
+        if (i + 1 >= nBytes) {
+          throw new UTFDataFormatException("Truncated UTF8 at " +
+              StringUtils.byteToHexString(bytes, i - 1, 2));
+        }
        buffer.append((char)(((b & 0x0F) << 12)
            | ((bytes[i++] & 0x3F) << 6)
            |  (bytes[i++] & 0x3F)));
+      } else if ((b & 0xF8) == 0xF0) {
+        if (i + 2 >= nBytes) {
+          throw new UTFDataFormatException("Truncated UTF8 at " +
+              StringUtils.byteToHexString(bytes, i - 1, 3));
+        }
+        // 0b11110xxx: 4-byte sequence
+        int codepoint =
+            ((b & 0x07) << 18)
+          | ((bytes[i++] & 0x3F) <<  12)
+          | ((bytes[i++] & 0x3F) <<  6)
+          | ((bytes[i++] & 0x3F));
+        buffer.append(highSurrogate(codepoint))
+              .append(lowSurrogate(codepoint));
+      } else {
+        // The UTF8 standard describes 5-byte and 6-byte sequences, but
+        // these are no longer allowed as of 2003 (see RFC 3629)
+
+        // Only show the next 6 bytes max in the error code - in case the
+        // buffer is large, this will prevent an exceedingly large message.
+        int endForError = Math.min(i + 5, nBytes);
+        throw new UTFDataFormatException("Invalid UTF8 at " +
+            StringUtils.byteToHexString(bytes, i - 1, endForError));
      }
    }
  }

+  private static char highSurrogate(int codePoint) {
+    return (char) ((codePoint >>> 10)
+        + (Character.MIN_HIGH_SURROGATE - (Character.MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
+  }
+
+  private static char lowSurrogate(int codePoint) {
+    return (char) ((codePoint & 0x3ff) + Character.MIN_LOW_SURROGATE);
+  }
+
  /** Write a UTF-8 encoded string.
   *
   * @see DataOutput#writeUTF(String)
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
@ -199,7 +199,8 @@ public abstract class Server {
  //     in ObjectWritable to efficiently transmit arrays of primitives
  // 6 : Made RPC payload header explicit
  // 7 : Changed Ipc Connection Header to use Protocol buffers
-  public static final byte CURRENT_VERSION = 7;
+  // 8 : SASL server always sends a final response
+  public static final byte CURRENT_VERSION = 8;

  /**
   * Initial and max size of response buffer
@ -1220,8 +1221,8 @@ public abstract class Server {
          AUDITLOG.warn(AUTH_FAILED_FOR + clientIP + ":" + attemptingUser);
          throw e;
        }
-        if (replyToken == null && authMethod == AuthMethod.PLAIN) {
-          // client needs at least response to know if it should use SIMPLE
+        if (saslServer.isComplete() && replyToken == null) {
+          // send final response for success
          replyToken = new byte[0];
        }
        if (replyToken != null) {
@ -1392,7 +1393,7 @@ public abstract class Server {
    }

    private AuthMethod initializeAuthContext(AuthMethod authMethod)
-        throws IOException {
+        throws IOException, InterruptedException {
      try {
        if (enabledAuthMethods.contains(authMethod)) {
          saslServer = createSaslServer(authMethod);
@ -1425,8 +1426,7 @@ public abstract class Server {
    }

    private SaslServer createSaslServer(AuthMethod authMethod)
-        throws IOException {
-      SaslServer saslServer = null;
+        throws IOException, InterruptedException {
      String hostname = null;
      String saslProtocol = null;
      CallbackHandler saslCallback = null;
@ -1462,10 +1462,23 @@ public abstract class Server {
              "Server does not support SASL " + authMethod);
      }
      
-      String mechanism = authMethod.getMechanismName();
-      saslServer = Sasl.createSaslServer(
-          mechanism, saslProtocol, hostname,
-          SaslRpcServer.SASL_PROPS, saslCallback);
+      return createSaslServer(authMethod.getMechanismName(), saslProtocol,
+                              hostname, saslCallback);                                    
+    }
+
+    private SaslServer createSaslServer(final String mechanism,
+                                        final String protocol,
+                                        final String hostname,
+                                        final CallbackHandler callback
+        ) throws IOException, InterruptedException {
+      SaslServer saslServer = UserGroupInformation.getCurrentUser().doAs(
+          new PrivilegedExceptionAction<SaslServer>() {
+            @Override
+            public SaslServer run() throws SaslException  {
+              return Sasl.createSaslServer(mechanism, protocol, hostname,
+                                           SaslRpcServer.SASL_PROPS, callback);
+            }
+          });
      if (saslServer == null) {
        throw new AccessControlException(
            "Unable to find SASL server implementation for " + mechanism);
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java
@ -90,6 +90,17 @@ public abstract class MetricsSystem implements MetricsSystemMXBean {
   */
  public abstract void register(Callback callback);

+  /**
+   * Requests an immediate publish of all metrics from sources to sinks.
+   * 
+   * This is a "soft" request: the expectation is that a best effort will be
+   * done to synchronously snapshot the metrics from all the sources and put
+   * them in all the sinks (including flushing the sinks) before returning to
+   * the caller. If this can't be accomplished in reasonable time it's OK to
+   * return to the caller before everything is done. 
+   */
+  public abstract void publishMetricsNow();
+
  /**
   * Shutdown the metrics system completely (usually during server shutdown.)
   * The MetricsSystemMXBean will be unregistered.
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSinkAdapter.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSinkAdapter.java
@ -19,6 +19,7 @@
 package org.apache.hadoop.metrics2.impl;

 import java.util.Random;
+import java.util.concurrent.*;

 import static com.google.common.base.Preconditions.*;

@ -48,6 +49,7 @@ class MetricsSinkAdapter implements SinkQueue.Consumer<MetricsBuffer> {
  private volatile boolean stopping = false;
  private volatile boolean inError = false;
  private final int period, firstRetryDelay, retryCount;
+  private final long oobPutTimeout;
  private final float retryBackoff;
  private final MetricsRegistry registry = new MetricsRegistry("sinkadapter");
  private final MutableStat latency;
@ -69,6 +71,8 @@ class MetricsSinkAdapter implements SinkQueue.Consumer<MetricsBuffer> {
    this.period = checkArg(period, period > 0, "period");
    firstRetryDelay = checkArg(retryDelay, retryDelay > 0, "retry delay");
    this.retryBackoff = checkArg(retryBackoff, retryBackoff>1, "retry backoff");
+    oobPutTimeout = (long)
+        (firstRetryDelay * Math.pow(retryBackoff, retryCount) * 1000);
    this.retryCount = retryCount;
    this.queue = new SinkQueue<MetricsBuffer>(checkArg(queueCapacity,
        queueCapacity > 0, "queue capacity"));
@ -96,6 +100,23 @@ class MetricsSinkAdapter implements SinkQueue.Consumer<MetricsBuffer> {
    return true; // OK
  }
  
+  public boolean putMetricsImmediate(MetricsBuffer buffer) {
+    WaitableMetricsBuffer waitableBuffer =
+        new WaitableMetricsBuffer(buffer);
+    if (!queue.enqueue(waitableBuffer)) {
+      LOG.warn(name + " has a full queue and can't consume the given metrics.");
+      dropped.incr();
+      return false;
+    }
+    if (!waitableBuffer.waitTillNotified(oobPutTimeout)) {
+      LOG.warn(name +
+          " couldn't fulfill an immediate putMetrics request in time." +
+          " Abandoning.");
+      return false;
+    }
+    return true;
+  }
+
  void publishMetricsFromQueue() {
    int retryDelay = firstRetryDelay;
    int n = retryCount;
@ -158,6 +179,9 @@ class MetricsSinkAdapter implements SinkQueue.Consumer<MetricsBuffer> {
      sink.flush();
      latency.add(Time.now() - ts);
    }
+    if (buffer instanceof WaitableMetricsBuffer) {
+      ((WaitableMetricsBuffer)buffer).notifyAnyWaiters();
+    }
    LOG.debug("Done");
  }

@ -191,4 +215,26 @@ class MetricsSinkAdapter implements SinkQueue.Consumer<MetricsBuffer> {
  MetricsSink sink() {
    return sink;
  }
+
+  static class WaitableMetricsBuffer extends MetricsBuffer {
+    private final Semaphore notificationSemaphore =
+        new Semaphore(0);
+
+    public WaitableMetricsBuffer(MetricsBuffer metricsBuffer) {
+      super(metricsBuffer);
+    }
+
+    public boolean waitTillNotified(long millisecondsToWait) {
+      try {
+        return notificationSemaphore.tryAcquire(millisecondsToWait,
+            TimeUnit.MILLISECONDS);
+      } catch (InterruptedException e) {
+        return false;
+      }
+    }
+
+    public void notifyAnyWaiters() {
+      notificationSemaphore.release();
+    }
+  }
 }
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java
@ -344,7 +344,17 @@ public class MetricsSystemImpl extends MetricsSystem implements MetricsSource {
  synchronized void onTimerEvent() {
    logicalTime += period;
    if (sinks.size() > 0) {
-      publishMetrics(sampleMetrics());
+      publishMetrics(sampleMetrics(), false);
+    }
+  }
+  
+  /**
+   * Requests an immediate publish of all metrics from sources to sinks.
+   */
+  @Override
+  public void publishMetricsNow() {
+    if (sinks.size() > 0) {
+      publishMetrics(sampleMetrics(), true);
    }    
  }

@ -380,12 +390,20 @@ public class MetricsSystemImpl extends MetricsSystem implements MetricsSource {
  /**
   * Publish a metrics snapshot to all the sinks
   * @param buffer  the metrics snapshot to publish
+   * @param immediate  indicates that we should publish metrics immediately
+   *                   instead of using a separate thread.
   */
-  synchronized void publishMetrics(MetricsBuffer buffer) {
+  synchronized void publishMetrics(MetricsBuffer buffer, boolean immediate) {
    int dropped = 0;
    for (MetricsSinkAdapter sa : sinks.values()) {
      long startTime = Time.now();
-      dropped += sa.putMetrics(buffer, logicalTime) ? 0 : 1;
+      boolean result;
+      if (immediate) {
+        result = sa.putMetricsImmediate(buffer); 
+      } else {
+        result = sa.putMetrics(buffer, logicalTime);
+      }
+      dropped += result ? 0 : 1;
      publishStat.add(Time.now() - startTime);
    }
    droppedPubAll.incr(dropped);
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java
@ -301,10 +301,14 @@ public class UserGroupInformation {
  private static Class<? extends Principal> OS_PRINCIPAL_CLASS;
  private static final boolean windows =
      System.getProperty("os.name").startsWith("Windows");
+  private static final boolean is64Bit =
+      System.getProperty("os.arch").contains("64");
  /* Return the OS login module class name */
  private static String getOSLoginModuleName() {
    if (System.getProperty("java.vendor").contains("IBM")) {
-      return windows ? "com.ibm.security.auth.module.NTLoginModule"
+      return windows ? (is64Bit
+          ? "com.ibm.security.auth.module.Win64LoginModule"
+          : "com.ibm.security.auth.module.NTLoginModule")
        : "com.ibm.security.auth.module.LinuxLoginModule";
    } else {
      return windows ? "com.sun.security.auth.module.NTLoginModule"
@ -319,11 +323,11 @@ public class UserGroupInformation {
    try {
      if (System.getProperty("java.vendor").contains("IBM")) {
        if (windows) {
-          return (Class<? extends Principal>)
-            cl.loadClass("com.ibm.security.auth.UsernamePrincipal");
+          return (Class<? extends Principal>) (is64Bit
+            ? cl.loadClass("com.ibm.security.auth.UsernamePrincipal")
+            : cl.loadClass("com.ibm.security.auth.NTUserPrincipal"));
        } else {
-          return (Class<? extends Principal>)
-            (System.getProperty("os.arch").contains("64")
+          return (Class<? extends Principal>) (is64Bit
            ? cl.loadClass("com.ibm.security.auth.UsernamePrincipal")
            : cl.loadClass("com.ibm.security.auth.LinuxPrincipal"));
        }
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextTestHelper.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextTestHelper.java
@ -32,7 +32,7 @@ import org.junit.Assert;
 */
 public final class FileContextTestHelper {
  // The test root is relative to the <wd>/build/test/data by default
-  public static final String TEST_ROOT_DIR = 
+  public static String TEST_ROOT_DIR = 
    System.getProperty("test.build.data", "build/test/data") + "/test";
  private static final int DEFAULT_BLOCK_SIZE = 1024;
  private static final int DEFAULT_NUM_BLOCKS = 2;
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemTestHelper.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemTestHelper.java
@ -34,7 +34,7 @@ import static org.mockito.Mockito.mock;
 */
 public final class FileSystemTestHelper {
  // The test root is relative to the <wd>/build/test/data by default
-  public static final String TEST_ROOT_DIR = 
+  public static String TEST_ROOT_DIR = 
    System.getProperty("test.build.data", "target/test/data") + "/test";
  private static final int DEFAULT_BLOCK_SIZE = 1024;
  private static final int DEFAULT_NUM_BLOCKS = 2;
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java
@ -19,8 +19,13 @@
 package org.apache.hadoop.io;

 import junit.framework.TestCase;
+import java.io.IOException;
+import java.io.UTFDataFormatException;
 import java.util.Random;

+import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.util.StringUtils;
+
 /** Unit tests for UTF8. */
@SuppressWarnings("deprecation")
 public class TestUTF8 extends TestCase {
@ -93,4 +98,72 @@ public class TestUTF8 extends TestCase {
    assertEquals(s, new String(dob.getData(), 2, dob.getLength()-2, "UTF-8"));
  }

+  /**
+   * Test encoding and decoding of UTF8 outside the basic multilingual plane.
+   *
+   * This is a regression test for HADOOP-9103.
+   */
+  public void testNonBasicMultilingualPlane() throws Exception {
+    // Test using the "CAT FACE" character (U+1F431)
+    // See http://www.fileformat.info/info/unicode/char/1f431/index.htm
+    String catFace = "\uD83D\uDC31";
+
+    // This encodes to 4 bytes in UTF-8:
+    byte[] encoded = catFace.getBytes("UTF-8");
+    assertEquals(4, encoded.length);
+    assertEquals("f09f90b1", StringUtils.byteToHexString(encoded));
+
+    // Decode back to String using our own decoder
+    String roundTrip = UTF8.fromBytes(encoded);
+    assertEquals(catFace, roundTrip);
+  }
+
+  /**
+   * Test that decoding invalid UTF8 throws an appropriate error message.
+   */
+  public void testInvalidUTF8() throws Exception {
+    byte[] invalid = new byte[] {
+        0x01, 0x02, (byte)0xff, (byte)0xff, 0x01, 0x02, 0x03, 0x04, 0x05 };
+    try {
+      UTF8.fromBytes(invalid);
+      fail("did not throw an exception");
+    } catch (UTFDataFormatException utfde) {
+      GenericTestUtils.assertExceptionContains(
+          "Invalid UTF8 at ffff01020304", utfde);
+    }
+  }
+
+  /**
+   * Test for a 5-byte UTF8 sequence, which is now considered illegal.
+   */
+  public void test5ByteUtf8Sequence() throws Exception {
+    byte[] invalid = new byte[] {
+        0x01, 0x02, (byte)0xf8, (byte)0x88, (byte)0x80,
+        (byte)0x80, (byte)0x80, 0x04, 0x05 };
+    try {
+      UTF8.fromBytes(invalid);
+      fail("did not throw an exception");
+    } catch (UTFDataFormatException utfde) {
+      GenericTestUtils.assertExceptionContains(
+          "Invalid UTF8 at f88880808004", utfde);
+    }
+  }
+  
+  /**
+   * Test that decoding invalid UTF8 due to truncation yields the correct
+   * exception type.
+   */
+  public void testInvalidUTF8Truncated() throws Exception {
+    // Truncated CAT FACE character -- this is a 4-byte sequence, but we
+    // only have the first three bytes.
+    byte[] truncated = new byte[] {
+        (byte)0xF0, (byte)0x9F, (byte)0x90 };
+    try {
+      UTF8.fromBytes(truncated);
+      fail("did not throw an exception");
+    } catch (UTFDataFormatException utfde) {
+      GenericTestUtils.assertExceptionContains(
+          "Truncated UTF8 at f09f90", utfde);
+    }
+  }
 }
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestGangliaMetrics.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestGangliaMetrics.java
@ -29,8 +29,6 @@ import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.TimeUnit;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@ -115,31 +113,23 @@ public class TestGangliaMetrics {
    final int expectedCountFromGanglia30 = expectedMetrics.length;
    final int expectedCountFromGanglia31 = 2 * expectedMetrics.length;

-    // use latch to make sure we received required records before shutting
-    // down the MetricSystem
-    CountDownLatch latch = new CountDownLatch(
-        expectedCountFromGanglia30 + expectedCountFromGanglia31);
-
    // Setup test for GangliaSink30
    AbstractGangliaSink gsink30 = new GangliaSink30();
    gsink30.init(cb.subset("test"));
-    MockDatagramSocket mockds30 = new MockDatagramSocket(latch);
+    MockDatagramSocket mockds30 = new MockDatagramSocket();
    GangliaMetricsTestHelper.setDatagramSocket(gsink30, mockds30);

    // Setup test for GangliaSink31
    AbstractGangliaSink gsink31 = new GangliaSink31();
    gsink31.init(cb.subset("test"));
-    MockDatagramSocket mockds31 = new MockDatagramSocket(latch);
+    MockDatagramSocket mockds31 = new MockDatagramSocket();
    GangliaMetricsTestHelper.setDatagramSocket(gsink31, mockds31);

    // register the sinks
    ms.register("gsink30", "gsink30 desc", gsink30);
    ms.register("gsink31", "gsink31 desc", gsink31);
-    ms.onTimerEvent();  // trigger something interesting
+    ms.publishMetricsNow(); // publish the metrics

-    // wait for all records and the stop MetricSystem.  Without this
-    // sometime the ms gets shutdown before all the sinks have consumed
-    latch.await(200, TimeUnit.MILLISECONDS);
    ms.stop();

    // check GanfliaSink30 data
@ -198,7 +188,6 @@ public class TestGangliaMetrics {
   */
  private class MockDatagramSocket extends DatagramSocket {
    private ArrayList<byte[]> capture;
-    private CountDownLatch latch;

    /**
     * @throws SocketException
@ -207,15 +196,6 @@ public class TestGangliaMetrics {
      capture = new  ArrayList<byte[]>();
    }

-    /**
-     * @param latch
-     * @throws SocketException
-     */
-    public MockDatagramSocket(CountDownLatch latch) throws SocketException {
-      this();
-      this.latch = latch;
-    }
-
    /* (non-Javadoc)
     * @see java.net.DatagramSocket#send(java.net.DatagramPacket)
     */
@ -225,9 +205,6 @@ public class TestGangliaMetrics {
      byte[] bytes = new byte[p.getLength()];
      System.arraycopy(p.getData(), p.getOffset(), bytes, 0, p.getLength());
      capture.add(bytes);
-
-      // decrement the latch
-      latch.countDown();
    }

    /**
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSystemImpl.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSystemImpl.java
@ -18,7 +18,11 @@

 package org.apache.hadoop.metrics2.impl;

-import java.util.List;
+import java.util.*;
+import java.util.concurrent.*;
+import java.util.concurrent.atomic.*;
+
+import javax.annotation.Nullable;

 import org.junit.Test;
 import org.junit.runner.RunWith;
@ -26,9 +30,11 @@ import org.junit.runner.RunWith;
 import org.mockito.ArgumentCaptor;
 import org.mockito.Captor;
 import org.mockito.runners.MockitoJUnitRunner;
+
 import static org.junit.Assert.*;
 import static org.mockito.Mockito.*;

+import com.google.common.base.Predicate;
 import com.google.common.collect.Iterables;

 import org.apache.commons.configuration.SubsetConfiguration;
@ -36,6 +42,8 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.metrics2.MetricsException;
 import static org.apache.hadoop.test.MoreAsserts.*;
+
+import org.apache.hadoop.metrics2.AbstractMetric;
 import org.apache.hadoop.metrics2.MetricsRecord;
 import org.apache.hadoop.metrics2.MetricsSink;
 import org.apache.hadoop.metrics2.MetricsSource;
@ -47,6 +55,7 @@ import org.apache.hadoop.metrics2.lib.MetricsRegistry;
 import org.apache.hadoop.metrics2.lib.MutableCounterLong;
 import org.apache.hadoop.metrics2.lib.MutableRate;
 import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
+import org.apache.hadoop.util.StringUtils;

 /**
 * Test the MetricsSystemImpl class
@ -72,7 +81,7 @@ public class TestMetricsSystemImpl {
  }

  @Test public void testInitFirst() throws Exception {
-    ConfigBuilder cb = new ConfigBuilder().add("*.period", 8)
+    new ConfigBuilder().add("*.period", 8)
        //.add("test.sink.plugin.urls", getPluginUrlsAsString())
        .add("test.sink.test.class", TestSink.class.getName())
        .add("test.*.source.filter.exclude", "s0")
@ -93,8 +102,9 @@ public class TestMetricsSystemImpl {
    MetricsSink sink2 = mock(MetricsSink.class);
    ms.registerSink("sink1", "sink1 desc", sink1);
    ms.registerSink("sink2", "sink2 desc", sink2);
-    ms.onTimerEvent();  // trigger something interesting
+    ms.publishMetricsNow(); // publish the metrics
    ms.stop();
+    ms.shutdown();

    verify(sink1, times(2)).putMetrics(r1.capture());
    List<MetricsRecord> mr1 = r1.getAllValues();
@ -104,6 +114,177 @@ public class TestMetricsSystemImpl {
    assertEquals("output", mr1, mr2);
  }

+  @Test public void testMultiThreadedPublish() throws Exception {
+    new ConfigBuilder().add("*.period", 80)
+      .add("test.sink.Collector.queue.capacity", "20")
+      .save(TestMetricsConfig.getTestFilename("hadoop-metrics2-test"));
+    final MetricsSystemImpl ms = new MetricsSystemImpl("Test");
+    ms.start();
+    final int numThreads = 10;
+    final CollectingSink sink = new CollectingSink(numThreads);
+    ms.registerSink("Collector",
+        "Collector of values from all threads.", sink);
+    final TestSource[] sources = new TestSource[numThreads];
+    final Thread[] threads = new Thread[numThreads];
+    final String[] results = new String[numThreads];
+    final CyclicBarrier barrier1 = new CyclicBarrier(numThreads),
+        barrier2 = new CyclicBarrier(numThreads);
+    for (int i = 0; i < numThreads; i++) {
+      sources[i] = ms.register("threadSource" + i,
+          "A source of my threaded goodness.",
+          new TestSource("threadSourceRec" + i));
+      threads[i] = new Thread(new Runnable() {
+        private boolean safeAwait(int mySource, CyclicBarrier barrier) {
+          try {
+            barrier1.await(2, TimeUnit.SECONDS);
+          } catch (InterruptedException e) {
+            results[mySource] = "Interrupted";
+            return false;
+          } catch (BrokenBarrierException e) {
+            results[mySource] = "Broken Barrier";
+            return false;
+          } catch (TimeoutException e) {
+            results[mySource] = "Timed out on barrier";
+            return false;
+          }
+          return true;
+        }
+        
+        @Override
+        public void run() {
+          int mySource = Integer.parseInt(Thread.currentThread().getName());
+          if (sink.collected[mySource].get() != 0L) {
+            results[mySource] = "Someone else collected my metric!";
+            return;
+          }
+          // Wait for all the threads to come here so we can hammer
+          // the system at the same time
+          if (!safeAwait(mySource, barrier1)) return;
+          sources[mySource].g1.set(230);
+          ms.publishMetricsNow();
+          // Since some other thread may have snatched my metric,
+          // I need to wait for the threads to finish before checking.
+          if (!safeAwait(mySource, barrier2)) return;
+          if (sink.collected[mySource].get() != 230L) {
+            results[mySource] = "Metric not collected!";
+            return;
+          }
+          results[mySource] = "Passed";
+        }
+      }, "" + i);
+    }
+    for (Thread t : threads)
+      t.start();
+    for (Thread t : threads)
+      t.join();
+    assertEquals(0L, ms.droppedPubAll.value());
+    assertTrue(StringUtils.join("\n", Arrays.asList(results)),
+      Iterables.all(Arrays.asList(results), new Predicate<String>() {
+        @Override
+        public boolean apply(@Nullable String input) {
+          return input.equalsIgnoreCase("Passed");
+        }
+      }));
+    ms.stop();
+    ms.shutdown();
+  }
+
+  private static class CollectingSink implements MetricsSink {
+    private final AtomicLong[] collected;
+    
+    public CollectingSink(int capacity) {
+      collected = new AtomicLong[capacity];
+      for (int i = 0; i < capacity; i++) {
+        collected[i] = new AtomicLong();
+      }
+    }
+    
+    @Override
+    public void init(SubsetConfiguration conf) {
+    }
+
+    @Override
+    public void putMetrics(MetricsRecord record) {
+      final String prefix = "threadSourceRec";
+      if (record.name().startsWith(prefix)) {
+        final int recordNumber = Integer.parseInt(
+            record.name().substring(prefix.length()));
+        ArrayList<String> names = new ArrayList<String>();
+        for (AbstractMetric m : record.metrics()) {
+          if (m.name().equalsIgnoreCase("g1")) {
+            collected[recordNumber].set(m.value().longValue());
+            return;
+          }
+          names.add(m.name());
+        }
+      }
+    }
+
+    @Override
+    public void flush() {
+    }
+  }
+
+  @Test public void testHangingSink() {
+    new ConfigBuilder().add("*.period", 8)
+      .add("test.sink.test.class", TestSink.class.getName())
+      .add("test.sink.hanging.retry.delay", "1")
+      .add("test.sink.hanging.retry.backoff", "1.01")
+      .add("test.sink.hanging.retry.count", "0")
+      .save(TestMetricsConfig.getTestFilename("hadoop-metrics2-test"));
+    MetricsSystemImpl ms = new MetricsSystemImpl("Test");
+    ms.start();
+    TestSource s = ms.register("s3", "s3 desc", new TestSource("s3rec"));
+    s.c1.incr();
+    HangingSink hanging = new HangingSink();
+    ms.registerSink("hanging", "Hang the sink!", hanging);
+    ms.publishMetricsNow();
+    assertEquals(1L, ms.droppedPubAll.value());
+    assertFalse(hanging.getInterrupted());
+    ms.stop();
+    ms.shutdown();
+    assertTrue(hanging.getInterrupted());
+    assertTrue("The sink didn't get called after its first hang " +
+               "for subsequent records.", hanging.getGotCalledSecondTime());
+  }
+
+  private static class HangingSink implements MetricsSink {
+    private volatile boolean interrupted;
+    private boolean gotCalledSecondTime;
+    private boolean firstTime = true;
+
+    public boolean getGotCalledSecondTime() {
+      return gotCalledSecondTime;
+    }
+
+    public boolean getInterrupted() {
+      return interrupted;
+    }
+
+    @Override
+    public void init(SubsetConfiguration conf) {
+    }
+
+    @Override
+    public void putMetrics(MetricsRecord record) {
+      // No need to hang every time, just the first record.
+      if (!firstTime) {
+        gotCalledSecondTime = true;
+        return;
+      }
+      firstTime = false;
+      try {
+        Thread.sleep(10 * 1000);
+      } catch (InterruptedException ex) {
+        interrupted = true;
+      }
+    }
+
+    @Override
+    public void flush() {
+    }
+  }
+
  @Test public void testRegisterDups() {
    MetricsSystem ms = new MetricsSystemImpl();
    TestSource ts1 = new TestSource("ts1");
@ -116,6 +297,7 @@ public class TestMetricsSystemImpl {
    MetricsSource s2 = ms.getSource("ts1");
    assertNotNull(s2);
    assertNotSame(s1, s2);
+    ms.shutdown();
  }

  @Test(expected=MetricsException.class) public void testRegisterDupError() {
--- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/UserProvider.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/UserProvider.java
@ -53,10 +53,9 @@ public class UserProvider extends AbstractHttpContextInjectable<Principal> imple
    public String parseParam(String str) {
      if (str != null) {
        int len = str.length();
-        if (len < 1 || len > 31) {
+        if (len < 1) {
          throw new IllegalArgumentException(MessageFormat.format(
-            "Parameter [{0}], invalid value [{1}], it's length must be between 1 and 31",
-            getName(), str));
+            "Parameter [{0}], it's length must be at least 1", getName()));
        }
      }
      return super.parseParam(str);
--- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/wsrs/TestUserProvider.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/wsrs/TestUserProvider.java
@ -108,13 +108,6 @@ public class TestUserProvider {
    userParam.parseParam("");
  }

-  @Test
-  @TestException(exception = IllegalArgumentException.class)
-  public void userNameTooLong() {
-    UserProvider.UserParam userParam = new UserProvider.UserParam("username");
-    userParam.parseParam("a123456789012345678901234567890x");
-  }
-
  @Test
  @TestException(exception = IllegalArgumentException.class)
  public void userNameInvalidStart() {
@ -135,12 +128,6 @@ public class TestUserProvider {
    assertNotNull(userParam.parseParam("a"));
  }

-  @Test
-  public void userNameMaxLength() {
-    UserProvider.UserParam userParam = new UserProvider.UserParam("username");
-    assertNotNull(userParam.parseParam("a123456789012345678901234567890"));
-  }
-
  @Test
  public void userNameValidDollarSign() {
    UserProvider.UserParam userParam = new UserProvider.UserParam("username");
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@ -92,17 +92,12 @@ Trunk (Unreleased)
    HDFS-3040. TestMulitipleNNDataBlockScanner is misspelled. (Madhukara Phatak
    via atm)

-    HDFS-3049. During the normal NN startup process, fall back on a different
-    edit log if we see one that is corrupt (Colin Patrick McCabe via todd)
-
    HDFS-3478. Test quotas with Long.Max_Value. (Sujay Rau via eli)

    HDFS-3498. Support replica removal in BlockPlacementPolicy and make
    BlockPlacementPolicyDefault extensible for reusing code in subclasses.
    (Junping Du via szetszwo)

-    HDFS-3571. Allow EditLogFileInputStream to read from a remote URL (todd)
-
    HDFS-3510.  Editlog pre-allocation is performed prior to writing edits
    to avoid partial edits case disk out of space.(Colin McCabe via suresh)

@ -146,8 +141,6 @@ Trunk (Unreleased)
    HDFS-4052. BlockManager#invalidateWork should print log outside the lock.
    (Jing Zhao via suresh)

-    HDFS-4110. Refine a log printed in JNStorage. (Liang Xie via suresh)
-
    HDFS-4124. Refactor INodeDirectory#getExistingPathINodes() to enable 
    returning more than INode array. (Jing Zhao via suresh)

@ -160,10 +153,6 @@ Trunk (Unreleased)
    HDFS-4152. Add a new class BlocksMapUpdateInfo for the parameter in
    INode.collectSubtreeBlocksAndClear(..). (Jing Zhao via szetszwo)

-    HDFS-4153. Add START_MSG/SHUTDOWN_MSG for JournalNode. (liang xie via atm)
-
-    HDFS-3935. Add JournalNode to the start/stop scripts (Andy Isaacson via todd)
-
    HDFS-4206. Change the fields in INode and its subclasses to private.
    (szetszwo)

@ -176,6 +165,11 @@ Trunk (Unreleased)
    HDFS-4209. Clean up the addNode/addChild/addChildNoQuotaCheck methods in
    FSDirectory and INodeDirectory. (szetszwo)

+    HDFS-3358. Specify explicitly that the NN UI status total is talking
+    of persistent objects on heap. (harsh)
+
+    HDFS-4234. Use generic code for choosing datanode in Balancer.  (szetszwo)
+
  OPTIMIZATIONS

  BUG FIXES
@ -268,107 +262,12 @@ Trunk (Unreleased)
    HDFS-4105. The SPNEGO user for secondary namenode should use the web 
    keytab. (Arpit Gupta via jitendra)

-  BREAKDOWN OF HDFS-3077 SUBTASKS
+    HDFS-4240. For nodegroup-aware block placement, when a node is excluded,
+    the nodes in the same nodegroup should also be excluded.  (Junping Du
+    via szetszwo)

-    HDFS-3077. Quorum-based protocol for reading and writing edit logs.
-    (todd, Brandon Li, and Hari Mankude via todd)
-    
-    HDFS-3694. Fix getEditLogManifest to fetch httpPort if necessary (todd)
-    
-    HDFS-3692. Support purgeEditLogs() call to remotely purge logs on JNs
-    (todd)
-    
-    HDFS-3693. JNStorage should read its storage info even before a writer
-    becomes active (todd)
-    
-    HDFS-3725. Fix QJM startup when individual JNs have gaps (todd)
-    
-    HDFS-3741. Exhaustive failure injection test for skipped RPCs (todd)
-    
-    HDFS-3773. TestNNWithQJM fails after HDFS-3741. (atm)
-    
-    HDFS-3793. Implement genericized format() in QJM (todd)
-    
-    HDFS-3795. QJM: validate journal dir at startup (todd)
-    
-    HDFS-3798. Avoid throwing NPE when finalizeSegment() is called on invalid
-    segment (todd)
-    
-    HDFS-3799. QJM: handle empty log segments during recovery (todd)
-    
-    HDFS-3797. QJM: add segment txid as a parameter to journal() RPC (todd)
-    
-    HDFS-3800. improvements to QJM fault testing (todd)
-    
-    HDFS-3823. QJM: TestQJMWithFaults fails occasionally because of missed
-    setting of HTTP port. (todd and atm)
-    
-    HDFS-3826. QJM: Some trivial logging / exception text improvements. (todd
-    and atm)
-    
-    HDFS-3839. QJM: hadoop-daemon.sh should be updated to accept "journalnode"
-    (eli)
-    
-    HDFS-3845. Fixes for edge cases in QJM recovery protocol (todd)
-    
-    HDFS-3877. QJM: Provide defaults for dfs.journalnode.*address (eli)
-    
-    HDFS-3863. Track last "committed" txid in QJM (todd)
-    
-    HDFS-3869. Expose non-file journal manager details in web UI (todd)
-    
-    HDFS-3884. Journal format() should reset cached values (todd)
-    
-    HDFS-3870. Add metrics to JournalNode (todd)
-    
-    HDFS-3891. Make selectInputStreams throw IOE instead of RTE (todd)
-    
-    HDFS-3726. If a logger misses an RPC, don't retry that logger until next
-    segment (todd)
-    
-    HDFS-3893. QJM: Make QJM work with security enabled. (atm)
-    
-    HDFS-3897. QJM: TestBlockToken fails after HDFS-3893. (atm)
-    
-    HDFS-3898. QJM: enable TCP_NODELAY for IPC (todd)
-    
-    HDFS-3885. QJM: optimize log sync when JN is lagging behind (todd)
-    
-    HDFS-3900. QJM: avoid validating log segments on log rolls (todd)
-    
-    HDFS-3901. QJM: send 'heartbeat' messages to JNs even when they are
-    out-of-sync (todd)
-    
-    HDFS-3899. QJM: Add client-side metrics (todd)
-    
-    HDFS-3914. QJM: acceptRecovery should abort current segment (todd)
-    
-    HDFS-3915. QJM: Failover fails with auth error in secure cluster (todd)
-    
-    HDFS-3906. QJM: quorum timeout on failover with large log segment (todd)
-    
-    HDFS-3840. JournalNodes log JournalNotFormattedException backtrace error
-    before being formatted (todd)
-    
-    HDFS-3894. QJM: testRecoverAfterDoubleFailures can be flaky due to IPC
-    client caching (todd)
-    
-    HDFS-3926. QJM: Add user documentation for QJM. (atm)
-    
-    HDFS-3943. QJM: remove currently-unused md5sum field (todd)
-    
-    HDFS-3950. QJM: misc TODO cleanup, improved log messages, etc. (todd)
-    
-    HDFS-3955. QJM: Make acceptRecovery() atomic. (todd)
-    
-    HDFS-3956. QJM: purge temporary files when no longer within retention
-    period (todd)
-    
-    HDFS-4004. TestJournalNode#testJournal fails because of test case execution
-    order (Chao Shi via todd)
-    
-    HDFS-4017. Unclosed FileInputStream in GetJournalEditServlet
-    (Chao Shi via todd)
+    HDFS-4260 Fix HDFS tests to set test dir to a valid HDFS path as opposed
+    to the local build path (Chri Nauroth via Sanjay)

 Release 2.0.3-alpha - Unreleased 

@ -493,6 +392,24 @@ Release 2.0.3-alpha - Unreleased
    HDFS-4214. OfflineEditsViewer should print out the offset at which it
    encountered an error. (Colin Patrick McCabe via atm)

+    HDFS-4199. Provide test for HdfsVolumeId. (Ivan A. Veselovsky via atm)
+
+    HDFS-3049. During the normal NN startup process, fall back on a different
+    edit log if we see one that is corrupt (Colin Patrick McCabe via todd)
+
+    HDFS-3571. Allow EditLogFileInputStream to read from a remote URL (todd)
+
+    HDFS-4110. Refine a log printed in JNStorage. (Liang Xie via suresh)
+
+    HDFS-4153. Add START_MSG/SHUTDOWN_MSG for JournalNode. (liang xie via atm)
+
+    HDFS-3935. Add JournalNode to the start/stop scripts (Andy Isaacson via todd)
+
+    HDFS-4268. Remove redundant enum NNHAStatusHeartbeat.State. (shv)
+
+    HDFS-3680. Allow customized audit logging in HDFS FSNamesystem. (Marcelo
+    Vanzin via atm)
+
  OPTIMIZATIONS

  BUG FIXES
@ -646,6 +563,127 @@ Release 2.0.3-alpha - Unreleased
    of it is undefined after the iteration or modifications of the map.
    (szetszwo)

+    HDFS-4231. BackupNode: Introduce BackupState. (shv)
+
+    HDFS-4243. When replacing an INodeDirectory, the parent pointers of the
+    children of the child have to be updated to the new child.  (Jing Zhao
+    via szetszwo)
+
+    HDFS-4238. Standby namenode should not do purging of shared
+    storage edits. (todd)
+
+    HDFS-4282. TestEditLog.testFuzzSequences FAILED in all pre-commit test
+    (todd)
+
+    HDFS-4236. Remove artificial limit on username length introduced in
+    HDFS-4171. (tucu via suresh)
+
+    HDFS-4279. NameNode does not initialize generic conf keys when started
+    with -recover. (Colin Patrick McCabe via atm)
+
+  BREAKDOWN OF HDFS-3077 SUBTASKS
+
+    HDFS-3077. Quorum-based protocol for reading and writing edit logs.
+    (todd, Brandon Li, and Hari Mankude via todd)
+    
+    HDFS-3694. Fix getEditLogManifest to fetch httpPort if necessary (todd)
+    
+    HDFS-3692. Support purgeEditLogs() call to remotely purge logs on JNs
+    (todd)
+    
+    HDFS-3693. JNStorage should read its storage info even before a writer
+    becomes active (todd)
+    
+    HDFS-3725. Fix QJM startup when individual JNs have gaps (todd)
+    
+    HDFS-3741. Exhaustive failure injection test for skipped RPCs (todd)
+    
+    HDFS-3773. TestNNWithQJM fails after HDFS-3741. (atm)
+    
+    HDFS-3793. Implement genericized format() in QJM (todd)
+    
+    HDFS-3795. QJM: validate journal dir at startup (todd)
+    
+    HDFS-3798. Avoid throwing NPE when finalizeSegment() is called on invalid
+    segment (todd)
+    
+    HDFS-3799. QJM: handle empty log segments during recovery (todd)
+    
+    HDFS-3797. QJM: add segment txid as a parameter to journal() RPC (todd)
+    
+    HDFS-3800. improvements to QJM fault testing (todd)
+    
+    HDFS-3823. QJM: TestQJMWithFaults fails occasionally because of missed
+    setting of HTTP port. (todd and atm)
+    
+    HDFS-3826. QJM: Some trivial logging / exception text improvements. (todd
+    and atm)
+    
+    HDFS-3839. QJM: hadoop-daemon.sh should be updated to accept "journalnode"
+    (eli)
+    
+    HDFS-3845. Fixes for edge cases in QJM recovery protocol (todd)
+    
+    HDFS-3877. QJM: Provide defaults for dfs.journalnode.*address (eli)
+    
+    HDFS-3863. Track last "committed" txid in QJM (todd)
+    
+    HDFS-3869. Expose non-file journal manager details in web UI (todd)
+    
+    HDFS-3884. Journal format() should reset cached values (todd)
+    
+    HDFS-3870. Add metrics to JournalNode (todd)
+    
+    HDFS-3891. Make selectInputStreams throw IOE instead of RTE (todd)
+    
+    HDFS-3726. If a logger misses an RPC, don't retry that logger until next
+    segment (todd)
+    
+    HDFS-3893. QJM: Make QJM work with security enabled. (atm)
+    
+    HDFS-3897. QJM: TestBlockToken fails after HDFS-3893. (atm)
+    
+    HDFS-3898. QJM: enable TCP_NODELAY for IPC (todd)
+    
+    HDFS-3885. QJM: optimize log sync when JN is lagging behind (todd)
+    
+    HDFS-3900. QJM: avoid validating log segments on log rolls (todd)
+    
+    HDFS-3901. QJM: send 'heartbeat' messages to JNs even when they are
+    out-of-sync (todd)
+    
+    HDFS-3899. QJM: Add client-side metrics (todd)
+    
+    HDFS-3914. QJM: acceptRecovery should abort current segment (todd)
+    
+    HDFS-3915. QJM: Failover fails with auth error in secure cluster (todd)
+    
+    HDFS-3906. QJM: quorum timeout on failover with large log segment (todd)
+    
+    HDFS-3840. JournalNodes log JournalNotFormattedException backtrace error
+    before being formatted (todd)
+    
+    HDFS-3894. QJM: testRecoverAfterDoubleFailures can be flaky due to IPC
+    client caching (todd)
+    
+    HDFS-3926. QJM: Add user documentation for QJM. (atm)
+    
+    HDFS-3943. QJM: remove currently-unused md5sum field (todd)
+    
+    HDFS-3950. QJM: misc TODO cleanup, improved log messages, etc. (todd)
+    
+    HDFS-3955. QJM: Make acceptRecovery() atomic. (todd)
+    
+    HDFS-3956. QJM: purge temporary files when no longer within retention
+    period (todd)
+    
+    HDFS-4004. TestJournalNode#testJournal fails because of test case execution
+    order (Chao Shi via todd)
+    
+    HDFS-4017. Unclosed FileInputStream in GetJournalEditServlet
+    (Chao Shi via todd)
+
+
 Release 2.0.2-alpha - 2012-09-07 

  INCOMPATIBLE CHANGES
@ -2035,6 +2073,11 @@ Release 0.23.6 - UNRELEASED

  BUG FIXES

+    HDFS-4247. saveNamespace should be tolerant of dangling lease (daryn)
+
+    HDFS-4248. Renaming directories may incorrectly remove the paths in leases
+    under the tree.  (daryn via szetszwo)
+
 Release 0.23.5 - UNRELEASED

  INCOMPATIBLE CHANGES
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/HdfsVolumeId.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/HdfsVolumeId.java
@ -27,26 +27,38 @@ import org.apache.hadoop.classification.InterfaceStability;
 * HDFS-specific volume identifier which implements {@link VolumeId}. Can be
 * used to differentiate between the data directories on a single datanode. This
 * identifier is only unique on a per-datanode basis.
+ * 
+ * Note that invalid IDs are represented by {@link VolumeId#INVALID_VOLUME_ID}.
 */
@InterfaceStability.Unstable
@InterfaceAudience.Public
 public class HdfsVolumeId implements VolumeId {
  
  private final byte[] id;
-  private final boolean isValid;

-  public HdfsVolumeId(byte[] id, boolean isValid) {
+  public HdfsVolumeId(byte[] id) {
+    if (id == null) {
+      throw new NullPointerException("A valid Id can only be constructed " +
+      		"with a non-null byte array.");
+    }
    this.id = id;
-    this.isValid = isValid;
  }

  @Override
-  public boolean isValid() {
-    return isValid;
+  public final boolean isValid() {
+    return true;
  }

  @Override
  public int compareTo(VolumeId arg0) {
+    if (arg0 == null) {
+      return 1;
+    }
+    if (!arg0.isValid()) {
+      // any valid ID is greater 
+      // than any invalid ID: 
+      return 1;
+    }
    return hashCode() - arg0.hashCode();
  }

@ -63,8 +75,10 @@ public class HdfsVolumeId implements VolumeId {
    if (obj == this) {
      return true;
    }
-
    HdfsVolumeId that = (HdfsVolumeId) obj;
+    // NB: if (!obj.isValid()) { return false; } check is not necessary
+    // because we have class identity checking above, and for this class
+    // isValid() is always true.
    return new EqualsBuilder().append(this.id, that.id).isEquals();
  }

--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/VolumeId.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/VolumeId.java
@ -28,6 +28,48 @@ import org.apache.hadoop.classification.InterfaceStability;
@InterfaceAudience.Public
 public interface VolumeId extends Comparable<VolumeId> {

+  /**
+   * Represents an invalid Volume ID (ID for unknown content).
+   */
+  public static final VolumeId INVALID_VOLUME_ID = new VolumeId() {
+    
+    @Override
+    public int compareTo(VolumeId arg0) {
+      // This object is equal only to itself;
+      // It is greater than null, and
+      // is always less than any other VolumeId:
+      if (arg0 == null) {
+        return 1;
+      }
+      if (arg0 == this) {
+        return 0;
+      } else {
+        return -1;
+      }
+    }
+    
+    @Override
+    public boolean equals(Object obj) {
+      // this object is equal only to itself:
+      return (obj == this);
+    }
+    
+    @Override
+    public int hashCode() {
+      return Integer.MIN_VALUE;
+    }
+    
+    @Override
+    public boolean isValid() {
+      return false;
+    }
+    
+    @Override
+    public String toString() {
+      return "Invalid VolumeId";
+    }
+  };
+  
  /**
   * Indicates if the disk identifier is valid. Invalid identifiers indicate
   * that the block was not present, or the location could otherwise not be
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockStorageLocationUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockStorageLocationUtil.java
@ -202,7 +202,7 @@ class BlockStorageLocationUtil {
      ArrayList<VolumeId> l = new ArrayList<VolumeId>(b.getLocations().length);
      // Start off all IDs as invalid, fill it in later with results from RPCs
      for (int i = 0; i < b.getLocations().length; i++) {
-        l.add(new HdfsVolumeId(null, false));
+        l.add(VolumeId.INVALID_VOLUME_ID);
      }
      blockVolumeIds.put(b, l);
    }
@ -236,7 +236,7 @@ class BlockStorageLocationUtil {
        // Get the VolumeId by indexing into the list of VolumeIds
        // provided by the datanode
        byte[] volumeId = metaVolumeIds.get(volumeIndex);
-        HdfsVolumeId id = new HdfsVolumeId(volumeId, true);
+        HdfsVolumeId id = new HdfsVolumeId(volumeId);
        // Find out which index we are in the LocatedBlock's replicas
        LocatedBlock locBlock = extBlockToLocBlock.get(extBlock);
        DatanodeInfo[] dnInfos = locBlock.getLocations();
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@ -246,6 +246,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
  public static final String  DFS_HOSTS = "dfs.hosts";
  public static final String  DFS_HOSTS_EXCLUDE = "dfs.hosts.exclude";
  public static final String  DFS_CLIENT_LOCAL_INTERFACES = "dfs.client.local.interfaces";
+  public static final String  DFS_NAMENODE_AUDIT_LOGGERS_KEY = "dfs.namenode.audit.loggers";
+  public static final String  DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME = "default";

  // Much code in hdfs is not yet updated to use these keys.
  public static final String  DFS_CLIENT_BLOCK_WRITE_LOCATEFOLLOWINGBLOCK_RETRIES_KEY = "dfs.client.block.write.locateFollowingBlock.retries";
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
@ -26,6 +26,7 @@ import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.CreateFlag;
 import org.apache.hadoop.fs.FsServerDefaults;
 import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.hdfs.server.protocol.StorageReport;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
@ -1232,9 +1233,9 @@ public class PBHelper {
    if (s == null) return null;
    switch (s.getState()) {
    case ACTIVE:
-      return new NNHAStatusHeartbeat(NNHAStatusHeartbeat.State.ACTIVE, s.getTxid());
+      return new NNHAStatusHeartbeat(HAServiceState.ACTIVE, s.getTxid());
    case STANDBY:
-      return new NNHAStatusHeartbeat(NNHAStatusHeartbeat.State.STANDBY, s.getTxid());
+      return new NNHAStatusHeartbeat(HAServiceState.STANDBY, s.getTxid());
    default:
      throw new IllegalArgumentException("Unexpected NNHAStatusHeartbeat.State:" + s.getState());
    }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
@ -75,6 +75,7 @@ import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations.BlockWithLocat
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.net.NetworkTopology;
+import org.apache.hadoop.net.Node;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.Time;
@ -557,7 +558,7 @@ public class Balancer {
    }
    
    /** Decide if still need to move more bytes */
-    protected boolean isMoveQuotaFull() {
+    protected boolean hasSpaceForScheduling() {
      return scheduledSize<maxSize2Move;
    }

@ -922,23 +923,53 @@ public class Balancer {
    LOG.info(nodes.size() + " " + name + ": " + nodes);
  }

-  /* Decide all <source, target> pairs and
+  /** A matcher interface for matching nodes. */
+  private interface Matcher {
+    /** Given the cluster topology, does the left node match the right node? */
+    boolean match(NetworkTopology cluster, Node left,  Node right);
+  }
+
+  /** Match datanodes in the same node group. */
+  static final Matcher SAME_NODE_GROUP = new Matcher() {
+    @Override
+    public boolean match(NetworkTopology cluster, Node left, Node right) {
+      return cluster.isOnSameNodeGroup(left, right);
+    }
+  };
+
+  /** Match datanodes in the same rack. */
+  static final Matcher SAME_RACK = new Matcher() {
+    @Override
+    public boolean match(NetworkTopology cluster, Node left, Node right) {
+      return cluster.isOnSameRack(left, right);
+    }
+  };
+
+  /** Match any datanode with any other datanode. */
+  static final Matcher ANY_OTHER = new Matcher() {
+    @Override
+    public boolean match(NetworkTopology cluster, Node left, Node right) {
+      return left != right;
+    }
+  };
+
+  /**
+   * Decide all <source, target> pairs and
   * the number of bytes to move from a source to a target
   * Maximum bytes to be moved per node is
   * Min(1 Band worth of bytes,  MAX_SIZE_TO_MOVE).
   * Return total number of bytes to move in this iteration
   */
  private long chooseNodes() {
-    // First, match nodes on the same node group if cluster has nodegroup
-    // awareness
+    // First, match nodes on the same node group if cluster is node group aware
    if (cluster.isNodeGroupAware()) {
-      chooseNodesOnSameNodeGroup();
+      chooseNodes(SAME_NODE_GROUP);
    }
    
    // Then, match nodes on the same rack
-    chooseNodes(true);
-    // At last, match nodes on different racks
-    chooseNodes(false);
+    chooseNodes(SAME_RACK);
+    // At last, match all remaining nodes
+    chooseNodes(ANY_OTHER);
    
    assert (datanodes.size() >= sources.size()+targets.size())
      : "Mismatched number of datanodes (" +
@ -953,56 +984,54 @@ public class Balancer {
    return bytesToMove;
  }

-  /**
-   * Decide all <source, target> pairs where source and target are 
-   * on the same NodeGroup
-   */
-  private void chooseNodesOnSameNodeGroup() {
-
+  /** Decide all <source, target> pairs according to the matcher. */
+  private void chooseNodes(final Matcher matcher) {
    /* first step: match each overUtilized datanode (source) to
-     * one or more underUtilized datanodes within same NodeGroup(targets).
+     * one or more underUtilized datanodes (targets).
     */
-    chooseOnSameNodeGroup(overUtilizedDatanodes, underUtilizedDatanodes);
+    chooseDatanodes(overUtilizedDatanodes, underUtilizedDatanodes, matcher);
    
-    /* match each remaining overutilized datanode (source) to below average 
-     * utilized datanodes within the same NodeGroup(targets).
+    /* match each remaining overutilized datanode (source) to 
+     * below average utilized datanodes (targets).
     * Note only overutilized datanodes that haven't had that max bytes to move
     * satisfied in step 1 are selected
     */
-    chooseOnSameNodeGroup(overUtilizedDatanodes, belowAvgUtilizedDatanodes);
+    chooseDatanodes(overUtilizedDatanodes, belowAvgUtilizedDatanodes, matcher);

-    /* match each remaining underutilized datanode to above average utilized 
-     * datanodes within the same NodeGroup.
+    /* match each remaining underutilized datanode (target) to 
+     * above average utilized datanodes (source).
     * Note only underutilized datanodes that have not had that max bytes to
     * move satisfied in step 1 are selected.
     */
-    chooseOnSameNodeGroup(underUtilizedDatanodes, aboveAvgUtilizedDatanodes);
+    chooseDatanodes(underUtilizedDatanodes, aboveAvgUtilizedDatanodes, matcher);
  }

  /**
-   * Match two sets of nodes within the same NodeGroup, one should be source
-   * nodes (utilization > Avg), and the other should be destination nodes 
-   * (utilization < Avg).
-   * @param datanodes
-   * @param candidates
+   * For each datanode, choose matching nodes from the candidates. Either the
+   * datanodes or the candidates are source nodes with (utilization > Avg), and
+   * the others are target nodes with (utilization < Avg).
   */
  private <D extends BalancerDatanode, C extends BalancerDatanode> void 
-      chooseOnSameNodeGroup(Collection<D> datanodes, Collection<C> candidates) {
+      chooseDatanodes(Collection<D> datanodes, Collection<C> candidates,
+          Matcher matcher) {
    for (Iterator<D> i = datanodes.iterator(); i.hasNext();) {
      final D datanode = i.next();
-      for(; chooseOnSameNodeGroup(datanode, candidates.iterator()); );
-      if (!datanode.isMoveQuotaFull()) {
+      for(; chooseForOneDatanode(datanode, candidates, matcher); );
+      if (!datanode.hasSpaceForScheduling()) {
        i.remove();
      }
    }
  }

  /**
-   * Match one datanode with a set of candidates nodes within the same NodeGroup.
+   * For the given datanode, choose a candidate and then schedule it.
+   * @return true if a candidate is chosen; false if no candidates is chosen.
   */
-  private <T extends BalancerDatanode> boolean chooseOnSameNodeGroup(
-      BalancerDatanode dn, Iterator<T> candidates) {
-    final T chosen = chooseCandidateOnSameNodeGroup(dn, candidates);
+  private <C extends BalancerDatanode> boolean chooseForOneDatanode(
+      BalancerDatanode dn, Collection<C> candidates, Matcher matcher) {
+    final Iterator<C> i = candidates.iterator();
+    final C chosen = chooseCandidate(dn, i, matcher);
+
    if (chosen == null) {
      return false;
    }
@ -1011,8 +1040,8 @@ public class Balancer {
    } else {
      matchSourceWithTargetToMove((Source)chosen, dn);
    }
-    if (!chosen.isMoveQuotaFull()) {
-      candidates.remove();
+    if (!chosen.hasSpaceForScheduling()) {
+      i.remove();
    }
    return true;
  }
@ -1029,19 +1058,15 @@ public class Balancer {
        +source.datanode.getName() + " to " + target.datanode.getName());
  }
  
-  /** choose a datanode from <code>candidates</code> within the same NodeGroup 
-   * of <code>dn</code>.
-   */
-  private <T extends BalancerDatanode> T chooseCandidateOnSameNodeGroup(
-      BalancerDatanode dn, Iterator<T> candidates) {
-    if (dn.isMoveQuotaFull()) {
+  /** Choose a candidate for the given datanode. */
+  private <D extends BalancerDatanode, C extends BalancerDatanode>
+      C chooseCandidate(D dn, Iterator<C> candidates, Matcher matcher) {
+    if (dn.hasSpaceForScheduling()) {
      for(; candidates.hasNext(); ) {
-        final T c = candidates.next();
-        if (!c.isMoveQuotaFull()) {
+        final C c = candidates.next();
+        if (!c.hasSpaceForScheduling()) {
          candidates.remove();
-          continue;
-        }
-        if (cluster.isOnSameNodeGroup(dn.getDatanode(), c.getDatanode())) {
+        } else if (matcher.match(cluster, dn.getDatanode(), c.getDatanode())) {
          return c;
        }
      }
@ -1049,148 +1074,6 @@ public class Balancer {
    return null;
  }

-  /* if onRack is true, decide all <source, target> pairs
-   * where source and target are on the same rack; Otherwise
-   * decide all <source, target> pairs where source and target are
-   * on different racks
-   */
-  private void chooseNodes(boolean onRack) {
-    /* first step: match each overUtilized datanode (source) to
-     * one or more underUtilized datanodes (targets).
-     */
-    chooseTargets(underUtilizedDatanodes, onRack);
-    
-    /* match each remaining overutilized datanode (source) to 
-     * below average utilized datanodes (targets).
-     * Note only overutilized datanodes that haven't had that max bytes to move
-     * satisfied in step 1 are selected
-     */
-    chooseTargets(belowAvgUtilizedDatanodes, onRack);
-
-    /* match each remaining underutilized datanode (target) to 
-     * above average utilized datanodes (source).
-     * Note only underutilized datanodes that have not had that max bytes to
-     * move satisfied in step 1 are selected.
-     */
-    chooseSources(aboveAvgUtilizedDatanodes, onRack);
-  }
-   
-  /* choose targets from the target candidate list for each over utilized
-   * source datanode. OnRackTarget determines if the chosen target 
-   * should be on the same rack as the source
-   */
-  private void chooseTargets(
-      Collection<BalancerDatanode> targetCandidates, boolean onRackTarget ) {
-    for (Iterator<Source> srcIterator = overUtilizedDatanodes.iterator();
-        srcIterator.hasNext();) {
-      Source source = srcIterator.next();
-      while (chooseTarget(source, targetCandidates.iterator(), onRackTarget)) {
-      }
-      if (!source.isMoveQuotaFull()) {
-        srcIterator.remove();
-      }
-    }
-    return;
-  }
-  
-  /* choose sources from the source candidate list for each under utilized
-   * target datanode. onRackSource determines if the chosen source 
-   * should be on the same rack as the target
-   */
-  private void chooseSources(
-      Collection<Source> sourceCandidates, boolean onRackSource) {
-    for (Iterator<BalancerDatanode> targetIterator = 
-      underUtilizedDatanodes.iterator(); targetIterator.hasNext();) {
-      BalancerDatanode target = targetIterator.next();
-      while (chooseSource(target, sourceCandidates.iterator(), onRackSource)) {
-      }
-      if (!target.isMoveQuotaFull()) {
-        targetIterator.remove();
-      }
-    }
-    return;
-  }
-
-  /* For the given source, choose targets from the target candidate list.
-   * OnRackTarget determines if the chosen target 
-   * should be on the same rack as the source
-   */
-  private boolean chooseTarget(Source source,
-      Iterator<BalancerDatanode> targetCandidates, boolean onRackTarget) {
-    if (!source.isMoveQuotaFull()) {
-      return false;
-    }
-    boolean foundTarget = false;
-    BalancerDatanode target = null;
-    while (!foundTarget && targetCandidates.hasNext()) {
-      target = targetCandidates.next();
-      if (!target.isMoveQuotaFull()) {
-        targetCandidates.remove();
-        continue;
-      }
-      if (onRackTarget) {
-        // choose from on-rack nodes
-        if (cluster.isOnSameRack(source.datanode, target.datanode)) {
-          foundTarget = true;
-        }
-      } else {
-        // choose from off-rack nodes
-        if (!cluster.isOnSameRack(source.datanode, target.datanode)) {
-          foundTarget = true;
-        }
-      }
-    }
-    if (foundTarget) {
-      assert(target != null):"Choose a null target";
-      matchSourceWithTargetToMove(source, target);
-      if (!target.isMoveQuotaFull()) {
-        targetCandidates.remove();
-      }
-      return true;
-    }
-    return false;
-  }
-
-  /* For the given target, choose sources from the source candidate list.
-   * OnRackSource determines if the chosen source 
-   * should be on the same rack as the target
-   */
-  private boolean chooseSource(BalancerDatanode target,
-      Iterator<Source> sourceCandidates, boolean onRackSource) {
-    if (!target.isMoveQuotaFull()) {
-      return false;
-    }
-    boolean foundSource = false;
-    Source source = null;
-    while (!foundSource && sourceCandidates.hasNext()) {
-      source = sourceCandidates.next();
-      if (!source.isMoveQuotaFull()) {
-        sourceCandidates.remove();
-        continue;
-      }
-      if (onRackSource) {
-        // choose from on-rack nodes
-        if ( cluster.isOnSameRack(source.getDatanode(), target.getDatanode())) {
-          foundSource = true;
-        }
-      } else {
-        // choose from off-rack nodes
-        if (!cluster.isOnSameRack(source.datanode, target.datanode)) {
-          foundSource = true;
-        }
-      }
-    }
-    if (foundSource) {
-      assert(source != null):"Choose a null source";
-      matchSourceWithTargetToMove(source, target);
-      if ( !source.isMoveQuotaFull()) {
-          sourceCandidates.remove();
-        }
-      return true;
-    }
-    return false;
-  }
-
  private static class BytesMoved {
    private long bytesMoved = 0L;;
    private synchronized void inc( long bytes ) {
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
@ -152,8 +152,9 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
      
    List<DatanodeDescriptor> results = 
      new ArrayList<DatanodeDescriptor>(chosenNodes);
-    for (Node node:chosenNodes) {
-      excludedNodes.put(node, node);
+    for (DatanodeDescriptor node:chosenNodes) {
+      // add localMachine and related nodes to excludedNodes
+      addToExcludedNodes(node, excludedNodes);
      adjustExcludedNodes(excludedNodes, node);
    }
      
@ -235,7 +236,7 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
               + totalReplicasExpected + "\n"
               + e.getMessage());
      if (avoidStaleNodes) {
-        // ecxludedNodes now has - initial excludedNodes, any nodes that were
+        // excludedNodes now has - initial excludedNodes, any nodes that were
        // chosen and nodes that were tried but were not chosen because they
        // were stale, decommissioned or for any other reason a node is not
        // chosen for write. Retry again now not avoiding stale node
@ -273,6 +274,8 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
        if (isGoodTarget(localMachine, blocksize, maxNodesPerRack, false,
            results, avoidStaleNodes)) {
          results.add(localMachine);
+          // add localMachine and related nodes to excludedNode
+          addToExcludedNodes(localMachine, excludedNodes);
          return localMachine;
        }
      } 
@ -282,6 +285,18 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
        maxNodesPerRack, results, avoidStaleNodes);
  }
  
+  /**
+   * Add <i>localMachine</i> and related nodes to <i>excludedNodes</i>
+   * for next replica choosing. In sub class, we can add more nodes within
+   * the same failure domain of localMachine
+   * @return number of new excluded nodes
+   */
+  protected int addToExcludedNodes(DatanodeDescriptor localMachine,
+      HashMap<Node, Node> excludedNodes) {
+    Node node = excludedNodes.put(localMachine, localMachine);
+    return node == null?1:0;
+  }
+
  /* choose one node from the rack that <i>localMachine</i> is on.
   * if no such node is available, choose one node from the rack where
   * a second replica is on.
@ -392,6 +407,8 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
        if (isGoodTarget(chosenNode, blocksize, 
                maxNodesPerRack, results, avoidStaleNodes)) {
          results.add(chosenNode);
+          // add chosenNode and related nodes to excludedNode
+          addToExcludedNodes(chosenNode, excludedNodes);
          adjustExcludedNodes(excludedNodes, chosenNode);
          return chosenNode;
        } else {
@ -441,6 +458,9 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
              maxNodesPerRack, results, avoidStaleNodes)) {
          numOfReplicas--;
          results.add(chosenNode);
+          // add chosenNode and related nodes to excludedNode
+          int newExcludedNodes = addToExcludedNodes(chosenNode, excludedNodes);
+          numOfAvailableNodes -= newExcludedNodes;
          adjustExcludedNodes(excludedNodes, chosenNode);
        } else {
          badTarget = true;
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java
@ -241,6 +241,27 @@ public class BlockPlacementPolicyWithNodeGroup extends BlockPlacementPolicyDefau
    return NetworkTopology.getFirstHalf(nodeGroupString);
  }
  
+  /**
+   * Find other nodes in the same nodegroup of <i>localMachine</i> and add them
+   * into <i>excludeNodes</i> as replica should not be duplicated for nodes 
+   * within the same nodegroup
+   * @return number of new excluded nodes
+   */
+  protected int addToExcludedNodes(DatanodeDescriptor localMachine,
+      HashMap<Node, Node> excludedNodes) {
+    int countOfExcludedNodes = 0;
+    String nodeGroupScope = localMachine.getNetworkLocation();
+    List<Node> leafNodes = clusterMap.getLeaves(nodeGroupScope);
+    for (Node leafNode : leafNodes) {
+      Node node = excludedNodes.put(leafNode, leafNode);
+      if (node == null) {
+        // not a existing node in excludedNodes
+        countOfExcludedNodes++;
+      }
+    }
+    return countOfExcludedNodes;
+  }
+
  /**
   * Pick up replica node set for deleting replica as over-replicated. 
   * First set contains replica nodes on rack with more than one
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
@ -26,6 +26,7 @@ import java.util.concurrent.CopyOnWriteArrayList;

 import org.apache.commons.logging.Log;
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@ -411,7 +412,7 @@ class BPOfferService {
    final long txid = nnHaState.getTxId();
    
    final boolean nnClaimsActive =
-      nnHaState.getState() == NNHAStatusHeartbeat.State.ACTIVE;
+      nnHaState.getState() == HAServiceState.ACTIVE;
    final boolean bposThinksActive = bpServiceToActive == actor;
    final boolean isMoreRecentClaim = txid > lastActiveClaimTxId; 
    
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AuditLogger.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AuditLogger.java
@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.net.InetAddress;
+import java.security.Principal;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+
+/**
+ * Interface defining an audit logger.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public interface AuditLogger {
+
+  /**
+   * Called during initialization of the logger.
+   *
+   * @param conf The configuration object.
+   */
+  void initialize(Configuration conf);
+
+  /**
+   * Called to log an audit event.
+   * <p>
+   * This method must return as quickly as possible, since it's called
+   * in a critical section of the NameNode's operation.
+   *
+   * @param succeeded Whether authorization succeeded.
+   * @param userName Name of the user executing the request.
+   * @param addr Remote address of the request.
+   * @param cmd The requested command.
+   * @param src Path of affected source file.
+   * @param dst Path of affected destination file (if any).
+   * @param stat File information for operations that change the file's
+   *             metadata (permissions, owner, times, etc).
+   */
+  void logAuditEvent(boolean succeeded, String userName,
+      InetAddress addr, String cmd, String src, String dst,
+      FileStatus stat);
+
+}
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
@ -24,6 +24,7 @@ import java.net.SocketTimeoutException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.NameNodeProxies;
@ -35,6 +36,7 @@ import org.apache.hadoop.hdfs.protocolPB.JournalProtocolPB;
 import org.apache.hadoop.hdfs.protocolPB.JournalProtocolServerSideTranslatorPB;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
 import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
 import org.apache.hadoop.hdfs.server.protocol.FenceResponse;
 import org.apache.hadoop.hdfs.server.protocol.JournalInfo;
 import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
@ -416,12 +418,21 @@ public class BackupNode extends NameNode {
  }

  @Override
+  protected String getNameServiceId(Configuration conf) {
+    return DFSUtil.getBackupNameServiceId(conf);
+  }
+
+  protected HAState createHAState() {
+    return new BackupState();
+  }
+
+  @Override // NameNode
  protected NameNodeHAContext createHAContext() {
    return new BNHAContext();
  }

  private class BNHAContext extends NameNodeHAContext {
-    @Override // NameNode
+    @Override // NameNodeHAContext
    public void checkOperation(OperationCategory op)
        throws StandbyException {
      if (op == OperationCategory.UNCHECKED ||
@ -435,10 +446,42 @@ public class BackupNode extends NameNode {
        throw new StandbyException(msg);
      }
    }
+
+    @Override // NameNodeHAContext
+    public void prepareToStopStandbyServices() throws ServiceFailedException {
+    }
+
+    /**
+     * Start services for BackupNode.
+     * <p>
+     * The following services should be muted
+     * (not run or not pass any control commands to DataNodes)
+     * on BackupNode:
+     * {@link LeaseManager.Monitor} protected by SafeMode.
+     * {@link BlockManager.ReplicationMonitor} protected by SafeMode.
+     * {@link HeartbeatManager.Monitor} protected by SafeMode.
+     * {@link DecommissionManager.Monitor} need to prohibit refreshNodes().
+     * {@link PendingReplicationBlocks.PendingReplicationMonitor} harmless,
+     * because ReplicationMonitor is muted.
+     */
+    @Override
+    public void startActiveServices() throws IOException {
+      try {
+        namesystem.startActiveServices();
+      } catch (Throwable t) {
+        doImmediateShutdown(t);
+      }
    }

    @Override
-  protected String getNameServiceId(Configuration conf) {
-    return DFSUtil.getBackupNameServiceId(conf);
+    public void stopActiveServices() throws IOException {
+      try {
+        if (namesystem != null) {
+          namesystem.stopActiveServices();
+        }
+      } catch (Throwable t) {
+        doImmediateShutdown(t);
+      }
+    }
  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupState.java
@ -0,0 +1,53 @@
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
+import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
+import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
+import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
+import org.apache.hadoop.ipc.StandbyException;
+
+@InterfaceAudience.Private
+public class BackupState extends HAState {
+
+  public BackupState() {
+    super(HAServiceState.STANDBY);
+  }
+
+  @Override // HAState
+  public void checkOperation(HAContext context, OperationCategory op)
+      throws StandbyException {
+    context.checkOperation(op);
+  }
+
+  @Override // HAState
+  public boolean shouldPopulateReplQueues() {
+    return false;
+  }
+
+  @Override // HAState
+  public void enterState(HAContext context) throws ServiceFailedException {
+    try {
+      context.startActiveServices();
+    } catch (IOException e) {
+      throw new ServiceFailedException("Failed to start backup services", e);
+    }
+  }
+
+  @Override // HAState
+  public void exitState(HAContext context) throws ServiceFailedException {
+    try {
+      context.stopActiveServices();
+    } catch (IOException e) {
+      throw new ServiceFailedException("Failed to stop backup services", e);
+    }
+  }
+
+  @Override // HAState
+  public void prepareToExitState(HAContext context) throws ServiceFailedException {
+    context.prepareToStopStandbyServices();
+  }
+}
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
@ -587,6 +587,8 @@ public class FSDirectory implements Closeable {
        // update modification time of dst and the parent of src
        srcInodes[srcInodes.length-2].setModificationTime(timestamp);
        dstInodes[dstInodes.length-2].setModificationTime(timestamp);
+        // update moved leases with new filename
+        getFSNamesystem().unprotectedChangeLease(src, dst);        
        return true;
      }
    } finally {
@ -752,6 +754,8 @@ public class FSDirectory implements Closeable {
        }
        srcInodes[srcInodes.length - 2].setModificationTime(timestamp);
        dstInodes[dstInodes.length - 2].setModificationTime(timestamp);
+        // update moved lease with new filename
+        getFSNamesystem().unprotectedChangeLease(src, dst);

        // Collect the blocks and remove the lease for previous dst
        int filesDeleted = 0;
@ -1204,19 +1208,38 @@ public class FSDirectory implements Closeable {
      ) throws IOException {    
    writeLock();
    try {
-      replaceINodeUnsynced(path, oldnode, newnode);
+      unprotectedReplaceNode(path, oldnode, newnode);
+    } finally {
+      writeUnlock();
+    }
+  }
  
-      //Currently, oldnode and newnode are assumed to contain the same blocks.
-      //Otherwise, blocks need to be removed from the blocksMap.
+  void unprotectedReplaceNode(String path, INodeFile oldnode, INodeFile newnode)
+      throws IOException, UnresolvedLinkException {
+    assert hasWriteLock();
+    INodeDirectory parent = oldnode.parent;
+    // Remove the node from the namespace 
+    if (!oldnode.removeNode()) {
+      NameNode.stateChangeLog.warn("DIR* FSDirectory.replaceNode: " +
+                                   "failed to remove " + path);
+      throw new IOException("FSDirectory.replaceNode: " +
+                            "failed to remove " + path);
+    } 
+    
+    // Parent should be non-null, otherwise oldnode.removeNode() will return
+    // false
+    newnode.setLocalName(oldnode.getLocalNameBytes());
+    parent.addChild(newnode, true);
+    
+    /* Currently oldnode and newnode are assumed to contain the same
+     * blocks. Otherwise, blocks need to be removed from the blocksMap.
+     */
    int index = 0;
    for (BlockInfo b : newnode.getBlocks()) {
      BlockInfo info = getBlockManager().addBlockCollection(b, newnode);
      newnode.setBlock(index, info); // inode refers to the block in BlocksMap
      index++;
    }
-    } finally {
-      writeUnlock();
-    }
  }

  /**
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@ -914,6 +914,11 @@ public class FSEditLog implements LogsPurgeable {
    return journalSet;
  }
  
+  @VisibleForTesting
+  synchronized void setJournalSetForTesting(JournalSet js) {
+    this.journalSet = js;
+  }
+  
  /**
   * Used only by tests.
   */
@ -1067,9 +1072,18 @@ public class FSEditLog implements LogsPurgeable {

  /**
   * Archive any log files that are older than the given txid.
+   * 
+   * If the edit log is not open for write, then this call returns with no
+   * effect.
   */
  @Override
  public synchronized void purgeLogsOlderThan(final long minTxIdToKeep) {
+    // Should not purge logs unless they are open for write.
+    // This prevents the SBN from purging logs on shared storage, for example.
+    if (!isOpenForWrite()) {
+      return;
+    }
+    
    assert curSegmentTxId == HdfsConstants.INVALID_TXID || // on format this is no-op
      minTxIdToKeep <= curSegmentTxId :
      "cannot purge logs older than txid " + minTxIdToKeep +
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
@ -31,7 +31,6 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
-import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.LayoutVersion;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
@ -322,7 +321,7 @@ public class FSEditLogLoader {
        INodeFileUnderConstruction ucFile = (INodeFileUnderConstruction) oldFile;
        fsNamesys.leaseManager.removeLeaseWithPrefixPath(addCloseOp.path);
        INodeFile newFile = ucFile.convertToInodeFile();
-        fsDir.replaceNode(addCloseOp.path, ucFile, newFile);
+        fsDir.unprotectedReplaceNode(addCloseOp.path, ucFile, newFile);
      }
      break;
    }
@ -360,10 +359,8 @@ public class FSEditLogLoader {
    }
    case OP_RENAME_OLD: {
      RenameOldOp renameOp = (RenameOldOp)op;
-      HdfsFileStatus dinfo = fsDir.getFileInfo(renameOp.dst, false);
      fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst,
                                renameOp.timestamp);
-      fsNamesys.unprotectedChangeLease(renameOp.src, renameOp.dst, dinfo);
      break;
    }
    case OP_DELETE: {
@ -433,11 +430,8 @@ public class FSEditLogLoader {
    }
    case OP_RENAME: {
      RenameOp renameOp = (RenameOp)op;
-
-      HdfsFileStatus dinfo = fsDir.getFileInfo(renameOp.dst, false);
      fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst,
                                renameOp.timestamp, renameOp.options);
-      fsNamesys.unprotectedChangeLease(renameOp.src, renameOp.dst, dinfo);
      break;
    }
    case OP_GET_DELEGATION_TOKEN: {
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java
@ -197,7 +197,7 @@ public class FSImageSerialization {
  public static String readString(DataInputStream in) throws IOException {
    DeprecatedUTF8 ustr = TL_DATA.get().U_STR;
    ustr.readFields(in);
-    return ustr.toString();
+    return ustr.toStringChecked();
  }

  static String readString_EmptyAsNull(DataInputStream in) throws IOException {
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@ -34,6 +34,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT;
@ -111,6 +113,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.CreateFlag;
 import org.apache.hadoop.fs.FileAlreadyExistsException;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FsServerDefaults;
 import org.apache.hadoop.fs.InvalidPathException;
 import org.apache.hadoop.fs.Options;
@ -121,6 +124,7 @@ import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HAUtil;
@ -165,12 +169,10 @@ import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
 import org.apache.hadoop.hdfs.server.namenode.INodeDirectory.INodesInPath;
 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
-import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState;
 import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer;
 import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
 import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
 import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer;
-import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState;
 import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean;
 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
 import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeFileWithLink;
@ -250,32 +252,32 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      }
  };

-  private static final void logAuditEvent(UserGroupInformation ugi,
+  private boolean isAuditEnabled() {
+    return !isDefaultAuditLogger || auditLog.isInfoEnabled();
+  }
+
+  private void logAuditEvent(UserGroupInformation ugi,
      InetAddress addr, String cmd, String src, String dst,
      HdfsFileStatus stat) {
    logAuditEvent(true, ugi, addr, cmd, src, dst, stat);
  }

-  private static final void logAuditEvent(boolean succeeded,
+  private void logAuditEvent(boolean succeeded,
      UserGroupInformation ugi, InetAddress addr, String cmd, String src,
      String dst, HdfsFileStatus stat) {
-    final StringBuilder sb = auditBuffer.get();
-    sb.setLength(0);
-    sb.append("allowed=").append(succeeded).append("\t");
-    sb.append("ugi=").append(ugi).append("\t");
-    sb.append("ip=").append(addr).append("\t");
-    sb.append("cmd=").append(cmd).append("\t");
-    sb.append("src=").append(src).append("\t");
-    sb.append("dst=").append(dst).append("\t");
-    if (null == stat) {
-      sb.append("perm=null");
-    } else {
-      sb.append("perm=");
-      sb.append(stat.getOwner()).append(":");
-      sb.append(stat.getGroup()).append(":");
-      sb.append(stat.getPermission());
+    FileStatus status = null;
+    if (stat != null) {
+      Path symlink = stat.isSymlink() ? new Path(stat.getSymlink()) : null;
+      Path path = dst != null ? new Path(dst) : new Path(src);
+      status = new FileStatus(stat.getLen(), stat.isDir(),
+          stat.getReplication(), stat.getBlockSize(), stat.getModificationTime(),
+          stat.getAccessTime(), stat.getPermission(), stat.getOwner(),
+          stat.getGroup(), symlink, path);
+    }
+    for (AuditLogger logger : auditLoggers) {
+      logger.logAuditEvent(succeeded, ugi.toString(), addr,
+          cmd, src, dst, status);
    }
-    auditLog.info(sb);
  }

  /**
@ -308,6 +310,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
  final DelegationTokenSecretManager dtSecretManager;
  private final boolean alwaysUseDelegationTokensForTests;
  
+  // Tracks whether the default audit logger is the only configured audit
+  // logger; this allows isAuditEnabled() to return false in case the
+  // underlying logger is disabled, and avoid some unnecessary work.
+  private final boolean isDefaultAuditLogger;
+  private final List<AuditLogger> auditLoggers;

  /** The namespace tree. */
  FSDirectory dir;
@ -542,14 +549,50 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      this.dir = new FSDirectory(fsImage, this, conf);
      this.snapshotManager = new SnapshotManager(this, dir);
      this.safeMode = new SafeModeInfo(conf);
-
+      this.auditLoggers = initAuditLoggers(conf);
+      this.isDefaultAuditLogger = auditLoggers.size() == 1 &&
+        auditLoggers.get(0) instanceof DefaultAuditLogger;
    } catch(IOException e) {
      LOG.error(getClass().getSimpleName() + " initialization failed.", e);
      close();
      throw e;
+    } catch (RuntimeException re) {
+      LOG.error(getClass().getSimpleName() + " initialization failed.", re);
+      close();
+      throw re;
    }
  }

+  private List<AuditLogger> initAuditLoggers(Configuration conf) {
+    // Initialize the custom access loggers if configured.
+    Collection<String> alClasses = conf.getStringCollection(DFS_NAMENODE_AUDIT_LOGGERS_KEY);
+    List<AuditLogger> auditLoggers = Lists.newArrayList();
+    if (alClasses != null && !alClasses.isEmpty()) {
+      for (String className : alClasses) {
+        try {
+          AuditLogger logger;
+          if (DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME.equals(className)) {
+            logger = new DefaultAuditLogger();
+          } else {
+            logger = (AuditLogger) Class.forName(className).newInstance();
+          }
+          logger.initialize(conf);
+          auditLoggers.add(logger);
+        } catch (RuntimeException re) {
+          throw re;
+        } catch (Exception e) {
+          throw new RuntimeException(e);
+        }
+      }
+    }
+
+    // Make sure there is at least one logger installed.
+    if (auditLoggers.isEmpty()) {
+      auditLoggers.add(new DefaultAuditLogger());
+    }
+    return auditLoggers;
+  }
+
  void loadFSImage(StartupOption startOpt, FSImage fsImage, boolean haEnabled)
      throws IOException {
    // format before starting up if requested
@ -1010,7 +1053,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      return haEnabled;
    }

-    return haContext.getState() instanceof StandbyState;
+    return HAServiceState.STANDBY == haContext.getState().getServiceState();
  }

  /**
@ -1036,7 +1079,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    long totalInodes = this.dir.totalInodes();
    long totalBlocks = this.getBlocksTotal();
    out.println(totalInodes + " files and directories, " + totalBlocks
-        + " blocks = " + (totalInodes + totalBlocks) + " total");
+        + " blocks = " + (totalInodes + totalBlocks)
+        + " total filesystem objects");

    blockManager.metaSave(out);
  }
@ -1082,7 +1126,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    try {
      setPermissionInt(src, permission);
    } catch (AccessControlException e) {
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
                      getRemoteIp(),
                      "setPermission", src, null, null);
@ -1104,14 +1148,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      }
      checkOwner(src);
      dir.setPermission(src, permission);
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        resultingStat = dir.getFileInfo(src, false);
      }
    } finally {
      writeUnlock();
    }
    getEditLog().logSync();
-    if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+    if (isAuditEnabled() && isExternalInvocation()) {
      logAuditEvent(UserGroupInformation.getCurrentUser(),
                    getRemoteIp(),
                    "setPermission", src, null, resultingStat);
@ -1128,7 +1172,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    try {
      setOwnerInt(src, username, group);
    } catch (AccessControlException e) {
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
                      getRemoteIp(),
                      "setOwner", src, null, null);
@ -1159,14 +1203,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
        }
      }
      dir.setOwner(src, username, group);
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        resultingStat = dir.getFileInfo(src, false);
      }
    } finally {
      writeUnlock();
    }
    getEditLog().logSync();
-    if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+    if (isAuditEnabled() && isExternalInvocation()) {
      logAuditEvent(UserGroupInformation.getCurrentUser(),
                    getRemoteIp(),
                    "setOwner", src, null, resultingStat);
@ -1209,7 +1253,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      return getBlockLocationsInt(src, offset, length, doAccessTime,
                                  needBlockToken, checkSafeMode);
    } catch (AccessControlException e) {
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
                      getRemoteIp(),
                      "open", src, null, null);
@ -1235,7 +1279,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    }
    final LocatedBlocks ret = getBlockLocationsUpdateTimes(src,
        offset, length, doAccessTime, needBlockToken);  
-    if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+    if (isAuditEnabled() && isExternalInvocation()) {
      logAuditEvent(UserGroupInformation.getCurrentUser(),
                    getRemoteIp(),
                    "open", src, null, null);
@ -1316,7 +1360,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    try {
      concatInt(target, srcs);
    } catch (AccessControlException e) {
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        logAuditEvent(false, UserGroupInformation.getLoginUser(),
                      getRemoteIp(),
                      "concat", Arrays.toString(srcs), target, null);
@ -1359,14 +1403,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
        throw new SafeModeException("Cannot concat " + target, safeMode);
      }
      concatInternal(target, srcs);
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        resultingStat = dir.getFileInfo(target, false);
      }
    } finally {
      writeUnlock();
    }
    getEditLog().logSync();
-    if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+    if (isAuditEnabled() && isExternalInvocation()) {
      logAuditEvent(UserGroupInformation.getLoginUser(),
                    getRemoteIp(),
                    "concat", Arrays.toString(srcs), target, resultingStat);
@ -1492,7 +1536,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    try {
      setTimesInt(src, mtime, atime);
    } catch (AccessControlException e) {
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
                      getRemoteIp(),
                      "setTimes", src, null, null);
@ -1518,7 +1562,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      INode inode = dir.getMutableINode(src);
      if (inode != null) {
        dir.setTimes(src, inode, mtime, atime, true);
-        if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+        if (isAuditEnabled() && isExternalInvocation()) {
          final HdfsFileStatus stat = dir.getFileInfo(src, false);
          logAuditEvent(UserGroupInformation.getCurrentUser(),
                        getRemoteIp(),
@ -1541,7 +1585,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    try {
      createSymlinkInt(target, link, dirPerms, createParent);
    } catch (AccessControlException e) {
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
                      getRemoteIp(),
                      "createSymlink", link, target, null);
@ -1562,14 +1606,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
        verifyParentDir(link);
      }
      createSymlinkInternal(target, link, dirPerms, createParent);
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        resultingStat = dir.getFileInfo(link, false);
      }
    } finally {
      writeUnlock();
    }
    getEditLog().logSync();
-    if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+    if (isAuditEnabled() && isExternalInvocation()) {
      logAuditEvent(UserGroupInformation.getCurrentUser(),
                    getRemoteIp(),
                    "createSymlink", link, target, resultingStat);
@ -1625,7 +1669,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    try {
      return setReplicationInt(src, replication);
    } catch (AccessControlException e) {
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
                      getRemoteIp(),
                      "setReplication", src, null, null);
@ -1661,7 +1705,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    }

    getEditLog().logSync();
-    if (isFile && auditLog.isInfoEnabled() && isExternalInvocation()) {
+    if (isFile && isAuditEnabled() && isExternalInvocation()) {
      logAuditEvent(UserGroupInformation.getCurrentUser(),
                    getRemoteIp(),
                    "setReplication", src, null, null);
@ -1717,7 +1761,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      startFileInt(src, permissions, holder, clientMachine, flag, createParent,
                   replication, blockSize);
    } catch (AccessControlException e) {
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
                      getRemoteIp(),
                      "create", src, null, null);
@ -1750,7 +1794,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      }
    } 

-    if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+    if (isAuditEnabled() && isExternalInvocation()) {
      final HdfsFileStatus stat = dir.getFileInfo(src, false);
      logAuditEvent(UserGroupInformation.getCurrentUser(),
                    getRemoteIp(),
@ -2052,7 +2096,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    try {
      return appendFileInt(src, holder, clientMachine);
    } catch (AccessControlException e) {
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
                      getRemoteIp(),
                      "append", src, null, null);
@ -2098,7 +2142,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
            +" block size " + lb.getBlock().getNumBytes());
      }
    }
-    if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+    if (isAuditEnabled() && isExternalInvocation()) {
      logAuditEvent(UserGroupInformation.getCurrentUser(),
                    getRemoteIp(),
                    "append", src, null, null);
@ -2544,7 +2588,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    try {
      return renameToInt(src, dst);
    } catch (AccessControlException e) {
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
                      getRemoteIp(),
                      "rename", src, dst, null);
@ -2566,14 +2610,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      checkOperation(OperationCategory.WRITE);

      status = renameToInternal(src, dst);
-      if (status && auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (status && isAuditEnabled() && isExternalInvocation()) {
        resultingStat = dir.getFileInfo(dst, false);
      }
    } finally {
      writeUnlock();
    }
    getEditLog().logSync();
-    if (status && auditLog.isInfoEnabled() && isExternalInvocation()) {
+    if (status && isAuditEnabled() && isExternalInvocation()) {
      logAuditEvent(UserGroupInformation.getCurrentUser(),
                    getRemoteIp(),
                    "rename", src, dst, resultingStat);
@ -2595,15 +2639,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    if (isPermissionEnabled) {
      //We should not be doing this.  This is move() not renameTo().
      //but for now,
+      //NOTE: yes, this is bad!  it's assuming much lower level behavior
+      //      of rewriting the dst
      String actualdst = dir.isDir(dst)?
          dst + Path.SEPARATOR + new Path(src).getName(): dst;
      checkParentAccess(src, FsAction.WRITE);
      checkAncestorAccess(actualdst, FsAction.WRITE);
    }

-    HdfsFileStatus dinfo = dir.getFileInfo(dst, false);
    if (dir.renameTo(src, dst)) {
-      unprotectedChangeLease(src, dst, dinfo);     // update lease with new filename
      return true;
    }
    return false;
@ -2623,14 +2667,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      checkOperation(OperationCategory.WRITE);

      renameToInternal(src, dst, options);
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        resultingStat = dir.getFileInfo(dst, false); 
      }
    } finally {
      writeUnlock();
    }
    getEditLog().logSync();
-    if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+    if (isAuditEnabled() && isExternalInvocation()) {
      StringBuilder cmd = new StringBuilder("rename options=");
      for (Rename option : options) {
        cmd.append(option.value()).append(" ");
@ -2654,9 +2698,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      checkAncestorAccess(dst, FsAction.WRITE);
    }

-    HdfsFileStatus dinfo = dir.getFileInfo(dst, false);
    dir.renameTo(src, dst, options);
-    unprotectedChangeLease(src, dst, dinfo); // update lease with new filename
  }
  
  /**
@ -2671,7 +2713,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    try {
      return deleteInt(src, recursive);
    } catch (AccessControlException e) {
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
                      getRemoteIp(),
                      "delete", src, null, null);
@ -2687,7 +2729,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      NameNode.stateChangeLog.debug("DIR* NameSystem.delete: " + src);
    }
    boolean status = deleteInternal(src, recursive, true);
-    if (status && auditLog.isInfoEnabled() && isExternalInvocation()) {
+    if (status && isAuditEnabled() && isExternalInvocation()) {
      logAuditEvent(UserGroupInformation.getCurrentUser(),
                    getRemoteIp(),
                    "delete", src, null, null);
@ -2885,7 +2927,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      }
      stat = dir.getFileInfo(src, resolveLink);
    } catch (AccessControlException e) {
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
                      getRemoteIp(),
                      "getfileinfo", src, null, null);
@ -2894,7 +2936,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    } finally {
      readUnlock();
    }
-    if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+    if (isAuditEnabled() && isExternalInvocation()) {
      logAuditEvent(UserGroupInformation.getCurrentUser(),
                    getRemoteIp(),
                    "getfileinfo", src, null, null);
@ -2910,7 +2952,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    try {
      return mkdirsInt(src, permissions, createParent);
    } catch (AccessControlException e) {
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
                      getRemoteIp(),
                      "mkdirs", src, null, null);
@ -2934,7 +2976,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      writeUnlock();
    }
    getEditLog().logSync();
-    if (status && auditLog.isInfoEnabled() && isExternalInvocation()) {
+    if (status && isAuditEnabled() && isExternalInvocation()) {
      final HdfsFileStatus stat = dir.getFileInfo(src, false);
      logAuditEvent(UserGroupInformation.getCurrentUser(),
                    getRemoteIp(),
@ -3368,7 +3410,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    try {
      return getListingInt(src, startAfter, needLocation);
    } catch (AccessControlException e) {
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        logAuditEvent(false, UserGroupInformation.getCurrentUser(),
                      getRemoteIp(),
                      "listStatus", src, null, null);
@ -3392,7 +3434,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
          checkTraverse(src);
        }
      }
-      if (auditLog.isInfoEnabled() && isExternalInvocation()) {
+      if (isAuditEnabled() && isExternalInvocation()) {
        logAuditEvent(UserGroupInformation.getCurrentUser(),
                      getRemoteIp(),
                      "listStatus", src, null, null);
@ -3482,15 +3524,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,

  private NNHAStatusHeartbeat createHaStatusHeartbeat() {
    HAState state = haContext.getState();
-    NNHAStatusHeartbeat.State hbState;
-    if (state instanceof ActiveState) {
-      hbState = NNHAStatusHeartbeat.State.ACTIVE;
-    } else if (state instanceof StandbyState) {
-      hbState = NNHAStatusHeartbeat.State.STANDBY;      
-    } else {
-      throw new AssertionError("Invalid state: " + state.getClass());
-    }
-    return new NNHAStatusHeartbeat(hbState,
+    return new NNHAStatusHeartbeat(state.getServiceState(),
        getFSImage().getLastAppliedOrWrittenTxId());
  }

@ -3929,7 +3963,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    private synchronized void leave() {
      // if not done yet, initialize replication queues.
      // In the standby, do not populate repl queues
-      if (!isPopulatingReplQueues() && !isInStandbyState()) {
+      if (!isPopulatingReplQueues() && shouldPopulateReplQueues()) {
        initializeReplQueues();
      }
      long timeInSafemode = now() - startTime;
@ -3972,7 +4006,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     * initializing replication queues.
     */
    private synchronized boolean canInitializeReplQueues() {
-      return !isInStandbyState() && blockSafe >= blockReplQueueThreshold;
+      return shouldPopulateReplQueues()
+          && blockSafe >= blockReplQueueThreshold;
    }
      
    /** 
@ -4312,7 +4347,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,

  @Override
  public boolean isPopulatingReplQueues() {
-    if (isInStandbyState()) {
+    if (!shouldPopulateReplQueues()) {
      return false;
    }
    // safeMode is volatile, and may be set to null at any time
@ -4322,6 +4357,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    return safeMode.isPopulatingReplQueues();
  }

+  private boolean shouldPopulateReplQueues() {
+    if(haContext == null || haContext.getState() == null)
+      return false;
+    return haContext.getState().shouldPopulateReplQueues();
+  }
+
  @Override
  public void incrementSafeBlockCount(int replication) {
    // safeMode is volatile, and may be set to null at any time
@ -4939,31 +4980,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,

  // rename was successful. If any part of the renamed subtree had
  // files that were being written to, update with new filename.
-  void unprotectedChangeLease(String src, String dst, HdfsFileStatus dinfo) {
-    String overwrite;
-    String replaceBy;
+  void unprotectedChangeLease(String src, String dst) {
    assert hasWriteLock();
-
-    boolean destinationExisted = true;
-    if (dinfo == null) {
-      destinationExisted = false;
-    }
-
-    if (destinationExisted && dinfo.isDir()) {
-      Path spath = new Path(src);
-      Path parent = spath.getParent();
-      if (parent.isRoot()) {
-        overwrite = parent.toString();
-      } else {
-        overwrite = parent.toString() + Path.SEPARATOR;
-      }
-      replaceBy = dst + Path.SEPARATOR;
-    } else {
-      overwrite = src;
-      replaceBy = dst;
-    }
-
-    leaseManager.changeLease(src, dst, overwrite, replaceBy);
+    leaseManager.changeLease(src, dst);
  }

  /**
@ -4974,19 +4993,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    // lock on our behalf. If we took the read lock here, we could block
    // for fairness if a writer is waiting on the lock.
    synchronized (leaseManager) {
-      out.writeInt(leaseManager.countPath()); // write the size
-
-      for (Lease lease : leaseManager.getSortedLeases()) {
-        for(String path : lease.getPaths()) {
-          // verify that path exists in namespace
-          final INodeFileUnderConstruction cons;
-          try {
-            cons = INodeFileUnderConstruction.valueOf(dir.getINode(path), path);
-          } catch (UnresolvedLinkException e) {
-            throw new AssertionError("Lease files should reside on this FS");
-          }
-          FSImageSerialization.writeINodeUnderConstruction(out, cons, path);
-        }
+      Map<String, INodeFileUnderConstruction> nodes =
+          leaseManager.getINodesUnderConstruction();
+      out.writeInt(nodes.size()); // write the size    
+      for (Map.Entry<String, INodeFileUnderConstruction> entry
+           : nodes.entrySet()) {
+        FSImageSerialization.writeINodeUnderConstruction(
+            out, entry.getValue(), entry.getKey());
      }
    }
  }
@ -5345,7 +5358,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   * Log fsck event in the audit log 
   */
  void logFsckEvent(String src, InetAddress remoteAddress) throws IOException {
-    if (auditLog.isInfoEnabled()) {
+    if (isAuditEnabled()) {
      logAuditEvent(UserGroupInformation.getCurrentUser(),
                    remoteAddress,
                    "fsck", src, null, null);
@ -5729,4 +5742,44 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
          newSnapshotRoot.toString(), null);
    }
  }
+
+  /**
+   * Default AuditLogger implementation; used when no access logger is
+   * defined in the config file. It can also be explicitly listed in the
+   * config file.
+   */
+  private static class DefaultAuditLogger implements AuditLogger {
+
+    @Override
+    public void initialize(Configuration conf) {
+      // Nothing to do.
+    }
+
+    @Override
+    public void logAuditEvent(boolean succeeded, String userName,
+        InetAddress addr, String cmd, String src, String dst,
+        FileStatus status) {
+      if (auditLog.isInfoEnabled()) {
+        final StringBuilder sb = auditBuffer.get();
+        sb.setLength(0);
+        sb.append("allowed=").append(succeeded).append("\t");
+        sb.append("ugi=").append(userName).append("\t");
+        sb.append("ip=").append(addr).append("\t");
+        sb.append("cmd=").append(cmd).append("\t");
+        sb.append("src=").append(src).append("\t");
+        sb.append("dst=").append(dst).append("\t");
+        if (null == status) {
+          sb.append("perm=null");
+        } else {
+          sb.append("perm=");
+          sb.append(status.getOwner()).append(":");
+          sb.append(status.getGroup()).append(":");
+          sb.append(status.getPermission());
+        }
+        auditLog.info(sb);
+      }
+    }
+
+  }
+
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java
@ -80,6 +80,11 @@ public class INodeDirectory extends INode {
  public INodeDirectory(INodeDirectory other) {
    super(other);
    this.children = other.children;
+    if (this.children != null) {
+      for (INode child : children) {
+        child.parent = this;
+      }
+    }
  }
  
  /** @return true unconditionally. */
@ -118,6 +123,7 @@ public class INodeDirectory extends INode {

    final int low = searchChildren(newChild);
    if (low>=0) { // an old child exists so replace by the newChild
+      children.get(low).parent = null;
      children.set(low, newChild);
    } else {
      throw new IllegalArgumentException("No child exists to be replaced");
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
@ -331,22 +331,19 @@ public class LeaseManager {
    }
  }

-  synchronized void changeLease(String src, String dst,
-      String overwrite, String replaceBy) {
+  synchronized void changeLease(String src, String dst) {
    if (LOG.isDebugEnabled()) {
      LOG.debug(getClass().getSimpleName() + ".changelease: " +
-               " src=" + src + ", dest=" + dst + 
-               ", overwrite=" + overwrite +
-               ", replaceBy=" + replaceBy);
+               " src=" + src + ", dest=" + dst);
    }

-    final int len = overwrite.length();
+    final int len = src.length();
    for(Map.Entry<String, Lease> entry
        : findLeaseWithPrefixPath(src, sortedLeasesByPath).entrySet()) {
      final String oldpath = entry.getKey();
      final Lease lease = entry.getValue();
-      //overwrite must be a prefix of oldpath
-      final String newpath = replaceBy + oldpath.substring(len);
+      // replace stem of src with new destination
+      final String newpath = dst + oldpath.substring(len);
      if (LOG.isDebugEnabled()) {
        LOG.debug("changeLease: replacing " + oldpath + " with " + newpath);
      }
@ -429,6 +426,26 @@ public class LeaseManager {
    }
  }

+  /**
+   * Get the list of inodes corresponding to valid leases.
+   * @return list of inodes
+   * @throws UnresolvedLinkException
+   */
+  Map<String, INodeFileUnderConstruction> getINodesUnderConstruction() {
+    Map<String, INodeFileUnderConstruction> inodes =
+        new TreeMap<String, INodeFileUnderConstruction>();
+    for (String p : sortedLeasesByPath.keySet()) {
+      // verify that path exists in namespace
+      try {
+        INode node = fsnamesystem.dir.getINode(p);
+        inodes.put(p, INodeFileUnderConstruction.valueOf(node, p));
+      } catch (IOException ioe) {
+        LOG.error(ioe);
+      }
+    }
+    return inodes;
+  }
+  
  /** Check the leases beginning from the oldest.
   *  @return true is sync is needed.
   */
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@ -598,11 +598,7 @@ public class NameNode {
    String nsId = getNameServiceId(conf);
    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
    this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
-    if (!haEnabled) {
-      state = ACTIVE_STATE;
-    } else {
-      state = STANDBY_STATE;
-    }
+    state = createHAState();
    this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
    this.haContext = createHAContext();
    try {
@ -619,6 +615,10 @@ public class NameNode {
    }
  }

+  protected HAState createHAState() {
+    return !haEnabled ? ACTIVE_STATE : STANDBY_STATE;
+  }
+
  protected HAContext createHAContext() {
    return new NameNodeHAContext();
  }
@ -1050,6 +1050,9 @@ public class NameNode {

  private static void doRecovery(StartupOption startOpt, Configuration conf)
      throws IOException {
+    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
+    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
+    initializeGenericKeys(conf, nsId, namenodeId);
    if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) {
      if (!confirmPrompt("You have selected Metadata Recovery mode.  " +
          "This mode is intended to recover lost metadata on a corrupt " +
@ -1298,7 +1301,7 @@ public class NameNode {
   *          before exit.
   * @throws ExitException thrown only for testing.
   */
-  private synchronized void doImmediateShutdown(Throwable t)
+  protected synchronized void doImmediateShutdown(Throwable t)
      throws ExitException {
    String message = "Error encountered requiring NN shutdown. " +
        "Shutting down immediately.";
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java
@ -102,7 +102,7 @@ class NamenodeJspHelper {
    long usedNonHeap = (totalNonHeap * 100) / commitedNonHeap;

    String str = "<div>" + inodes + " files and directories, " + blocks + " blocks = "
-        + (inodes + blocks) + " total";
+        + (inodes + blocks) + " total filesystem objects";
    if (maxobjects != 0) {
      long pct = ((inodes + blocks) * 100) / maxobjects;
      str += " / " + maxobjects + " (" + pct + "%)";
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NNHAStatusHeartbeat.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NNHAStatusHeartbeat.java
@ -19,31 +19,26 @@ package org.apache.hadoop.hdfs.server.protocol;

 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;

@InterfaceAudience.Private
@InterfaceStability.Evolving
 public class NNHAStatusHeartbeat {

-  private State state;
+  private HAServiceState state;
  private long txid = HdfsConstants.INVALID_TXID;
  
-  public NNHAStatusHeartbeat(State state, long txid) {
+  public NNHAStatusHeartbeat(HAServiceState state, long txid) {
    this.state = state;
    this.txid = txid;
  }

-  public State getState() {
+  public HAServiceState getState() {
    return state;
  }
  
  public long getTxId() {
    return txid;
  }
-  
-  @InterfaceAudience.Private
-  public enum State {
-    ACTIVE,
-    STANDBY;
-  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/UserParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/UserParam.java
@ -38,10 +38,9 @@ public class UserParam extends StringParam {
        MessageFormat.format("Parameter [{0}], cannot be NULL", NAME));
    }
    int len = str.length();
-    if (len < 1 || len > 31) {
+    if (len < 1) {
      throw new IllegalArgumentException(MessageFormat.format(
-        "Parameter [{0}], invalid value [{1}], it's length must be between 1 and 31",
-        NAME, str));
+        "Parameter [{0}], it's length must be at least 1", NAME));
    }
    return str;
  }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@ -1184,4 +1184,17 @@
  </description>
 </property>

+<property>
+  <name>dfs.namenode.audit.loggers</name>
+  <value>default</value>
+  <description>
+    List of classes implementing audit loggers that will receive audit events.
+    These should be implementations of org.apache.hadoop.hdfs.server.namenode.AuditLogger.
+    The special value "default" can be used to reference the default audit
+    logger, which uses the configured log system. Installing custom audit loggers
+    may affect the performance and stability of the NameNode. Refer to the custom
+    logger's documentation for more details.
+  </description>
+</property>
+
 </configuration>
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsCreateMkdir.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsCreateMkdir.java
@ -41,6 +41,7 @@ public class TestFcHdfsCreateMkdir extends
  @BeforeClass
  public static void clusterSetupAtBegining()
                                    throws IOException, LoginException, URISyntaxException  {
+    FileContextTestHelper.TEST_ROOT_DIR = "/tmp/TestFcHdfsCreateMkdir";
    Configuration conf = new HdfsConfiguration();
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
    fc = FileContext.getFileContext(cluster.getURI(0), conf);
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsPermission.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsPermission.java
@ -41,6 +41,7 @@ public class TestFcHdfsPermission extends FileContextPermissionBase {
  @BeforeClass
  public static void clusterSetupAtBegining()
                                    throws IOException, LoginException, URISyntaxException  {
+    FileContextTestHelper.TEST_ROOT_DIR = "/tmp/TestFcHdfsPermission";
    Configuration conf = new HdfsConfiguration();
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
    fc = FileContext.getFileContext(cluster.getURI(0), conf);
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsSetUMask.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsSetUMask.java
@ -82,6 +82,7 @@ public class TestFcHdfsSetUMask {
  @BeforeClass
  public static void clusterSetupAtBegining()
        throws IOException, LoginException, URISyntaxException  {
+    FileContextTestHelper.TEST_ROOT_DIR = "/tmp/TestFcHdfsSetUMask";
    Configuration conf = new HdfsConfiguration();
    // set permissions very restrictive
    conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY,  "077");
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsSymlink.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsSymlink.java
@ -86,6 +86,7 @@ public class TestFcHdfsSymlink extends FileContextSymlinkBaseTest {

  @BeforeClass
  public static void testSetUp() throws Exception {
+    FileContextTestHelper.TEST_ROOT_DIR = "/tmp/TestFcHdfsSymlink";
    Configuration conf = new HdfsConfiguration();
    conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true);
    conf.set(FsPermission.UMASK_LABEL, "000");
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestHDFSFileContextMainOperations.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestHDFSFileContextMainOperations.java
@ -49,6 +49,8 @@ public class TestHDFSFileContextMainOperations extends
  @BeforeClass
  public static void clusterSetupAtBegining() throws IOException,
      LoginException, URISyntaxException {
+    FileContextTestHelper.TEST_ROOT_DIR =
+      "/tmp/TestHDFSFileContextMainOperations";
    cluster = new MiniDFSCluster.Builder(CONF).numDataNodes(2).build();
    cluster.waitClusterUp();
    fc = FileContext.getFileContext(cluster.getURI(0), CONF);
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestVolumeId.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestVolumeId.java
@ -0,0 +1,183 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+public class TestVolumeId {
+
+  @Test
+  public void testEquality() {
+    final VolumeId id1 = new HdfsVolumeId(new byte[] { (byte)0, (byte)0 });
+    testEq(true, id1, id1);
+
+    final VolumeId id2 = new HdfsVolumeId(new byte[] { (byte)0, (byte)1 });
+    testEq(true, id2, id2);
+    testEq(false, id1, id2);
+
+    final VolumeId id3 = new HdfsVolumeId(new byte[] { (byte)1, (byte)0 });
+    testEq(true, id3, id3);
+    testEq(false, id1, id3);
+    
+    // same as 2, but "invalid":
+    final VolumeId id2copy1 = new HdfsVolumeId(new byte[] { (byte)0, (byte)1 });
+    
+    testEq(true, id2, id2copy1);
+
+    // same as 2copy1: 
+    final VolumeId id2copy2 = new HdfsVolumeId(new byte[] { (byte)0, (byte)1 });
+    
+    testEq(true, id2, id2copy2);
+    
+    testEqMany(true, new VolumeId[] { id2, id2copy1, id2copy2 });
+    
+    testEqMany(false, new VolumeId[] { id1, id2, id3 });
+  }
+  
+  @SuppressWarnings("unchecked")
+  private <T> void testEq(final boolean eq, Comparable<? super T> id1, Comparable<? super T> id2) {
+    final int h1 = id1.hashCode();
+    final int h2 = id2.hashCode();
+    
+    // eq reflectivity:
+    assertTrue(id1.equals(id1));
+    assertTrue(id2.equals(id2));
+    assertEquals(0, id1.compareTo((T)id1));
+    assertEquals(0, id2.compareTo((T)id2));
+
+    // eq symmetry:
+    assertEquals(eq, id1.equals(id2));
+    assertEquals(eq, id2.equals(id1));
+    
+    // null comparison:
+    assertFalse(id1.equals(null));
+    assertFalse(id2.equals(null));
+    
+    // compareTo:
+    assertEquals(eq, 0 == id1.compareTo((T)id2));
+    assertEquals(eq, 0 == id2.compareTo((T)id1));
+    // compareTo must be antisymmetric:
+    assertEquals(sign(id1.compareTo((T)id2)), -sign(id2.compareTo((T)id1)));
+    
+    // compare with null should never return 0 to be consistent with #equals(): 
+    assertTrue(id1.compareTo(null) != 0);
+    assertTrue(id2.compareTo(null) != 0);
+    
+    // check that hash codes did not change:
+    assertEquals(h1, id1.hashCode());
+    assertEquals(h2, id2.hashCode());
+    if (eq) {
+      // in this case the hash codes must be the same:
+      assertEquals(h1, h2);
+    }
+  }
+  
+  private static int sign(int x) {
+    if (x == 0) {
+      return 0;
+    } else if (x > 0) {
+      return 1;
+    } else {
+      return -1;
+    }
+  }
+  
+  @SuppressWarnings("unchecked")
+  private <T> void testEqMany(final boolean eq, Comparable<? super T>... volumeIds) {
+    Comparable<? super T> vidNext;
+    int sum = 0;
+    for (int i=0; i<volumeIds.length; i++) {
+      if (i == volumeIds.length - 1) {
+        vidNext = volumeIds[0];
+      } else {
+        vidNext = volumeIds[i + 1];
+      }
+      testEq(eq, volumeIds[i], vidNext);
+      sum += sign(volumeIds[i].compareTo((T)vidNext));
+    }
+    // the comparison relationship must always be acyclic:
+    assertTrue(sum < volumeIds.length);
+  }
+
+  /*
+   * Test HdfsVolumeId(new byte[0]) instances: show that we permit such
+   * objects, they are still valid, and obey the same equality
+   * rules other objects do. 
+   */
+  @Test
+  public void testIdEmptyBytes() {
+    final VolumeId idEmpty1   = new HdfsVolumeId(new byte[0]);
+    assertTrue(idEmpty1.isValid());
+    final VolumeId idEmpty2   = new HdfsVolumeId(new byte[0]);
+    assertTrue(idEmpty2.isValid());
+    final VolumeId idNotEmpty = new HdfsVolumeId(new byte[] { (byte)1 });
+    assertTrue(idNotEmpty.isValid());
+    
+    testEq(true, idEmpty1, idEmpty2);
+    testEq(false, idEmpty1, idNotEmpty);
+    testEq(false, idEmpty2, idNotEmpty);
+  }
+  
+  /*
+   * Test the VolumeId.INVALID_VOLUME_ID singleton.
+   */
+  @Test
+  public void testInvalidId() {
+    try {
+      new HdfsVolumeId(null);
+      assertTrue("NPE expected.", false);
+    } catch (NullPointerException npe) {
+      // okay
+    }
+    final VolumeId idEmpty   = new HdfsVolumeId(new byte[] {});
+    final VolumeId idNotEmpty = new HdfsVolumeId(new byte[] { (byte)1 });
+    
+    testEq(false, VolumeId.INVALID_VOLUME_ID, idNotEmpty);
+    testEq(false, VolumeId.INVALID_VOLUME_ID, idEmpty);
+    
+    testEqMany(true, 
+        new VolumeId[] { 
+          VolumeId.INVALID_VOLUME_ID, 
+          VolumeId.INVALID_VOLUME_ID, 
+          VolumeId.INVALID_VOLUME_ID } );
+    testEqMany(false, 
+        new VolumeId[] {
+          VolumeId.INVALID_VOLUME_ID, 
+          idEmpty, 
+          idNotEmpty });
+  }
+  
+  /*
+   * test #toString() for typical VolumeId equality classes
+   */
+  @Test
+  public void testToString() {
+    // The #toString() return value is only checked for != null.
+    // We cannot assert more.
+    String strInvalid = VolumeId.INVALID_VOLUME_ID.toString();
+    assertNotNull(strInvalid);
+    
+    String strEmpty = new HdfsVolumeId(new byte[] {}).toString();
+    assertNotNull(strEmpty);
+    
+    String strNotEmpty = new HdfsVolumeId(new byte[] { (byte)1 }).toString();
+    assertNotNull(strNotEmpty);
+  } 
+  
+}
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemAtHdfsRoot.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemAtHdfsRoot.java
@ -25,6 +25,7 @@ import javax.security.auth.login.LoginException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileSystemTestHelper;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
@ -45,6 +46,7 @@ public class TestViewFileSystemAtHdfsRoot extends ViewFileSystemBaseTest {
  @BeforeClass
  public static void clusterSetupAtBegining() throws IOException,
      LoginException, URISyntaxException {
+    FileSystemTestHelper.TEST_ROOT_DIR = "/tmp/TestViewFileSystemAtHdfsRoot";
    SupportsBlocks = true;
    CONF.setBoolean(
        DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemHdfs.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemHdfs.java
@ -51,6 +51,7 @@ public class TestViewFileSystemHdfs extends ViewFileSystemBaseTest {
  @BeforeClass
  public static void clusterSetupAtBegining() throws IOException,
      LoginException, URISyntaxException {
+    FileSystemTestHelper.TEST_ROOT_DIR = "/tmp/TestViewFileSystemHdfs";
    SupportsBlocks = true;
    CONF.setBoolean(
        DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsAtHdfsRoot.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsAtHdfsRoot.java
@ -23,6 +23,7 @@ import java.net.URISyntaxException;
 import javax.security.auth.login.LoginException;

 import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.FileContextTestHelper;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.RemoteIterator;
@ -46,6 +47,7 @@ public class TestViewFsAtHdfsRoot extends ViewFsBaseTest {
  @BeforeClass
  public static void clusterSetupAtBegining() throws IOException,
      LoginException, URISyntaxException {
+    FileContextTestHelper.TEST_ROOT_DIR = "/tmp/TestViewFsAtHdfsRoot";
    SupportsBlocks = true;
    CONF.setBoolean(
        DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsHdfs.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsHdfs.java
@ -24,6 +24,7 @@ import java.net.URISyntaxException;
 import javax.security.auth.login.LoginException;

 import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.FileContextTestHelper;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
@ -42,6 +43,7 @@ public class TestViewFsHdfs extends ViewFsBaseTest {
  @BeforeClass
  public static void clusterSetupAtBegining() throws IOException,
      LoginException, URISyntaxException {
+    FileContextTestHelper.TEST_ROOT_DIR = "/tmp/TestViewFsHdfs";
    SupportsBlocks = true;
    CONF.setBoolean(
        DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLease.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLease.java
@ -30,7 +30,9 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Options;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
@ -49,6 +51,10 @@ public class TestLease {
        ).getLeaseByPath(src.toString()) != null;
  }

+  static int leaseCount(MiniDFSCluster cluster) {
+    return NameNodeAdapter.getLeaseManager(cluster.getNamesystem()).countLease();
+  }
+  
  static final String dirString = "/test/lease";
  final Path dir = new Path(dirString);
  static final Log LOG = LogFactory.getLog(TestLease.class);
@ -126,6 +132,96 @@ public class TestLease {
    }
  }

+  @Test
+  public void testLeaseAfterRename() throws Exception {
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
+    try {
+      Path p = new Path("/test-file");
+      Path d = new Path("/test-d");
+      Path d2 = new Path("/test-d-other");
+
+      // open a file to get a lease
+      FileSystem fs = cluster.getFileSystem();
+      FSDataOutputStream out = fs.create(p);
+      out.writeBytes("something");
+      //out.hsync();
+      Assert.assertTrue(hasLease(cluster, p));
+      Assert.assertEquals(1, leaseCount(cluster));
+      
+      // just to ensure first fs doesn't have any logic to twiddle leases
+      DistributedFileSystem fs2 = (DistributedFileSystem) FileSystem.newInstance(fs.getUri(), fs.getConf());
+
+      // rename the file into an existing dir
+      LOG.info("DMS: rename file into dir");
+      Path pRenamed = new Path(d, p.getName());
+      fs2.mkdirs(d);
+      fs2.rename(p, pRenamed);
+      Assert.assertFalse(p+" exists", fs2.exists(p));
+      Assert.assertTrue(pRenamed+" not found", fs2.exists(pRenamed));
+      Assert.assertFalse("has lease for "+p, hasLease(cluster, p));
+      Assert.assertTrue("no lease for "+pRenamed, hasLease(cluster, pRenamed));
+      Assert.assertEquals(1, leaseCount(cluster));
+    
+      // rename the parent dir to a new non-existent dir
+      LOG.info("DMS: rename parent dir");
+      Path pRenamedAgain = new Path(d2, pRenamed.getName());
+      fs2.rename(d, d2);
+      // src gone
+      Assert.assertFalse(d+" exists", fs2.exists(d));
+      Assert.assertFalse("has lease for "+pRenamed, hasLease(cluster, pRenamed));
+      // dst checks
+      Assert.assertTrue(d2+" not found", fs2.exists(d2));
+      Assert.assertTrue(pRenamedAgain+" not found", fs2.exists(pRenamedAgain));
+      Assert.assertTrue("no lease for "+pRenamedAgain, hasLease(cluster, pRenamedAgain));
+      Assert.assertEquals(1, leaseCount(cluster));
+
+      // rename the parent dir to existing dir
+      // NOTE: rename w/o options moves paths into existing dir
+      LOG.info("DMS: rename parent again");
+      pRenamed = pRenamedAgain;
+      pRenamedAgain = new Path(new Path(d, d2.getName()), p.getName());      
+      fs2.mkdirs(d);
+      fs2.rename(d2, d);
+      // src gone
+      Assert.assertFalse(d2+" exists", fs2.exists(d2));
+      Assert.assertFalse("no lease for "+pRenamed, hasLease(cluster, pRenamed));
+      // dst checks
+      Assert.assertTrue(d+" not found", fs2.exists(d));
+      Assert.assertTrue(pRenamedAgain +" not found", fs2.exists(pRenamedAgain));
+      Assert.assertTrue("no lease for "+pRenamedAgain, hasLease(cluster, pRenamedAgain));
+      Assert.assertEquals(1, leaseCount(cluster));
+      
+      // rename with opts to non-existent dir
+      pRenamed = pRenamedAgain;
+      pRenamedAgain = new Path(d2, p.getName());
+      fs2.rename(pRenamed.getParent(), d2, Options.Rename.OVERWRITE);
+      // src gone
+      Assert.assertFalse(pRenamed.getParent() +" not found", fs2.exists(pRenamed.getParent()));
+      Assert.assertFalse("has lease for "+pRenamed, hasLease(cluster, pRenamed));
+      // dst checks
+      Assert.assertTrue(d2+" not found", fs2.exists(d2));
+      Assert.assertTrue(pRenamedAgain+" not found", fs2.exists(pRenamedAgain));
+      Assert.assertTrue("no lease for "+pRenamedAgain, hasLease(cluster, pRenamedAgain));
+      Assert.assertEquals(1, leaseCount(cluster));
+
+      // rename with opts to existing dir
+      // NOTE: rename with options will not move paths into the existing dir
+      pRenamed = pRenamedAgain;
+      pRenamedAgain = new Path(d, p.getName());
+      fs2.rename(pRenamed.getParent(), d, Options.Rename.OVERWRITE);
+      // src gone
+      Assert.assertFalse(pRenamed.getParent() +" not found", fs2.exists(pRenamed.getParent()));
+      Assert.assertFalse("has lease for "+pRenamed, hasLease(cluster, pRenamed));
+      // dst checks
+      Assert.assertTrue(d+" not found", fs2.exists(d));
+      Assert.assertTrue(pRenamedAgain+" not found", fs2.exists(pRenamedAgain));
+      Assert.assertTrue("no lease for "+pRenamedAgain, hasLease(cluster, pRenamedAgain));
+      Assert.assertEquals(1, leaseCount(cluster));
+    } finally {
+      cluster.shutdown();
+    }
+  }
+  
  @Test
  public void testLease() throws Exception {
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java
@ -17,13 +17,15 @@
 */
 package org.apache.hadoop.hdfs.server.balancer;

+import static org.junit.Assert.assertEquals;
+
 import java.io.IOException;
 import java.net.URI;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.TimeoutException;
-import junit.framework.TestCase;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@ -43,7 +45,7 @@ import org.junit.Test;
 /**
 * This class tests if a balancer schedules tasks correctly.
 */
-public class TestBalancerWithNodeGroup extends TestCase {
+public class TestBalancerWithNodeGroup {
  private static final Log LOG = LogFactory.getLog(
  "org.apache.hadoop.hdfs.TestBalancerWithNodeGroup");
  
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicyWithNodeGroup.java
@ -21,6 +21,7 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;

+import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
@ -45,6 +46,8 @@ import org.junit.Test;
 public class TestReplicationPolicyWithNodeGroup {
  private static final int BLOCK_SIZE = 1024;
  private static final int NUM_OF_DATANODES = 8;
+  private static final int NUM_OF_DATANODES_BOUNDARY = 6;
+  private static final int NUM_OF_DATANODES_MORE_TARGETS = 12;
  private static final Configuration CONF = new HdfsConfiguration();
  private static final NetworkTopology cluster;
  private static final NameNode namenode;
@ -62,6 +65,32 @@ public class TestReplicationPolicyWithNodeGroup {
      DFSTestUtil.getDatanodeDescriptor("8.8.8.8", "/d2/r3/n6")
  };
  
+  private final static DatanodeDescriptor dataNodesInBoundaryCase[] = 
+          new DatanodeDescriptor[] {
+      DFSTestUtil.getDatanodeDescriptor("1.1.1.1", "/d1/r1/n1"),
+      DFSTestUtil.getDatanodeDescriptor("2.2.2.2", "/d1/r1/n1"),
+      DFSTestUtil.getDatanodeDescriptor("3.3.3.3", "/d1/r1/n1"),
+      DFSTestUtil.getDatanodeDescriptor("4.4.4.4", "/d1/r1/n2"),
+      DFSTestUtil.getDatanodeDescriptor("5.5.5.5", "/d1/r2/n3"),
+      DFSTestUtil.getDatanodeDescriptor("6.6.6.6", "/d1/r2/n3")
+  };
+  
+  private final static DatanodeDescriptor dataNodesInMoreTargetsCase[] =
+          new DatanodeDescriptor[] {
+      DFSTestUtil.getDatanodeDescriptor("1.1.1.1", "/r1/n1"),
+      DFSTestUtil.getDatanodeDescriptor("2.2.2.2", "/r1/n1"),
+      DFSTestUtil.getDatanodeDescriptor("3.3.3.3", "/r1/n2"),
+      DFSTestUtil.getDatanodeDescriptor("4.4.4.4", "/r1/n2"),
+      DFSTestUtil.getDatanodeDescriptor("5.5.5.5", "/r1/n3"),
+      DFSTestUtil.getDatanodeDescriptor("6.6.6.6", "/r1/n3"),
+      DFSTestUtil.getDatanodeDescriptor("7.7.7.7", "/r2/n4"),
+      DFSTestUtil.getDatanodeDescriptor("8.8.8.8", "/r2/n4"),
+      DFSTestUtil.getDatanodeDescriptor("9.9.9.9", "/r2/n5"),
+      DFSTestUtil.getDatanodeDescriptor("10.10.10.10", "/r2/n5"),
+      DFSTestUtil.getDatanodeDescriptor("11.11.11.11", "/r2/n6"),
+      DFSTestUtil.getDatanodeDescriptor("12.12.12.12", "/r2/n6"),
+  };
+
  private final static DatanodeDescriptor NODE = 
      new DatanodeDescriptor(DFSTestUtil.getDatanodeDescriptor("9.9.9.9", "/d2/r4/n7"));

@ -74,6 +103,12 @@ public class TestReplicationPolicyWithNodeGroup {
          "org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyWithNodeGroup");
      CONF.set(CommonConfigurationKeysPublic.NET_TOPOLOGY_IMPL_KEY, 
          "org.apache.hadoop.net.NetworkTopologyWithNodeGroup");
+      
+      File baseDir = new File(System.getProperty(
+          "test.build.data", "build/test/data"), "dfs/");
+      CONF.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
+          new File(baseDir, "name").getPath());
+      
      DFSTestUtil.formatNameNode(CONF);
      namenode = new NameNode(CONF);
    } catch (IOException e) {
@ -98,6 +133,26 @@ public class TestReplicationPolicyWithNodeGroup {
    }
  }
  
+  /**
+   * Scan the targets list: all targets should be on different NodeGroups.
+   * Return false if two targets are found on the same NodeGroup.
+   */
+  private static boolean checkTargetsOnDifferentNodeGroup(
+      DatanodeDescriptor[] targets) {
+    if(targets.length == 0)
+      return true;
+    Set<String> targetSet = new HashSet<String>();
+    for(DatanodeDescriptor node:targets) {
+      String nodeGroup = NetworkTopology.getLastHalf(node.getNetworkLocation());
+      if(targetSet.contains(nodeGroup)) {
+        return false;
+      } else {
+        targetSet.add(nodeGroup);
+      }
+    }
+    return true;
+  }
+  
  /**
   * In this testcase, client is dataNodes[0]. So the 1st replica should be
   * placed on dataNodes[0], the 2nd replica should be placed on 
@ -498,4 +553,121 @@ public class TestReplicationPolicyWithNodeGroup {
    assertEquals(chosenNode, dataNodes[5]);
  }
  
+  /**
+   * Test replica placement policy in case of boundary topology.
+   * Rack 2 has only 1 node group & can't be placed with two replicas
+   * The 1st replica will be placed on writer.
+   * The 2nd replica should be placed on a different rack 
+   * The 3rd replica should be placed on the same rack with writer, but on a 
+   * different node group.
+   */
+  @Test
+  public void testChooseTargetsOnBoundaryTopology() throws Exception {
+    for(int i=0; i<NUM_OF_DATANODES; i++) {
+      cluster.remove(dataNodes[i]);
+    }
+
+    for(int i=0; i<NUM_OF_DATANODES_BOUNDARY; i++) {
+      cluster.add(dataNodesInBoundaryCase[i]);
+    }
+    for(int i=0; i<NUM_OF_DATANODES_BOUNDARY; i++) {
+        dataNodes[0].updateHeartbeat(
+                2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L,
+                (HdfsConstants.MIN_BLOCKS_FOR_WRITE-1)*BLOCK_SIZE, 0L, 0, 0);
+        
+      dataNodesInBoundaryCase[i].updateHeartbeat(
+          2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L,
+          2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L, 0, 0);
+    }
+
+    DatanodeDescriptor[] targets;
+    targets = replicator.chooseTarget(filename, 0, dataNodesInBoundaryCase[0],
+        new ArrayList<DatanodeDescriptor>(), BLOCK_SIZE);
+    assertEquals(targets.length, 0);
+    
+    targets = replicator.chooseTarget(filename, 1, dataNodesInBoundaryCase[0],
+        new ArrayList<DatanodeDescriptor>(), BLOCK_SIZE);
+    assertEquals(targets.length, 1);
+
+    targets = replicator.chooseTarget(filename, 2, dataNodesInBoundaryCase[0],
+        new ArrayList<DatanodeDescriptor>(), BLOCK_SIZE);
+    assertEquals(targets.length, 2);
+    assertFalse(cluster.isOnSameRack(targets[0], targets[1]));
+    
+    targets = replicator.chooseTarget(filename, 3, dataNodesInBoundaryCase[0],
+        new ArrayList<DatanodeDescriptor>(), BLOCK_SIZE);
+    assertEquals(targets.length, 3);
+    assertTrue(checkTargetsOnDifferentNodeGroup(targets));
+  }
+
+  /**
+   * Test re-replication policy in boundary case.
+   * Rack 2 has only one node group & the node in this node group is chosen
+   * Rack 1 has two nodegroups & one of them is chosen.
+   * Replica policy should choose the node from node group of Rack1 but not the
+   * same nodegroup with chosen nodes.
+   */
+  @Test
+  public void testRereplicateOnBoundaryTopology() throws Exception {
+    for(int i=0; i<NUM_OF_DATANODES_BOUNDARY; i++) {
+      dataNodesInBoundaryCase[i].updateHeartbeat(
+          2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L,
+          2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L, 0, 0);
+    }
+    List<DatanodeDescriptor> chosenNodes = new ArrayList<DatanodeDescriptor>();
+    chosenNodes.add(dataNodesInBoundaryCase[0]);
+    chosenNodes.add(dataNodesInBoundaryCase[5]);
+    DatanodeDescriptor[] targets;
+    targets = replicator.chooseTarget(filename, 1, dataNodesInBoundaryCase[0],
+        chosenNodes, BLOCK_SIZE);
+    assertFalse(cluster.isOnSameNodeGroup(targets[0], 
+        dataNodesInBoundaryCase[0]));
+    assertFalse(cluster.isOnSameNodeGroup(targets[0],
+        dataNodesInBoundaryCase[5]));
+    assertTrue(checkTargetsOnDifferentNodeGroup(targets));
+  }
+  
+  /**
+   * Test replica placement policy in case of targets more than number of 
+   * NodeGroups.
+   * The 12-nodes cluster only has 6 NodeGroups, but in some cases, like: 
+   * placing submitted job file, there is requirement to choose more (10) 
+   * targets for placing replica. We should test it can return 6 targets.
+   */
+  @Test
+  public void testChooseMoreTargetsThanNodeGroups() throws Exception {
+    // Cleanup nodes in previous tests
+    for(int i=0; i<NUM_OF_DATANODES_BOUNDARY; i++) {
+      DatanodeDescriptor node = dataNodesInBoundaryCase[i];
+      if (cluster.contains(node)) {
+        cluster.remove(node);
+      }
+    }
+
+    for(int i=0; i<NUM_OF_DATANODES_MORE_TARGETS; i++) {
+      cluster.add(dataNodesInMoreTargetsCase[i]);
+    }
+
+    for(int i=0; i<NUM_OF_DATANODES_MORE_TARGETS; i++) {
+      dataNodesInMoreTargetsCase[i].updateHeartbeat(
+          2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L,
+          2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L, 0, 0);
+    }
+
+    DatanodeDescriptor[] targets;
+    // Test normal case -- 3 replicas
+    targets = replicator.chooseTarget(filename, 3, dataNodesInMoreTargetsCase[0],
+        new ArrayList<DatanodeDescriptor>(), BLOCK_SIZE);
+    assertEquals(targets.length, 3);
+    assertTrue(checkTargetsOnDifferentNodeGroup(targets));
+
+    // Test special case -- replica number over node groups.
+    targets = replicator.chooseTarget(filename, 10, dataNodesInMoreTargetsCase[0],
+        new ArrayList<DatanodeDescriptor>(), BLOCK_SIZE);
+    assertTrue(checkTargetsOnDifferentNodeGroup(targets));
+    // Verify it only can find 6 targets for placing replicas.
+    assertEquals(targets.length, 6);
+  }
+
+
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
@ -29,6 +29,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@ -41,7 +42,6 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
 import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
-import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat.State;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
 import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport;
@ -123,7 +123,7 @@ public class TestBPOfferService {
          Mockito.anyInt(),
          Mockito.anyInt(),
          Mockito.anyInt());
-    mockHaStatuses[nnIdx] = new NNHAStatusHeartbeat(State.STANDBY, 0);
+    mockHaStatuses[nnIdx] = new NNHAStatusHeartbeat(HAServiceState.STANDBY, 0);
    return mock;
  }
  
@ -255,12 +255,12 @@ public class TestBPOfferService {
      assertNull(bpos.getActiveNN());

      // Have NN1 claim active at txid 1
-      mockHaStatuses[0] = new NNHAStatusHeartbeat(State.ACTIVE, 1);
+      mockHaStatuses[0] = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 1);
      bpos.triggerHeartbeatForTests();
      assertSame(mockNN1, bpos.getActiveNN());

      // NN2 claims active at a higher txid
-      mockHaStatuses[1] = new NNHAStatusHeartbeat(State.ACTIVE, 2);
+      mockHaStatuses[1] = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 2);
      bpos.triggerHeartbeatForTests();
      assertSame(mockNN2, bpos.getActiveNN());
      
@ -272,12 +272,12 @@ public class TestBPOfferService {
      // Even if NN2 goes to standby, DN shouldn't reset to talking to NN1,
      // because NN1's txid is lower than the last active txid. Instead,
      // it should consider neither active.
-      mockHaStatuses[1] = new NNHAStatusHeartbeat(State.STANDBY, 2);
+      mockHaStatuses[1] = new NNHAStatusHeartbeat(HAServiceState.STANDBY, 2);
      bpos.triggerHeartbeatForTests();
      assertNull(bpos.getActiveNN());
      
      // Now if NN1 goes back to a higher txid, it should be considered active
-      mockHaStatuses[0] = new NNHAStatusHeartbeat(State.ACTIVE, 3);
+      mockHaStatuses[0] = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 3);
      bpos.triggerHeartbeatForTests();
      assertSame(mockNN1, bpos.getActiveNN());

--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java
@ -49,6 +49,7 @@ import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
@ -72,7 +73,6 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
 import org.apache.hadoop.hdfs.server.protocol.InterDatanodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
-import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat.State;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
 import org.apache.hadoop.hdfs.server.protocol.StorageReport;
@ -157,7 +157,7 @@ public class TestBlockRecovery {
            Mockito.anyInt()))
        .thenReturn(new HeartbeatResponse(
            new DatanodeCommand[0],
-            new NNHAStatusHeartbeat(State.ACTIVE, 1)));
+            new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 1)));

    dn = new DataNode(conf, dirs, null) {
      @Override
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
@ -179,6 +179,13 @@ public class NameNodeAdapter {
    return spy;
  }
  
+  public static JournalSet spyOnJournalSet(NameNode nn) {
+    FSEditLog editLog = nn.getFSImage().getEditLog();
+    JournalSet js = Mockito.spy(editLog.getJournalSet());
+    editLog.setJournalSetForTesting(js);
+    return js;
+  }
+  
  public static String getMkdirOpPath(FSEditLogOp op) {
    if (op.opCode == FSEditLogOpCodes.OP_MKDIR) {
      return ((MkdirOp) op).path;
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogger.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogger.java
@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.namenode;
+
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.IOException;
+import java.net.InetAddress;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
+import org.apache.hadoop.ipc.RemoteException;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.junit.Test;
+
+/**
+ * Tests for the {@link AuditLogger} custom audit logging interface.
+ */
+public class TestAuditLogger {
+
+  /**
+   * Tests that AuditLogger works as expected.
+   */
+  @Test
+  public void testAuditLogger() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    conf.set(DFS_NAMENODE_AUDIT_LOGGERS_KEY,
+        DummyAuditLogger.class.getName());
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
+
+    try {
+      cluster.waitClusterUp();
+      assertTrue(DummyAuditLogger.initialized);
+
+      FileSystem fs = cluster.getFileSystem();
+      long time = System.currentTimeMillis();
+      fs.setTimes(new Path("/"), time, time);
+      assertEquals(1, DummyAuditLogger.logCount);
+    } finally {
+      cluster.shutdown();
+    }
+  }
+
+  /**
+   * Tests that a broken audit logger causes requests to fail.
+   */
+  @Test
+  public void testBrokenLogger() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    conf.set(DFS_NAMENODE_AUDIT_LOGGERS_KEY,
+        BrokenAuditLogger.class.getName());
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
+
+    try {
+      cluster.waitClusterUp();
+
+      FileSystem fs = cluster.getFileSystem();
+      long time = System.currentTimeMillis();
+      fs.setTimes(new Path("/"), time, time);
+      fail("Expected exception due to broken audit logger.");
+    } catch (RemoteException re) {
+      // Expected.
+    } finally {
+      cluster.shutdown();
+    }
+  }
+
+  public static class DummyAuditLogger implements AuditLogger {
+
+    static boolean initialized;
+    static int logCount;
+
+    public void initialize(Configuration conf) {
+      initialized = true;
+    }
+
+    public void logAuditEvent(boolean succeeded, String userName,
+        InetAddress addr, String cmd, String src, String dst,
+        FileStatus stat) {
+      logCount++;
+    }
+
+  }
+
+  public static class BrokenAuditLogger implements AuditLogger {
+
+    public void initialize(Configuration conf) {
+      // No op.
+    }
+
+    public void logAuditEvent(boolean succeeded, String userName,
+        InetAddress addr, String cmd, String src, String dst,
+        FileStatus stat) {
+      throw new RuntimeException("uh oh");
+    }
+
+  }
+
+}
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java
@ -35,6 +35,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HAUtil;
@ -103,6 +104,9 @@ public class TestBackupNode {
    BackupNode bn = (BackupNode)NameNode.createNameNode(
        new String[]{startupOpt.getName()}, c);
    assertTrue(bn.getRole() + " must be in SafeMode.", bn.isInSafeMode());
+    assertTrue(bn.getRole() + " must be in StandbyState",
+               bn.getNamesystem().getHAState()
+                 .equalsIgnoreCase(HAServiceState.STANDBY.name()));
    return bn;
  }

--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java
@ -25,10 +25,15 @@ import static org.junit.Assert.fail;
 import java.io.FileNotFoundException;
 import java.io.IOException;

+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Options;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathIsNotDirectoryException;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.junit.Test;

@ -157,6 +162,48 @@ public class TestINodeFile {
    
  }
  
+  /**
+   * FSDirectory#unprotectedSetQuota creates a new INodeDirectoryWithQuota to
+   * replace the original INodeDirectory. Before HDFS-4243, the parent field of
+   * all the children INodes of the target INodeDirectory is not changed to
+   * point to the new INodeDirectoryWithQuota. This testcase tests this
+   * scenario.
+   */
+  @Test
+  public void testGetFullPathNameAfterSetQuota() throws Exception {
+    long fileLen = 1024;
+    replication = 3;
+    Configuration conf = new Configuration();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(
+        replication).build();
+    cluster.waitActive();
+    FSNamesystem fsn = cluster.getNamesystem();
+    FSDirectory fsdir = fsn.getFSDirectory();
+    DistributedFileSystem dfs = cluster.getFileSystem();
+    
+    // Create a file for test
+    final Path dir = new Path("/dir");
+    final Path file = new Path(dir, "file");
+    DFSTestUtil.createFile(dfs, file, fileLen, replication, 0L);
+    
+    // Check the full path name of the INode associating with the file
+    INode fnode = fsdir.getINode(file.toString());
+    assertEquals(file.toString(), fnode.getFullPathName());
+    
+    // Call FSDirectory#unprotectedSetQuota which calls
+    // INodeDirectory#replaceChild
+    dfs.setQuota(dir, Long.MAX_VALUE - 1, replication * fileLen * 10);
+    final Path newDir = new Path("/newdir");
+    final Path newFile = new Path(newDir, "file");
+    // Also rename dir
+    dfs.rename(dir, newDir, Options.Rename.OVERWRITE);
+    // /dir/file now should be renamed to /newdir/file
+    fnode = fsdir.getINode(newFile.toString());
+    // getFullPathName can return correct result only if the parent field of
+    // child node is set correctly
+    assertEquals(newFile.toString(), fnode.getFullPathName());
+  }
+  
  @Test
  public void testAppendBlocks() {
    INodeFile origFile = createINodeFiles(1, "origfile")[0];
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java
@ -92,7 +92,8 @@ public class TestMetaSave {
    DataInputStream in = new DataInputStream(fstream);
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    String line = reader.readLine();
-    assertTrue(line.equals("3 files and directories, 2 blocks = 5 total"));
+    assertTrue(line.equals(
+      "3 files and directories, 2 blocks = 5 total filesystem objects"));
    line = reader.readLine();
    assertTrue(line.equals("Live Datanodes: 1"));
    line = reader.readLine();
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java
@ -30,11 +30,16 @@ import java.io.RandomAccessFile;
 import java.util.HashSet;
 import java.util.Set;

+import junit.framework.Assert;
+
+import org.apache.commons.io.FileUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
@ -438,6 +443,39 @@ public class TestNameNodeRecovery {
    }
  }

+  /**
+   * Create a test configuration that will exercise the initializeGenericKeys
+   * code path.  This is a regression test for HDFS-4279.
+   */
+  static void setupRecoveryTestConf(Configuration conf) throws IOException {
+    conf.set(DFSConfigKeys.DFS_NAMESERVICES, "ns1");
+    conf.set(DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY, "nn1");
+    conf.set(DFSUtil.addKeySuffixes(DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX,
+      "ns1"), "nn1,nn2");
+    String baseDir = System.getProperty(
+        MiniDFSCluster.PROP_TEST_BUILD_DATA, "build/test/data") + "/dfs/";
+    File nameDir = new File(baseDir, "nameR");
+    File secondaryDir = new File(baseDir, "namesecondaryR");
+    conf.set(DFSUtil.addKeySuffixes(DFSConfigKeys.
+        DFS_NAMENODE_NAME_DIR_KEY, "ns1", "nn1"),
+        nameDir.getCanonicalPath());
+    conf.set(DFSUtil.addKeySuffixes(DFSConfigKeys.
+        DFS_NAMENODE_CHECKPOINT_DIR_KEY, "ns1", "nn1"),
+        secondaryDir.getCanonicalPath());
+    conf.unset(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY);
+    conf.unset(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_DIR_KEY);
+    FileUtils.deleteQuietly(nameDir);
+    if (!nameDir.mkdirs()) {
+      throw new RuntimeException("failed to make directory " +
+        nameDir.getAbsolutePath());
+    }
+    FileUtils.deleteQuietly(secondaryDir);
+    if (!secondaryDir.mkdirs()) {
+      throw new RuntimeException("failed to make directory " +
+        secondaryDir.getAbsolutePath());
+    }
+  }
+
  static void testNameNodeRecoveryImpl(Corruptor corruptor, boolean finalize)
      throws IOException {
    final String TEST_PATH = "/test/path/dir";
@ -446,12 +484,13 @@ public class TestNameNodeRecovery {

    // start a cluster
    Configuration conf = new HdfsConfiguration();
+    setupRecoveryTestConf(conf);
    MiniDFSCluster cluster = null;
    FileSystem fileSys = null;
    StorageDirectory sd = null;
    try {
      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
-          .enableManagedDfsDirsRedundancy(false).build();
+          .manageNameDfsDirs(false).build();
      cluster.waitActive();
      if (!finalize) {
        // Normally, the in-progress edit log would be finalized by
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
@ -603,6 +603,24 @@ public class TestSaveNamespace {
    }
  }
  
+  @Test
+  public void testSaveNamespaceWithDanglingLease() throws Exception {
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(new Configuration())
+        .numDataNodes(1).build();
+    cluster.waitActive();
+    DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem();
+    try {
+      cluster.getNamesystem().leaseManager.addLease("me", "/non-existent");      
+      fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+      cluster.getNameNodeRpc().saveNamespace();
+      fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+
  private void doAnEdit(FSNamesystem fsn, int id) throws IOException {
    // Make an edit
    fsn.mkdirs(
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
@ -35,6 +35,7 @@ import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.server.namenode.FSImage;
 import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.JournalSet;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
@ -66,6 +67,12 @@ public class TestStandbyCheckpoints {
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1);
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 5);
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
+    
+    // Dial down the retention of extra edits and checkpoints. This is to
+    // help catch regressions of HDFS-4238 (SBN should not purge shared edits)
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY, 0);
+    
    conf.setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, true);
    conf.set(DFSConfigKeys.DFS_IMAGE_COMPRESSION_CODEC_KEY,
        SlowCodec.class.getCanonicalName());
@ -99,15 +106,20 @@ public class TestStandbyCheckpoints {

  @Test
  public void testSBNCheckpoints() throws Exception {
-    doEdits(0, 10);
+    JournalSet standbyJournalSet = NameNodeAdapter.spyOnJournalSet(nn1);
    
+    doEdits(0, 10);
    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
    // Once the standby catches up, it should notice that it needs to
    // do a checkpoint and save one to its local directories.
-    HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(0, 12));
+    HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12));
    
    // It should also upload it back to the active.
-    HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(0, 12));
+    HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(12));
+    
+    // The standby should never try to purge edit logs on shared storage.
+    Mockito.verify(standbyJournalSet, Mockito.never()).
+      purgeLogsOlderThan(Mockito.anyLong());
  }

  /**
@ -129,8 +141,8 @@ public class TestStandbyCheckpoints {
    // so the standby will catch up. Then, both will be in standby mode
    // with enough uncheckpointed txns to cause a checkpoint, and they
    // will each try to take a checkpoint and upload to each other.
-    HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(0, 12));
-    HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(0, 12));
+    HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12));
+    HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(12));
    
    assertEquals(12, nn0.getNamesystem().getFSImage()
        .getMostRecentCheckpointTxId());
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java
@ -28,6 +28,7 @@ import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSMainOperationsBaseTest;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileSystemTestHelper;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
@ -53,6 +54,10 @@ public class TestFSMainOperationsWebHdfs extends FSMainOperationsBaseTest {

  @BeforeClass
  public static void setupCluster() {
+    // Initialize the test root directory to a DFS like path
+    // since we are testing based on the MiniDFSCluster.
+    FileSystemTestHelper.TEST_ROOT_DIR = "/tmp/TestFSMainOperationsWebHdfs";
+
    final Configuration conf = new Configuration();
    conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true);
    try {
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/resources/TestParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/resources/TestParam.java
@ -244,11 +244,6 @@ public class TestParam {
    assertNull(userParam.getValue());
  }

-  @Test(expected = IllegalArgumentException.class)
-  public void userNameTooLong() {
-    new UserParam("a123456789012345678901234567890x");
-  }
-
  @Test(expected = IllegalArgumentException.class)
  public void userNameInvalidStart() {
    new UserParam("1x");
@ -265,12 +260,6 @@ public class TestParam {
    assertNotNull(userParam.getValue());
  }

-  @Test
-  public void userNameMaxLength() {
-    UserParam userParam = new UserParam("a123456789012345678901234567890");
-    assertNotNull(userParam.getValue());
-  }
-
  @Test
  public void userNameValidDollarSign() {
    UserParam userParam = new UserParam("a$");
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@ -11,6 +11,9 @@ Trunk (Unreleased)
    MAPREDUCE-2669. Add new examples for Mean, Median, and Standard Deviation.
    (Plamen Jeliazkov via shv)

+    MAPREDUCE-4049. Experimental api to allow for alternate shuffle plugins.
+    (Avner BenHanoch via acmurthy) 
+
  IMPROVEMENTS

    MAPREDUCE-3787. [Gridmix] Optimize job monitoring and STRESS mode for
@ -604,6 +607,9 @@ Release 0.23.6 - UNRELEASED
    MAPREDUCE-4817. Hardcoded task ping timeout kills tasks localizing large 
    amounts of data (tgraves)

+    MAPREDUCE-4836. Elapsed time for running tasks on AM web UI tasks page is 0
+    (Ravi Prakash via jeagles)
+
 Release 0.23.5 - UNRELEASED

  INCOMPATIBLE CHANGES
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/TaskInfo.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/TaskInfo.java
@ -59,11 +59,12 @@ public class TaskInfo {
    TaskReport report = task.getReport();
    this.startTime = report.getStartTime();
    this.finishTime = report.getFinishTime();
-    this.elapsedTime = Times.elapsed(this.startTime, this.finishTime, false);
+    this.state = report.getTaskState();
+    this.elapsedTime = Times.elapsed(this.startTime, this.finishTime,
+      this.state == TaskState.RUNNING);
    if (this.elapsedTime == -1) {
      this.elapsedTime = 0;
    }
-    this.state = report.getTaskState();
    this.progress = report.getProgress() * 100;
    this.id = MRApps.toString(task.getID());
    this.taskNum = task.getID().getId();
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ReduceTask.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ReduceTask.java
@ -340,6 +340,7 @@ public class ReduceTask extends Task {
    // Initialize the codec
    codec = initCodec();
    RawKeyValueIterator rIter = null;
+    ShuffleConsumerPlugin shuffleConsumerPlugin = null; 
    
    boolean isLocal = false; 
    // local if
@ -358,8 +359,14 @@ public class ReduceTask extends Task {
        (null != combinerClass) ? 
 	     new CombineOutputCollector(reduceCombineOutputCounter, reporter, conf) : null;

-      Shuffle shuffle = 
-        new Shuffle(getTaskID(), job, FileSystem.getLocal(job), umbilical, 
+      Class<? extends ShuffleConsumerPlugin> clazz =
+            job.getClass(MRConfig.SHUFFLE_CONSUMER_PLUGIN, Shuffle.class, ShuffleConsumerPlugin.class);
+						
+      shuffleConsumerPlugin = ReflectionUtils.newInstance(clazz, job);
+      LOG.info("Using ShuffleConsumerPlugin: " + shuffleConsumerPlugin);
+
+      ShuffleConsumerPlugin.Context shuffleContext = 
+        new ShuffleConsumerPlugin.Context(getTaskID(), job, FileSystem.getLocal(job), umbilical, 
                    super.lDirAlloc, reporter, codec, 
                    combinerClass, combineCollector, 
                    spilledRecordsCounter, reduceCombineInputCounter,
@ -368,7 +375,8 @@ public class ReduceTask extends Task {
                    mergedMapOutputsCounter,
                    taskStatus, copyPhase, sortPhase, this,
                    mapOutputFile);
-      rIter = shuffle.run();
+      shuffleConsumerPlugin.init(shuffleContext);
+      rIter = shuffleConsumerPlugin.run();
    } else {
      // local job runner doesn't have a copy phase
      copyPhase.complete();
@ -399,6 +407,10 @@ public class ReduceTask extends Task {
      runOldReducer(job, umbilical, reporter, rIter, comparator, 
                    keyClass, valueClass);
    }
+
+    if (shuffleConsumerPlugin != null) {
+      shuffleConsumerPlugin.close();
+    }
    done(umbilical, reporter);
  }

--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ShuffleConsumerPlugin.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ShuffleConsumerPlugin.java
@ -0,0 +1,168 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import java.io.IOException;
+import org.apache.hadoop.mapred.Task.CombineOutputCollector;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalDirAllocator;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.util.Progress;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * ShuffleConsumerPlugin for serving Reducers.  It may shuffle MOF files from
+ * either the built-in ShuffleHandler or from a 3rd party AuxiliaryService.
+ *
+ */
+@InterfaceAudience.LimitedPrivate("mapreduce")
+@InterfaceStability.Unstable
+public interface ShuffleConsumerPlugin<K, V> {
+
+  public void init(Context<K, V> context);
+
+  public RawKeyValueIterator run() throws IOException, InterruptedException;
+
+  public void close();
+
+  @InterfaceAudience.LimitedPrivate("mapreduce")
+  @InterfaceStability.Unstable
+  public static class Context<K,V> {
+    private final org.apache.hadoop.mapreduce.TaskAttemptID reduceId;
+    private final JobConf jobConf;
+    private final FileSystem localFS;
+    private final TaskUmbilicalProtocol umbilical;
+    private final LocalDirAllocator localDirAllocator;
+    private final Reporter reporter;
+    private final CompressionCodec codec;
+    private final Class<? extends Reducer> combinerClass;
+    private final CombineOutputCollector<K, V> combineCollector;
+    private final Counters.Counter spilledRecordsCounter;
+    private final Counters.Counter reduceCombineInputCounter;
+    private final Counters.Counter shuffledMapsCounter;
+    private final Counters.Counter reduceShuffleBytes;
+    private final Counters.Counter failedShuffleCounter;
+    private final Counters.Counter mergedMapOutputsCounter;
+    private final TaskStatus status;
+    private final Progress copyPhase;
+    private final Progress mergePhase;
+    private final Task reduceTask;
+    private final MapOutputFile mapOutputFile;
+
+    public Context(org.apache.hadoop.mapreduce.TaskAttemptID reduceId,
+                   JobConf jobConf, FileSystem localFS,
+                   TaskUmbilicalProtocol umbilical,
+                   LocalDirAllocator localDirAllocator,
+                   Reporter reporter, CompressionCodec codec,
+                   Class<? extends Reducer> combinerClass,
+                   CombineOutputCollector<K,V> combineCollector,
+                   Counters.Counter spilledRecordsCounter,
+                   Counters.Counter reduceCombineInputCounter,
+                   Counters.Counter shuffledMapsCounter,
+                   Counters.Counter reduceShuffleBytes,
+                   Counters.Counter failedShuffleCounter,
+                   Counters.Counter mergedMapOutputsCounter,
+                   TaskStatus status, Progress copyPhase, Progress mergePhase,
+                   Task reduceTask, MapOutputFile mapOutputFile) {
+      this.reduceId = reduceId;
+      this.jobConf = jobConf;
+      this.localFS = localFS;
+      this. umbilical = umbilical;
+      this.localDirAllocator = localDirAllocator;
+      this.reporter = reporter;
+      this.codec = codec;
+      this.combinerClass = combinerClass;
+      this.combineCollector = combineCollector;
+      this.spilledRecordsCounter = spilledRecordsCounter;
+      this.reduceCombineInputCounter = reduceCombineInputCounter;
+      this.shuffledMapsCounter = shuffledMapsCounter;
+      this.reduceShuffleBytes = reduceShuffleBytes;
+      this.failedShuffleCounter = failedShuffleCounter;
+      this.mergedMapOutputsCounter = mergedMapOutputsCounter;
+      this.status = status;
+      this.copyPhase = copyPhase;
+      this.mergePhase = mergePhase;
+      this.reduceTask = reduceTask;
+      this.mapOutputFile = mapOutputFile;
+    }
+
+    public org.apache.hadoop.mapreduce.TaskAttemptID getReduceId() {
+      return reduceId;
+    }
+    public JobConf getJobConf() {
+      return jobConf;
+    }
+    public FileSystem getLocalFS() {
+      return localFS;
+    }
+    public TaskUmbilicalProtocol getUmbilical() {
+      return umbilical;
+    }
+    public LocalDirAllocator getLocalDirAllocator() {
+      return localDirAllocator;
+    }
+    public Reporter getReporter() {
+      return reporter;
+    }
+    public CompressionCodec getCodec() {
+      return codec;
+    }
+    public Class<? extends Reducer> getCombinerClass() {
+      return combinerClass;
+    }
+    public CombineOutputCollector<K, V> getCombineCollector() {
+      return combineCollector;
+    }
+    public Counters.Counter getSpilledRecordsCounter() {
+      return spilledRecordsCounter;
+    }
+    public Counters.Counter getReduceCombineInputCounter() {
+      return reduceCombineInputCounter;
+    }
+    public Counters.Counter getShuffledMapsCounter() {
+      return shuffledMapsCounter;
+    }
+    public Counters.Counter getReduceShuffleBytes() {
+      return reduceShuffleBytes;
+    }
+    public Counters.Counter getFailedShuffleCounter() {
+      return failedShuffleCounter;
+    }
+    public Counters.Counter getMergedMapOutputsCounter() {
+      return mergedMapOutputsCounter;
+    }
+    public TaskStatus getStatus() {
+      return status;
+    }
+    public Progress getCopyPhase() {
+      return copyPhase;
+    }
+    public Progress getMergePhase() {
+      return mergePhase;
+    }
+    public Task getReduceTask() {
+      return reduceTask;
+    }
+    public MapOutputFile getMapOutputFile() {
+      return mapOutputFile;
+    }
+  } // end of public static class Context<K,V>
+
+}
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java
@ -85,6 +85,9 @@ public interface MRConfig {

  public static final boolean SHUFFLE_SSL_ENABLED_DEFAULT = false;

+  public static final String SHUFFLE_CONSUMER_PLUGIN =
+    "mapreduce.job.reduce.shuffle.consumer.plugin.class";
+
  /**
   * Configuration key to enable/disable IFile readahead.
   */
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java
@ -34,73 +34,63 @@ import org.apache.hadoop.mapred.Task;
 import org.apache.hadoop.mapred.Task.CombineOutputCollector;
 import org.apache.hadoop.mapred.TaskStatus;
 import org.apache.hadoop.mapred.TaskUmbilicalProtocol;
+import org.apache.hadoop.mapred.ShuffleConsumerPlugin;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.hadoop.util.Progress;

-@InterfaceAudience.Private
+@InterfaceAudience.LimitedPrivate("mapreduce")
@InterfaceStability.Unstable
@SuppressWarnings({"unchecked", "rawtypes"})
-public class Shuffle<K, V> implements ExceptionReporter {
+public class Shuffle<K, V> implements ShuffleConsumerPlugin<K, V>, ExceptionReporter {
  private static final int PROGRESS_FREQUENCY = 2000;
  private static final int MAX_EVENTS_TO_FETCH = 10000;
  private static final int MIN_EVENTS_TO_FETCH = 100;
  private static final int MAX_RPC_OUTSTANDING_EVENTS = 3000000;
  
-  private final TaskAttemptID reduceId;
-  private final JobConf jobConf;
-  private final Reporter reporter;
-  private final ShuffleClientMetrics metrics;
-  private final TaskUmbilicalProtocol umbilical;
+  private ShuffleConsumerPlugin.Context context;

-  private final ShuffleScheduler<K,V> scheduler;
-  private final MergeManager<K, V> merger;
+  private TaskAttemptID reduceId;
+  private JobConf jobConf;
+  private Reporter reporter;
+  private ShuffleClientMetrics metrics;
+  private TaskUmbilicalProtocol umbilical;
+  
+  private ShuffleScheduler<K,V> scheduler;
+  private MergeManager<K, V> merger;
  private Throwable throwable = null;
  private String throwingThreadName = null;
-  private final Progress copyPhase;
-  private final TaskStatus taskStatus;
-  private final Task reduceTask; //Used for status updates
+  private Progress copyPhase;
+  private TaskStatus taskStatus;
+  private Task reduceTask; //Used for status updates

-  public Shuffle(TaskAttemptID reduceId, JobConf jobConf, FileSystem localFS,
-                 TaskUmbilicalProtocol umbilical,
-                 LocalDirAllocator localDirAllocator,  
-                 Reporter reporter,
-                 CompressionCodec codec,
-                 Class<? extends Reducer> combinerClass,
-                 CombineOutputCollector<K,V> combineCollector,
-                 Counters.Counter spilledRecordsCounter,
-                 Counters.Counter reduceCombineInputCounter,
-                 Counters.Counter shuffledMapsCounter,
-                 Counters.Counter reduceShuffleBytes,
-                 Counters.Counter failedShuffleCounter,
-                 Counters.Counter mergedMapOutputsCounter,
-                 TaskStatus status,
-                 Progress copyPhase,
-                 Progress mergePhase,
-                 Task reduceTask,
-                 MapOutputFile mapOutputFile) {
-    this.reduceId = reduceId;
-    this.jobConf = jobConf;
-    this.umbilical = umbilical;
-    this.reporter = reporter;
+  @Override
+  public void init(ShuffleConsumerPlugin.Context context) {
+    this.context = context;
+
+    this.reduceId = context.getReduceId();
+    this.jobConf = context.getJobConf();
+    this.umbilical = context.getUmbilical();
+    this.reporter = context.getReporter();
    this.metrics = new ShuffleClientMetrics(reduceId, jobConf);
-    this.copyPhase = copyPhase;
-    this.taskStatus = status;
-    this.reduceTask = reduceTask;
+    this.copyPhase = context.getCopyPhase();
+    this.taskStatus = context.getStatus();
+    this.reduceTask = context.getReduceTask();
    
    scheduler = 
-      new ShuffleScheduler<K,V>(jobConf, status, this, copyPhase, 
-                                shuffledMapsCounter, 
-                                reduceShuffleBytes, failedShuffleCounter);
-    merger = new MergeManager<K, V>(reduceId, jobConf, localFS, 
-                                    localDirAllocator, reporter, codec, 
-                                    combinerClass, combineCollector, 
-                                    spilledRecordsCounter, 
-                                    reduceCombineInputCounter, 
-                                    mergedMapOutputsCounter, 
-                                    this, mergePhase, mapOutputFile);
+      new ShuffleScheduler<K,V>(jobConf, taskStatus, this, copyPhase, 
+                                context.getShuffledMapsCounter(), 
+                                context.getReduceShuffleBytes(), context.getFailedShuffleCounter());
+    merger = new MergeManager<K, V>(reduceId, jobConf, context.getLocalFS(),
+                                    context.getLocalDirAllocator(), reporter, context.getCodec(),
+                                    context.getCombinerClass(), context.getCombineCollector(), 
+                                    context.getSpilledRecordsCounter(), 
+                                    context.getReduceCombineInputCounter(), 
+                                    context.getMergedMapOutputsCounter(), 
+                                    this, context.getMergePhase(), context.getMapOutputFile());
  }

+  @Override
  public RawKeyValueIterator run() throws IOException, InterruptedException {
    // Scale the maximum events we fetch per RPC call to mitigate OOM issues
    // on the ApplicationMaster when a thundering herd of reducers fetch events
@ -171,6 +161,10 @@ public class Shuffle<K, V> implements ExceptionReporter {
    return kvIter;
  }

+  @Override
+  public void close(){
+  }
+
  public synchronized void reportException(Throwable t) {
    if (throwable == null) {
      throwable = t;
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
@ -748,6 +748,16 @@
  </description>
 </property>

+<property>
+  <name>mapreduce.job.reduce.shuffle.consumer.plugin.class</name>
+  <value>org.apache.hadoop.mapreduce.task.reduce.Shuffle</value>
+  <description> 
+  Name of the class whose instance will be used 
+  to send shuffle requests by reducetasks of this job.
+  The class must be an instance of org.apache.hadoop.mapred.ShuffleConsumerPlugin.
+  </description>
+</property>
+
 <!-- MR YARN Application properties -->

 <property>
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestShufflePlugin.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestShufflePlugin.java
@ -0,0 +1,197 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.hadoop.mapreduce;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.*;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.fs.LocalDirAllocator;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.mapred.Task.CombineOutputCollector;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.util.Progress;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.mapreduce.task.reduce.Shuffle;
+import org.apache.hadoop.mapred.Counters;
+import org.apache.hadoop.mapred.Counters.Counter;
+import org.apache.hadoop.mapred.MapOutputFile;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Task;
+import org.apache.hadoop.mapred.ReduceTask;
+import org.apache.hadoop.mapred.TaskStatus;
+import org.apache.hadoop.mapred.TaskUmbilicalProtocol;
+import org.apache.hadoop.mapred.ShuffleConsumerPlugin;
+import org.apache.hadoop.mapred.RawKeyValueIterator;
+import org.apache.hadoop.mapred.Reducer;
+
+/**
+  * A JUnit for testing availability and accessibility of shuffle related API.
+  * It is needed for maintaining comptability with external sub-classes of
+  * ShuffleConsumerPlugin and AuxiliaryService(s) like ShuffleHandler.
+  *
+  * The importance of this test is for preserving API with 3rd party plugins.
+  */
+public class TestShufflePlugin<K, V> {
+
+  static class TestShuffleConsumerPlugin<K, V> implements ShuffleConsumerPlugin<K, V> {
+
+    @Override
+    public void init(ShuffleConsumerPlugin.Context<K, V> context) {
+        // just verify that Context has kept its public interface
+      context.getReduceId();
+      context.getJobConf();
+      context.getLocalFS();
+      context.getUmbilical();
+      context.getLocalDirAllocator();
+      context.getReporter();
+      context.getCodec();
+      context.getCombinerClass();
+      context.getCombineCollector();
+      context.getSpilledRecordsCounter();
+      context.getReduceCombineInputCounter();
+      context.getShuffledMapsCounter();
+      context.getReduceShuffleBytes();
+      context.getFailedShuffleCounter();
+      context.getMergedMapOutputsCounter();
+      context.getStatus();
+      context.getCopyPhase();
+      context.getMergePhase();
+      context.getReduceTask();
+      context.getMapOutputFile();
+    }
+
+    @Override
+    public void close(){
+    }
+
+    @Override
+    public RawKeyValueIterator run() throws java.io.IOException, java.lang.InterruptedException{
+      return null;
+    }
+  }
+
+
+
+  @Test
+  /**
+   * A testing method instructing core hadoop to load an external ShuffleConsumerPlugin
+   * as if it came from a 3rd party.
+   */
+  public void testPluginAbility() {
+
+    try{
+      // create JobConf with mapreduce.job.shuffle.consumer.plugin=TestShuffleConsumerPlugin
+      JobConf jobConf = new JobConf();
+      jobConf.setClass(MRConfig.SHUFFLE_CONSUMER_PLUGIN,
+                       TestShufflePlugin.TestShuffleConsumerPlugin.class,
+                       ShuffleConsumerPlugin.class);
+
+      ShuffleConsumerPlugin shuffleConsumerPlugin = null;
+      Class<? extends ShuffleConsumerPlugin> clazz =
+        jobConf.getClass(MRConfig.SHUFFLE_CONSUMER_PLUGIN, Shuffle.class, ShuffleConsumerPlugin.class);
+      assertNotNull("Unable to get " + MRConfig.SHUFFLE_CONSUMER_PLUGIN, clazz);
+
+      // load 3rd party plugin through core's factory method
+      shuffleConsumerPlugin = ReflectionUtils.newInstance(clazz, jobConf);
+      assertNotNull("Unable to load " + MRConfig.SHUFFLE_CONSUMER_PLUGIN, shuffleConsumerPlugin);
+    }
+    catch (Exception e) {
+      assertTrue("Threw exception:" + e, false);
+    }
+  }
+
+  @Test
+  /**
+   * A testing method verifying availability and accessibility of API that is needed
+   * for sub-classes of ShuffleConsumerPlugin
+   */
+  public void testConsumerApi() {
+
+    JobConf jobConf = new JobConf();
+    ShuffleConsumerPlugin<K, V> shuffleConsumerPlugin = new TestShuffleConsumerPlugin<K, V>();
+
+    //mock creation
+    ReduceTask mockReduceTask = mock(ReduceTask.class);
+    TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
+    Reporter mockReporter = mock(Reporter.class);
+    FileSystem mockFileSystem = mock(FileSystem.class);
+    Class<? extends org.apache.hadoop.mapred.Reducer>  combinerClass = jobConf.getCombinerClass();
+    @SuppressWarnings("unchecked")  // needed for mock with generic
+    CombineOutputCollector<K, V>  mockCombineOutputCollector =
+      (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
+    org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID =
+      mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
+    LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
+    CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
+    Counter mockCounter = mock(Counter.class);
+    TaskStatus mockTaskStatus = mock(TaskStatus.class);
+    Progress mockProgress = mock(Progress.class);
+    MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
+    Task mockTask = mock(Task.class);
+
+    try {
+      String [] dirs = jobConf.getLocalDirs();
+      // verify that these APIs are available through super class handler
+      ShuffleConsumerPlugin.Context<K, V> context =
+	    new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, jobConf, mockFileSystem,
+                                                mockUmbilical, mockLocalDirAllocator,
+                                                mockReporter, mockCompressionCodec,
+                                                combinerClass, mockCombineOutputCollector,
+                                                mockCounter, mockCounter, mockCounter,
+                                                mockCounter, mockCounter, mockCounter,
+                                                mockTaskStatus, mockProgress, mockProgress,
+                                                mockTask, mockMapOutputFile);
+      shuffleConsumerPlugin.init(context);
+      shuffleConsumerPlugin.run();
+      shuffleConsumerPlugin.close();
+    }
+    catch (Exception e) {
+      assertTrue("Threw exception:" + e, false);
+    }
+
+    // verify that these APIs are available for 3rd party plugins
+    mockReduceTask.getTaskID();
+    mockReduceTask.getJobID();
+    mockReduceTask.getNumMaps();
+    mockReduceTask.getPartition();
+    mockReporter.progress();
+  }
+
+  @Test
+  /**
+   * A testing method verifying availability and accessibility of API needed for
+   * AuxiliaryService(s) which are "Shuffle-Providers" (ShuffleHandler and 3rd party plugins)
+   */
+  public void testProviderApi() {
+
+    ApplicationId mockApplicationId = mock(ApplicationId.class);
+    mockApplicationId.setClusterTimestamp(new Long(10));
+    mockApplicationId.setId(mock(JobID.class).getId());
+    LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
+    JobConf mockJobConf = mock(JobConf.class);
+    try {
+      mockLocalDirAllocator.getLocalPathToRead("", mockJobConf);
+    }
+    catch (Exception e) {
+      assertTrue("Threw exception:" + e, false);
+    }
+  }
+}
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@ -117,7 +117,11 @@ Release 2.0.3-alpha - Unreleased

    YARN-229. Remove old unused RM recovery code. (Bikas Saha via acmurthy) 

-    YARN-187. Add hierarchical queues to the fair scheduler. (Sandy Ryza via tomwhite)
+    YARN-187. Add hierarchical queues to the fair scheduler.
+    (Sandy Ryza via tomwhite)
+
+    YARN-72. NM should handle cleaning up containers when it shuts down.
+    (Sandy Ryza via tomwhite)

 Release 2.0.2-alpha - 2012-09-07 

@ -199,6 +203,9 @@ Release 0.23.6 - UNRELEASED
    YARN-251. Proxy URI generation fails for blank tracking URIs (Tom White
    via jlowe)

+    YARN-258. RM web page UI shows Invalid Date for start and finish times
+    (Ravi Prakash via jlowe)
+
 Release 0.23.5 - UNRELEASED

  INCOMPATIBLE CHANGES
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/JQueryUI.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/JQueryUI.java
@ -28,17 +28,6 @@ import static org.apache.hadoop.yarn.util.StringHelper.*;
 import org.apache.hadoop.yarn.webapp.hamlet.HamletSpec.HTML;

 public class JQueryUI extends HtmlBlock {
-  // Render choices (mostly for dataTables)
-  public enum Render {
-    /** small (<~100 rows) table as html, most gracefully degradable */
-    HTML,
-    /** medium (<~2000 rows) table as js array */
-    JS_ARRAY,
-    /** large (<~10000 rows) table loading from server */
-    JS_LOAD,
-    /** huge (>~10000 rows) table processing from server */
-    JS_SERVER
-  };

  // UI params
  public static final String ACCORDION = "ui.accordion";
@ -197,12 +186,4 @@ public class JQueryUI extends HtmlBlock {
        append("sPaginationType: 'full_numbers', iDisplayLength:20, ").
        append("aLengthMenu:[20, 40, 60, 80, 100]");
  }
-
-  public static StringBuilder tableInitProgress(StringBuilder init,
-                                                long numCells) {
-    return init.append(", bProcessing:true, ").
-        append("oLanguage:{sProcessing:'Processing ").
-        append(numCells).append(" cells...").
-        append("<p><img src=\"/static/busy.gif\">'}");
-  }
 }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/Jsons.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/Jsons.java
@ -1,56 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.hadoop.yarn.webapp.view;
-
-import java.io.PrintWriter;
-
-import static org.apache.hadoop.yarn.util.StringHelper.*;
-import static org.apache.hadoop.yarn.webapp.view.JQueryUI.*;
-
-/**
- * JSON helpers
- */
-public class Jsons {
-  public static final String _SEP = "\",\"";
-
-  public static PrintWriter appendProgressBar(PrintWriter out, String pct) {
-    return out.append("<br title='").append(pct).append("'>").
-        append("<div class='").append(C_PROGRESSBAR).
-        append("' title='").append(pct).append('%').
-        append("'><div class='").append(C_PROGRESSBAR_VALUE).
-        append("' style='width: ").append(pct).
-        append("%'>").append("<\\/div><\\/div>");
-  }
-
-  public static PrintWriter appendProgressBar(PrintWriter out,
-                                                  float progress) {
-    return appendProgressBar(out, String.format("%.1f", progress));
-  }
-
-  public static PrintWriter appendSortable(PrintWriter out, Object value) {
-    return out.append("<br title='").append(String.valueOf(value)).append("'>");
-  }
-
-  public static PrintWriter appendLink(PrintWriter out, Object anchor,
-                                       String prefix, String... parts) {
-    String anchorText = String.valueOf(anchor);
-    return out.append("<a href='").append(anchor == null ? "#" :
-      ujoin(prefix, parts)).append("'>").append(anchorText).append("<\\/a>");
-  }
-}
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java
@ -25,13 +25,23 @@ import org.apache.hadoop.yarn.api.records.ContainerId;
 public class CMgrCompletedContainersEvent extends ContainerManagerEvent {

  private List<ContainerId> containerToCleanup;
+  private Reason reason;
  
-  public CMgrCompletedContainersEvent(List<ContainerId> containersToCleanup) {
+  public CMgrCompletedContainersEvent(List<ContainerId> containersToCleanup, Reason reason) {
    super(ContainerManagerEventType.FINISH_CONTAINERS);
    this.containerToCleanup = containersToCleanup;
+    this.reason = reason;
  }

  public List<ContainerId> getContainersToCleanup() {
    return this.containerToCleanup;
  }
+  
+  public Reason getReason() {
+    return reason;
+  }
+  
+  public static enum Reason {
+    ON_SHUTDOWN, BY_RESOURCEMANAGER
+  }
 }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
@ -19,6 +19,9 @@
 package org.apache.hadoop.yarn.server.nodemanager;

 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ConcurrentSkipListMap;
@ -62,13 +65,23 @@ public class NodeManager extends CompositeService implements
   */
  public static final int SHUTDOWN_HOOK_PRIORITY = 30;
  
+  /**
+   * Extra duration to wait for containers to be killed on shutdown.
+   */
+  private static final int SHUTDOWN_CLEANUP_SLOP_MS = 1000;
+
  private static final Log LOG = LogFactory.getLog(NodeManager.class);
  protected final NodeManagerMetrics metrics = NodeManagerMetrics.create();
  private ApplicationACLsManager aclsManager;
  private NodeHealthCheckerService nodeHealthChecker;
  private LocalDirsHandlerService dirsHandler;
+  private Context context;
+  private AsyncDispatcher dispatcher;
+  private ContainerManagerImpl containerManager;
  private static CompositeServiceShutdownHook nodeManagerShutdownHook; 
  
+  private long waitForContainersOnShutdownMillis;
+  
  public NodeManager() {
    super(NodeManager.class.getName());
  }
@ -115,7 +128,7 @@ public class NodeManager extends CompositeService implements
      containerTokenSecretManager = new NMContainerTokenSecretManager(conf);
    }

-    Context context = new NMContext(containerTokenSecretManager);
+    this.context = new NMContext(containerTokenSecretManager);

    this.aclsManager = new ApplicationACLsManager(conf);

@ -131,7 +144,7 @@ public class NodeManager extends CompositeService implements
    addService(del);

    // NodeManager level dispatcher
-    AsyncDispatcher dispatcher = new AsyncDispatcher();
+    this.dispatcher = new AsyncDispatcher();

    nodeHealthChecker = new NodeHealthCheckerService();
    addService(nodeHealthChecker);
@ -144,7 +157,7 @@ public class NodeManager extends CompositeService implements
    NodeResourceMonitor nodeResourceMonitor = createNodeResourceMonitor();
    addService(nodeResourceMonitor);

-    ContainerManagerImpl containerManager =
+    containerManager =
        createContainerManager(context, exec, del, nodeStatusUpdater,
        this.aclsManager, dirsHandler);
    addService(containerManager);
@ -162,6 +175,13 @@ public class NodeManager extends CompositeService implements
    // so that we make sure everything is up before registering with RM. 
    addService(nodeStatusUpdater);
    
+    waitForContainersOnShutdownMillis =
+        conf.getLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS,
+            YarnConfiguration.DEFAULT_NM_SLEEP_DELAY_BEFORE_SIGKILL_MS) + 
+        conf.getLong(YarnConfiguration.NM_PROCESS_KILL_WAIT_MS,
+            YarnConfiguration.DEFAULT_NM_PROCESS_KILL_WAIT_MS) +
+        SHUTDOWN_CLEANUP_SLOP_MS;
+    
    super.init(conf);
    // TODO add local dirs to del
  }
@ -178,10 +198,45 @@ public class NodeManager extends CompositeService implements

  @Override
  public void stop() {
+    cleanupContainers();
    super.stop();
    DefaultMetricsSystem.shutdown();
  }
  
+  @SuppressWarnings("unchecked")
+  private void cleanupContainers() {
+    Map<ContainerId, Container> containers = context.getContainers();
+    if (containers.isEmpty()) {
+      return;
+    }
+    LOG.info("Containers still running on shutdown: " + containers.keySet());
+    
+    List<ContainerId> containerIds = new ArrayList<ContainerId>(containers.keySet());
+    dispatcher.getEventHandler().handle(
+        new CMgrCompletedContainersEvent(containerIds, 
+            CMgrCompletedContainersEvent.Reason.ON_SHUTDOWN));
+    
+    LOG.info("Waiting for containers to be killed");
+    
+    long waitStartTime = System.currentTimeMillis();
+    while (!containers.isEmpty() && 
+        System.currentTimeMillis() - waitStartTime < waitForContainersOnShutdownMillis) {
+      try {
+        Thread.sleep(1000);
+      } catch (InterruptedException ex) {
+        LOG.warn("Interrupted while sleeping on container kill", ex);
+      }
+    }
+
+    // All containers killed
+    if (containers.isEmpty()) {
+      LOG.info("All containers in DONE state");
+    } else {
+      LOG.info("Done waiting for containers to be killed. Still alive: " + 
+          containers.keySet());
+    }
+  }
+
  public static class NMContext implements Context {

    private final NodeId nodeId = Records.newRecord(NodeId.class);
@ -283,6 +338,11 @@ public class NodeManager extends CompositeService implements
    return new NodeManager();
  }
  
+  // For testing
+  ContainerManagerImpl getContainerManager() {
+    return containerManager;
+  }
+
  public static void main(String[] args) {
    Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler());
    StringUtils.startupShutdownMessage(NodeManager.class, args, LOG);
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
@ -363,7 +363,8 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
                .getContainersToCleanupList();
            if (containersToCleanup.size() != 0) {
              dispatcher.getEventHandler().handle(
-                  new CMgrCompletedContainersEvent(containersToCleanup));
+                  new CMgrCompletedContainersEvent(containersToCleanup, 
+                      CMgrCompletedContainersEvent.Reason.BY_RESOURCEMANAGER));
            }
            List<ApplicationId> appsToCleanup =
                response.getApplicationsToCleanupList();
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
@ -23,6 +23,8 @@ import static org.apache.hadoop.yarn.service.Service.STATE.STARTED;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;

@ -593,9 +595,16 @@ public class ContainerManagerImpl extends CompositeService implements
          (CMgrCompletedContainersEvent) event;
      for (ContainerId container : containersFinishedEvent
          .getContainersToCleanup()) {
+        String diagnostic = "";
+        if (containersFinishedEvent.getReason() == 
+            CMgrCompletedContainersEvent.Reason.ON_SHUTDOWN) {
+          diagnostic = "Container Killed on Shutdown";
+        } else if (containersFinishedEvent.getReason() == 
+            CMgrCompletedContainersEvent.Reason.BY_RESOURCEMANAGER) {
+          diagnostic = "Container Killed by ResourceManager";
+        }
        this.dispatcher.getEventHandler().handle(
-            new ContainerKillEvent(container,
-                "Container Killed by ResourceManager"));
+            new ContainerKillEvent(container, diagnostic));
      }
      break;
    default:
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/MockNodeStatusUpdater.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/MockNodeStatusUpdater.java
@ -0,0 +1,92 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.nodemanager;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.server.api.ResourceTracker;
+import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
+import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
+import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
+import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse;
+import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
+import org.apache.hadoop.yarn.server.api.records.NodeStatus;
+import org.apache.hadoop.yarn.server.api.records.RegistrationResponse;
+import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
+
+/**
+ * This class allows a node manager to run without without communicating with a
+ * real RM.
+ */
+public class MockNodeStatusUpdater extends NodeStatusUpdaterImpl {
+  static final Log LOG = LogFactory.getLog(MockNodeStatusUpdater.class);
+  
+  private static final RecordFactory recordFactory = RecordFactoryProvider
+      .getRecordFactory(null);
+
+  private ResourceTracker resourceTracker;
+
+  public MockNodeStatusUpdater(Context context, Dispatcher dispatcher,
+      NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) {
+    super(context, dispatcher, healthChecker, metrics);
+    resourceTracker = new MockResourceTracker();
+  }
+
+  @Override
+  protected ResourceTracker getRMClient() {
+    return resourceTracker;
+  }
+  
+  private static class MockResourceTracker implements ResourceTracker {
+    private int heartBeatID;
+
+    @Override
+    public RegisterNodeManagerResponse registerNodeManager(
+        RegisterNodeManagerRequest request) throws YarnRemoteException {
+      RegistrationResponse regResponse = recordFactory
+          .newRecordInstance(RegistrationResponse.class);
+
+      RegisterNodeManagerResponse response = recordFactory
+          .newRecordInstance(RegisterNodeManagerResponse.class);
+      response.setRegistrationResponse(regResponse);
+      return response;
+    }
+
+    @Override
+    public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request)
+        throws YarnRemoteException {
+      NodeStatus nodeStatus = request.getNodeStatus();
+      LOG.info("Got heartbeat number " + heartBeatID);
+      nodeStatus.setResponseId(heartBeatID++);
+
+      HeartbeatResponse response = recordFactory
+          .newRecordInstance(HeartbeatResponse.class);
+      response.setResponseId(heartBeatID);
+
+      NodeHeartbeatResponse nhResponse = recordFactory
+          .newRecordInstance(NodeHeartbeatResponse.class);
+      nhResponse.setHeartbeatResponse(response);
+      return nhResponse;
+    }
+  }
+}
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java
@ -0,0 +1,222 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.nodemanager;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.UnsupportedFileSystemException;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.ContainerState;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.URL;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestNodeManagerShutdown {
+  static final File basedir =
+      new File("target", TestNodeManagerShutdown.class.getName());
+  static final File tmpDir = new File(basedir, "tmpDir");
+  static final File logsDir = new File(basedir, "logs");
+  static final File remoteLogsDir = new File(basedir, "remotelogs");
+  static final File nmLocalDir = new File(basedir, "nm0");
+  static final File processStartFile = new File(tmpDir, "start_file.txt")
+    .getAbsoluteFile();
+
+  static final RecordFactory recordFactory = RecordFactoryProvider
+      .getRecordFactory(null);
+  static final String user = "nobody";
+  private FileContext localFS;
+
+  @Before
+  public void setup() throws UnsupportedFileSystemException {
+    localFS = FileContext.getLocalFSFileContext();
+    tmpDir.mkdirs();
+    logsDir.mkdirs();
+    remoteLogsDir.mkdirs();
+    nmLocalDir.mkdirs();
+  }
+  
+  @After
+  public void tearDown() throws IOException, InterruptedException {
+    localFS.delete(new Path(basedir.getPath()), true);
+  }
+  
+  @Test
+  public void testKillContainersOnShutdown() throws IOException {
+    NodeManager nm = getNodeManager();
+    nm.init(createNMConfig());
+    nm.start();
+    
+    ContainerManagerImpl containerManager = nm.getContainerManager();
+    File scriptFile = createUnhaltingScriptFile();
+    
+    ContainerLaunchContext containerLaunchContext = 
+        recordFactory.newRecordInstance(ContainerLaunchContext.class);
+
+    // Construct the Container-id
+    ContainerId cId = createContainerId();
+    containerLaunchContext.setContainerId(cId);
+
+    containerLaunchContext.setUser(user);
+
+    URL localResourceUri =
+        ConverterUtils.getYarnUrlFromPath(localFS
+            .makeQualified(new Path(scriptFile.getAbsolutePath())));
+    LocalResource localResource =
+        recordFactory.newRecordInstance(LocalResource.class);
+    localResource.setResource(localResourceUri);
+    localResource.setSize(-1);
+    localResource.setVisibility(LocalResourceVisibility.APPLICATION);
+    localResource.setType(LocalResourceType.FILE);
+    localResource.setTimestamp(scriptFile.lastModified());
+    String destinationFile = "dest_file";
+    Map<String, LocalResource> localResources = 
+        new HashMap<String, LocalResource>();
+    localResources.put(destinationFile, localResource);
+    containerLaunchContext.setLocalResources(localResources);
+    containerLaunchContext.setUser(containerLaunchContext.getUser());
+    List<String> commands = new ArrayList<String>();
+    commands.add("/bin/bash");
+    commands.add(scriptFile.getAbsolutePath());
+    containerLaunchContext.setCommands(commands);
+    containerLaunchContext.setResource(recordFactory
+        .newRecordInstance(Resource.class));
+    containerLaunchContext.getResource().setMemory(1024);
+    StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class);
+    startRequest.setContainerLaunchContext(containerLaunchContext);
+    containerManager.startContainer(startRequest);
+    
+    GetContainerStatusRequest request =
+        recordFactory.newRecordInstance(GetContainerStatusRequest.class);
+        request.setContainerId(cId);
+    ContainerStatus containerStatus =
+        containerManager.getContainerStatus(request).getStatus();
+    Assert.assertEquals(ContainerState.RUNNING, containerStatus.getState());
+    
+    try {Thread.sleep(5000);} catch (InterruptedException ex) {ex.printStackTrace();} 
+    
+    nm.stop();
+    
+    // Now verify the contents of the file
+    // Script generates a message when it receives a sigterm
+    // so we look for that
+    BufferedReader reader =
+        new BufferedReader(new FileReader(processStartFile));
+
+    boolean foundSigTermMessage = false;
+    while (true) {
+      String line = reader.readLine();
+      if (line == null) {
+        break;
+      }
+      if (line.contains("SIGTERM")) {
+        foundSigTermMessage = true;
+        break;
+      }
+    }
+    Assert.assertTrue("Did not find sigterm message", foundSigTermMessage);
+    reader.close();
+  }
+  
+  private ContainerId createContainerId() {
+    ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class);
+    appId.setClusterTimestamp(0);
+    appId.setId(0);
+    ApplicationAttemptId appAttemptId = 
+        recordFactory.newRecordInstance(ApplicationAttemptId.class);
+    appAttemptId.setApplicationId(appId);
+    appAttemptId.setAttemptId(1);
+    ContainerId containerId = 
+        recordFactory.newRecordInstance(ContainerId.class);
+    containerId.setApplicationAttemptId(appAttemptId);
+    return containerId;
+  }
+  
+  private YarnConfiguration createNMConfig() {
+    YarnConfiguration conf = new YarnConfiguration();
+    conf.setInt(YarnConfiguration.NM_PMEM_MB, 5*1024); // 5GB
+    conf.set(YarnConfiguration.NM_ADDRESS, "127.0.0.1:12345");
+    conf.set(YarnConfiguration.NM_LOCALIZER_ADDRESS, "127.0.0.1:12346");
+    conf.set(YarnConfiguration.NM_LOG_DIRS, logsDir.getAbsolutePath());
+    conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogsDir.getAbsolutePath());
+    conf.set(YarnConfiguration.NM_LOCAL_DIRS, nmLocalDir.getAbsolutePath());
+    return conf;
+  }
+  
+  /**
+   * Creates a script to run a container that will run forever unless
+   * stopped by external means.
+   */
+  private File createUnhaltingScriptFile() throws IOException {
+    File scriptFile = new File(tmpDir, "scriptFile.sh");
+    BufferedWriter fileWriter = new BufferedWriter(new FileWriter(scriptFile));
+    fileWriter.write("#!/bin/bash\n\n");
+    fileWriter.write("echo \"Running testscript for delayed kill\"\n");
+    fileWriter.write("hello=\"Got SIGTERM\"\n");
+    fileWriter.write("umask 0\n");
+    fileWriter.write("trap \"echo $hello >> " + processStartFile + "\" SIGTERM\n");
+    fileWriter.write("echo \"Writing pid to start file\"\n");
+    fileWriter.write("echo $$ >> " + processStartFile + "\n");
+    fileWriter.write("while true; do\nsleep 1s;\ndone\n");
+
+    fileWriter.close();
+    return scriptFile;
+  }
+
+  private NodeManager getNodeManager() {
+    return new NodeManager() {
+      @Override
+      protected NodeStatusUpdater createNodeStatusUpdater(Context context,
+          Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
+        MockNodeStatusUpdater myNodeStatusUpdater = new MockNodeStatusUpdater(
+            context, dispatcher, healthChecker, metrics);
+        return myNodeStatusUpdater;
+      }
+    };
+  }
+}
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlock.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlock.java
@ -25,26 +25,27 @@ import static org.apache.hadoop.yarn.webapp.view.JQueryUI.C_PROGRESSBAR_VALUE;

 import java.util.Collection;
 import java.util.HashSet;
+import java.util.concurrent.ConcurrentMap;

 import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
 import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo;
-import org.apache.hadoop.yarn.util.Times;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY;
 import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
-import org.apache.hadoop.yarn.webapp.view.JQueryUI.Render;

 import com.google.inject.Inject;

 class AppsBlock extends HtmlBlock {
-  final AppsList list;
+  final ConcurrentMap<ApplicationId, RMApp> apps;

-  @Inject AppsBlock(AppsList list, ViewContext ctx) {
+@Inject AppsBlock(RMContext rmContext, ViewContext ctx) {
    super(ctx);
-    this.list = list;
+    apps = rmContext.getRMApps();
  }

  @Override public void render(Block html) {
@ -63,7 +64,6 @@ class AppsBlock extends HtmlBlock {
            th(".progress", "Progress").
            th(".ui", "Tracking UI")._()._().
        tbody();
-    int i = 0;
    Collection<RMAppState> reqAppStates = null;
    String reqStateString = $(APP_STATE);
    if (reqStateString != null && !reqStateString.isEmpty()) {
@ -74,7 +74,7 @@ class AppsBlock extends HtmlBlock {
      }
    }
    StringBuilder appsTableData = new StringBuilder("[\n");
-    for (RMApp app : list.apps.values()) {
+    for (RMApp app : apps.values()) {
      if (reqAppStates != null && !reqAppStates.contains(app.getState())) {
        continue;
      }
@ -108,7 +108,6 @@ class AppsBlock extends HtmlBlock {
      appsTableData.append(trackingURL).append("'>")
      .append(appInfo.getTrackingUI()).append("</a>\"],\n");

-      if (list.rendering != Render.HTML && ++i >= 20) break;
    }
    if(appsTableData.charAt(appsTableData.length() - 2) == ',') {
      appsTableData.delete(appsTableData.length()-2, appsTableData.length()-1);
@ -118,12 +117,5 @@ class AppsBlock extends HtmlBlock {
    _("var appsTableData=" + appsTableData)._();

    tbody._()._();
-
-    if (list.rendering == Render.JS_ARRAY) {
-      echo("<script type='text/javascript'>\n",
-           "var appsData=");
-      list.toDataTableArrays(reqAppStates, writer());
-      echo("\n</script>\n");
-    }
  }
 }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsList.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsList.java
@ -1,101 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.hadoop.yarn.server.resourcemanager.webapp;
-
-import static org.apache.commons.lang.StringEscapeUtils.escapeHtml;
-import static org.apache.commons.lang.StringEscapeUtils.escapeJavaScript;
-import static org.apache.hadoop.yarn.webapp.view.Jsons._SEP;
-import static org.apache.hadoop.yarn.webapp.view.Jsons.appendLink;
-import static org.apache.hadoop.yarn.webapp.view.Jsons.appendProgressBar;
-import static org.apache.hadoop.yarn.webapp.view.Jsons.appendSortable;
-
-import java.io.PrintWriter;
-import java.util.Collection;
-import java.util.concurrent.ConcurrentMap;
-
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
-import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo;
-import org.apache.hadoop.yarn.util.Times;
-import org.apache.hadoop.yarn.webapp.Controller.RequestContext;
-import org.apache.hadoop.yarn.webapp.ToJSON;
-import org.apache.hadoop.yarn.webapp.view.JQueryUI.Render;
-
-import com.google.inject.Inject;
-import com.google.inject.servlet.RequestScoped;
-
-// So we only need to do asm.getApplications once in a request
-@RequestScoped
-class AppsList implements ToJSON {
-  final RequestContext rc;
-  final ConcurrentMap<ApplicationId, RMApp> apps;
-  Render rendering;
-
-  @Inject AppsList(RequestContext ctx, RMContext rmContext) {
-    rc = ctx;
-    apps = rmContext.getRMApps();
-  }
-
-  void toDataTableArrays(Collection<RMAppState> requiredAppStates, PrintWriter out) {
-    out.append('[');
-    boolean first = true;
-    for (RMApp app : apps.values()) {
-      if (requiredAppStates != null &&
-          !requiredAppStates.contains(app.getState())) {
-        continue;
-      }
-      AppInfo appInfo = new AppInfo(app, true);
-      String startTime = Times.format(appInfo.getStartTime());
-      String finishTime = Times.format(appInfo.getFinishTime());
-      if (first) {
-        first = false;
-      } else {
-        out.append(",\n");
-      }
-      out.append("[\"");
-      appendSortable(out, appInfo.getAppIdNum());
-      appendLink(out, appInfo.getAppId(), rc.prefix(), "app",
-          appInfo.getAppId()).append(_SEP).
-          append(escapeHtml(appInfo.getUser())).append(_SEP).
-          append(escapeJavaScript(escapeHtml(appInfo.getName()))).append(_SEP).
-          append(escapeHtml(appInfo.getQueue())).append(_SEP);
-      appendSortable(out, appInfo.getStartTime()).
-          append(startTime).append(_SEP);
-      appendSortable(out, appInfo.getFinishTime()).
-          append(finishTime).append(_SEP).
-          append(appInfo.getState()).append(_SEP).
-          append(appInfo.getFinalStatus()).append(_SEP);
-      appendProgressBar(out, appInfo.getProgress()).append(_SEP);
-      appendLink(out, appInfo.getTrackingUI(), rc.prefix(),
-          !appInfo.isTrackingUrlReady() ?
-            "#" : appInfo.getTrackingUrlPretty()).
-          append("\"]");
-    }
-    out.append(']');
-  }
-
-  @Override
-  public void toJSON(PrintWriter out) {
-    out.print("{\"aaData\":");
-    toDataTableArrays(null, out);
-    out.print("}\n");
-  }
-}
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerAppsBlock.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerAppsBlock.java
@ -25,8 +25,11 @@ import static org.apache.hadoop.yarn.webapp.view.JQueryUI._PROGRESSBAR_VALUE;

 import java.util.Collection;
 import java.util.HashSet;
+import java.util.concurrent.ConcurrentMap;

 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
@ -38,7 +41,6 @@ import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
 import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY;
 import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
-import org.apache.hadoop.yarn.webapp.view.JQueryUI.Render;

 import com.google.inject.Inject;

@ -47,15 +49,15 @@ import com.google.inject.Inject;
 * scheduler as part of the fair scheduler page.
 */
 public class FairSchedulerAppsBlock extends HtmlBlock {
-  final AppsList list;
+  final ConcurrentMap<ApplicationId, RMApp> apps;
  final FairSchedulerInfo fsinfo;
  
-  @Inject public FairSchedulerAppsBlock(AppsList list, 
+  @Inject public FairSchedulerAppsBlock(RMContext rmContext, 
      ResourceManager rm, ViewContext ctx) {
    super(ctx);
-    this.list = list;
    FairScheduler scheduler = (FairScheduler) rm.getResourceScheduler();
    fsinfo = new FairSchedulerInfo(scheduler);
+    apps = rmContext.getRMApps();
  }
  
  @Override public void render(Block html) {
@ -75,7 +77,6 @@ public class FairSchedulerAppsBlock extends HtmlBlock {
            th(".progress", "Progress").
            th(".ui", "Tracking UI")._()._().
        tbody();
-    int i = 0;
    Collection<RMAppState> reqAppStates = null;
    String reqStateString = $(APP_STATE);
    if (reqStateString != null && !reqStateString.isEmpty()) {
@ -85,7 +86,7 @@ public class FairSchedulerAppsBlock extends HtmlBlock {
        reqAppStates.add(RMAppState.valueOf(stateString));
      }
    }
-    for (RMApp app : list.apps.values()) {
+    for (RMApp app : apps.values()) {
      if (reqAppStates != null && !reqAppStates.contains(app.getState())) {
        continue;
      }
@ -122,15 +123,7 @@ public class FairSchedulerAppsBlock extends HtmlBlock {
          td().
            a(!appInfo.isTrackingUrlReady()?
              "#" : appInfo.getTrackingUrlPretty(), appInfo.getTrackingUI())._()._();
-      if (list.rendering != Render.HTML && ++i >= 20) break;
    }
    tbody._()._();
-
-    if (list.rendering == Render.JS_ARRAY) {
-      echo("<script type='text/javascript'>\n",
-           "var appsData=");
-      list.toDataTableArrays(reqAppStates, writer());
-      echo("\n</script>\n");
-    }
  }
 }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java
@ -93,8 +93,4 @@ public class RmController extends Controller {
  public void submit() {
    setTitle("Application Submission Not Allowed");
  }
-
-  public void json() {
-    renderJSON(AppsList.class);
-  }
 }
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmView.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmView.java
@ -60,9 +60,8 @@ public class RmView extends TwoColumnLayout {
  }

  private String appsTableInit() {
-    AppsList list = getInstance(AppsList.class);
    // id, user, name, queue, starttime, finishtime, state, status, progress, ui
-    StringBuilder init = tableInit()
+    return tableInit()
      .append(", 'aaData': appsTableData")
      .append(", bDeferRender: true")
      .append(", bProcessing: true")
@ -78,18 +77,6 @@ public class RmView extends TwoColumnLayout {
      .append(", 'mRender': parseHadoopProgress }]")

      // Sort by id upon page load
-      .append(", aaSorting: [[0, 'desc']]");
-
-    String rows = $("rowlimit");
-    int rowLimit = rows.isEmpty() ? MAX_DISPLAY_ROWS : Integer.parseInt(rows);
-    if (list.apps.size() < rowLimit) {
-      list.rendering = Render.HTML;
-      return init.append('}').toString();
-    }
-    if (list.apps.size() > MAX_FAST_ROWS) {
-      tableInitProgress(init, list.apps.size() * 6);
-    }
-    list.rendering = Render.JS_ARRAY;
-    return init.append(", aaData:appsData}").toString();
+      .append(", aaSorting: [[0, 'desc']]}").toString();
  }
 }